View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import java.io.IOException;
21  import java.util.Iterator;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.conf.Configuration;
26  import org.apache.hadoop.conf.Configured;
27  import org.apache.hadoop.fs.FileStatus;
28  import org.apache.hadoop.fs.FileSystem;
29  import org.apache.hadoop.fs.Path;
30  import org.apache.hadoop.hbase.Cell;
31  import org.apache.hadoop.hbase.CellComparator;
32  import org.apache.hadoop.hbase.CellUtil;
33  import org.apache.hadoop.hbase.HBaseConfiguration;
34  import org.apache.hadoop.hbase.KeyValue;
35  import org.apache.hadoop.hbase.TableName;
36  import org.apache.hadoop.hbase.client.Connection;
37  import org.apache.hadoop.hbase.client.ConnectionFactory;
38  import org.apache.hadoop.hbase.client.Delete;
39  import org.apache.hadoop.hbase.client.Mutation;
40  import org.apache.hadoop.hbase.client.Put;
41  import org.apache.hadoop.hbase.client.Result;
42  import org.apache.hadoop.hbase.client.ResultScanner;
43  import org.apache.hadoop.hbase.client.Scan;
44  import org.apache.hadoop.hbase.client.Table;
45  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
46  import org.apache.hadoop.hbase.util.Bytes;
47  import org.apache.hadoop.mapreduce.Counters;
48  import org.apache.hadoop.mapreduce.Job;
49  import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
50  import org.apache.hadoop.mapreduce.security.TokenCache;
51  import org.apache.hadoop.util.GenericOptionsParser;
52  import org.apache.hadoop.util.Tool;
53  import org.apache.hadoop.util.ToolRunner;
54  
55  import com.google.common.base.Throwables;
56  import com.google.common.collect.Iterators;
57  
58  public class SyncTable extends Configured implements Tool {
59  
60    private static final Log LOG = LogFactory.getLog(SyncTable.class);
61  
62    static final String SOURCE_HASH_DIR_CONF_KEY = "sync.table.source.hash.dir";
63    static final String SOURCE_TABLE_CONF_KEY = "sync.table.source.table.name";
64    static final String TARGET_TABLE_CONF_KEY = "sync.table.target.table.name";
65    static final String SOURCE_ZK_CLUSTER_CONF_KEY = "sync.table.source.zk.cluster";
66    static final String TARGET_ZK_CLUSTER_CONF_KEY = "sync.table.target.zk.cluster";
67    static final String DRY_RUN_CONF_KEY = "sync.table.dry.run";
68    static final String DO_DELETES_CONF_KEY = "sync.table.do.deletes";
69    static final String DO_PUTS_CONF_KEY = "sync.table.do.puts";
70    static final String IGNORE_TIMESTAMPS = "sync.table.ignore.timestamps";
71  
72    Path sourceHashDir;
73    String sourceTableName;
74    String targetTableName;
75  
76    String sourceZkCluster;
77    String targetZkCluster;
78    boolean dryRun;
79    boolean doDeletes = true;
80    boolean doPuts = true;
81    boolean ignoreTimestamps;
82  
83    Counters counters;
84  
85    public SyncTable(Configuration conf) {
86      super(conf);
87    }
88  
89    private void initCredentialsForHBase(String zookeeper, Job job) throws IOException {
90      Configuration peerConf = HBaseConfiguration.createClusterConf(job
91              .getConfiguration(), zookeeper);
92      if("kerberos".equalsIgnoreCase(peerConf.get("hbase.security.authentication"))){
93        TableMapReduceUtil.initCredentialsForCluster(job, peerConf);
94      }
95    }
96  
97    public Job createSubmittableJob(String[] args) throws IOException {
98      FileSystem fs = sourceHashDir.getFileSystem(getConf());
99      if (!fs.exists(sourceHashDir)) {
100       throw new IOException("Source hash dir not found: " + sourceHashDir);
101     }
102 
103     Job job = Job.getInstance(getConf(),getConf().get("mapreduce.job.name",
104         "syncTable_" + sourceTableName + "-" + targetTableName));
105     Configuration jobConf = job.getConfiguration();
106     if ("kerberos".equalsIgnoreCase(jobConf.get("hadoop.security.authentication"))) {
107       TokenCache.obtainTokensForNamenodes(job.getCredentials(), new
108           Path[] { sourceHashDir }, getConf());
109     }
110 
111     HashTable.TableHash tableHash = HashTable.TableHash.read(getConf(), sourceHashDir);
112     LOG.info("Read source hash manifest: " + tableHash);
113     LOG.info("Read " + tableHash.partitions.size() + " partition keys");
114     if (!tableHash.tableName.equals(sourceTableName)) {
115       LOG.warn("Table name mismatch - manifest indicates hash was taken from: "
116           + tableHash.tableName + " but job is reading from: " + sourceTableName);
117     }
118     if (tableHash.numHashFiles != tableHash.partitions.size() + 1) {
119       throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
120           + " should be 1 more than the number of partition keys.  However, the manifest file "
121           + " says numHashFiles=" + tableHash.numHashFiles + " but the number of partition keys"
122           + " found in the partitions file is " + tableHash.partitions.size());
123     }
124 
125     Path dataDir = new Path(sourceHashDir, HashTable.HASH_DATA_DIR);
126     int dataSubdirCount = 0;
127     for (FileStatus file : fs.listStatus(dataDir)) {
128       if (file.getPath().getName().startsWith(HashTable.OUTPUT_DATA_FILE_PREFIX)) {
129         dataSubdirCount++;
130       }
131     }
132 
133     if (dataSubdirCount != tableHash.numHashFiles) {
134       throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
135           + " should be 1 more than the number of partition keys.  However, the number of data dirs"
136           + " found is " + dataSubdirCount + " but the number of partition keys"
137           + " found in the partitions file is " + tableHash.partitions.size());
138     }
139 
140     job.setJarByClass(HashTable.class);
141     jobConf.set(SOURCE_HASH_DIR_CONF_KEY, sourceHashDir.toString());
142     jobConf.set(SOURCE_TABLE_CONF_KEY, sourceTableName);
143     jobConf.set(TARGET_TABLE_CONF_KEY, targetTableName);
144     if (sourceZkCluster != null) {
145       jobConf.set(SOURCE_ZK_CLUSTER_CONF_KEY, sourceZkCluster);
146       initCredentialsForHBase(sourceZkCluster, job);
147     }
148     if (targetZkCluster != null) {
149       jobConf.set(TARGET_ZK_CLUSTER_CONF_KEY, targetZkCluster);
150       initCredentialsForHBase(targetZkCluster, job);
151     }
152     jobConf.setBoolean(DRY_RUN_CONF_KEY, dryRun);
153     jobConf.setBoolean(DO_DELETES_CONF_KEY, doDeletes);
154     jobConf.setBoolean(DO_PUTS_CONF_KEY, doPuts);
155     jobConf.setBoolean(IGNORE_TIMESTAMPS, ignoreTimestamps);
156 
157     TableMapReduceUtil.initTableMapperJob(targetTableName, tableHash.initScan(),
158         SyncMapper.class, null, null, job);
159 
160     job.setNumReduceTasks(0);
161 
162     if (dryRun) {
163       job.setOutputFormatClass(NullOutputFormat.class);
164     } else {
165       // No reducers.  Just write straight to table.  Call initTableReducerJob
166       // because it sets up the TableOutputFormat.
167       TableMapReduceUtil.initTableReducerJob(targetTableName, null, job, null,
168           targetZkCluster, null, null);
169 
170       // would be nice to add an option for bulk load instead
171     }
172 
173     // Obtain an authentication token, for the specified cluster, on behalf of the current user
174     if (sourceZkCluster != null) {
175       Configuration peerConf =
176           HBaseConfiguration.createClusterConf(job.getConfiguration(), sourceZkCluster);
177       TableMapReduceUtil.initCredentialsForCluster(job, peerConf);
178     }
179     return job;
180   }
181 
182   public static class SyncMapper extends TableMapper<ImmutableBytesWritable, Mutation> {
183     Path sourceHashDir;
184 
185     Connection sourceConnection;
186     Connection targetConnection;
187     Table sourceTable;
188     Table targetTable;
189     boolean dryRun;
190     boolean doDeletes = true;
191     boolean doPuts = true;
192     boolean ignoreTimestamp;
193 
194     HashTable.TableHash sourceTableHash;
195     HashTable.TableHash.Reader sourceHashReader;
196     ImmutableBytesWritable currentSourceHash;
197     ImmutableBytesWritable nextSourceKey;
198     HashTable.ResultHasher targetHasher;
199 
200     Throwable mapperException;
201 
202     public static enum Counter {BATCHES, HASHES_MATCHED, HASHES_NOT_MATCHED, SOURCEMISSINGROWS,
203       SOURCEMISSINGCELLS, TARGETMISSINGROWS, TARGETMISSINGCELLS, ROWSWITHDIFFS, DIFFERENTCELLVALUES,
204       MATCHINGROWS, MATCHINGCELLS, EMPTY_BATCHES, RANGESMATCHED, RANGESNOTMATCHED};
205 
206     @Override
207     protected void setup(Context context) throws IOException {
208 
209       Configuration conf = context.getConfiguration();
210       sourceHashDir = new Path(conf.get(SOURCE_HASH_DIR_CONF_KEY));
211       sourceConnection = openConnection(conf, SOURCE_ZK_CLUSTER_CONF_KEY, null);
212       targetConnection = openConnection(conf, TARGET_ZK_CLUSTER_CONF_KEY,
213           TableOutputFormat.OUTPUT_CONF_PREFIX);
214       sourceTable = openTable(sourceConnection, conf, SOURCE_TABLE_CONF_KEY);
215       targetTable = openTable(targetConnection, conf, TARGET_TABLE_CONF_KEY);
216       dryRun = conf.getBoolean(DRY_RUN_CONF_KEY, false);
217       doDeletes = conf.getBoolean(DO_DELETES_CONF_KEY, true);
218       doPuts = conf.getBoolean(DO_PUTS_CONF_KEY, true);
219       ignoreTimestamp = conf.getBoolean(IGNORE_TIMESTAMPS, false);
220 
221       sourceTableHash = HashTable.TableHash.read(conf, sourceHashDir);
222       LOG.info("Read source hash manifest: " + sourceTableHash);
223       LOG.info("Read " + sourceTableHash.partitions.size() + " partition keys");
224 
225       TableSplit split = (TableSplit) context.getInputSplit();
226       ImmutableBytesWritable splitStartKey = new ImmutableBytesWritable(split.getStartRow());
227 
228       sourceHashReader = sourceTableHash.newReader(conf, splitStartKey);
229       findNextKeyHashPair();
230 
231       // create a hasher, but don't start it right away
232       // instead, find the first hash batch at or after the start row
233       // and skip any rows that come before.  they will be caught by the previous task
234       targetHasher = new HashTable.ResultHasher();
235       targetHasher.ignoreTimestamps = ignoreTimestamp;
236     }
237 
238     private static Connection openConnection(Configuration conf, String zkClusterConfKey,
239                                              String configPrefix)
240       throws IOException {
241         String zkCluster = conf.get(zkClusterConfKey);
242         Configuration clusterConf = HBaseConfiguration.createClusterConf(conf,
243             zkCluster, configPrefix);
244         return ConnectionFactory.createConnection(clusterConf);
245     }
246 
247     private static Table openTable(Connection connection, Configuration conf,
248         String tableNameConfKey) throws IOException {
249       return connection.getTable(TableName.valueOf(conf.get(tableNameConfKey)));
250     }
251 
252     /**
253      * Attempt to read the next source key/hash pair.
254      * If there are no more, set nextSourceKey to null
255      */
256     private void findNextKeyHashPair() throws IOException {
257       boolean hasNext = sourceHashReader.next();
258       if (hasNext) {
259         nextSourceKey = sourceHashReader.getCurrentKey();
260       } else {
261         // no more keys - last hash goes to the end
262         nextSourceKey = null;
263       }
264     }
265 
266     @Override
267     protected void map(ImmutableBytesWritable key, Result value, Context context)
268         throws IOException, InterruptedException {
269       try {
270         // first, finish any hash batches that end before the scanned row
271         while (nextSourceKey != null && key.compareTo(nextSourceKey) >= 0) {
272           moveToNextBatch(context);
273         }
274 
275         // next, add the scanned row (as long as we've reached the first batch)
276         if (targetHasher.isBatchStarted()) {
277           targetHasher.hashResult(value);
278         }
279       } catch (Throwable t) {
280         mapperException = t;
281         Throwables.propagateIfInstanceOf(t, IOException.class);
282         Throwables.propagateIfInstanceOf(t, InterruptedException.class);
283         Throwables.propagate(t);
284       }
285     }
286 
287     /**
288      * If there is an open hash batch, complete it and sync if there are diffs.
289      * Start a new batch, and seek to read the
290      */
291     private void moveToNextBatch(Context context) throws IOException, InterruptedException {
292       if (targetHasher.isBatchStarted()) {
293         finishBatchAndCompareHashes(context);
294       }
295       targetHasher.startBatch(nextSourceKey);
296       currentSourceHash = sourceHashReader.getCurrentHash();
297 
298       findNextKeyHashPair();
299     }
300 
301     /**
302      * Finish the currently open hash batch.
303      * Compare the target hash to the given source hash.
304      * If they do not match, then sync the covered key range.
305      */
306     private void finishBatchAndCompareHashes(Context context)
307         throws IOException, InterruptedException {
308       targetHasher.finishBatch();
309       context.getCounter(Counter.BATCHES).increment(1);
310       if (targetHasher.getBatchSize() == 0) {
311         context.getCounter(Counter.EMPTY_BATCHES).increment(1);
312       }
313       ImmutableBytesWritable targetHash = targetHasher.getBatchHash();
314       if (targetHash.equals(currentSourceHash)) {
315         context.getCounter(Counter.HASHES_MATCHED).increment(1);
316       } else {
317         context.getCounter(Counter.HASHES_NOT_MATCHED).increment(1);
318 
319         ImmutableBytesWritable stopRow = nextSourceKey == null
320                                           ? new ImmutableBytesWritable(sourceTableHash.stopRow)
321                                           : nextSourceKey;
322 
323         if (LOG.isDebugEnabled()) {
324           LOG.debug("Hash mismatch.  Key range: " + toHex(targetHasher.getBatchStartKey())
325               + " to " + toHex(stopRow)
326               + " sourceHash: " + toHex(currentSourceHash)
327               + " targetHash: " + toHex(targetHash));
328         }
329 
330         syncRange(context, targetHasher.getBatchStartKey(), stopRow);
331       }
332     }
333     private static String toHex(ImmutableBytesWritable bytes) {
334       return Bytes.toHex(bytes.get(), bytes.getOffset(), bytes.getLength());
335     }
336 
337     private static final CellScanner EMPTY_CELL_SCANNER
338       = new CellScanner(Iterators.<Result>emptyIterator());
339 
340     /**
341      * Rescan the given range directly from the source and target tables.
342      * Count and log differences, and if this is not a dry run, output Puts and Deletes
343      * to make the target table match the source table for this range
344      */
345     private void syncRange(Context context, ImmutableBytesWritable startRow,
346         ImmutableBytesWritable stopRow) throws IOException, InterruptedException {
347       Scan scan = sourceTableHash.initScan();
348       scan.setStartRow(startRow.copyBytes());
349       scan.setStopRow(stopRow.copyBytes());
350 
351       ResultScanner sourceScanner = sourceTable.getScanner(scan);
352       CellScanner sourceCells = new CellScanner(sourceScanner.iterator());
353 
354       ResultScanner targetScanner = targetTable.getScanner(new Scan(scan));
355       CellScanner targetCells = new CellScanner(targetScanner.iterator());
356 
357       boolean rangeMatched = true;
358       byte[] nextSourceRow = sourceCells.nextRow();
359       byte[] nextTargetRow = targetCells.nextRow();
360       while(nextSourceRow != null || nextTargetRow != null) {
361         boolean rowMatched;
362         int rowComparison = compareRowKeys(nextSourceRow, nextTargetRow);
363         if (rowComparison < 0) {
364           if (LOG.isInfoEnabled()) {
365             LOG.info("Target missing row: " + Bytes.toHex(nextSourceRow));
366           }
367           context.getCounter(Counter.TARGETMISSINGROWS).increment(1);
368 
369           rowMatched = syncRowCells(context, nextSourceRow, sourceCells, EMPTY_CELL_SCANNER);
370           nextSourceRow = sourceCells.nextRow();  // advance only source to next row
371         } else if (rowComparison > 0) {
372           if (LOG.isInfoEnabled()) {
373             LOG.info("Source missing row: " + Bytes.toHex(nextTargetRow));
374           }
375           context.getCounter(Counter.SOURCEMISSINGROWS).increment(1);
376 
377           rowMatched = syncRowCells(context, nextTargetRow, EMPTY_CELL_SCANNER, targetCells);
378           nextTargetRow = targetCells.nextRow();  // advance only target to next row
379         } else {
380           // current row is the same on both sides, compare cell by cell
381           rowMatched = syncRowCells(context, nextSourceRow, sourceCells, targetCells);
382           nextSourceRow = sourceCells.nextRow();
383           nextTargetRow = targetCells.nextRow();
384         }
385 
386         if (!rowMatched) {
387           rangeMatched = false;
388         }
389       }
390 
391       sourceScanner.close();
392       targetScanner.close();
393 
394       context.getCounter(rangeMatched ? Counter.RANGESMATCHED : Counter.RANGESNOTMATCHED)
395         .increment(1);
396     }
397 
398     private static class CellScanner {
399       private final Iterator<Result> results;
400 
401       private byte[] currentRow;
402       private Result currentRowResult;
403       private int nextCellInRow;
404 
405       private Result nextRowResult;
406 
407       public CellScanner(Iterator<Result> results) {
408         this.results = results;
409       }
410 
411       /**
412        * Advance to the next row and return its row key.
413        * Returns null iff there are no more rows.
414        */
415       public byte[] nextRow() {
416         if (nextRowResult == null) {
417           // no cached row - check scanner for more
418           while (results.hasNext()) {
419             nextRowResult = results.next();
420             Cell nextCell = nextRowResult.rawCells()[0];
421             if (currentRow == null
422                 || !Bytes.equals(currentRow, 0, currentRow.length, nextCell.getRowArray(),
423                 nextCell.getRowOffset(), nextCell.getRowLength())) {
424               // found next row
425               break;
426             } else {
427               // found another result from current row, keep scanning
428               nextRowResult = null;
429             }
430           }
431 
432           if (nextRowResult == null) {
433             // end of data, no more rows
434             currentRowResult = null;
435             currentRow = null;
436             return null;
437           }
438         }
439 
440         // advance to cached result for next row
441         currentRowResult = nextRowResult;
442         nextCellInRow = 0;
443         currentRow = currentRowResult.getRow();
444         nextRowResult = null;
445         return currentRow;
446       }
447 
448       /**
449        * Returns the next Cell in the current row or null iff none remain.
450        */
451       public Cell nextCellInRow() {
452         if (currentRowResult == null) {
453           // nothing left in current row
454           return null;
455         }
456 
457         Cell nextCell = currentRowResult.rawCells()[nextCellInRow];
458         nextCellInRow++;
459         if (nextCellInRow == currentRowResult.size()) {
460           if (results.hasNext()) {
461             Result result = results.next();
462             Cell cell = result.rawCells()[0];
463             if (Bytes.equals(currentRow, 0, currentRow.length, cell.getRowArray(),
464                 cell.getRowOffset(), cell.getRowLength())) {
465               // result is part of current row
466               currentRowResult = result;
467               nextCellInRow = 0;
468             } else {
469               // result is part of next row, cache it
470               nextRowResult = result;
471               // current row is complete
472               currentRowResult = null;
473             }
474           } else {
475             // end of data
476             currentRowResult = null;
477           }
478         }
479         return nextCell;
480       }
481     }
482 
483     private Cell checkAndResetTimestamp(Cell sourceCell){
484       if (ignoreTimestamp) {
485         sourceCell = new KeyValue(sourceCell);
486         ((KeyValue) sourceCell).setTimestamp(System.currentTimeMillis());
487       }
488       return sourceCell;
489     }
490 
491     /**
492      * Compare the cells for the given row from the source and target tables.
493      * Count and log any differences.
494      * If not a dry run, output a Put and/or Delete needed to sync the target table
495      * to match the source table.
496      */
497     private boolean syncRowCells(Context context, byte[] rowKey, CellScanner sourceCells,
498         CellScanner targetCells) throws IOException, InterruptedException {
499       Put put = null;
500       Delete delete = null;
501       long matchingCells = 0;
502       boolean matchingRow = true;
503       Cell sourceCell = sourceCells.nextCellInRow();
504       Cell targetCell = targetCells.nextCellInRow();
505       while (sourceCell != null || targetCell != null) {
506 
507         int cellKeyComparison = compareCellKeysWithinRow(sourceCell, targetCell);
508         if (cellKeyComparison < 0) {
509           if (LOG.isDebugEnabled()) {
510             LOG.debug("Target missing cell: " + sourceCell);
511           }
512           context.getCounter(Counter.TARGETMISSINGCELLS).increment(1);
513           matchingRow = false;
514 
515           if (!dryRun && doPuts) {
516             if (put == null) {
517               put = new Put(rowKey);
518             }
519             sourceCell = checkAndResetTimestamp(sourceCell);
520             put.add(sourceCell);
521           }
522 
523           sourceCell = sourceCells.nextCellInRow();
524         } else if (cellKeyComparison > 0) {
525           if (LOG.isDebugEnabled()) {
526             LOG.debug("Source missing cell: " + targetCell);
527           }
528           context.getCounter(Counter.SOURCEMISSINGCELLS).increment(1);
529           matchingRow = false;
530 
531           if (!dryRun && doDeletes) {
532            if (delete == null) {
533               delete = new Delete(rowKey);
534             }
535             // add a tombstone to exactly match the target cell that is missing on the source
536             delete.addColumn(CellUtil.cloneFamily(targetCell),
537                 CellUtil.cloneQualifier(targetCell), targetCell.getTimestamp());
538           }
539 
540           targetCell = targetCells.nextCellInRow();
541         } else {
542           // the cell keys are equal, now check values
543           if (CellUtil.matchingValue(sourceCell, targetCell)) {
544             matchingCells++;
545           } else {
546             if (LOG.isDebugEnabled()) {
547               LOG.debug("Different values: ");
548               LOG.debug("  source cell: " + sourceCell
549                   + " value: " + Bytes.toHex(sourceCell.getValueArray(),
550                       sourceCell.getValueOffset(), sourceCell.getValueLength()));
551               LOG.debug("  target cell: " + targetCell
552                   + " value: " + Bytes.toHex(targetCell.getValueArray(),
553                       targetCell.getValueOffset(), targetCell.getValueLength()));
554             }
555             context.getCounter(Counter.DIFFERENTCELLVALUES).increment(1);
556             matchingRow = false;
557 
558             if (!dryRun && doPuts) {
559              // overwrite target cell
560               if (put == null) {
561                 put = new Put(rowKey);
562               }
563               sourceCell = checkAndResetTimestamp(sourceCell);
564               put.add(sourceCell);
565             }
566           }
567           sourceCell = sourceCells.nextCellInRow();
568           targetCell = targetCells.nextCellInRow();
569         }
570 
571         if (!dryRun && sourceTableHash.scanBatch > 0) {
572           if (put != null && put.size() >= sourceTableHash.scanBatch) {
573             context.write(new ImmutableBytesWritable(rowKey), put);
574             put = null;
575           }
576           if (delete != null && delete.size() >= sourceTableHash.scanBatch) {
577             context.write(new ImmutableBytesWritable(rowKey), delete);
578             delete = null;
579           }
580         }
581       }
582 
583       if (!dryRun) {
584         if (put != null) {
585           context.write(new ImmutableBytesWritable(rowKey), put);
586         }
587         if (delete != null) {
588           context.write(new ImmutableBytesWritable(rowKey), delete);
589         }
590       }
591 
592       if (matchingCells > 0) {
593         context.getCounter(Counter.MATCHINGCELLS).increment(matchingCells);
594       }
595       if (matchingRow) {
596         context.getCounter(Counter.MATCHINGROWS).increment(1);
597         return true;
598       } else {
599         context.getCounter(Counter.ROWSWITHDIFFS).increment(1);
600         return false;
601       }
602     }
603 
604     private static final CellComparator cellComparator = new CellComparator();
605     /**
606      * Compare row keys of the given Result objects.
607      * Nulls are after non-nulls
608      */
609     private static int compareRowKeys(byte[] r1, byte[] r2) {
610       if (r1 == null) {
611         return 1;  // source missing row
612       } else if (r2 == null) {
613         return -1; // target missing row
614       } else {
615         return cellComparator.compareRows(r1, 0, r1.length, r2, 0, r2.length);
616       }
617     }
618 
619     /**
620      * Compare families, qualifiers, and timestamps of the given Cells.
621      * They are assumed to be of the same row.
622      * Nulls are after non-nulls.
623      */
624     private int compareCellKeysWithinRow(Cell c1, Cell c2) {
625       if (c1 == null) {
626         return 1; // source missing cell
627       }
628       if (c2 == null) {
629         return -1; // target missing cell
630       }
631 
632       int result = CellComparator.compareFamilies(c1, c2);
633       if (result != 0) {
634         return result;
635       }
636 
637       result = CellComparator.compareQualifiers(c1, c2);
638       if (result != 0) {
639         return result;
640       }
641       if (this.ignoreTimestamp) {
642         return 0;
643       } else{
644         // note timestamp comparison is inverted - more recent cells first
645         return CellComparator.compareTimestamps(c1, c2);
646       }
647     }
648 
649     @Override
650     protected void cleanup(Context context)
651         throws IOException, InterruptedException {
652       if (mapperException == null) {
653         try {
654           finishRemainingHashRanges(context);
655         } catch (Throwable t) {
656           mapperException = t;
657         }
658       }
659 
660       try {
661         sourceTable.close();
662         targetTable.close();
663         sourceConnection.close();
664         targetConnection.close();
665       } catch (Throwable t) {
666         if (mapperException == null) {
667           mapperException = t;
668         } else {
669           LOG.error("Suppressing exception from closing tables", t);
670         }
671       }
672 
673       // propagate first exception
674       if (mapperException != null) {
675         Throwables.propagateIfInstanceOf(mapperException, IOException.class);
676         Throwables.propagateIfInstanceOf(mapperException, InterruptedException.class);
677         Throwables.propagate(mapperException);
678       }
679     }
680 
681     private void finishRemainingHashRanges(Context context) throws IOException,
682         InterruptedException {
683       TableSplit split = (TableSplit) context.getInputSplit();
684       byte[] splitEndRow = split.getEndRow();
685       boolean reachedEndOfTable = HashTable.isTableEndRow(splitEndRow);
686 
687       // if there are more hash batches that begin before the end of this split move to them
688       while (nextSourceKey != null
689           && (nextSourceKey.compareTo(splitEndRow) < 0 || reachedEndOfTable)) {
690         moveToNextBatch(context);
691       }
692 
693       if (targetHasher.isBatchStarted()) {
694         // need to complete the final open hash batch
695 
696         if ((nextSourceKey != null && nextSourceKey.compareTo(splitEndRow) > 0)
697               || (nextSourceKey == null && !Bytes.equals(splitEndRow, sourceTableHash.stopRow))) {
698           // the open hash range continues past the end of this region
699           // add a scan to complete the current hash range
700           Scan scan = sourceTableHash.initScan();
701           scan.setStartRow(splitEndRow);
702           if (nextSourceKey == null) {
703             scan.setStopRow(sourceTableHash.stopRow);
704           } else {
705             scan.setStopRow(nextSourceKey.copyBytes());
706           }
707 
708           ResultScanner targetScanner = null;
709           try {
710             targetScanner = targetTable.getScanner(scan);
711             for (Result row : targetScanner) {
712               targetHasher.hashResult(row);
713             }
714           } finally {
715             if (targetScanner != null) {
716               targetScanner.close();
717             }
718           }
719         } // else current batch ends exactly at split end row
720 
721         finishBatchAndCompareHashes(context);
722       }
723     }
724   }
725 
726   private static final int NUM_ARGS = 3;
727   private static void printUsage(final String errorMsg) {
728     if (errorMsg != null && errorMsg.length() > 0) {
729       System.err.println("ERROR: " + errorMsg);
730       System.err.println();
731     }
732     System.err.println("Usage: SyncTable [options] <sourcehashdir> <sourcetable> <targettable>");
733     System.err.println();
734     System.err.println("Options:");
735 
736     System.err.println(" sourcezkcluster  ZK cluster key of the source table");
737     System.err.println("                  (defaults to cluster in classpath's config)");
738     System.err.println(" targetzkcluster  ZK cluster key of the target table");
739     System.err.println("                  (defaults to cluster in classpath's config)");
740     System.err.println(" dryrun           if true, output counters but no writes");
741     System.err.println("                  (defaults to false)");
742     System.err.println(" doDeletes        if false, does not perform deletes");
743     System.err.println("                  (defaults to true)");
744     System.err.println(" doPuts           if false, does not perform puts ");
745     System.err.println("                  (defaults to true)");
746     System.err.println(" ignoreTimestamps if true, ignores cells timestamps while comparing ");
747     System.err.println("                  cell values. Any missing cell on target then gets");
748     System.err.println("                  added with current time as timestamp ");
749     System.err.println("                  (defaults to false)");
750     System.err.println();
751     System.err.println("Args:");
752     System.err.println(" sourcehashdir    path to HashTable output dir for source table");
753     System.err.println("                  (see org.apache.hadoop.hbase.mapreduce.HashTable)");
754     System.err.println(" sourcetable      Name of the source table to sync from");
755     System.err.println(" targettable      Name of the target table to sync to");
756     System.err.println();
757     System.err.println("Examples:");
758     System.err.println(" For a dry run SyncTable of tableA from a remote source cluster");
759     System.err.println(" to a local target cluster:");
760     System.err.println(" $ bin/hbase " +
761         "org.apache.hadoop.hbase.mapreduce.SyncTable --dryrun=true"
762         + " --sourcezkcluster=zk1.example.com,zk2.example.com,zk3.example.com:2181:/hbase"
763         + " hdfs://nn:9000/hashes/tableA tableA tableA");
764   }
765 
766   private boolean doCommandLine(final String[] args) {
767     if (args.length < NUM_ARGS) {
768       printUsage(null);
769       return false;
770     }
771     try {
772       sourceHashDir = new Path(args[args.length - 3]);
773       sourceTableName = args[args.length - 2];
774       targetTableName = args[args.length - 1];
775 
776       for (int i = 0; i < args.length - NUM_ARGS; i++) {
777         String cmd = args[i];
778         if (cmd.equals("-h") || cmd.startsWith("--h")) {
779           printUsage(null);
780           return false;
781         }
782 
783         final String sourceZkClusterKey = "--sourcezkcluster=";
784         if (cmd.startsWith(sourceZkClusterKey)) {
785           sourceZkCluster = cmd.substring(sourceZkClusterKey.length());
786           continue;
787         }
788 
789         final String targetZkClusterKey = "--targetzkcluster=";
790         if (cmd.startsWith(targetZkClusterKey)) {
791           targetZkCluster = cmd.substring(targetZkClusterKey.length());
792           continue;
793         }
794 
795         final String dryRunKey = "--dryrun=";
796         if (cmd.startsWith(dryRunKey)) {
797           dryRun = Boolean.parseBoolean(cmd.substring(dryRunKey.length()));
798           continue;
799         }
800 
801         final String doDeletesKey = "--doDeletes=";
802         if (cmd.startsWith(doDeletesKey)) {
803           doDeletes = Boolean.parseBoolean(cmd.substring(doDeletesKey.length()));
804           continue;
805         }
806 
807         final String doPutsKey = "--doPuts=";
808         if (cmd.startsWith(doPutsKey)) {
809           doPuts = Boolean.parseBoolean(cmd.substring(doPutsKey.length()));
810           continue;
811         }
812 
813         final String ignoreTimestampsKey = "--ignoreTimestamps=";
814         if (cmd.startsWith(ignoreTimestampsKey)) {
815           ignoreTimestamps = Boolean.parseBoolean(cmd.substring(ignoreTimestampsKey.length()));
816           continue;
817         }
818 
819         printUsage("Invalid argument '" + cmd + "'");
820         return false;
821       }
822 
823 
824     } catch (Exception e) {
825       e.printStackTrace();
826       printUsage("Can't start because " + e.getMessage());
827       return false;
828     }
829     return true;
830   }
831 
832   /**
833    * Main entry point.
834    */
835   public static void main(String[] args) throws Exception {
836     int ret = ToolRunner.run(new SyncTable(HBaseConfiguration.create()), args);
837     System.exit(ret);
838   }
839 
840   @Override
841   public int run(String[] args) throws Exception {
842     String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
843     if (!doCommandLine(otherArgs)) {
844       return 1;
845     }
846 
847     Job job = createSubmittableJob(otherArgs);
848     if (!job.waitForCompletion(true)) {
849       LOG.info("Map-reduce job failed!");
850       return 1;
851     }
852     counters = job.getCounters();
853     return 0;
854   }
855 
856 }