1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import java.io.IOException;
21 import java.util.Iterator;
22
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.conf.Configuration;
26 import org.apache.hadoop.conf.Configured;
27 import org.apache.hadoop.fs.FileStatus;
28 import org.apache.hadoop.fs.FileSystem;
29 import org.apache.hadoop.fs.Path;
30 import org.apache.hadoop.hbase.Cell;
31 import org.apache.hadoop.hbase.CellComparator;
32 import org.apache.hadoop.hbase.CellUtil;
33 import org.apache.hadoop.hbase.HBaseConfiguration;
34 import org.apache.hadoop.hbase.KeyValue;
35 import org.apache.hadoop.hbase.TableName;
36 import org.apache.hadoop.hbase.client.Connection;
37 import org.apache.hadoop.hbase.client.ConnectionFactory;
38 import org.apache.hadoop.hbase.client.Delete;
39 import org.apache.hadoop.hbase.client.Mutation;
40 import org.apache.hadoop.hbase.client.Put;
41 import org.apache.hadoop.hbase.client.Result;
42 import org.apache.hadoop.hbase.client.ResultScanner;
43 import org.apache.hadoop.hbase.client.Scan;
44 import org.apache.hadoop.hbase.client.Table;
45 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
46 import org.apache.hadoop.hbase.util.Bytes;
47 import org.apache.hadoop.mapreduce.Counters;
48 import org.apache.hadoop.mapreduce.Job;
49 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
50 import org.apache.hadoop.mapreduce.security.TokenCache;
51 import org.apache.hadoop.util.GenericOptionsParser;
52 import org.apache.hadoop.util.Tool;
53 import org.apache.hadoop.util.ToolRunner;
54
55 import com.google.common.base.Throwables;
56 import com.google.common.collect.Iterators;
57
58 public class SyncTable extends Configured implements Tool {
59
60 private static final Log LOG = LogFactory.getLog(SyncTable.class);
61
62 static final String SOURCE_HASH_DIR_CONF_KEY = "sync.table.source.hash.dir";
63 static final String SOURCE_TABLE_CONF_KEY = "sync.table.source.table.name";
64 static final String TARGET_TABLE_CONF_KEY = "sync.table.target.table.name";
65 static final String SOURCE_ZK_CLUSTER_CONF_KEY = "sync.table.source.zk.cluster";
66 static final String TARGET_ZK_CLUSTER_CONF_KEY = "sync.table.target.zk.cluster";
67 static final String DRY_RUN_CONF_KEY = "sync.table.dry.run";
68 static final String DO_DELETES_CONF_KEY = "sync.table.do.deletes";
69 static final String DO_PUTS_CONF_KEY = "sync.table.do.puts";
70 static final String IGNORE_TIMESTAMPS = "sync.table.ignore.timestamps";
71
72 Path sourceHashDir;
73 String sourceTableName;
74 String targetTableName;
75
76 String sourceZkCluster;
77 String targetZkCluster;
78 boolean dryRun;
79 boolean doDeletes = true;
80 boolean doPuts = true;
81 boolean ignoreTimestamps;
82
83 Counters counters;
84
85 public SyncTable(Configuration conf) {
86 super(conf);
87 }
88
89 private void initCredentialsForHBase(String zookeeper, Job job) throws IOException {
90 Configuration peerConf = HBaseConfiguration.createClusterConf(job
91 .getConfiguration(), zookeeper);
92 if("kerberos".equalsIgnoreCase(peerConf.get("hbase.security.authentication"))){
93 TableMapReduceUtil.initCredentialsForCluster(job, peerConf);
94 }
95 }
96
97 public Job createSubmittableJob(String[] args) throws IOException {
98 FileSystem fs = sourceHashDir.getFileSystem(getConf());
99 if (!fs.exists(sourceHashDir)) {
100 throw new IOException("Source hash dir not found: " + sourceHashDir);
101 }
102
103 Job job = Job.getInstance(getConf(),getConf().get("mapreduce.job.name",
104 "syncTable_" + sourceTableName + "-" + targetTableName));
105 Configuration jobConf = job.getConfiguration();
106 if ("kerberos".equalsIgnoreCase(jobConf.get("hadoop.security.authentication"))) {
107 TokenCache.obtainTokensForNamenodes(job.getCredentials(), new
108 Path[] { sourceHashDir }, getConf());
109 }
110
111 HashTable.TableHash tableHash = HashTable.TableHash.read(getConf(), sourceHashDir);
112 LOG.info("Read source hash manifest: " + tableHash);
113 LOG.info("Read " + tableHash.partitions.size() + " partition keys");
114 if (!tableHash.tableName.equals(sourceTableName)) {
115 LOG.warn("Table name mismatch - manifest indicates hash was taken from: "
116 + tableHash.tableName + " but job is reading from: " + sourceTableName);
117 }
118 if (tableHash.numHashFiles != tableHash.partitions.size() + 1) {
119 throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
120 + " should be 1 more than the number of partition keys. However, the manifest file "
121 + " says numHashFiles=" + tableHash.numHashFiles + " but the number of partition keys"
122 + " found in the partitions file is " + tableHash.partitions.size());
123 }
124
125 Path dataDir = new Path(sourceHashDir, HashTable.HASH_DATA_DIR);
126 int dataSubdirCount = 0;
127 for (FileStatus file : fs.listStatus(dataDir)) {
128 if (file.getPath().getName().startsWith(HashTable.OUTPUT_DATA_FILE_PREFIX)) {
129 dataSubdirCount++;
130 }
131 }
132
133 if (dataSubdirCount != tableHash.numHashFiles) {
134 throw new RuntimeException("Hash data appears corrupt. The number of of hash files created"
135 + " should be 1 more than the number of partition keys. However, the number of data dirs"
136 + " found is " + dataSubdirCount + " but the number of partition keys"
137 + " found in the partitions file is " + tableHash.partitions.size());
138 }
139
140 job.setJarByClass(HashTable.class);
141 jobConf.set(SOURCE_HASH_DIR_CONF_KEY, sourceHashDir.toString());
142 jobConf.set(SOURCE_TABLE_CONF_KEY, sourceTableName);
143 jobConf.set(TARGET_TABLE_CONF_KEY, targetTableName);
144 if (sourceZkCluster != null) {
145 jobConf.set(SOURCE_ZK_CLUSTER_CONF_KEY, sourceZkCluster);
146 initCredentialsForHBase(sourceZkCluster, job);
147 }
148 if (targetZkCluster != null) {
149 jobConf.set(TARGET_ZK_CLUSTER_CONF_KEY, targetZkCluster);
150 initCredentialsForHBase(targetZkCluster, job);
151 }
152 jobConf.setBoolean(DRY_RUN_CONF_KEY, dryRun);
153 jobConf.setBoolean(DO_DELETES_CONF_KEY, doDeletes);
154 jobConf.setBoolean(DO_PUTS_CONF_KEY, doPuts);
155 jobConf.setBoolean(IGNORE_TIMESTAMPS, ignoreTimestamps);
156
157 TableMapReduceUtil.initTableMapperJob(targetTableName, tableHash.initScan(),
158 SyncMapper.class, null, null, job);
159
160 job.setNumReduceTasks(0);
161
162 if (dryRun) {
163 job.setOutputFormatClass(NullOutputFormat.class);
164 } else {
165
166
167 TableMapReduceUtil.initTableReducerJob(targetTableName, null, job, null,
168 targetZkCluster, null, null);
169
170
171 }
172
173
174 if (sourceZkCluster != null) {
175 Configuration peerConf =
176 HBaseConfiguration.createClusterConf(job.getConfiguration(), sourceZkCluster);
177 TableMapReduceUtil.initCredentialsForCluster(job, peerConf);
178 }
179 return job;
180 }
181
182 public static class SyncMapper extends TableMapper<ImmutableBytesWritable, Mutation> {
183 Path sourceHashDir;
184
185 Connection sourceConnection;
186 Connection targetConnection;
187 Table sourceTable;
188 Table targetTable;
189 boolean dryRun;
190 boolean doDeletes = true;
191 boolean doPuts = true;
192 boolean ignoreTimestamp;
193
194 HashTable.TableHash sourceTableHash;
195 HashTable.TableHash.Reader sourceHashReader;
196 ImmutableBytesWritable currentSourceHash;
197 ImmutableBytesWritable nextSourceKey;
198 HashTable.ResultHasher targetHasher;
199
200 Throwable mapperException;
201
202 public static enum Counter {BATCHES, HASHES_MATCHED, HASHES_NOT_MATCHED, SOURCEMISSINGROWS,
203 SOURCEMISSINGCELLS, TARGETMISSINGROWS, TARGETMISSINGCELLS, ROWSWITHDIFFS, DIFFERENTCELLVALUES,
204 MATCHINGROWS, MATCHINGCELLS, EMPTY_BATCHES, RANGESMATCHED, RANGESNOTMATCHED};
205
206 @Override
207 protected void setup(Context context) throws IOException {
208
209 Configuration conf = context.getConfiguration();
210 sourceHashDir = new Path(conf.get(SOURCE_HASH_DIR_CONF_KEY));
211 sourceConnection = openConnection(conf, SOURCE_ZK_CLUSTER_CONF_KEY, null);
212 targetConnection = openConnection(conf, TARGET_ZK_CLUSTER_CONF_KEY,
213 TableOutputFormat.OUTPUT_CONF_PREFIX);
214 sourceTable = openTable(sourceConnection, conf, SOURCE_TABLE_CONF_KEY);
215 targetTable = openTable(targetConnection, conf, TARGET_TABLE_CONF_KEY);
216 dryRun = conf.getBoolean(DRY_RUN_CONF_KEY, false);
217 doDeletes = conf.getBoolean(DO_DELETES_CONF_KEY, true);
218 doPuts = conf.getBoolean(DO_PUTS_CONF_KEY, true);
219 ignoreTimestamp = conf.getBoolean(IGNORE_TIMESTAMPS, false);
220
221 sourceTableHash = HashTable.TableHash.read(conf, sourceHashDir);
222 LOG.info("Read source hash manifest: " + sourceTableHash);
223 LOG.info("Read " + sourceTableHash.partitions.size() + " partition keys");
224
225 TableSplit split = (TableSplit) context.getInputSplit();
226 ImmutableBytesWritable splitStartKey = new ImmutableBytesWritable(split.getStartRow());
227
228 sourceHashReader = sourceTableHash.newReader(conf, splitStartKey);
229 findNextKeyHashPair();
230
231
232
233
234 targetHasher = new HashTable.ResultHasher();
235 targetHasher.ignoreTimestamps = ignoreTimestamp;
236 }
237
238 private static Connection openConnection(Configuration conf, String zkClusterConfKey,
239 String configPrefix)
240 throws IOException {
241 String zkCluster = conf.get(zkClusterConfKey);
242 Configuration clusterConf = HBaseConfiguration.createClusterConf(conf,
243 zkCluster, configPrefix);
244 return ConnectionFactory.createConnection(clusterConf);
245 }
246
247 private static Table openTable(Connection connection, Configuration conf,
248 String tableNameConfKey) throws IOException {
249 return connection.getTable(TableName.valueOf(conf.get(tableNameConfKey)));
250 }
251
252
253
254
255
256 private void findNextKeyHashPair() throws IOException {
257 boolean hasNext = sourceHashReader.next();
258 if (hasNext) {
259 nextSourceKey = sourceHashReader.getCurrentKey();
260 } else {
261
262 nextSourceKey = null;
263 }
264 }
265
266 @Override
267 protected void map(ImmutableBytesWritable key, Result value, Context context)
268 throws IOException, InterruptedException {
269 try {
270
271 while (nextSourceKey != null && key.compareTo(nextSourceKey) >= 0) {
272 moveToNextBatch(context);
273 }
274
275
276 if (targetHasher.isBatchStarted()) {
277 targetHasher.hashResult(value);
278 }
279 } catch (Throwable t) {
280 mapperException = t;
281 Throwables.propagateIfInstanceOf(t, IOException.class);
282 Throwables.propagateIfInstanceOf(t, InterruptedException.class);
283 Throwables.propagate(t);
284 }
285 }
286
287
288
289
290
291 private void moveToNextBatch(Context context) throws IOException, InterruptedException {
292 if (targetHasher.isBatchStarted()) {
293 finishBatchAndCompareHashes(context);
294 }
295 targetHasher.startBatch(nextSourceKey);
296 currentSourceHash = sourceHashReader.getCurrentHash();
297
298 findNextKeyHashPair();
299 }
300
301
302
303
304
305
306 private void finishBatchAndCompareHashes(Context context)
307 throws IOException, InterruptedException {
308 targetHasher.finishBatch();
309 context.getCounter(Counter.BATCHES).increment(1);
310 if (targetHasher.getBatchSize() == 0) {
311 context.getCounter(Counter.EMPTY_BATCHES).increment(1);
312 }
313 ImmutableBytesWritable targetHash = targetHasher.getBatchHash();
314 if (targetHash.equals(currentSourceHash)) {
315 context.getCounter(Counter.HASHES_MATCHED).increment(1);
316 } else {
317 context.getCounter(Counter.HASHES_NOT_MATCHED).increment(1);
318
319 ImmutableBytesWritable stopRow = nextSourceKey == null
320 ? new ImmutableBytesWritable(sourceTableHash.stopRow)
321 : nextSourceKey;
322
323 if (LOG.isDebugEnabled()) {
324 LOG.debug("Hash mismatch. Key range: " + toHex(targetHasher.getBatchStartKey())
325 + " to " + toHex(stopRow)
326 + " sourceHash: " + toHex(currentSourceHash)
327 + " targetHash: " + toHex(targetHash));
328 }
329
330 syncRange(context, targetHasher.getBatchStartKey(), stopRow);
331 }
332 }
333 private static String toHex(ImmutableBytesWritable bytes) {
334 return Bytes.toHex(bytes.get(), bytes.getOffset(), bytes.getLength());
335 }
336
337 private static final CellScanner EMPTY_CELL_SCANNER
338 = new CellScanner(Iterators.<Result>emptyIterator());
339
340
341
342
343
344
345 private void syncRange(Context context, ImmutableBytesWritable startRow,
346 ImmutableBytesWritable stopRow) throws IOException, InterruptedException {
347 Scan scan = sourceTableHash.initScan();
348 scan.setStartRow(startRow.copyBytes());
349 scan.setStopRow(stopRow.copyBytes());
350
351 ResultScanner sourceScanner = sourceTable.getScanner(scan);
352 CellScanner sourceCells = new CellScanner(sourceScanner.iterator());
353
354 ResultScanner targetScanner = targetTable.getScanner(new Scan(scan));
355 CellScanner targetCells = new CellScanner(targetScanner.iterator());
356
357 boolean rangeMatched = true;
358 byte[] nextSourceRow = sourceCells.nextRow();
359 byte[] nextTargetRow = targetCells.nextRow();
360 while(nextSourceRow != null || nextTargetRow != null) {
361 boolean rowMatched;
362 int rowComparison = compareRowKeys(nextSourceRow, nextTargetRow);
363 if (rowComparison < 0) {
364 if (LOG.isInfoEnabled()) {
365 LOG.info("Target missing row: " + Bytes.toHex(nextSourceRow));
366 }
367 context.getCounter(Counter.TARGETMISSINGROWS).increment(1);
368
369 rowMatched = syncRowCells(context, nextSourceRow, sourceCells, EMPTY_CELL_SCANNER);
370 nextSourceRow = sourceCells.nextRow();
371 } else if (rowComparison > 0) {
372 if (LOG.isInfoEnabled()) {
373 LOG.info("Source missing row: " + Bytes.toHex(nextTargetRow));
374 }
375 context.getCounter(Counter.SOURCEMISSINGROWS).increment(1);
376
377 rowMatched = syncRowCells(context, nextTargetRow, EMPTY_CELL_SCANNER, targetCells);
378 nextTargetRow = targetCells.nextRow();
379 } else {
380
381 rowMatched = syncRowCells(context, nextSourceRow, sourceCells, targetCells);
382 nextSourceRow = sourceCells.nextRow();
383 nextTargetRow = targetCells.nextRow();
384 }
385
386 if (!rowMatched) {
387 rangeMatched = false;
388 }
389 }
390
391 sourceScanner.close();
392 targetScanner.close();
393
394 context.getCounter(rangeMatched ? Counter.RANGESMATCHED : Counter.RANGESNOTMATCHED)
395 .increment(1);
396 }
397
398 private static class CellScanner {
399 private final Iterator<Result> results;
400
401 private byte[] currentRow;
402 private Result currentRowResult;
403 private int nextCellInRow;
404
405 private Result nextRowResult;
406
407 public CellScanner(Iterator<Result> results) {
408 this.results = results;
409 }
410
411
412
413
414
415 public byte[] nextRow() {
416 if (nextRowResult == null) {
417
418 while (results.hasNext()) {
419 nextRowResult = results.next();
420 Cell nextCell = nextRowResult.rawCells()[0];
421 if (currentRow == null
422 || !Bytes.equals(currentRow, 0, currentRow.length, nextCell.getRowArray(),
423 nextCell.getRowOffset(), nextCell.getRowLength())) {
424
425 break;
426 } else {
427
428 nextRowResult = null;
429 }
430 }
431
432 if (nextRowResult == null) {
433
434 currentRowResult = null;
435 currentRow = null;
436 return null;
437 }
438 }
439
440
441 currentRowResult = nextRowResult;
442 nextCellInRow = 0;
443 currentRow = currentRowResult.getRow();
444 nextRowResult = null;
445 return currentRow;
446 }
447
448
449
450
451 public Cell nextCellInRow() {
452 if (currentRowResult == null) {
453
454 return null;
455 }
456
457 Cell nextCell = currentRowResult.rawCells()[nextCellInRow];
458 nextCellInRow++;
459 if (nextCellInRow == currentRowResult.size()) {
460 if (results.hasNext()) {
461 Result result = results.next();
462 Cell cell = result.rawCells()[0];
463 if (Bytes.equals(currentRow, 0, currentRow.length, cell.getRowArray(),
464 cell.getRowOffset(), cell.getRowLength())) {
465
466 currentRowResult = result;
467 nextCellInRow = 0;
468 } else {
469
470 nextRowResult = result;
471
472 currentRowResult = null;
473 }
474 } else {
475
476 currentRowResult = null;
477 }
478 }
479 return nextCell;
480 }
481 }
482
483 private Cell checkAndResetTimestamp(Cell sourceCell){
484 if (ignoreTimestamp) {
485 sourceCell = new KeyValue(sourceCell);
486 ((KeyValue) sourceCell).setTimestamp(System.currentTimeMillis());
487 }
488 return sourceCell;
489 }
490
491
492
493
494
495
496
497 private boolean syncRowCells(Context context, byte[] rowKey, CellScanner sourceCells,
498 CellScanner targetCells) throws IOException, InterruptedException {
499 Put put = null;
500 Delete delete = null;
501 long matchingCells = 0;
502 boolean matchingRow = true;
503 Cell sourceCell = sourceCells.nextCellInRow();
504 Cell targetCell = targetCells.nextCellInRow();
505 while (sourceCell != null || targetCell != null) {
506
507 int cellKeyComparison = compareCellKeysWithinRow(sourceCell, targetCell);
508 if (cellKeyComparison < 0) {
509 if (LOG.isDebugEnabled()) {
510 LOG.debug("Target missing cell: " + sourceCell);
511 }
512 context.getCounter(Counter.TARGETMISSINGCELLS).increment(1);
513 matchingRow = false;
514
515 if (!dryRun && doPuts) {
516 if (put == null) {
517 put = new Put(rowKey);
518 }
519 sourceCell = checkAndResetTimestamp(sourceCell);
520 put.add(sourceCell);
521 }
522
523 sourceCell = sourceCells.nextCellInRow();
524 } else if (cellKeyComparison > 0) {
525 if (LOG.isDebugEnabled()) {
526 LOG.debug("Source missing cell: " + targetCell);
527 }
528 context.getCounter(Counter.SOURCEMISSINGCELLS).increment(1);
529 matchingRow = false;
530
531 if (!dryRun && doDeletes) {
532 if (delete == null) {
533 delete = new Delete(rowKey);
534 }
535
536 delete.addColumn(CellUtil.cloneFamily(targetCell),
537 CellUtil.cloneQualifier(targetCell), targetCell.getTimestamp());
538 }
539
540 targetCell = targetCells.nextCellInRow();
541 } else {
542
543 if (CellUtil.matchingValue(sourceCell, targetCell)) {
544 matchingCells++;
545 } else {
546 if (LOG.isDebugEnabled()) {
547 LOG.debug("Different values: ");
548 LOG.debug(" source cell: " + sourceCell
549 + " value: " + Bytes.toHex(sourceCell.getValueArray(),
550 sourceCell.getValueOffset(), sourceCell.getValueLength()));
551 LOG.debug(" target cell: " + targetCell
552 + " value: " + Bytes.toHex(targetCell.getValueArray(),
553 targetCell.getValueOffset(), targetCell.getValueLength()));
554 }
555 context.getCounter(Counter.DIFFERENTCELLVALUES).increment(1);
556 matchingRow = false;
557
558 if (!dryRun && doPuts) {
559
560 if (put == null) {
561 put = new Put(rowKey);
562 }
563 sourceCell = checkAndResetTimestamp(sourceCell);
564 put.add(sourceCell);
565 }
566 }
567 sourceCell = sourceCells.nextCellInRow();
568 targetCell = targetCells.nextCellInRow();
569 }
570
571 if (!dryRun && sourceTableHash.scanBatch > 0) {
572 if (put != null && put.size() >= sourceTableHash.scanBatch) {
573 context.write(new ImmutableBytesWritable(rowKey), put);
574 put = null;
575 }
576 if (delete != null && delete.size() >= sourceTableHash.scanBatch) {
577 context.write(new ImmutableBytesWritable(rowKey), delete);
578 delete = null;
579 }
580 }
581 }
582
583 if (!dryRun) {
584 if (put != null) {
585 context.write(new ImmutableBytesWritable(rowKey), put);
586 }
587 if (delete != null) {
588 context.write(new ImmutableBytesWritable(rowKey), delete);
589 }
590 }
591
592 if (matchingCells > 0) {
593 context.getCounter(Counter.MATCHINGCELLS).increment(matchingCells);
594 }
595 if (matchingRow) {
596 context.getCounter(Counter.MATCHINGROWS).increment(1);
597 return true;
598 } else {
599 context.getCounter(Counter.ROWSWITHDIFFS).increment(1);
600 return false;
601 }
602 }
603
604 private static final CellComparator cellComparator = new CellComparator();
605
606
607
608
609 private static int compareRowKeys(byte[] r1, byte[] r2) {
610 if (r1 == null) {
611 return 1;
612 } else if (r2 == null) {
613 return -1;
614 } else {
615 return cellComparator.compareRows(r1, 0, r1.length, r2, 0, r2.length);
616 }
617 }
618
619
620
621
622
623
624 private int compareCellKeysWithinRow(Cell c1, Cell c2) {
625 if (c1 == null) {
626 return 1;
627 }
628 if (c2 == null) {
629 return -1;
630 }
631
632 int result = CellComparator.compareFamilies(c1, c2);
633 if (result != 0) {
634 return result;
635 }
636
637 result = CellComparator.compareQualifiers(c1, c2);
638 if (result != 0) {
639 return result;
640 }
641 if (this.ignoreTimestamp) {
642 return 0;
643 } else{
644
645 return CellComparator.compareTimestamps(c1, c2);
646 }
647 }
648
649 @Override
650 protected void cleanup(Context context)
651 throws IOException, InterruptedException {
652 if (mapperException == null) {
653 try {
654 finishRemainingHashRanges(context);
655 } catch (Throwable t) {
656 mapperException = t;
657 }
658 }
659
660 try {
661 sourceTable.close();
662 targetTable.close();
663 sourceConnection.close();
664 targetConnection.close();
665 } catch (Throwable t) {
666 if (mapperException == null) {
667 mapperException = t;
668 } else {
669 LOG.error("Suppressing exception from closing tables", t);
670 }
671 }
672
673
674 if (mapperException != null) {
675 Throwables.propagateIfInstanceOf(mapperException, IOException.class);
676 Throwables.propagateIfInstanceOf(mapperException, InterruptedException.class);
677 Throwables.propagate(mapperException);
678 }
679 }
680
681 private void finishRemainingHashRanges(Context context) throws IOException,
682 InterruptedException {
683 TableSplit split = (TableSplit) context.getInputSplit();
684 byte[] splitEndRow = split.getEndRow();
685 boolean reachedEndOfTable = HashTable.isTableEndRow(splitEndRow);
686
687
688 while (nextSourceKey != null
689 && (nextSourceKey.compareTo(splitEndRow) < 0 || reachedEndOfTable)) {
690 moveToNextBatch(context);
691 }
692
693 if (targetHasher.isBatchStarted()) {
694
695
696 if ((nextSourceKey != null && nextSourceKey.compareTo(splitEndRow) > 0)
697 || (nextSourceKey == null && !Bytes.equals(splitEndRow, sourceTableHash.stopRow))) {
698
699
700 Scan scan = sourceTableHash.initScan();
701 scan.setStartRow(splitEndRow);
702 if (nextSourceKey == null) {
703 scan.setStopRow(sourceTableHash.stopRow);
704 } else {
705 scan.setStopRow(nextSourceKey.copyBytes());
706 }
707
708 ResultScanner targetScanner = null;
709 try {
710 targetScanner = targetTable.getScanner(scan);
711 for (Result row : targetScanner) {
712 targetHasher.hashResult(row);
713 }
714 } finally {
715 if (targetScanner != null) {
716 targetScanner.close();
717 }
718 }
719 }
720
721 finishBatchAndCompareHashes(context);
722 }
723 }
724 }
725
726 private static final int NUM_ARGS = 3;
727 private static void printUsage(final String errorMsg) {
728 if (errorMsg != null && errorMsg.length() > 0) {
729 System.err.println("ERROR: " + errorMsg);
730 System.err.println();
731 }
732 System.err.println("Usage: SyncTable [options] <sourcehashdir> <sourcetable> <targettable>");
733 System.err.println();
734 System.err.println("Options:");
735
736 System.err.println(" sourcezkcluster ZK cluster key of the source table");
737 System.err.println(" (defaults to cluster in classpath's config)");
738 System.err.println(" targetzkcluster ZK cluster key of the target table");
739 System.err.println(" (defaults to cluster in classpath's config)");
740 System.err.println(" dryrun if true, output counters but no writes");
741 System.err.println(" (defaults to false)");
742 System.err.println(" doDeletes if false, does not perform deletes");
743 System.err.println(" (defaults to true)");
744 System.err.println(" doPuts if false, does not perform puts ");
745 System.err.println(" (defaults to true)");
746 System.err.println(" ignoreTimestamps if true, ignores cells timestamps while comparing ");
747 System.err.println(" cell values. Any missing cell on target then gets");
748 System.err.println(" added with current time as timestamp ");
749 System.err.println(" (defaults to false)");
750 System.err.println();
751 System.err.println("Args:");
752 System.err.println(" sourcehashdir path to HashTable output dir for source table");
753 System.err.println(" (see org.apache.hadoop.hbase.mapreduce.HashTable)");
754 System.err.println(" sourcetable Name of the source table to sync from");
755 System.err.println(" targettable Name of the target table to sync to");
756 System.err.println();
757 System.err.println("Examples:");
758 System.err.println(" For a dry run SyncTable of tableA from a remote source cluster");
759 System.err.println(" to a local target cluster:");
760 System.err.println(" $ bin/hbase " +
761 "org.apache.hadoop.hbase.mapreduce.SyncTable --dryrun=true"
762 + " --sourcezkcluster=zk1.example.com,zk2.example.com,zk3.example.com:2181:/hbase"
763 + " hdfs://nn:9000/hashes/tableA tableA tableA");
764 }
765
766 private boolean doCommandLine(final String[] args) {
767 if (args.length < NUM_ARGS) {
768 printUsage(null);
769 return false;
770 }
771 try {
772 sourceHashDir = new Path(args[args.length - 3]);
773 sourceTableName = args[args.length - 2];
774 targetTableName = args[args.length - 1];
775
776 for (int i = 0; i < args.length - NUM_ARGS; i++) {
777 String cmd = args[i];
778 if (cmd.equals("-h") || cmd.startsWith("--h")) {
779 printUsage(null);
780 return false;
781 }
782
783 final String sourceZkClusterKey = "--sourcezkcluster=";
784 if (cmd.startsWith(sourceZkClusterKey)) {
785 sourceZkCluster = cmd.substring(sourceZkClusterKey.length());
786 continue;
787 }
788
789 final String targetZkClusterKey = "--targetzkcluster=";
790 if (cmd.startsWith(targetZkClusterKey)) {
791 targetZkCluster = cmd.substring(targetZkClusterKey.length());
792 continue;
793 }
794
795 final String dryRunKey = "--dryrun=";
796 if (cmd.startsWith(dryRunKey)) {
797 dryRun = Boolean.parseBoolean(cmd.substring(dryRunKey.length()));
798 continue;
799 }
800
801 final String doDeletesKey = "--doDeletes=";
802 if (cmd.startsWith(doDeletesKey)) {
803 doDeletes = Boolean.parseBoolean(cmd.substring(doDeletesKey.length()));
804 continue;
805 }
806
807 final String doPutsKey = "--doPuts=";
808 if (cmd.startsWith(doPutsKey)) {
809 doPuts = Boolean.parseBoolean(cmd.substring(doPutsKey.length()));
810 continue;
811 }
812
813 final String ignoreTimestampsKey = "--ignoreTimestamps=";
814 if (cmd.startsWith(ignoreTimestampsKey)) {
815 ignoreTimestamps = Boolean.parseBoolean(cmd.substring(ignoreTimestampsKey.length()));
816 continue;
817 }
818
819 printUsage("Invalid argument '" + cmd + "'");
820 return false;
821 }
822
823
824 } catch (Exception e) {
825 e.printStackTrace();
826 printUsage("Can't start because " + e.getMessage());
827 return false;
828 }
829 return true;
830 }
831
832
833
834
835 public static void main(String[] args) throws Exception {
836 int ret = ToolRunner.run(new SyncTable(HBaseConfiguration.create()), args);
837 System.exit(ret);
838 }
839
840 @Override
841 public int run(String[] args) throws Exception {
842 String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
843 if (!doCommandLine(otherArgs)) {
844 return 1;
845 }
846
847 Job job = createSubmittableJob(otherArgs);
848 if (!job.waitForCompletion(true)) {
849 LOG.info("Map-reduce job failed!");
850 return 1;
851 }
852 counters = job.getCounters();
853 return 0;
854 }
855
856 }