1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.snapshot;
20
21 import java.io.BufferedInputStream;
22 import java.io.FileNotFoundException;
23 import java.io.DataInput;
24 import java.io.DataOutput;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.util.ArrayList;
28 import java.util.Collections;
29 import java.util.Comparator;
30 import java.util.LinkedList;
31 import java.util.List;
32 import java.util.Random;
33
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.hadoop.hbase.classification.InterfaceAudience;
37 import org.apache.hadoop.hbase.classification.InterfaceStability;
38 import org.apache.hadoop.conf.Configuration;
39 import org.apache.hadoop.conf.Configured;
40 import org.apache.hadoop.fs.FSDataInputStream;
41 import org.apache.hadoop.fs.FSDataOutputStream;
42 import org.apache.hadoop.fs.FileChecksum;
43 import org.apache.hadoop.fs.FileStatus;
44 import org.apache.hadoop.fs.FileSystem;
45 import org.apache.hadoop.fs.FileUtil;
46 import org.apache.hadoop.fs.Path;
47 import org.apache.hadoop.fs.permission.FsPermission;
48 import org.apache.hadoop.hbase.TableName;
49 import org.apache.hadoop.hbase.HBaseConfiguration;
50 import org.apache.hadoop.hbase.HConstants;
51 import org.apache.hadoop.hbase.HRegionInfo;
52 import org.apache.hadoop.hbase.io.FileLink;
53 import org.apache.hadoop.hbase.io.HFileLink;
54 import org.apache.hadoop.hbase.io.WALLink;
55 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
56 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
57 import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
58 import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
59 import org.apache.hadoop.hbase.util.FSUtils;
60 import org.apache.hadoop.hbase.util.Pair;
61 import org.apache.hadoop.io.BytesWritable;
62 import org.apache.hadoop.io.IOUtils;
63 import org.apache.hadoop.io.NullWritable;
64 import org.apache.hadoop.io.Writable;
65 import org.apache.hadoop.mapreduce.Job;
66 import org.apache.hadoop.mapreduce.JobContext;
67 import org.apache.hadoop.mapreduce.Mapper;
68 import org.apache.hadoop.mapreduce.InputFormat;
69 import org.apache.hadoop.mapreduce.InputSplit;
70 import org.apache.hadoop.mapreduce.RecordReader;
71 import org.apache.hadoop.mapreduce.TaskAttemptContext;
72 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
73 import org.apache.hadoop.mapreduce.security.TokenCache;
74 import org.apache.hadoop.hbase.io.hadoopbackport.ThrottledInputStream;
75 import org.apache.hadoop.util.StringUtils;
76 import org.apache.hadoop.util.Tool;
77 import org.apache.hadoop.util.ToolRunner;
78
79
80
81
82
83
84
85
86 @InterfaceAudience.Public
87 @InterfaceStability.Evolving
88 public class ExportSnapshot extends Configured implements Tool {
89 public static final String NAME = "exportsnapshot";
90
91 public static final String CONF_SOURCE_PREFIX = NAME + ".from.";
92
93 public static final String CONF_DEST_PREFIX = NAME + ".to.";
94
95 private static final Log LOG = LogFactory.getLog(ExportSnapshot.class);
96
97 private static final String MR_NUM_MAPS = "mapreduce.job.maps";
98 private static final String CONF_NUM_SPLITS = "snapshot.export.format.splits";
99 private static final String CONF_SNAPSHOT_NAME = "snapshot.export.format.snapshot.name";
100 private static final String CONF_SNAPSHOT_DIR = "snapshot.export.format.snapshot.dir";
101 private static final String CONF_FILES_USER = "snapshot.export.files.attributes.user";
102 private static final String CONF_FILES_GROUP = "snapshot.export.files.attributes.group";
103 private static final String CONF_FILES_MODE = "snapshot.export.files.attributes.mode";
104 private static final String CONF_CHECKSUM_VERIFY = "snapshot.export.checksum.verify";
105 private static final String CONF_OUTPUT_ROOT = "snapshot.export.output.root";
106 private static final String CONF_INPUT_ROOT = "snapshot.export.input.root";
107 private static final String CONF_BUFFER_SIZE = "snapshot.export.buffer.size";
108 private static final String CONF_MAP_GROUP = "snapshot.export.default.map.group";
109 private static final String CONF_BANDWIDTH_MB = "snapshot.export.map.bandwidth.mb";
110 protected static final String CONF_SKIP_TMP = "snapshot.export.skip.tmp";
111
112 static final String CONF_TEST_FAILURE = "test.snapshot.export.failure";
113 static final String CONF_TEST_RETRY = "test.snapshot.export.failure.retry";
114
115 private static final String INPUT_FOLDER_PREFIX = "export-files.";
116
117
118 public enum Counter {
119 MISSING_FILES, FILES_COPIED, FILES_SKIPPED, COPY_FAILED,
120 BYTES_EXPECTED, BYTES_SKIPPED, BYTES_COPIED
121 }
122
123 private static class ExportMapper extends Mapper<BytesWritable, NullWritable,
124 NullWritable, NullWritable> {
125 final static int REPORT_SIZE = 1 * 1024 * 1024;
126 final static int BUFFER_SIZE = 64 * 1024;
127
128 private boolean testFailures;
129 private Random random;
130
131 private boolean verifyChecksum;
132 private String filesGroup;
133 private String filesUser;
134 private short filesMode;
135 private int bufferSize;
136
137 private FileSystem outputFs;
138 private Path outputArchive;
139 private Path outputRoot;
140
141 private FileSystem inputFs;
142 private Path inputArchive;
143 private Path inputRoot;
144
145 @Override
146 public void setup(Context context) throws IOException {
147 Configuration conf = context.getConfiguration();
148 Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
149 Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
150
151 verifyChecksum = conf.getBoolean(CONF_CHECKSUM_VERIFY, true);
152
153 filesGroup = conf.get(CONF_FILES_GROUP);
154 filesUser = conf.get(CONF_FILES_USER);
155 filesMode = (short)conf.getInt(CONF_FILES_MODE, 0);
156 outputRoot = new Path(conf.get(CONF_OUTPUT_ROOT));
157 inputRoot = new Path(conf.get(CONF_INPUT_ROOT));
158
159 inputArchive = new Path(inputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
160 outputArchive = new Path(outputRoot, HConstants.HFILE_ARCHIVE_DIRECTORY);
161
162 testFailures = conf.getBoolean(CONF_TEST_FAILURE, false);
163
164 try {
165 srcConf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
166 inputFs = FileSystem.get(inputRoot.toUri(), srcConf);
167 } catch (IOException e) {
168 throw new IOException("Could not get the input FileSystem with root=" + inputRoot, e);
169 }
170
171 try {
172 destConf.setBoolean("fs." + outputRoot.toUri().getScheme() + ".impl.disable.cache", true);
173 outputFs = FileSystem.get(outputRoot.toUri(), destConf);
174 } catch (IOException e) {
175 throw new IOException("Could not get the output FileSystem with root="+ outputRoot, e);
176 }
177
178
179 int defaultBlockSize = Math.max((int) outputFs.getDefaultBlockSize(outputRoot), BUFFER_SIZE);
180 bufferSize = conf.getInt(CONF_BUFFER_SIZE, defaultBlockSize);
181 LOG.info("Using bufferSize=" + StringUtils.humanReadableInt(bufferSize));
182
183 for (Counter c : Counter.values()) {
184 context.getCounter(c).increment(0);
185 }
186 }
187
188 @Override
189 protected void cleanup(Context context) {
190 IOUtils.closeStream(inputFs);
191 IOUtils.closeStream(outputFs);
192 }
193
194 @Override
195 public void map(BytesWritable key, NullWritable value, Context context)
196 throws InterruptedException, IOException {
197 SnapshotFileInfo inputInfo = SnapshotFileInfo.parseFrom(key.copyBytes());
198 Path outputPath = getOutputPath(inputInfo);
199
200 copyFile(context, inputInfo, outputPath);
201 }
202
203
204
205
206 private Path getOutputPath(final SnapshotFileInfo inputInfo) throws IOException {
207 Path path = null;
208 switch (inputInfo.getType()) {
209 case HFILE:
210 Path inputPath = new Path(inputInfo.getHfile());
211 String family = inputPath.getParent().getName();
212 TableName table =HFileLink.getReferencedTableName(inputPath.getName());
213 String region = HFileLink.getReferencedRegionName(inputPath.getName());
214 String hfile = HFileLink.getReferencedHFileName(inputPath.getName());
215 path = new Path(FSUtils.getTableDir(new Path("./"), table),
216 new Path(region, new Path(family, hfile)));
217 break;
218 case WAL:
219 LOG.warn("snapshot does not keeps WALs: " + inputInfo);
220 break;
221 default:
222 throw new IOException("Invalid File Type: " + inputInfo.getType().toString());
223 }
224 return new Path(outputArchive, path);
225 }
226
227
228
229
230 private void injectTestFailure(final Context context, final SnapshotFileInfo inputInfo)
231 throws IOException {
232 if (testFailures) {
233 if (context.getConfiguration().getBoolean(CONF_TEST_RETRY, false)) {
234 if (random == null) {
235 random = new Random();
236 }
237
238
239
240
241 if (random.nextFloat() < 0.03) {
242 throw new IOException("TEST RETRY FAILURE: Unable to copy input=" + inputInfo
243 + " time=" + System.currentTimeMillis());
244 }
245 } else {
246 context.getCounter(Counter.COPY_FAILED).increment(1);
247 throw new IOException("TEST FAILURE: Unable to copy input=" + inputInfo);
248 }
249 }
250 }
251
252 private void copyFile(final Context context, final SnapshotFileInfo inputInfo,
253 final Path outputPath) throws IOException {
254 injectTestFailure(context, inputInfo);
255
256
257 FileStatus inputStat = getSourceFileStatus(context, inputInfo);
258
259
260 if (outputFs.exists(outputPath)) {
261 FileStatus outputStat = outputFs.getFileStatus(outputPath);
262 if (outputStat != null && sameFile(inputStat, outputStat)) {
263 LOG.info("Skip copy " + inputStat.getPath() + " to " + outputPath + ", same file.");
264 context.getCounter(Counter.FILES_SKIPPED).increment(1);
265 context.getCounter(Counter.BYTES_SKIPPED).increment(inputStat.getLen());
266 return;
267 }
268 }
269
270 InputStream in = openSourceFile(context, inputInfo);
271 int bandwidthMB = context.getConfiguration().getInt(CONF_BANDWIDTH_MB, 100);
272 if (Integer.MAX_VALUE != bandwidthMB) {
273 in = new ThrottledInputStream(new BufferedInputStream(in), bandwidthMB * 1024L * 1024L);
274 }
275
276 try {
277 context.getCounter(Counter.BYTES_EXPECTED).increment(inputStat.getLen());
278
279
280 createOutputPath(outputPath.getParent());
281 FSDataOutputStream out = outputFs.create(outputPath, true);
282 try {
283 copyData(context, inputStat.getPath(), in, outputPath, out, inputStat.getLen());
284 } finally {
285 out.close();
286 }
287
288
289 if (!preserveAttributes(outputPath, inputStat)) {
290 LOG.warn("You may have to run manually chown on: " + outputPath);
291 }
292 } finally {
293 in.close();
294 }
295 }
296
297
298
299
300 private void createOutputPath(final Path path) throws IOException {
301 if (filesUser == null && filesGroup == null) {
302 outputFs.mkdirs(path);
303 } else {
304 Path parent = path.getParent();
305 if (!outputFs.exists(parent) && !parent.isRoot()) {
306 createOutputPath(parent);
307 }
308 outputFs.mkdirs(path);
309 if (filesUser != null || filesGroup != null) {
310
311 outputFs.setOwner(path, filesUser, filesGroup);
312 }
313 if (filesMode > 0) {
314 outputFs.setPermission(path, new FsPermission(filesMode));
315 }
316 }
317 }
318
319
320
321
322
323
324
325
326
327 private boolean preserveAttributes(final Path path, final FileStatus refStat) {
328 FileStatus stat;
329 try {
330 stat = outputFs.getFileStatus(path);
331 } catch (IOException e) {
332 LOG.warn("Unable to get the status for file=" + path);
333 return false;
334 }
335
336 try {
337 if (filesMode > 0 && stat.getPermission().toShort() != filesMode) {
338 outputFs.setPermission(path, new FsPermission(filesMode));
339 } else if (refStat != null && !stat.getPermission().equals(refStat.getPermission())) {
340 outputFs.setPermission(path, refStat.getPermission());
341 }
342 } catch (IOException e) {
343 LOG.warn("Unable to set the permission for file="+ stat.getPath() +": "+ e.getMessage());
344 return false;
345 }
346
347 boolean hasRefStat = (refStat != null);
348 String user = stringIsNotEmpty(filesUser) || !hasRefStat ? filesUser : refStat.getOwner();
349 String group = stringIsNotEmpty(filesGroup) || !hasRefStat ? filesGroup : refStat.getGroup();
350 if (stringIsNotEmpty(user) || stringIsNotEmpty(group)) {
351 try {
352 if (!(user.equals(stat.getOwner()) && group.equals(stat.getGroup()))) {
353 outputFs.setOwner(path, user, group);
354 }
355 } catch (IOException e) {
356 LOG.warn("Unable to set the owner/group for file="+ stat.getPath() +": "+ e.getMessage());
357 LOG.warn("The user/group may not exist on the destination cluster: user=" +
358 user + " group=" + group);
359 return false;
360 }
361 }
362
363 return true;
364 }
365
366 private boolean stringIsNotEmpty(final String str) {
367 return str != null && str.length() > 0;
368 }
369
370 private void copyData(final Context context,
371 final Path inputPath, final InputStream in,
372 final Path outputPath, final FSDataOutputStream out,
373 final long inputFileSize)
374 throws IOException {
375 final String statusMessage = "copied %s/" + StringUtils.humanReadableInt(inputFileSize) +
376 " (%.1f%%)";
377
378 try {
379 byte[] buffer = new byte[bufferSize];
380 long totalBytesWritten = 0;
381 int reportBytes = 0;
382 int bytesRead;
383
384 long stime = System.currentTimeMillis();
385 while ((bytesRead = in.read(buffer)) > 0) {
386 out.write(buffer, 0, bytesRead);
387 totalBytesWritten += bytesRead;
388 reportBytes += bytesRead;
389
390 if (reportBytes >= REPORT_SIZE) {
391 context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
392 context.setStatus(String.format(statusMessage,
393 StringUtils.humanReadableInt(totalBytesWritten),
394 (totalBytesWritten/(float)inputFileSize) * 100.0f) +
395 " from " + inputPath + " to " + outputPath);
396 reportBytes = 0;
397 }
398 }
399 long etime = System.currentTimeMillis();
400
401 context.getCounter(Counter.BYTES_COPIED).increment(reportBytes);
402 context.setStatus(String.format(statusMessage,
403 StringUtils.humanReadableInt(totalBytesWritten),
404 (totalBytesWritten/(float)inputFileSize) * 100.0f) +
405 " from " + inputPath + " to " + outputPath);
406
407
408 if (totalBytesWritten != inputFileSize) {
409 String msg = "number of bytes copied not matching copied=" + totalBytesWritten +
410 " expected=" + inputFileSize + " for file=" + inputPath;
411 throw new IOException(msg);
412 }
413
414 LOG.info("copy completed for input=" + inputPath + " output=" + outputPath);
415 LOG.info("size=" + totalBytesWritten +
416 " (" + StringUtils.humanReadableInt(totalBytesWritten) + ")" +
417 " time=" + StringUtils.formatTimeDiff(etime, stime) +
418 String.format(" %.3fM/sec", (totalBytesWritten / ((etime - stime)/1000.0))/1048576.0));
419 context.getCounter(Counter.FILES_COPIED).increment(1);
420 } catch (IOException e) {
421 LOG.error("Error copying " + inputPath + " to " + outputPath, e);
422 context.getCounter(Counter.COPY_FAILED).increment(1);
423 throw e;
424 }
425 }
426
427
428
429
430
431
432 private FSDataInputStream openSourceFile(Context context, final SnapshotFileInfo fileInfo)
433 throws IOException {
434 try {
435 Configuration conf = context.getConfiguration();
436 FileLink link = null;
437 switch (fileInfo.getType()) {
438 case HFILE:
439 Path inputPath = new Path(fileInfo.getHfile());
440 link = HFileLink.buildFromHFileLinkPattern(conf, inputPath);
441 break;
442 case WAL:
443 String serverName = fileInfo.getWalServer();
444 String logName = fileInfo.getWalName();
445 link = new WALLink(inputRoot, serverName, logName);
446 break;
447 default:
448 throw new IOException("Invalid File Type: " + fileInfo.getType().toString());
449 }
450 return link.open(inputFs);
451 } catch (IOException e) {
452 context.getCounter(Counter.MISSING_FILES).increment(1);
453 LOG.error("Unable to open source file=" + fileInfo.toString(), e);
454 throw e;
455 }
456 }
457
458 private FileStatus getSourceFileStatus(Context context, final SnapshotFileInfo fileInfo)
459 throws IOException {
460 try {
461 Configuration conf = context.getConfiguration();
462 FileLink link = null;
463 switch (fileInfo.getType()) {
464 case HFILE:
465 Path inputPath = new Path(fileInfo.getHfile());
466 link = HFileLink.buildFromHFileLinkPattern(conf, inputPath);
467 break;
468 case WAL:
469 link = new WALLink(inputRoot, fileInfo.getWalServer(), fileInfo.getWalName());
470 break;
471 default:
472 throw new IOException("Invalid File Type: " + fileInfo.getType().toString());
473 }
474 return link.getFileStatus(inputFs);
475 } catch (FileNotFoundException e) {
476 context.getCounter(Counter.MISSING_FILES).increment(1);
477 LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e);
478 throw e;
479 } catch (IOException e) {
480 LOG.error("Unable to get the status for source file=" + fileInfo.toString(), e);
481 throw e;
482 }
483 }
484
485 private FileChecksum getFileChecksum(final FileSystem fs, final Path path) {
486 try {
487 return fs.getFileChecksum(path);
488 } catch (IOException e) {
489 LOG.warn("Unable to get checksum for file=" + path, e);
490 return null;
491 }
492 }
493
494
495
496
497
498 private boolean sameFile(final FileStatus inputStat, final FileStatus outputStat) {
499
500 if (inputStat.getLen() != outputStat.getLen()) return false;
501
502
503 if (!verifyChecksum) return true;
504
505
506 FileChecksum inChecksum = getFileChecksum(inputFs, inputStat.getPath());
507 if (inChecksum == null) return false;
508
509 FileChecksum outChecksum = getFileChecksum(outputFs, outputStat.getPath());
510 if (outChecksum == null) return false;
511
512 return inChecksum.equals(outChecksum);
513 }
514 }
515
516
517
518
519
520
521
522
523
524 private static List<Pair<SnapshotFileInfo, Long>> getSnapshotFiles(final Configuration conf,
525 final FileSystem fs, final Path snapshotDir) throws IOException {
526 SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
527
528 final List<Pair<SnapshotFileInfo, Long>> files = new ArrayList<Pair<SnapshotFileInfo, Long>>();
529 final TableName table = TableName.valueOf(snapshotDesc.getTable());
530
531
532 LOG.info("Loading Snapshot '" + snapshotDesc.getName() + "' hfile list");
533 SnapshotReferenceUtil.visitReferencedFiles(conf, fs, snapshotDir, snapshotDesc,
534 new SnapshotReferenceUtil.SnapshotVisitor() {
535 @Override
536 public void storeFile(final HRegionInfo regionInfo, final String family,
537 final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
538 if (storeFile.hasReference()) {
539
540 } else {
541 String region = regionInfo.getEncodedName();
542 String hfile = storeFile.getName();
543 Path path = HFileLink.createPath(table, region, family, hfile);
544
545 SnapshotFileInfo fileInfo = SnapshotFileInfo.newBuilder()
546 .setType(SnapshotFileInfo.Type.HFILE)
547 .setHfile(path.toString())
548 .build();
549
550 long size;
551 if (storeFile.hasFileSize()) {
552 size = storeFile.getFileSize();
553 } else {
554 size = HFileLink.buildFromHFileLinkPattern(conf, path).getFileStatus(fs).getLen();
555 }
556 files.add(new Pair<SnapshotFileInfo, Long>(fileInfo, size));
557 }
558 }
559 });
560
561 return files;
562 }
563
564
565
566
567
568
569
570
571
572 static List<List<Pair<SnapshotFileInfo, Long>>> getBalancedSplits(
573 final List<Pair<SnapshotFileInfo, Long>> files, final int ngroups) {
574
575 Collections.sort(files, new Comparator<Pair<SnapshotFileInfo, Long>>() {
576 @Override
577 public int compare(Pair<SnapshotFileInfo, Long> a, Pair<SnapshotFileInfo, Long> b) {
578 long r = a.getSecond() - b.getSecond();
579 return (r < 0) ? -1 : ((r > 0) ? 1 : 0);
580 }
581 });
582
583
584 List<List<Pair<SnapshotFileInfo, Long>>> fileGroups =
585 new LinkedList<List<Pair<SnapshotFileInfo, Long>>>();
586 long[] sizeGroups = new long[ngroups];
587 int hi = files.size() - 1;
588 int lo = 0;
589
590 List<Pair<SnapshotFileInfo, Long>> group;
591 int dir = 1;
592 int g = 0;
593
594 while (hi >= lo) {
595 if (g == fileGroups.size()) {
596 group = new LinkedList<Pair<SnapshotFileInfo, Long>>();
597 fileGroups.add(group);
598 } else {
599 group = fileGroups.get(g);
600 }
601
602 Pair<SnapshotFileInfo, Long> fileInfo = files.get(hi--);
603
604
605 sizeGroups[g] += fileInfo.getSecond();
606 group.add(fileInfo);
607
608
609 g += dir;
610 if (g == ngroups) {
611 dir = -1;
612 g = ngroups - 1;
613 } else if (g < 0) {
614 dir = 1;
615 g = 0;
616 }
617 }
618
619 if (LOG.isDebugEnabled()) {
620 for (int i = 0; i < sizeGroups.length; ++i) {
621 LOG.debug("export split=" + i + " size=" + StringUtils.humanReadableInt(sizeGroups[i]));
622 }
623 }
624
625 return fileGroups;
626 }
627
628 private static class ExportSnapshotInputFormat extends InputFormat<BytesWritable, NullWritable> {
629 @Override
630 public RecordReader<BytesWritable, NullWritable> createRecordReader(InputSplit split,
631 TaskAttemptContext tac) throws IOException, InterruptedException {
632 return new ExportSnapshotRecordReader(((ExportSnapshotInputSplit)split).getSplitKeys());
633 }
634
635 @Override
636 public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
637 Configuration conf = context.getConfiguration();
638 Path snapshotDir = new Path(conf.get(CONF_SNAPSHOT_DIR));
639 FileSystem fs = FileSystem.get(snapshotDir.toUri(), conf);
640
641 List<Pair<SnapshotFileInfo, Long>> snapshotFiles = getSnapshotFiles(conf, fs, snapshotDir);
642 int mappers = conf.getInt(CONF_NUM_SPLITS, 0);
643 if (mappers == 0 && snapshotFiles.size() > 0) {
644 mappers = 1 + (snapshotFiles.size() / conf.getInt(CONF_MAP_GROUP, 10));
645 mappers = Math.min(mappers, snapshotFiles.size());
646 conf.setInt(CONF_NUM_SPLITS, mappers);
647 conf.setInt(MR_NUM_MAPS, mappers);
648 }
649
650 List<List<Pair<SnapshotFileInfo, Long>>> groups = getBalancedSplits(snapshotFiles, mappers);
651 List<InputSplit> splits = new ArrayList(groups.size());
652 for (List<Pair<SnapshotFileInfo, Long>> files: groups) {
653 splits.add(new ExportSnapshotInputSplit(files));
654 }
655 return splits;
656 }
657
658 private static class ExportSnapshotInputSplit extends InputSplit implements Writable {
659 private List<Pair<BytesWritable, Long>> files;
660 private long length;
661
662 public ExportSnapshotInputSplit() {
663 this.files = null;
664 }
665
666 public ExportSnapshotInputSplit(final List<Pair<SnapshotFileInfo, Long>> snapshotFiles) {
667 this.files = new ArrayList(snapshotFiles.size());
668 for (Pair<SnapshotFileInfo, Long> fileInfo: snapshotFiles) {
669 this.files.add(new Pair<BytesWritable, Long>(
670 new BytesWritable(fileInfo.getFirst().toByteArray()), fileInfo.getSecond()));
671 this.length += fileInfo.getSecond();
672 }
673 }
674
675 private List<Pair<BytesWritable, Long>> getSplitKeys() {
676 return files;
677 }
678
679 @Override
680 public long getLength() throws IOException, InterruptedException {
681 return length;
682 }
683
684 @Override
685 public String[] getLocations() throws IOException, InterruptedException {
686 return new String[] {};
687 }
688
689 @Override
690 public void readFields(DataInput in) throws IOException {
691 int count = in.readInt();
692 files = new ArrayList<Pair<BytesWritable, Long>>(count);
693 length = 0;
694 for (int i = 0; i < count; ++i) {
695 BytesWritable fileInfo = new BytesWritable();
696 fileInfo.readFields(in);
697 long size = in.readLong();
698 files.add(new Pair<BytesWritable, Long>(fileInfo, size));
699 length += size;
700 }
701 }
702
703 @Override
704 public void write(DataOutput out) throws IOException {
705 out.writeInt(files.size());
706 for (final Pair<BytesWritable, Long> fileInfo: files) {
707 fileInfo.getFirst().write(out);
708 out.writeLong(fileInfo.getSecond());
709 }
710 }
711 }
712
713 private static class ExportSnapshotRecordReader
714 extends RecordReader<BytesWritable, NullWritable> {
715 private final List<Pair<BytesWritable, Long>> files;
716 private long totalSize = 0;
717 private long procSize = 0;
718 private int index = -1;
719
720 ExportSnapshotRecordReader(final List<Pair<BytesWritable, Long>> files) {
721 this.files = files;
722 for (Pair<BytesWritable, Long> fileInfo: files) {
723 totalSize += fileInfo.getSecond();
724 }
725 }
726
727 @Override
728 public void close() { }
729
730 @Override
731 public BytesWritable getCurrentKey() { return files.get(index).getFirst(); }
732
733 @Override
734 public NullWritable getCurrentValue() { return NullWritable.get(); }
735
736 @Override
737 public float getProgress() { return (float)procSize / totalSize; }
738
739 @Override
740 public void initialize(InputSplit split, TaskAttemptContext tac) { }
741
742 @Override
743 public boolean nextKeyValue() {
744 if (index >= 0) {
745 procSize += files.get(index).getSecond();
746 }
747 return(++index < files.size());
748 }
749 }
750 }
751
752
753
754
755
756
757
758
759 private void runCopyJob(final Path inputRoot, final Path outputRoot,
760 final String snapshotName, final Path snapshotDir, final boolean verifyChecksum,
761 final String filesUser, final String filesGroup, final int filesMode,
762 final int mappers, final int bandwidthMB)
763 throws IOException, InterruptedException, ClassNotFoundException {
764 Configuration conf = getConf();
765 if (filesGroup != null) conf.set(CONF_FILES_GROUP, filesGroup);
766 if (filesUser != null) conf.set(CONF_FILES_USER, filesUser);
767 if (mappers > 0) {
768 conf.setInt(CONF_NUM_SPLITS, mappers);
769 conf.setInt(MR_NUM_MAPS, mappers);
770 }
771 conf.setInt(CONF_FILES_MODE, filesMode);
772 conf.setBoolean(CONF_CHECKSUM_VERIFY, verifyChecksum);
773 conf.set(CONF_OUTPUT_ROOT, outputRoot.toString());
774 conf.set(CONF_INPUT_ROOT, inputRoot.toString());
775 conf.setInt(CONF_BANDWIDTH_MB, bandwidthMB);
776 conf.set(CONF_SNAPSHOT_NAME, snapshotName);
777 conf.set(CONF_SNAPSHOT_DIR, snapshotDir.toString());
778
779 Job job = new Job(conf);
780 job.setJobName("ExportSnapshot-" + snapshotName);
781 job.setJarByClass(ExportSnapshot.class);
782 TableMapReduceUtil.addDependencyJars(job);
783 job.setMapperClass(ExportMapper.class);
784 job.setInputFormatClass(ExportSnapshotInputFormat.class);
785 job.setOutputFormatClass(NullOutputFormat.class);
786 job.setMapSpeculativeExecution(false);
787 job.setNumReduceTasks(0);
788
789
790 Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
791 TokenCache.obtainTokensForNamenodes(job.getCredentials(),
792 new Path[] { inputRoot }, srcConf);
793 Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
794 TokenCache.obtainTokensForNamenodes(job.getCredentials(),
795 new Path[] { outputRoot }, destConf);
796
797
798 if (!job.waitForCompletion(true)) {
799 throw new ExportSnapshotException(job.getStatus().getFailureInfo());
800 }
801 }
802
803 private void verifySnapshot(final Configuration baseConf,
804 final FileSystem fs, final Path rootDir, final Path snapshotDir) throws IOException {
805
806 Configuration conf = new Configuration(baseConf);
807 FSUtils.setRootDir(conf, rootDir);
808 FSUtils.setFsDefault(conf, FSUtils.getRootDir(conf));
809 SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
810 SnapshotReferenceUtil.verifySnapshot(conf, fs, snapshotDir, snapshotDesc);
811 }
812
813
814
815
816 private void setOwner(final FileSystem fs, final Path path, final String user,
817 final String group, final boolean recursive) throws IOException {
818 if (user != null || group != null) {
819 if (recursive && fs.isDirectory(path)) {
820 for (FileStatus child : fs.listStatus(path)) {
821 setOwner(fs, child.getPath(), user, group, recursive);
822 }
823 }
824 fs.setOwner(path, user, group);
825 }
826 }
827
828
829
830
831 private void setPermission(final FileSystem fs, final Path path, final short filesMode,
832 final boolean recursive) throws IOException {
833 if (filesMode > 0) {
834 FsPermission perm = new FsPermission(filesMode);
835 if (recursive && fs.isDirectory(path)) {
836 for (FileStatus child : fs.listStatus(path)) {
837 setPermission(fs, child.getPath(), filesMode, recursive);
838 }
839 }
840 fs.setPermission(path, perm);
841 }
842 }
843
844
845
846
847
848 @Override
849 public int run(String[] args) throws IOException {
850 boolean verifyTarget = true;
851 boolean verifyChecksum = true;
852 String snapshotName = null;
853 String targetName = null;
854 boolean overwrite = false;
855 String filesGroup = null;
856 String filesUser = null;
857 Path outputRoot = null;
858 int bandwidthMB = Integer.MAX_VALUE;
859 int filesMode = 0;
860 int mappers = 0;
861
862 Configuration conf = getConf();
863 Path inputRoot = FSUtils.getRootDir(conf);
864
865
866 for (int i = 0; i < args.length; i++) {
867 String cmd = args[i];
868 if (cmd.equals("-snapshot")) {
869 snapshotName = args[++i];
870 } else if (cmd.equals("-target")) {
871 targetName = args[++i];
872 } else if (cmd.equals("-copy-to")) {
873 outputRoot = new Path(args[++i]);
874 } else if (cmd.equals("-copy-from")) {
875 inputRoot = new Path(args[++i]);
876 FSUtils.setRootDir(conf, inputRoot);
877 } else if (cmd.equals("-no-checksum-verify")) {
878 verifyChecksum = false;
879 } else if (cmd.equals("-no-target-verify")) {
880 verifyTarget = false;
881 } else if (cmd.equals("-mappers")) {
882 mappers = Integer.parseInt(args[++i]);
883 } else if (cmd.equals("-chuser")) {
884 filesUser = args[++i];
885 } else if (cmd.equals("-chgroup")) {
886 filesGroup = args[++i];
887 } else if (cmd.equals("-bandwidth")) {
888 bandwidthMB = Integer.parseInt(args[++i]);
889 } else if (cmd.equals("-chmod")) {
890 filesMode = Integer.parseInt(args[++i], 8);
891 } else if (cmd.equals("-overwrite")) {
892 overwrite = true;
893 } else if (cmd.equals("-h") || cmd.equals("--help")) {
894 printUsageAndExit();
895 } else {
896 System.err.println("UNEXPECTED: " + cmd);
897 printUsageAndExit();
898 }
899 }
900
901
902 if (snapshotName == null) {
903 System.err.println("Snapshot name not provided.");
904 printUsageAndExit();
905 }
906
907 if (outputRoot == null) {
908 System.err.println("Destination file-system not provided.");
909 printUsageAndExit();
910 }
911
912 if (targetName == null) {
913 targetName = snapshotName;
914 }
915
916 Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
917 srcConf.setBoolean("fs." + inputRoot.toUri().getScheme() + ".impl.disable.cache", true);
918 FileSystem inputFs = FileSystem.get(inputRoot.toUri(), srcConf);
919 LOG.debug("inputFs=" + inputFs.getUri().toString() + " inputRoot=" + inputRoot);
920 Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
921 destConf.setBoolean("fs." + outputRoot.toUri().getScheme() + ".impl.disable.cache", true);
922 FileSystem outputFs = FileSystem.get(outputRoot.toUri(), destConf);
923 LOG.debug("outputFs=" + outputFs.getUri().toString() + " outputRoot=" + outputRoot.toString());
924
925 boolean skipTmp = conf.getBoolean(CONF_SKIP_TMP, false) ||
926 conf.get(SnapshotDescriptionUtils.SNAPSHOT_WORKING_DIR) != null;
927
928 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, inputRoot);
929 Path snapshotTmpDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(targetName, outputRoot,
930 destConf);
931 Path outputSnapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(targetName, outputRoot);
932 Path initialOutputSnapshotDir = skipTmp ? outputSnapshotDir : snapshotTmpDir;
933
934
935 Path needSetOwnerDir = SnapshotDescriptionUtils.getSnapshotRootDir(outputRoot);
936 if (outputFs.exists(needSetOwnerDir)) {
937 if (skipTmp) {
938 needSetOwnerDir = outputSnapshotDir;
939 } else {
940 needSetOwnerDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(outputRoot, destConf);
941 if (outputFs.exists(needSetOwnerDir)) {
942 needSetOwnerDir = snapshotTmpDir;
943 }
944 }
945 }
946
947
948 if (outputFs.exists(outputSnapshotDir)) {
949 if (overwrite) {
950 if (!outputFs.delete(outputSnapshotDir, true)) {
951 System.err.println("Unable to remove existing snapshot directory: " + outputSnapshotDir);
952 return 1;
953 }
954 } else {
955 System.err.println("The snapshot '" + targetName +
956 "' already exists in the destination: " + outputSnapshotDir);
957 return 1;
958 }
959 }
960
961 if (!skipTmp) {
962
963 if (outputFs.exists(snapshotTmpDir)) {
964 if (overwrite) {
965 if (!outputFs.delete(snapshotTmpDir, true)) {
966 System.err.println("Unable to remove existing snapshot tmp directory: "+snapshotTmpDir);
967 return 1;
968 }
969 } else {
970 System.err.println("A snapshot with the same name '"+ targetName +"' may be in-progress");
971 System.err.println("Please check "+snapshotTmpDir+". If the snapshot has completed, ");
972 System.err.println("consider removing "+snapshotTmpDir+" by using the -overwrite option");
973 return 1;
974 }
975 }
976 }
977
978
979
980
981 try {
982 LOG.info("Copy Snapshot Manifest");
983 FileUtil.copy(inputFs, snapshotDir, outputFs, initialOutputSnapshotDir, false, false, conf);
984 } catch (IOException e) {
985 throw new ExportSnapshotException("Failed to copy the snapshot directory: from=" +
986 snapshotDir + " to=" + initialOutputSnapshotDir, e);
987 } finally {
988 if (filesUser != null || filesGroup != null) {
989 LOG.warn((filesUser == null ? "" : "Change the owner of " + needSetOwnerDir + " to "
990 + filesUser)
991 + (filesGroup == null ? "" : ", Change the group of " + needSetOwnerDir + " to "
992 + filesGroup));
993 setOwner(outputFs, needSetOwnerDir, filesUser, filesGroup, true);
994 }
995 if (filesMode > 0) {
996 LOG.warn("Change the permission of " + needSetOwnerDir + " to " + filesMode);
997 setPermission(outputFs, needSetOwnerDir, (short)filesMode, true);
998 }
999 }
1000
1001
1002 if (!targetName.equals(snapshotName)) {
1003 SnapshotDescription snapshotDesc =
1004 SnapshotDescriptionUtils.readSnapshotInfo(inputFs, snapshotDir)
1005 .toBuilder()
1006 .setName(targetName)
1007 .build();
1008 SnapshotDescriptionUtils.writeSnapshotInfo(snapshotDesc, initialOutputSnapshotDir, outputFs);
1009 if (filesUser != null || filesGroup != null) {
1010 outputFs.setOwner(new Path(initialOutputSnapshotDir,
1011 SnapshotDescriptionUtils.SNAPSHOTINFO_FILE), filesUser, filesGroup);
1012 }
1013 if (filesMode > 0) {
1014 outputFs.setPermission(new Path(initialOutputSnapshotDir,
1015 SnapshotDescriptionUtils.SNAPSHOTINFO_FILE), new FsPermission((short)filesMode));
1016 }
1017 }
1018
1019
1020
1021
1022 try {
1023 runCopyJob(inputRoot, outputRoot, snapshotName, snapshotDir, verifyChecksum,
1024 filesUser, filesGroup, filesMode, mappers, bandwidthMB);
1025
1026 LOG.info("Finalize the Snapshot Export");
1027 if (!skipTmp) {
1028
1029 if (!outputFs.rename(snapshotTmpDir, outputSnapshotDir)) {
1030 throw new ExportSnapshotException("Unable to rename snapshot directory from=" +
1031 snapshotTmpDir + " to=" + outputSnapshotDir);
1032 }
1033 }
1034
1035
1036 if (verifyTarget) {
1037 LOG.info("Verify snapshot integrity");
1038 verifySnapshot(destConf, outputFs, outputRoot, outputSnapshotDir);
1039 }
1040
1041 LOG.info("Export Completed: " + targetName);
1042 return 0;
1043 } catch (Exception e) {
1044 LOG.error("Snapshot export failed", e);
1045 if (!skipTmp) {
1046 outputFs.delete(snapshotTmpDir, true);
1047 }
1048 outputFs.delete(outputSnapshotDir, true);
1049 return 1;
1050 } finally {
1051 IOUtils.closeStream(inputFs);
1052 IOUtils.closeStream(outputFs);
1053 }
1054 }
1055
1056
1057 private void printUsageAndExit() {
1058 System.err.printf("Usage: bin/hbase %s [options]%n", getClass().getName());
1059 System.err.println(" where [options] are:");
1060 System.err.println(" -h|-help Show this help and exit.");
1061 System.err.println(" -snapshot NAME Snapshot to restore.");
1062 System.err.println(" -copy-to NAME Remote destination hdfs://");
1063 System.err.println(" -copy-from NAME Input folder hdfs:// (default hbase.rootdir)");
1064 System.err.println(" -no-checksum-verify Do not verify checksum, use name+length only.");
1065 System.err.println(" -no-target-verify Do not verify the integrity of the \\" +
1066 "exported snapshot.");
1067 System.err.println(" -overwrite Rewrite the snapshot manifest if already exists");
1068 System.err.println(" -chuser USERNAME Change the owner of the files " +
1069 "to the specified one.");
1070 System.err.println(" -chgroup GROUP Change the group of the files to " +
1071 "the specified one.");
1072 System.err.println(" -chmod MODE Change the permission of the files " +
1073 "to the specified one.");
1074 System.err.println(" -mappers Number of mappers to use during the " +
1075 "copy (mapreduce.job.maps).");
1076 System.err.println(" -bandwidth Limit bandwidth to this value in MB/second.");
1077 System.err.println();
1078 System.err.println("Examples:");
1079 System.err.println(" hbase snapshot export \\");
1080 System.err.println(" -snapshot MySnapshot -copy-to hdfs://srv2:8082/hbase \\");
1081 System.err.println(" -chuser MyUser -chgroup MyGroup -chmod 700 -mappers 16");
1082 System.err.println();
1083 System.err.println(" hbase snapshot export \\");
1084 System.err.println(" -snapshot MySnapshot -copy-from hdfs://srv2:8082/hbase \\");
1085 System.err.println(" -copy-to hdfs://srv1:50070/hbase \\");
1086 System.exit(1);
1087 }
1088
1089
1090
1091
1092
1093
1094
1095
1096 static int innerMain(final Configuration conf, final String [] args) throws Exception {
1097 return ToolRunner.run(conf, new ExportSnapshot(), args);
1098 }
1099
1100 public static void main(String[] args) throws Exception {
1101 System.exit(innerMain(HBaseConfiguration.create(), args));
1102 }
1103 }