1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import com.google.common.collect.Lists;
22 import org.apache.commons.logging.Log;
23 import org.apache.commons.logging.LogFactory;
24 import org.apache.hadoop.conf.Configuration;
25 import org.apache.hadoop.fs.FileSystem;
26 import org.apache.hadoop.fs.Path;
27 import org.apache.hadoop.hbase.CellUtil;
28 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
29 import org.apache.hadoop.hbase.HDFSBlocksDistribution.HostAndWeight;
30 import org.apache.hadoop.hbase.HRegionInfo;
31 import org.apache.hadoop.hbase.HTableDescriptor;
32 import org.apache.hadoop.hbase.classification.InterfaceAudience;
33 import org.apache.hadoop.hbase.classification.InterfaceStability;
34 import org.apache.hadoop.hbase.client.ClientSideRegionScanner;
35 import org.apache.hadoop.hbase.client.IsolationLevel;
36 import org.apache.hadoop.hbase.client.Result;
37 import org.apache.hadoop.hbase.client.Scan;
38 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
39 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
40 import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos.TableSnapshotRegionSplit;
41 import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
42 import org.apache.hadoop.hbase.regionserver.HRegion;
43 import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
44 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
45 import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
46 import org.apache.hadoop.hbase.util.Bytes;
47 import org.apache.hadoop.hbase.util.FSUtils;
48 import org.apache.hadoop.hbase.util.RegionSplitter;
49 import org.apache.hadoop.io.Writable;
50
51 import java.io.ByteArrayOutputStream;
52 import java.io.DataInput;
53 import java.io.DataOutput;
54 import java.io.IOException;
55 import java.util.ArrayList;
56 import java.util.List;
57 import java.util.UUID;
58
59
60
61
62 @InterfaceAudience.Private
63 @InterfaceStability.Evolving
64 public class TableSnapshotInputFormatImpl {
65
66
67
68 public static final Log LOG = LogFactory.getLog(TableSnapshotInputFormatImpl.class);
69
70 private static final String SNAPSHOT_NAME_KEY = "hbase.TableSnapshotInputFormat.snapshot.name";
71
72 protected static final String RESTORE_DIR_KEY = "hbase.TableSnapshotInputFormat.restore.dir";
73
74
75 private static final String LOCALITY_CUTOFF_MULTIPLIER =
76 "hbase.tablesnapshotinputformat.locality.cutoff.multiplier";
77 private static final float DEFAULT_LOCALITY_CUTOFF_MULTIPLIER = 0.8f;
78
79
80
81
82
83 public static final String SPLIT_ALGO = "hbase.mapreduce.split.algorithm";
84
85
86
87
88 public static final String NUM_SPLITS_PER_REGION = "hbase.mapreduce.splits.per.region";
89
90
91
92
93 public static class InputSplit implements Writable {
94
95 private HTableDescriptor htd;
96 private HRegionInfo regionInfo;
97 private String[] locations;
98 private String scan;
99 private String restoreDir;
100
101
102 public InputSplit() {}
103
104 public InputSplit(HTableDescriptor htd, HRegionInfo regionInfo, List<String> locations,
105 Scan scan, Path restoreDir) {
106 this.htd = htd;
107 this.regionInfo = regionInfo;
108 if (locations == null || locations.isEmpty()) {
109 this.locations = new String[0];
110 } else {
111 this.locations = locations.toArray(new String[locations.size()]);
112 }
113 try {
114 this.scan = scan != null ? TableMapReduceUtil.convertScanToString(scan) : "";
115 } catch (IOException e) {
116 LOG.warn("Failed to convert Scan to String", e);
117 }
118
119 this.restoreDir = restoreDir.toString();
120 }
121
122 public HTableDescriptor getHtd() {
123 return htd;
124 }
125
126 public String getScan() {
127 return scan;
128 }
129
130 public String getRestoreDir() {
131 return restoreDir;
132 }
133
134 public long getLength() {
135
136 return 0;
137 }
138
139 public String[] getLocations() {
140 return locations;
141 }
142
143 public HTableDescriptor getTableDescriptor() {
144 return htd;
145 }
146
147 public HRegionInfo getRegionInfo() {
148 return regionInfo;
149 }
150
151
152
153 @Override
154 public void write(DataOutput out) throws IOException {
155 TableSnapshotRegionSplit.Builder builder = TableSnapshotRegionSplit.newBuilder()
156 .setTable(htd.convert())
157 .setRegion(HRegionInfo.convert(regionInfo));
158
159 for (String location : locations) {
160 builder.addLocations(location);
161 }
162
163 TableSnapshotRegionSplit split = builder.build();
164
165 ByteArrayOutputStream baos = new ByteArrayOutputStream();
166 split.writeTo(baos);
167 baos.close();
168 byte[] buf = baos.toByteArray();
169 out.writeInt(buf.length);
170 out.write(buf);
171
172 Bytes.writeByteArray(out, Bytes.toBytes(scan));
173 Bytes.writeByteArray(out, Bytes.toBytes(restoreDir));
174
175 }
176
177 @Override
178 public void readFields(DataInput in) throws IOException {
179 int len = in.readInt();
180 byte[] buf = new byte[len];
181 in.readFully(buf);
182 TableSnapshotRegionSplit split = TableSnapshotRegionSplit.PARSER.parseFrom(buf);
183 this.htd = HTableDescriptor.convert(split.getTable());
184 this.regionInfo = HRegionInfo.convert(split.getRegion());
185 List<String> locationsList = split.getLocationsList();
186 this.locations = locationsList.toArray(new String[locationsList.size()]);
187
188 this.scan = Bytes.toString(Bytes.readByteArray(in));
189 this.restoreDir = Bytes.toString(Bytes.readByteArray(in));
190 }
191 }
192
193
194
195
196 public static class RecordReader {
197 private InputSplit split;
198 private Scan scan;
199 private Result result = null;
200 private ImmutableBytesWritable row = null;
201 private ClientSideRegionScanner scanner;
202
203 public ClientSideRegionScanner getScanner() {
204 return scanner;
205 }
206
207 public void initialize(InputSplit split, Configuration conf) throws IOException {
208 this.scan = TableMapReduceUtil.convertStringToScan(split.getScan());
209 this.split = split;
210 HTableDescriptor htd = split.htd;
211 HRegionInfo hri = this.split.getRegionInfo();
212 FileSystem fs = FSUtils.getCurrentFileSystem(conf);
213
214
215
216
217 scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED);
218
219 scan.setCacheBlocks(false);
220 scan.setScanMetricsEnabled(true);
221
222 scanner =
223 new ClientSideRegionScanner(conf, fs, new Path(split.restoreDir), htd, hri, scan, null);
224 }
225
226 public boolean nextKeyValue() throws IOException {
227 result = scanner.next();
228 if (result == null) {
229
230 return false;
231 }
232
233 if (this.row == null) {
234 this.row = new ImmutableBytesWritable();
235 }
236 this.row.set(result.getRow());
237 return true;
238 }
239
240 public ImmutableBytesWritable getCurrentKey() {
241 return row;
242 }
243
244 public Result getCurrentValue() {
245 return result;
246 }
247
248 public long getPos() {
249 return 0;
250 }
251
252 public float getProgress() {
253 return 0;
254 }
255
256 public void close() {
257 if (this.scanner != null) {
258 this.scanner.close();
259 }
260 }
261 }
262
263 public static List<InputSplit> getSplits(Configuration conf) throws IOException {
264 String snapshotName = getSnapshotName(conf);
265
266 Path rootDir = FSUtils.getRootDir(conf);
267 FileSystem fs = rootDir.getFileSystem(conf);
268
269 SnapshotManifest manifest = getSnapshotManifest(conf, snapshotName, rootDir, fs);
270
271 List<HRegionInfo> regionInfos = getRegionInfosFromManifest(manifest);
272
273
274 Scan scan = extractScanFromConf(conf);
275
276 Path restoreDir = new Path(conf.get(RESTORE_DIR_KEY));
277
278 RegionSplitter.SplitAlgorithm splitAlgo = getSplitAlgo(conf);
279
280 int numSplits = conf.getInt(NUM_SPLITS_PER_REGION, 1);
281
282 return getSplits(scan, manifest, regionInfos, restoreDir, conf, splitAlgo, numSplits);
283 }
284
285 public static RegionSplitter.SplitAlgorithm getSplitAlgo(Configuration conf) throws IOException{
286 String splitAlgoClassName = conf.get(SPLIT_ALGO);
287 if (splitAlgoClassName == null)
288 return null;
289 try {
290 return ((Class<? extends RegionSplitter.SplitAlgorithm>)
291 Class.forName(splitAlgoClassName)).newInstance();
292 } catch (ClassNotFoundException e) {
293 throw new IOException("SplitAlgo class " + splitAlgoClassName +
294 " is not found", e);
295 } catch (InstantiationException e) {
296 throw new IOException("SplitAlgo class " + splitAlgoClassName +
297 " is not instantiable", e);
298 } catch (IllegalAccessException e) {
299 throw new IOException("SplitAlgo class " + splitAlgoClassName +
300 " is not instantiable", e);
301 }
302 }
303
304 public static List<HRegionInfo> getRegionInfosFromManifest(SnapshotManifest manifest) {
305 List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
306 if (regionManifests == null) {
307 throw new IllegalArgumentException("Snapshot seems empty");
308 }
309
310 List<HRegionInfo> regionInfos = Lists.newArrayListWithCapacity(regionManifests.size());
311
312 for (SnapshotRegionManifest regionManifest : regionManifests) {
313 HRegionInfo hri = HRegionInfo.convert(regionManifest.getRegionInfo());
314 if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
315 continue;
316 }
317 regionInfos.add(hri);
318 }
319 return regionInfos;
320 }
321
322 public static SnapshotManifest getSnapshotManifest(Configuration conf, String snapshotName,
323 Path rootDir, FileSystem fs) throws IOException {
324 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
325 SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
326 return SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
327 }
328
329 public static Scan extractScanFromConf(Configuration conf) throws IOException {
330 Scan scan = null;
331 if (conf.get(TableInputFormat.SCAN) != null) {
332 scan = TableMapReduceUtil.convertStringToScan(conf.get(TableInputFormat.SCAN));
333 } else if (conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST) != null) {
334 String[] columns =
335 conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST).split(" ");
336 scan = new Scan();
337 for (String col : columns) {
338 scan.addFamily(Bytes.toBytes(col));
339 }
340 } else {
341 throw new IllegalArgumentException("Unable to create scan");
342 }
343 return scan;
344 }
345
346 public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
347 List<HRegionInfo> regionManifests, Path restoreDir, Configuration conf) throws IOException {
348 return getSplits(scan, manifest, regionManifests, restoreDir, conf, null, 1);
349 }
350
351 public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
352 List<HRegionInfo> regionManifests, Path restoreDir,
353 Configuration conf, RegionSplitter.SplitAlgorithm sa, int numSplits) throws IOException {
354
355 HTableDescriptor htd = manifest.getTableDescriptor();
356
357 Path tableDir = FSUtils.getTableDir(restoreDir, htd.getTableName());
358
359 List<InputSplit> splits = new ArrayList<InputSplit>();
360 for (HRegionInfo hri : regionManifests) {
361
362
363 if (numSplits > 1) {
364 byte[][] sp = sa.split(hri.getStartKey(), hri.getEndKey(), numSplits, true);
365 for (int i = 0; i < sp.length - 1; i++) {
366 if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), sp[i],
367 sp[i + 1])) {
368
369
370 List<String> hosts = getBestLocations(conf,
371 HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
372
373 int len = Math.min(3, hosts.size());
374 hosts = hosts.subList(0, len);
375 Scan boundedScan = new Scan(scan);
376 if (scan.getStartRow().length == 0) {
377 boundedScan.withStartRow(sp[i]);
378 } else {
379 boundedScan.withStartRow(
380 Bytes.compareTo(scan.getStartRow(), sp[i]) > 0 ? scan.getStartRow() : sp[i]);
381 }
382
383 if (scan.getStopRow().length == 0) {
384 boundedScan.withStopRow(sp[i + 1]);
385 } else {
386 boundedScan.withStopRow(
387 Bytes.compareTo(scan.getStopRow(), sp[i + 1]) < 0 ? scan.getStopRow() : sp[i + 1]);
388 }
389 splits.add(new InputSplit(htd, hri, hosts, boundedScan, restoreDir));
390 }
391 }
392 } else {
393 if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(),
394 hri.getEndKey())) {
395
396
397 List<String> hosts = getBestLocations(conf,
398 HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
399
400 int len = Math.min(3, hosts.size());
401 hosts = hosts.subList(0, len);
402 splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir));
403 }
404 }
405 }
406
407 return splits;
408
409 }
410
411
412
413
414
415
416
417
418
419
420
421
422
423 public static List<String> getBestLocations(
424 Configuration conf, HDFSBlocksDistribution blockDistribution) {
425 List<String> locations = new ArrayList<String>(3);
426
427 HostAndWeight[] hostAndWeights = blockDistribution.getTopHostsWithWeights();
428
429 if (hostAndWeights.length == 0) {
430 return locations;
431 }
432
433 HostAndWeight topHost = hostAndWeights[0];
434 locations.add(topHost.getHost());
435
436
437 double cutoffMultiplier
438 = conf.getFloat(LOCALITY_CUTOFF_MULTIPLIER, DEFAULT_LOCALITY_CUTOFF_MULTIPLIER);
439
440 double filterWeight = topHost.getWeight() * cutoffMultiplier;
441
442 for (int i = 1; i < hostAndWeights.length; i++) {
443 if (hostAndWeights[i].getWeight() >= filterWeight) {
444 locations.add(hostAndWeights[i].getHost());
445 } else {
446 break;
447 }
448 }
449
450 return locations;
451 }
452
453 private static String getSnapshotName(Configuration conf) {
454 String snapshotName = conf.get(SNAPSHOT_NAME_KEY);
455 if (snapshotName == null) {
456 throw new IllegalArgumentException("Snapshot name must be provided");
457 }
458 return snapshotName;
459 }
460
461
462
463
464
465
466
467
468
469
470 public static void setInput(Configuration conf, String snapshotName, Path restoreDir)
471 throws IOException {
472 setInput(conf, snapshotName, restoreDir, null, 1);
473 }
474
475
476
477
478
479
480
481
482
483
484
485
486 public static void setInput(Configuration conf, String snapshotName, Path restoreDir,
487 RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion)
488 throws IOException {
489 conf.set(SNAPSHOT_NAME_KEY, snapshotName);
490 if (numSplitsPerRegion < 1) {
491 throw new IllegalArgumentException("numSplits must be >= 1, " +
492 "illegal numSplits : " + numSplitsPerRegion);
493 }
494 if (splitAlgo == null && numSplitsPerRegion > 1) {
495 throw new IllegalArgumentException("Split algo can't be null when numSplits > 1");
496 }
497 if (splitAlgo != null) {
498 conf.set(SPLIT_ALGO, splitAlgo.getClass().getName());
499 }
500 conf.setInt(NUM_SPLITS_PER_REGION, numSplitsPerRegion);
501 conf.set(SNAPSHOT_NAME_KEY, snapshotName);
502
503 Path rootDir = FSUtils.getRootDir(conf);
504 FileSystem fs = rootDir.getFileSystem(conf);
505
506 restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
507
508
509 RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
510
511 conf.set(RESTORE_DIR_KEY, restoreDir.toString());
512 }
513 }