View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import com.google.common.collect.Lists;
22  import org.apache.commons.logging.Log;
23  import org.apache.commons.logging.LogFactory;
24  import org.apache.hadoop.conf.Configuration;
25  import org.apache.hadoop.fs.FileSystem;
26  import org.apache.hadoop.fs.Path;
27  import org.apache.hadoop.hbase.CellUtil;
28  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
29  import org.apache.hadoop.hbase.HDFSBlocksDistribution.HostAndWeight;
30  import org.apache.hadoop.hbase.HRegionInfo;
31  import org.apache.hadoop.hbase.HTableDescriptor;
32  import org.apache.hadoop.hbase.classification.InterfaceAudience;
33  import org.apache.hadoop.hbase.classification.InterfaceStability;
34  import org.apache.hadoop.hbase.client.ClientSideRegionScanner;
35  import org.apache.hadoop.hbase.client.IsolationLevel;
36  import org.apache.hadoop.hbase.client.Result;
37  import org.apache.hadoop.hbase.client.Scan;
38  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
39  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
40  import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos.TableSnapshotRegionSplit;
41  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
42  import org.apache.hadoop.hbase.regionserver.HRegion;
43  import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
44  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
45  import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
46  import org.apache.hadoop.hbase.util.Bytes;
47  import org.apache.hadoop.hbase.util.FSUtils;
48  import org.apache.hadoop.hbase.util.RegionSplitter;
49  import org.apache.hadoop.io.Writable;
50  
51  import java.io.ByteArrayOutputStream;
52  import java.io.DataInput;
53  import java.io.DataOutput;
54  import java.io.IOException;
55  import java.util.ArrayList;
56  import java.util.List;
57  import java.util.UUID;
58  
59  /**
60   * Hadoop MR API-agnostic implementation for mapreduce over table snapshots.
61   */
62  @InterfaceAudience.Private
63  @InterfaceStability.Evolving
64  public class TableSnapshotInputFormatImpl {
65    // TODO: Snapshots files are owned in fs by the hbase user. There is no
66    // easy way to delegate access.
67  
68    public static final Log LOG = LogFactory.getLog(TableSnapshotInputFormatImpl.class);
69  
70    private static final String SNAPSHOT_NAME_KEY = "hbase.TableSnapshotInputFormat.snapshot.name";
71    // key for specifying the root dir of the restored snapshot
72    protected static final String RESTORE_DIR_KEY = "hbase.TableSnapshotInputFormat.restore.dir";
73  
74    /** See {@link #getBestLocations(Configuration, HDFSBlocksDistribution)} */
75    private static final String LOCALITY_CUTOFF_MULTIPLIER =
76      "hbase.tablesnapshotinputformat.locality.cutoff.multiplier";
77    private static final float DEFAULT_LOCALITY_CUTOFF_MULTIPLIER = 0.8f;
78  
79    /**
80     * For MapReduce jobs running multiple mappers per region, determines
81     * what split algorithm we should be using to find split points for scanners.
82     */
83    public static final String SPLIT_ALGO = "hbase.mapreduce.split.algorithm";
84    /**
85     * For MapReduce jobs running multiple mappers per region, determines
86     * number of splits to generate per region.
87     */
88    public static final String NUM_SPLITS_PER_REGION = "hbase.mapreduce.splits.per.region";
89  
90    /**
91     * Implementation class for InputSplit logic common between mapred and mapreduce.
92     */
93    public static class InputSplit implements Writable {
94  
95      private HTableDescriptor htd;
96      private HRegionInfo regionInfo;
97      private String[] locations;
98      private String scan;
99      private String restoreDir;
100 
101     // constructor for mapreduce framework / Writable
102     public InputSplit() {}
103 
104     public InputSplit(HTableDescriptor htd, HRegionInfo regionInfo, List<String> locations,
105         Scan scan, Path restoreDir) {
106       this.htd = htd;
107       this.regionInfo = regionInfo;
108       if (locations == null || locations.isEmpty()) {
109         this.locations = new String[0];
110       } else {
111         this.locations = locations.toArray(new String[locations.size()]);
112       }
113       try {
114         this.scan = scan != null ? TableMapReduceUtil.convertScanToString(scan) : "";
115       } catch (IOException e) {
116         LOG.warn("Failed to convert Scan to String", e);
117       }
118 
119       this.restoreDir = restoreDir.toString();
120     }
121 
122     public HTableDescriptor getHtd() {
123       return htd;
124     }
125 
126     public String getScan() {
127       return scan;
128     }
129 
130     public String getRestoreDir() {
131       return restoreDir;
132     }
133 
134     public long getLength() {
135       //TODO: We can obtain the file sizes of the snapshot here.
136       return 0;
137     }
138 
139     public String[] getLocations() {
140       return locations;
141     }
142 
143     public HTableDescriptor getTableDescriptor() {
144       return htd;
145     }
146 
147     public HRegionInfo getRegionInfo() {
148       return regionInfo;
149     }
150 
151     // TODO: We should have ProtobufSerialization in Hadoop, and directly use PB objects instead of
152     // doing this wrapping with Writables.
153     @Override
154     public void write(DataOutput out) throws IOException {
155       TableSnapshotRegionSplit.Builder builder = TableSnapshotRegionSplit.newBuilder()
156           .setTable(htd.convert())
157           .setRegion(HRegionInfo.convert(regionInfo));
158 
159       for (String location : locations) {
160         builder.addLocations(location);
161       }
162 
163       TableSnapshotRegionSplit split = builder.build();
164 
165       ByteArrayOutputStream baos = new ByteArrayOutputStream();
166       split.writeTo(baos);
167       baos.close();
168       byte[] buf = baos.toByteArray();
169       out.writeInt(buf.length);
170       out.write(buf);
171 
172       Bytes.writeByteArray(out, Bytes.toBytes(scan));
173       Bytes.writeByteArray(out, Bytes.toBytes(restoreDir));
174 
175     }
176 
177     @Override
178     public void readFields(DataInput in) throws IOException {
179       int len = in.readInt();
180       byte[] buf = new byte[len];
181       in.readFully(buf);
182       TableSnapshotRegionSplit split = TableSnapshotRegionSplit.PARSER.parseFrom(buf);
183       this.htd = HTableDescriptor.convert(split.getTable());
184       this.regionInfo = HRegionInfo.convert(split.getRegion());
185       List<String> locationsList = split.getLocationsList();
186       this.locations = locationsList.toArray(new String[locationsList.size()]);
187 
188       this.scan = Bytes.toString(Bytes.readByteArray(in));
189       this.restoreDir = Bytes.toString(Bytes.readByteArray(in));
190     }
191   }
192 
193   /**
194    * Implementation class for RecordReader logic common between mapred and mapreduce.
195    */
196   public static class RecordReader {
197     private InputSplit split;
198     private Scan scan;
199     private Result result = null;
200     private ImmutableBytesWritable row = null;
201     private ClientSideRegionScanner scanner;
202 
203     public ClientSideRegionScanner getScanner() {
204       return scanner;
205     }
206 
207     public void initialize(InputSplit split, Configuration conf) throws IOException {
208       this.scan = TableMapReduceUtil.convertStringToScan(split.getScan());
209       this.split = split;
210       HTableDescriptor htd = split.htd;
211       HRegionInfo hri = this.split.getRegionInfo();
212       FileSystem fs = FSUtils.getCurrentFileSystem(conf);
213 
214 
215       // region is immutable, this should be fine,
216       // otherwise we have to set the thread read point
217       scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED);
218       // disable caching of data blocks
219       scan.setCacheBlocks(false);
220       scan.setScanMetricsEnabled(true);
221 
222       scanner =
223           new ClientSideRegionScanner(conf, fs, new Path(split.restoreDir), htd, hri, scan, null);
224     }
225 
226     public boolean nextKeyValue() throws IOException {
227       result = scanner.next();
228       if (result == null) {
229         //we are done
230         return false;
231       }
232 
233       if (this.row == null) {
234         this.row = new ImmutableBytesWritable();
235       }
236       this.row.set(result.getRow());
237       return true;
238     }
239 
240     public ImmutableBytesWritable getCurrentKey() {
241       return row;
242     }
243 
244     public Result getCurrentValue() {
245       return result;
246     }
247 
248     public long getPos() {
249       return 0;
250     }
251 
252     public float getProgress() {
253       return 0; // TODO: use total bytes to estimate
254     }
255 
256     public void close() {
257       if (this.scanner != null) {
258         this.scanner.close();
259       }
260     }
261   }
262 
263   public static List<InputSplit> getSplits(Configuration conf) throws IOException {
264     String snapshotName = getSnapshotName(conf);
265 
266     Path rootDir = FSUtils.getRootDir(conf);
267     FileSystem fs = rootDir.getFileSystem(conf);
268 
269     SnapshotManifest manifest = getSnapshotManifest(conf, snapshotName, rootDir, fs);
270 
271     List<HRegionInfo> regionInfos = getRegionInfosFromManifest(manifest);
272 
273     // TODO: mapred does not support scan as input API. Work around for now.
274     Scan scan = extractScanFromConf(conf);
275     // the temp dir where the snapshot is restored
276     Path restoreDir = new Path(conf.get(RESTORE_DIR_KEY));
277 
278     RegionSplitter.SplitAlgorithm splitAlgo = getSplitAlgo(conf);
279 
280     int numSplits = conf.getInt(NUM_SPLITS_PER_REGION, 1);
281 
282     return getSplits(scan, manifest, regionInfos, restoreDir, conf, splitAlgo, numSplits);
283   }
284 
285   public static RegionSplitter.SplitAlgorithm getSplitAlgo(Configuration conf) throws IOException{
286     String splitAlgoClassName = conf.get(SPLIT_ALGO);
287     if (splitAlgoClassName == null)
288       return null;
289     try {
290       return ((Class<? extends RegionSplitter.SplitAlgorithm>)
291               Class.forName(splitAlgoClassName)).newInstance();
292     } catch (ClassNotFoundException e) {
293       throw new IOException("SplitAlgo class " + splitAlgoClassName +
294               " is not found", e);
295     } catch (InstantiationException e) {
296       throw new IOException("SplitAlgo class " + splitAlgoClassName +
297               " is not instantiable", e);
298     } catch (IllegalAccessException e) {
299       throw new IOException("SplitAlgo class " + splitAlgoClassName +
300               " is not instantiable", e);
301     }
302   }
303 
304   public static List<HRegionInfo> getRegionInfosFromManifest(SnapshotManifest manifest) {
305     List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
306     if (regionManifests == null) {
307       throw new IllegalArgumentException("Snapshot seems empty");
308     }
309 
310     List<HRegionInfo> regionInfos = Lists.newArrayListWithCapacity(regionManifests.size());
311 
312     for (SnapshotRegionManifest regionManifest : regionManifests) {
313       HRegionInfo hri = HRegionInfo.convert(regionManifest.getRegionInfo());
314       if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
315         continue;
316       }
317       regionInfos.add(hri);
318     }
319     return regionInfos;
320   }
321 
322   public static SnapshotManifest getSnapshotManifest(Configuration conf, String snapshotName,
323       Path rootDir, FileSystem fs) throws IOException {
324     Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
325     SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
326     return SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
327   }
328 
329   public static Scan extractScanFromConf(Configuration conf) throws IOException {
330     Scan scan = null;
331     if (conf.get(TableInputFormat.SCAN) != null) {
332       scan = TableMapReduceUtil.convertStringToScan(conf.get(TableInputFormat.SCAN));
333     } else if (conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST) != null) {
334       String[] columns =
335         conf.get(org.apache.hadoop.hbase.mapred.TableInputFormat.COLUMN_LIST).split(" ");
336       scan = new Scan();
337       for (String col : columns) {
338         scan.addFamily(Bytes.toBytes(col));
339       }
340     } else {
341       throw new IllegalArgumentException("Unable to create scan");
342     }
343     return scan;
344   }
345 
346   public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
347       List<HRegionInfo> regionManifests, Path restoreDir, Configuration conf) throws IOException {
348     return getSplits(scan, manifest, regionManifests, restoreDir, conf, null, 1);
349   }
350 
351   public static List<InputSplit> getSplits(Scan scan, SnapshotManifest manifest,
352                                            List<HRegionInfo> regionManifests, Path restoreDir,
353                                            Configuration conf, RegionSplitter.SplitAlgorithm sa, int numSplits) throws IOException {
354     // load table descriptor
355     HTableDescriptor htd = manifest.getTableDescriptor();
356 
357     Path tableDir = FSUtils.getTableDir(restoreDir, htd.getTableName());
358 
359     List<InputSplit> splits = new ArrayList<InputSplit>();
360     for (HRegionInfo hri : regionManifests) {
361       // load region descriptor
362 
363       if (numSplits > 1) {
364         byte[][] sp = sa.split(hri.getStartKey(), hri.getEndKey(), numSplits, true);
365         for (int i = 0; i < sp.length - 1; i++) {
366           if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), sp[i],
367                   sp[i + 1])) {
368             // compute HDFS locations from snapshot files (which will get the locations for
369             // referred hfiles)
370             List<String> hosts = getBestLocations(conf,
371                     HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
372 
373             int len = Math.min(3, hosts.size());
374             hosts = hosts.subList(0, len);
375             Scan boundedScan = new Scan(scan);
376             if (scan.getStartRow().length == 0) {
377               boundedScan.withStartRow(sp[i]);
378             } else {
379               boundedScan.withStartRow(
380                 Bytes.compareTo(scan.getStartRow(), sp[i]) > 0 ? scan.getStartRow() : sp[i]);
381             }
382 
383             if (scan.getStopRow().length == 0) {
384               boundedScan.withStopRow(sp[i + 1]);
385             } else {
386               boundedScan.withStopRow(
387                 Bytes.compareTo(scan.getStopRow(), sp[i + 1]) < 0 ? scan.getStopRow() : sp[i + 1]);
388             }
389             splits.add(new InputSplit(htd, hri, hosts, boundedScan, restoreDir));
390           }
391         }
392       } else {
393         if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(),
394             hri.getEndKey())) {
395           // compute HDFS locations from snapshot files (which will get the locations for
396           // referred hfiles)
397           List<String> hosts = getBestLocations(conf,
398               HRegion.computeHDFSBlocksDistribution(conf, htd, hri, tableDir));
399 
400           int len = Math.min(3, hosts.size());
401           hosts = hosts.subList(0, len);
402           splits.add(new InputSplit(htd, hri, hosts, scan, restoreDir));
403         }
404       }
405     }
406 
407     return splits;
408 
409   }
410 
411   /**
412    * This computes the locations to be passed from the InputSplit. MR/Yarn schedulers does not take
413    * weights into account, thus will treat every location passed from the input split as equal. We
414    * do not want to blindly pass all the locations, since we are creating one split per region, and
415    * the region's blocks are all distributed throughout the cluster unless favorite node assignment
416    * is used. On the expected stable case, only one location will contain most of the blocks as
417    * local.
418    * On the other hand, in favored node assignment, 3 nodes will contain highly local blocks. Here
419    * we are doing a simple heuristic, where we will pass all hosts which have at least 80%
420    * (hbase.tablesnapshotinputformat.locality.cutoff.multiplier) as much block locality as the top
421    * host with the best locality.
422    */
423   public static List<String> getBestLocations(
424       Configuration conf, HDFSBlocksDistribution blockDistribution) {
425     List<String> locations = new ArrayList<String>(3);
426 
427     HostAndWeight[] hostAndWeights = blockDistribution.getTopHostsWithWeights();
428 
429     if (hostAndWeights.length == 0) {
430       return locations;
431     }
432 
433     HostAndWeight topHost = hostAndWeights[0];
434     locations.add(topHost.getHost());
435 
436     // Heuristic: filter all hosts which have at least cutoffMultiplier % of block locality
437     double cutoffMultiplier
438       = conf.getFloat(LOCALITY_CUTOFF_MULTIPLIER, DEFAULT_LOCALITY_CUTOFF_MULTIPLIER);
439 
440     double filterWeight = topHost.getWeight() * cutoffMultiplier;
441 
442     for (int i = 1; i < hostAndWeights.length; i++) {
443       if (hostAndWeights[i].getWeight() >= filterWeight) {
444         locations.add(hostAndWeights[i].getHost());
445       } else {
446         break;
447       }
448     }
449 
450     return locations;
451   }
452 
453   private static String getSnapshotName(Configuration conf) {
454     String snapshotName = conf.get(SNAPSHOT_NAME_KEY);
455     if (snapshotName == null) {
456       throw new IllegalArgumentException("Snapshot name must be provided");
457     }
458     return snapshotName;
459   }
460 
461   /**
462    * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
463    * @param conf the job to configure
464    * @param snapshotName the name of the snapshot to read from
465    * @param restoreDir a temporary directory to restore the snapshot into. Current user should
466    * have write permissions to this directory, and this should not be a subdirectory of rootdir.
467    * After the job is finished, restoreDir can be deleted.
468    * @throws IOException if an error occurs
469    */
470   public static void setInput(Configuration conf, String snapshotName, Path restoreDir)
471       throws IOException {
472     setInput(conf, snapshotName, restoreDir, null, 1);
473   }
474 
475   /**
476    * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
477    * @param conf the job to configure
478    * @param snapshotName the name of the snapshot to read from
479    * @param restoreDir a temporary directory to restore the snapshot into. Current user should
480    * have write permissions to this directory, and this should not be a subdirectory of rootdir.
481    * After the job is finished, restoreDir can be deleted.
482    * @param numSplitsPerRegion how many input splits to generate per one region
483    * @param splitAlgo SplitAlgorithm to be used when generating InputSplits
484    * @throws IOException if an error occurs
485    */
486   public static void setInput(Configuration conf, String snapshotName, Path restoreDir,
487                               RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion)
488           throws IOException {
489     conf.set(SNAPSHOT_NAME_KEY, snapshotName);
490     if (numSplitsPerRegion < 1) {
491       throw new IllegalArgumentException("numSplits must be >= 1, " +
492               "illegal numSplits : " + numSplitsPerRegion);
493     }
494     if (splitAlgo == null && numSplitsPerRegion > 1) {
495       throw new IllegalArgumentException("Split algo can't be null when numSplits > 1");
496     }
497     if (splitAlgo != null) {
498       conf.set(SPLIT_ALGO, splitAlgo.getClass().getName());
499     }
500     conf.setInt(NUM_SPLITS_PER_REGION, numSplitsPerRegion);
501     conf.set(SNAPSHOT_NAME_KEY, snapshotName);
502 
503     Path rootDir = FSUtils.getRootDir(conf);
504     FileSystem fs = rootDir.getFileSystem(conf);
505 
506     restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
507 
508     // TODO: restore from record readers to parallelize.
509     RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
510 
511     conf.set(RESTORE_DIR_KEY, restoreDir.toString());
512   }
513 }