View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.snapshot;
20  
21  import com.google.protobuf.CodedInputStream;
22  import com.google.protobuf.InvalidProtocolBufferException;
23  import java.io.FileNotFoundException;
24  import java.io.IOException;
25  import java.io.InterruptedIOException;
26  import java.util.ArrayList;
27  import java.util.Collection;
28  import java.util.HashMap;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.concurrent.Callable;
32  import java.util.concurrent.ExecutionException;
33  import java.util.concurrent.ExecutorCompletionService;
34  import java.util.concurrent.ThreadPoolExecutor;
35  import java.util.concurrent.TimeUnit;
36  import org.apache.commons.logging.Log;
37  import org.apache.commons.logging.LogFactory;
38  import org.apache.hadoop.conf.Configuration;
39  import org.apache.hadoop.fs.FSDataInputStream;
40  import org.apache.hadoop.fs.FSDataOutputStream;
41  import org.apache.hadoop.fs.FileSystem;
42  import org.apache.hadoop.fs.Path;
43  import org.apache.hadoop.hbase.HRegionInfo;
44  import org.apache.hadoop.hbase.HTableDescriptor;
45  import org.apache.hadoop.hbase.classification.InterfaceAudience;
46  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
47  import org.apache.hadoop.hbase.monitoring.MonitoredTask;
48  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
49  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
50  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotDataManifest;
51  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
52  import org.apache.hadoop.hbase.regionserver.HRegion;
53  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
54  import org.apache.hadoop.hbase.regionserver.Store;
55  import org.apache.hadoop.hbase.regionserver.StoreFile;
56  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
57  import org.apache.hadoop.hbase.util.Bytes;
58  import org.apache.hadoop.hbase.util.FSTableDescriptors;
59  import org.apache.hadoop.hbase.util.FSUtils;
60  import org.apache.hadoop.hbase.util.Threads;
61  
62  /**
63   * Utility class to help read/write the Snapshot Manifest.
64   *
65   * The snapshot format is transparent for the users of this class,
66   * once the snapshot is written, it will never be modified.
67   * On open() the snapshot will be loaded to the current in-memory format.
68   */
69  @InterfaceAudience.Private
70  public final class SnapshotManifest {
71    private static final Log LOG = LogFactory.getLog(SnapshotManifest.class);
72  
73    public static final String SNAPSHOT_MANIFEST_SIZE_LIMIT_CONF_KEY = "snapshot.manifest.size.limit";
74  
75    public static final String DATA_MANIFEST_NAME = "data.manifest";
76  
77    private List<SnapshotRegionManifest> regionManifests;
78    private SnapshotDescription desc;
79    private HTableDescriptor htd;
80  
81    private final ForeignExceptionSnare monitor;
82    private final Configuration conf;
83    private final Path workingDir;
84    private final FileSystem rootFs;
85    private final FileSystem workingDirFs;
86    private int manifestSizeLimit;
87    private final MonitoredTask statusTask;
88  
89    /**
90     *
91     * @param conf configuration file for HBase setup
92     * @param rootFs root filesystem containing HFiles
93     * @param workingDir file path of where the manifest should be located
94     * @param desc description of snapshot being taken
95     * @param monitor monitor of foreign exceptions
96     * @throws IOException if the working directory file system cannot be
97     *                     determined from the config file
98     */
99    private SnapshotManifest(final Configuration conf, final FileSystem rootFs,
100       final Path workingDir, final SnapshotDescription desc,
101       final ForeignExceptionSnare monitor, final MonitoredTask statusTask) throws IOException {
102     this.monitor = monitor;
103     this.desc = desc;
104     this.workingDir = workingDir;
105     this.conf = conf;
106     this.rootFs = rootFs;
107     this.statusTask = statusTask;
108     this.workingDirFs = this.workingDir.getFileSystem(this.conf);
109     this.manifestSizeLimit = conf.getInt(SNAPSHOT_MANIFEST_SIZE_LIMIT_CONF_KEY, 64 * 1024 * 1024);
110   }
111 
112   /**
113    * Return a SnapshotManifest instance, used for writing a snapshot.
114    *
115    * There are two usage pattern:
116    *  - The Master will create a manifest, add the descriptor, offline regions
117    *    and consolidate the snapshot by writing all the pending stuff on-disk.
118    *      manifest = SnapshotManifest.create(...)
119    *      manifest.addRegion(tableDir, hri)
120    *      manifest.consolidate()
121    *  - The RegionServer will create a single region manifest
122    *      manifest = SnapshotManifest.create(...)
123    *      manifest.addRegion(region)
124    */
125   public static SnapshotManifest create(final Configuration conf, final FileSystem fs,
126       final Path workingDir, final SnapshotDescription desc,
127       final ForeignExceptionSnare monitor) throws IOException {
128     return create(conf, fs, workingDir, desc, monitor, null);
129 
130   }
131 
132   public static SnapshotManifest create(final Configuration conf, final FileSystem fs,
133       final Path workingDir, final SnapshotDescription desc, final ForeignExceptionSnare monitor,
134       final MonitoredTask statusTask) throws IOException {
135     return new SnapshotManifest(conf, fs, workingDir, desc, monitor, statusTask);
136   }
137 
138   /**
139    * Return a SnapshotManifest instance with the information already loaded in-memory.
140    *    SnapshotManifest manifest = SnapshotManifest.open(...)
141    *    HTableDescriptor htd = manifest.getTableDescriptor()
142    *    for (SnapshotRegionManifest regionManifest: manifest.getRegionManifests())
143    *      hri = regionManifest.getRegionInfo()
144    *      for (regionManifest.getFamilyFiles())
145    *        ...
146    */
147   public static SnapshotManifest open(final Configuration conf, final FileSystem fs,
148       final Path workingDir, final SnapshotDescription desc) throws IOException {
149     SnapshotManifest manifest = new SnapshotManifest(conf, fs, workingDir, desc, null, null);
150     manifest.load();
151     return manifest;
152   }
153 
154 
155   /**
156    * Add the table descriptor to the snapshot manifest
157    */
158   public void addTableDescriptor(final HTableDescriptor htd) throws IOException {
159     this.htd = htd;
160   }
161 
162   interface RegionVisitor<TRegion, TFamily> {
163     TRegion regionOpen(final HRegionInfo regionInfo) throws IOException;
164     void regionClose(final TRegion region) throws IOException;
165 
166     TFamily familyOpen(final TRegion region, final byte[] familyName) throws IOException;
167     void familyClose(final TRegion region, final TFamily family) throws IOException;
168 
169     void storeFile(final TRegion region, final TFamily family, final StoreFileInfo storeFile)
170       throws IOException;
171   }
172 
173   private RegionVisitor createRegionVisitor(final SnapshotDescription desc) throws IOException {
174     switch (getSnapshotFormat(desc)) {
175       case SnapshotManifestV1.DESCRIPTOR_VERSION:
176         return new SnapshotManifestV1.ManifestBuilder(conf, rootFs, workingDir);
177       case SnapshotManifestV2.DESCRIPTOR_VERSION:
178         return new SnapshotManifestV2.ManifestBuilder(conf, rootFs, workingDir);
179       default:
180         throw new CorruptedSnapshotException("Invalid Snapshot version: "+ desc.getVersion(), desc);
181     }
182   }
183 
184   /**
185    * Creates a 'manifest' for the specified region, by reading directly from the HRegion object.
186    * This is used by the "online snapshot" when the table is enabled.
187    */
188   public void addRegion(final HRegion region) throws IOException {
189     // Get the ManifestBuilder/RegionVisitor
190     RegionVisitor visitor = createRegionVisitor(desc);
191 
192     // Visit the region and add it to the manifest
193     addRegion(region, visitor);
194   }
195 
196   protected void addRegion(final HRegion region, RegionVisitor visitor) throws IOException {
197     // 1. dump region meta info into the snapshot directory
198     LOG.debug("Storing '" + region + "' region-info for snapshot.");
199     Object regionData = visitor.regionOpen(region.getRegionInfo());
200     monitor.rethrowException();
201 
202     // 2. iterate through all the stores in the region
203     LOG.debug("Creating references for hfiles");
204 
205     for (Store store : region.getStores()) {
206       // 2.1. build the snapshot reference for the store
207       Object familyData = visitor.familyOpen(regionData, store.getFamily().getName());
208       monitor.rethrowException();
209 
210       List<StoreFile> storeFiles = new ArrayList<StoreFile>(store.getStorefiles());
211       if (LOG.isDebugEnabled()) {
212         LOG.debug("Adding snapshot references for " + storeFiles  + " hfiles");
213       }
214 
215       // 2.2. iterate through all the store's files and create "references".
216       for (int i = 0, sz = storeFiles.size(); i < sz; i++) {
217         StoreFile storeFile = storeFiles.get(i);
218         monitor.rethrowException();
219 
220         // create "reference" to this store file.
221         LOG.debug("Adding reference for file (" + (i+1) + "/" + sz + "): " + storeFile.getPath());
222         visitor.storeFile(regionData, familyData, storeFile.getFileInfo());
223       }
224       visitor.familyClose(regionData, familyData);
225     }
226     visitor.regionClose(regionData);
227   }
228 
229   /**
230    * Creates a 'manifest' for the specified region, by reading directly from the disk.
231    * This is used by the "offline snapshot" when the table is disabled.
232    */
233   public void addRegion(final Path tableDir, final HRegionInfo regionInfo) throws IOException {
234     // Get the ManifestBuilder/RegionVisitor
235     RegionVisitor visitor = createRegionVisitor(desc);
236 
237     // Visit the region and add it to the manifest
238     addRegion(tableDir, regionInfo, visitor);
239   }
240 
241   protected void addRegion(final Path tableDir, final HRegionInfo regionInfo, RegionVisitor visitor)
242       throws IOException {
243 
244     // Open the RegionFS
245     HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(conf, rootFs,
246         tableDir, regionInfo, true);
247     monitor.rethrowException();
248 
249     // 1. dump region meta info into the snapshot directory
250     LOG.debug("Storing region-info for snapshot.");
251     Object regionData = visitor.regionOpen(regionInfo);
252     monitor.rethrowException();
253 
254     // 2. iterate through all the stores in the region
255     LOG.debug("Creating references for hfiles");
256 
257     // This ensures that we have an atomic view of the directory as long as we have < ls limit
258     // (batch size of the files in a directory) on the namenode. Otherwise, we get back the files in
259     // batches and may miss files being added/deleted. This could be more robust (iteratively
260     // checking to see if we have all the files until we are sure), but the limit is currently 1000
261     // files/batch, far more than the number of store files under a single column family.
262     Collection<String> familyNames = regionFs.getFamilies();
263     if (familyNames != null) {
264       for (String familyName: familyNames) {
265         Object familyData = visitor.familyOpen(regionData, Bytes.toBytes(familyName));
266         monitor.rethrowException();
267 
268         Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(familyName);
269         if (storeFiles == null) {
270           LOG.debug("No files under family: " + familyName);
271           continue;
272         }
273 
274         // 2.1. build the snapshot reference for the store
275         if (LOG.isDebugEnabled()) {
276           LOG.debug("Adding snapshot references for " + storeFiles  + " hfiles");
277         }
278 
279         // 2.2. iterate through all the store's files and create "references".
280         int i = 0;
281         int sz = storeFiles.size();
282         for (StoreFileInfo storeFile: storeFiles) {
283           monitor.rethrowException();
284 
285           // create "reference" to this store file.
286           LOG.debug("Adding reference for file ("+ (++i) +"/" + sz + "): " + storeFile.getPath());
287           visitor.storeFile(regionData, familyData, storeFile);
288         }
289         visitor.familyClose(regionData, familyData);
290       }
291     }
292     visitor.regionClose(regionData);
293   }
294 
295   /**
296    * Load the information in the SnapshotManifest. Called by SnapshotManifest.open()
297    *
298    * If the format is v2 and there is no data-manifest, means that we are loading an
299    * in-progress snapshot. Since we support rolling-upgrades, we loook for v1 and v2
300    * regions format.
301    */
302   private void load() throws IOException {
303     switch (getSnapshotFormat(desc)) {
304       case SnapshotManifestV1.DESCRIPTOR_VERSION: {
305         this.htd = FSTableDescriptors.getTableDescriptorFromFs(workingDirFs, workingDir);
306         ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
307         try {
308           this.regionManifests =
309             SnapshotManifestV1.loadRegionManifests(conf, tpool, rootFs, workingDir, desc);
310         } finally {
311           tpool.shutdown();
312         }
313         break;
314       }
315       case SnapshotManifestV2.DESCRIPTOR_VERSION: {
316         SnapshotDataManifest dataManifest = readDataManifest();
317         if (dataManifest != null) {
318           htd = HTableDescriptor.convert(dataManifest.getTableSchema());
319           regionManifests = dataManifest.getRegionManifestsList();
320         } else {
321           // Compatibility, load the v1 regions
322           // This happens only when the snapshot is in-progress and the cache wants to refresh.
323           List<SnapshotRegionManifest> v1Regions, v2Regions;
324           ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
325           try {
326             v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, rootFs,
327                 workingDir, desc);
328             v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, rootFs,
329                 workingDir, desc, manifestSizeLimit);
330           } catch (InvalidProtocolBufferException e) {
331             throw new CorruptedSnapshotException("unable to parse region manifest " +
332                 e.getMessage(), e);
333           } finally {
334             tpool.shutdown();
335           }
336           if (v1Regions != null && v2Regions != null) {
337             regionManifests =
338               new ArrayList<SnapshotRegionManifest>(v1Regions.size() + v2Regions.size());
339             regionManifests.addAll(v1Regions);
340             regionManifests.addAll(v2Regions);
341           } else if (v1Regions != null) {
342             regionManifests = v1Regions;
343           } else /* if (v2Regions != null) */ {
344             regionManifests = v2Regions;
345           }
346         }
347         break;
348       }
349       default:
350         throw new CorruptedSnapshotException("Invalid Snapshot version: "+ desc.getVersion(), desc);
351     }
352   }
353 
354   /**
355    * Get the current snapshot working dir
356    */
357   public Path getSnapshotDir() {
358     return this.workingDir;
359   }
360 
361   /**
362    * Get the SnapshotDescription
363    */
364   public SnapshotDescription getSnapshotDescription() {
365     return this.desc;
366   }
367 
368   /**
369    * Get the table descriptor from the Snapshot
370    */
371   public HTableDescriptor getTableDescriptor() {
372     return this.htd;
373   }
374 
375   /**
376    * Get all the Region Manifest from the snapshot
377    */
378   public List<SnapshotRegionManifest> getRegionManifests() {
379     return this.regionManifests;
380   }
381 
382   private void setStatusMsg(String msg) {
383     if (this.statusTask != null) {
384       statusTask.setStatus(msg);
385     }
386   }
387 
388   /**
389    * Get all the Region Manifest from the snapshot.
390    * This is an helper to get a map with the region encoded name
391    */
392   public Map<String, SnapshotRegionManifest> getRegionManifestsMap() {
393     if (regionManifests == null || regionManifests.size() == 0) return null;
394 
395     HashMap<String, SnapshotRegionManifest> regionsMap =
396         new HashMap<String, SnapshotRegionManifest>(regionManifests.size());
397     for (SnapshotRegionManifest manifest: regionManifests) {
398       String regionName = getRegionNameFromManifest(manifest);
399       regionsMap.put(regionName, manifest);
400     }
401     return regionsMap;
402   }
403 
404   public void consolidate() throws IOException {
405     if (getSnapshotFormat(desc) == SnapshotManifestV1.DESCRIPTOR_VERSION) {
406       Path rootDir = FSUtils.getRootDir(conf);
407       LOG.info("Using old Snapshot Format");
408       // write a copy of descriptor to the snapshot directory
409       new FSTableDescriptors(conf, workingDirFs, rootDir)
410         .createTableDescriptorForTableDirectory(workingDir, htd, false);
411     } else {
412       LOG.debug("Convert to Single Snapshot Manifest for " + this.desc.getName());
413       convertToV2SingleManifest();
414     }
415   }
416 
417   /*
418    * In case of rolling-upgrade, we try to read all the formats and build
419    * the snapshot with the latest format.
420    */
421   private void convertToV2SingleManifest() throws IOException {
422     // Try to load v1 and v2 regions
423     List<SnapshotRegionManifest> v1Regions, v2Regions;
424     ThreadPoolExecutor tpool = createExecutor("SnapshotManifestLoader");
425     setStatusMsg("Loading Region manifests for " + this.desc.getName());
426     try {
427       v1Regions = SnapshotManifestV1.loadRegionManifests(conf, tpool, workingDirFs,
428           workingDir, desc);
429       v2Regions = SnapshotManifestV2.loadRegionManifests(conf, tpool, workingDirFs,
430           workingDir, desc, manifestSizeLimit);
431 
432       SnapshotDataManifest.Builder dataManifestBuilder = SnapshotDataManifest.newBuilder();
433       dataManifestBuilder.setTableSchema(htd.convert());
434 
435       if (v1Regions != null && v1Regions.size() > 0) {
436         dataManifestBuilder.addAllRegionManifests(v1Regions);
437       }
438       if (v2Regions != null && v2Regions.size() > 0) {
439         dataManifestBuilder.addAllRegionManifests(v2Regions);
440       }
441 
442       // Write the v2 Data Manifest.
443       // Once the data-manifest is written, the snapshot can be considered complete.
444       // Currently snapshots are written in a "temporary" directory and later
445       // moved to the "complated" snapshot directory.
446       setStatusMsg("Writing data manifest for " + this.desc.getName());
447       SnapshotDataManifest dataManifest = dataManifestBuilder.build();
448       writeDataManifest(dataManifest);
449       this.regionManifests = dataManifest.getRegionManifestsList();
450 
451       // Remove the region manifests. Everything is now in the data-manifest.
452       // The delete operation is "relaxed", unless we get an exception we keep going.
453       // The extra files in the snapshot directory will not give any problem,
454       // since they have the same content as the data manifest, and even by re-reading
455       // them we will get the same information.
456       int totalDeletes = 0;
457       ExecutorCompletionService<Void> completionService = new ExecutorCompletionService<>(tpool);
458       if (v1Regions != null) {
459         for (final SnapshotRegionManifest regionManifest: v1Regions) {
460           ++totalDeletes;
461           completionService.submit(new Callable<Void>() {
462             @Override
463             public Void call() throws Exception {
464               SnapshotManifestV1.deleteRegionManifest(workingDirFs, workingDir, regionManifest);
465               return null;
466             }
467           });
468         }
469       }
470       if (v2Regions != null) {
471         for (final SnapshotRegionManifest regionManifest: v2Regions) {
472           ++totalDeletes;
473           completionService.submit(new Callable<Void>() {
474             @Override
475             public Void call() throws Exception {
476               SnapshotManifestV2.deleteRegionManifest(workingDirFs, workingDir, regionManifest);
477               return null;
478             }
479           });
480         }
481       }
482       // Wait for the deletes to finish.
483       for (int i = 0; i < totalDeletes; i++) {
484         try {
485           completionService.take().get();
486         } catch (InterruptedException ie) {
487           throw new InterruptedIOException(ie.getMessage());
488         } catch (ExecutionException e) {
489           throw new IOException("Error deleting region manifests", e.getCause());
490         }
491       }
492     } finally {
493       tpool.shutdown();
494     }
495   }
496 
497   /*
498    * Write the SnapshotDataManifest file
499    */
500   private void writeDataManifest(final SnapshotDataManifest manifest)
501       throws IOException {
502     FSDataOutputStream stream = workingDirFs.create(new Path(workingDir, DATA_MANIFEST_NAME));
503     try {
504       manifest.writeTo(stream);
505     } finally {
506       stream.close();
507     }
508   }
509 
510   /*
511    * Read the SnapshotDataManifest file
512    */
513   private SnapshotDataManifest readDataManifest() throws IOException {
514     FSDataInputStream in = null;
515     try {
516       in = workingDirFs.open(new Path(workingDir, DATA_MANIFEST_NAME));
517       CodedInputStream cin = CodedInputStream.newInstance(in);
518       cin.setSizeLimit(manifestSizeLimit);
519       return SnapshotDataManifest.parseFrom(cin);
520     } catch (FileNotFoundException e) {
521       return null;
522     } catch (InvalidProtocolBufferException e) {
523       throw new CorruptedSnapshotException("unable to parse data manifest " + e.getMessage(), e);
524     } finally {
525       if (in != null) in.close();
526     }
527   }
528 
529   private ThreadPoolExecutor createExecutor(final String name) {
530     return createExecutor(conf, name);
531   }
532 
533   public static ThreadPoolExecutor createExecutor(final Configuration conf, final String name) {
534     int maxThreads = conf.getInt("hbase.snapshot.thread.pool.max", 8);
535     return Threads.getBoundedCachedThreadPool(maxThreads, 30L, TimeUnit.SECONDS,
536               Threads.getNamedThreadFactory(name));
537   }
538 
539   /**
540    * Extract the region encoded name from the region manifest
541    */
542   static String getRegionNameFromManifest(final SnapshotRegionManifest manifest) {
543     byte[] regionName = HRegionInfo.createRegionName(
544             ProtobufUtil.toTableName(manifest.getRegionInfo().getTableName()),
545             manifest.getRegionInfo().getStartKey().toByteArray(),
546             manifest.getRegionInfo().getRegionId(), true);
547     return HRegionInfo.encodeRegionName(regionName);
548   }
549 
550   /*
551    * Return the snapshot format
552    */
553   private static int getSnapshotFormat(final SnapshotDescription desc) {
554     return desc.hasVersion() ? desc.getVersion() : SnapshotManifestV1.DESCRIPTOR_VERSION;
555   }
556 }