View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.snapshot;
19  
20  import com.google.common.base.Preconditions;
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.util.HashSet;
24  import java.util.List;
25  import java.util.Set;
26  import java.util.concurrent.CancellationException;
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.HRegionInfo;
33  import org.apache.hadoop.hbase.HTableDescriptor;
34  import org.apache.hadoop.hbase.MetaTableAccessor;
35  import org.apache.hadoop.hbase.ServerName;
36  import org.apache.hadoop.hbase.TableName;
37  import org.apache.hadoop.hbase.classification.InterfaceAudience;
38  import org.apache.hadoop.hbase.errorhandling.ForeignException;
39  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
40  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
41  import org.apache.hadoop.hbase.executor.EventHandler;
42  import org.apache.hadoop.hbase.executor.EventType;
43  import org.apache.hadoop.hbase.master.MasterServices;
44  import org.apache.hadoop.hbase.master.MetricsSnapshot;
45  import org.apache.hadoop.hbase.master.SnapshotSentinel;
46  import org.apache.hadoop.hbase.master.TableLockManager;
47  import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
48  import org.apache.hadoop.hbase.monitoring.MonitoredTask;
49  import org.apache.hadoop.hbase.monitoring.TaskMonitor;
50  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
51  import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
52  import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
53  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
54  import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
55  import org.apache.hadoop.hbase.util.FSUtils;
56  import org.apache.hadoop.hbase.util.Pair;
57  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
58  import org.apache.zookeeper.KeeperException;
59  
60  /**
61   * A handler for taking snapshots from the master.
62   *
63   * This is not a subclass of TableEventHandler because using that would incur an extra hbase:meta scan.
64   *
65   * The {@link #snapshotRegions(List)} call should get implemented for each snapshot flavor.
66   */
67  @InterfaceAudience.Private
68  public abstract class TakeSnapshotHandler extends EventHandler implements SnapshotSentinel,
69      ForeignExceptionSnare {
70    private static final Log LOG = LogFactory.getLog(TakeSnapshotHandler.class);
71  
72    private volatile boolean finished;
73  
74    // none of these should ever be null
75    protected final MasterServices master;
76    protected final MetricsSnapshot metricsSnapshot = new MetricsSnapshot();
77    protected final SnapshotDescription snapshot;
78    protected final Configuration conf;
79    protected final FileSystem rootFs;
80    protected final FileSystem workingDirFs;
81    protected final Path rootDir;
82    private final Path snapshotDir;
83    protected final Path workingDir;
84    private final MasterSnapshotVerifier verifier;
85    protected final ForeignExceptionDispatcher monitor;
86    protected final TableLockManager tableLockManager;
87    protected final TableLock tableLock;
88    protected final MonitoredTask status;
89    protected final TableName snapshotTable;
90    protected final SnapshotManifest snapshotManifest;
91    protected final SnapshotManager snapshotManager;
92  
93    protected HTableDescriptor htd;
94  
95    /**
96     * @param snapshot descriptor of the snapshot to take
97     * @param masterServices master services provider
98     * @throws IllegalArgumentException if the working snapshot directory set from the
99     *   configuration is the same as the completed snapshot directory
100    * @throws IOException if the file system of the working snapshot directory cannot be
101    *   determined
102    */
103   public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices,
104                              final SnapshotManager snapshotManager) throws IOException {
105     super(masterServices, EventType.C_M_SNAPSHOT_TABLE);
106     assert snapshot != null : "SnapshotDescription must not be nul1";
107     assert masterServices != null : "MasterServices must not be nul1";
108     this.master = masterServices;
109     this.conf = this.master.getConfiguration();
110     this.rootDir = this.master.getMasterFileSystem().getRootDir();
111     this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir, conf);
112     Preconditions.checkArgument(!SnapshotDescriptionUtils.isSubDirectoryOf(workingDir, rootDir) ||
113             SnapshotDescriptionUtils.isWithinDefaultWorkingDir(workingDir, conf),
114         "The working directory " + workingDir + " cannot be in the root directory unless it is "
115             + "within the default working directory");
116 
117     this.snapshot = snapshot;
118     this.snapshotManager = snapshotManager;
119     this.snapshotTable = TableName.valueOf(snapshot.getTable());
120     this.rootFs = this.master.getMasterFileSystem().getFileSystem();
121     this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
122     this.workingDirFs = this.workingDir.getFileSystem(this.conf);
123     this.monitor = new ForeignExceptionDispatcher(snapshot.getName());
124 
125     this.tableLockManager = master.getTableLockManager();
126     this.tableLock = this.tableLockManager.writeLock(
127         snapshotTable,
128         EventType.C_M_SNAPSHOT_TABLE.toString());
129 
130     // prepare the verify
131     this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, workingDirFs);
132     // update the running tasks
133     this.status = TaskMonitor.get().createStatus(
134       "Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable);
135     this.status.enableStatusJournal(true);
136 
137     this.snapshotManifest =
138         SnapshotManifest.create(conf, rootFs, workingDir, snapshot, monitor, status);
139   }
140 
141   private HTableDescriptor loadTableDescriptor()
142       throws FileNotFoundException, IOException {
143     HTableDescriptor htd =
144       this.master.getTableDescriptors().get(snapshotTable);
145     if (htd == null) {
146       throw new IOException("HTableDescriptor missing for " + snapshotTable);
147     }
148     return htd;
149   }
150 
151   @Override
152   public TakeSnapshotHandler prepare() throws Exception {
153     super.prepare();
154     this.tableLock.acquire(); // after this, you should ensure to release this lock in
155                               // case of exceptions
156     boolean success = false;
157     try {
158       this.htd = loadTableDescriptor(); // check that .tableinfo is present
159       success = true;
160     } finally {
161       if (!success) {
162         releaseTableLock();
163       }
164     }
165 
166     return this;
167   }
168 
169   /**
170    * Execute the core common portions of taking a snapshot. The {@link #snapshotRegions(List)}
171    * call should get implemented for each snapshot flavor.
172    */
173   @Override
174   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
175     justification="Intentional")
176   public void process() {
177     String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " "
178         + eventType + " on table " + snapshotTable;
179     LOG.info(msg);
180     status.setStatus(msg);
181     try {
182       // If regions move after this meta scan, the region specific snapshot should fail, triggering
183       // an external exception that gets captured here.
184 
185       // write down the snapshot info in the working directory
186       SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, workingDirFs);
187       snapshotManifest.addTableDescriptor(this.htd);
188       monitor.rethrowException();
189 
190       List<Pair<HRegionInfo, ServerName>> regionsAndLocations;
191       if (TableName.META_TABLE_NAME.equals(snapshotTable)) {
192         regionsAndLocations = new MetaTableLocator().getMetaRegionsAndLocations(
193           server.getZooKeeper());
194       } else {
195         regionsAndLocations = MetaTableAccessor.getTableRegionsAndLocations(
196           server.getZooKeeper(), server.getConnection(), snapshotTable, false);
197       }
198 
199       // run the snapshot
200       snapshotRegions(regionsAndLocations);
201       monitor.rethrowException();
202 
203       // extract each pair to separate lists
204       Set<String> serverNames = new HashSet<String>();
205       for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
206         if (p != null && p.getFirst() != null && p.getSecond() != null) {
207           HRegionInfo hri = p.getFirst();
208           if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue;
209           serverNames.add(p.getSecond().toString());
210         }
211       }
212 
213       // flush the in-memory state, and write the single manifest
214       status.setStatus("Consolidate snapshot: " + snapshot.getName());
215       snapshotManifest.consolidate();
216 
217       // verify the snapshot is valid
218       status.setStatus("Verifying snapshot: " + snapshot.getName());
219       verifier.verifySnapshot(this.workingDir, serverNames);
220 
221       // complete the snapshot, atomically moving from tmp to .snapshot dir.
222       completeSnapshot(this.snapshotDir, this.workingDir, this.rootFs, this.workingDirFs);
223       msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed";
224       status.markComplete(msg);
225       LOG.info(msg);
226       metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
227     } catch (Exception e) { // FindBugs: REC_CATCH_EXCEPTION
228       status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " +
229           snapshotTable + " because " + e.getMessage());
230       String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot)
231           + " due to exception:" + e.getMessage();
232       LOG.error(reason, e);
233       ForeignException ee = new ForeignException(reason, e);
234       monitor.receive(ee);
235       // need to mark this completed to close off and allow cleanup to happen.
236       cancel(reason);
237     } finally {
238       LOG.debug("Launching cleanup of working dir:" + workingDir);
239       try {
240         // if the working dir is still present, the snapshot has failed.  it is present we delete
241         // it.
242         if (!workingDirFs.delete(workingDir, true)) {
243           LOG.error("Couldn't delete snapshot working directory:" + workingDir);
244         }
245       } catch (IOException e) {
246         LOG.error("Couldn't delete snapshot working directory:" + workingDir);
247       }
248       if (LOG.isDebugEnabled()) {
249         LOG.debug("Table snapshot journal : \n" + status.prettyPrintJournal());
250       }
251       releaseTableLock();
252     }
253   }
254 
255   protected void releaseTableLock() {
256     if (this.tableLock != null) {
257       try {
258         this.tableLock.release();
259       } catch (IOException ex) {
260         LOG.warn("Could not release the table lock", ex);
261       }
262     }
263   }
264 
265   /**
266    * Reset the manager to allow another snapshot to proceed.
267    * Commits the snapshot process by moving the working snapshot
268    * to the finalized filepath
269    *
270    * @param snapshotDir The file path of the completed snapshots
271    * @param workingDir  The file path of the in progress snapshots
272    * @param fs The file system of the completed snapshots
273    * @param workingDirFs The file system of the in progress snapshots
274    *
275    * @throws SnapshotCreationException if the snapshot could not be moved
276    * @throws IOException the filesystem could not be reached
277    */
278   public void completeSnapshot(Path snapshotDir, Path workingDir, FileSystem fs,
279       FileSystem workingDirFs) throws SnapshotCreationException, IOException {
280     SnapshotDescriptionUtils.completeSnapshot(snapshotDir, workingDir, fs, workingDirFs, conf);
281     finished = true;
282   }
283 
284   /**
285    * Snapshot the specified regions
286    */
287   protected abstract void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regions)
288       throws IOException, KeeperException;
289 
290   /**
291    * Take a snapshot of the specified disabled region
292    */
293   protected void snapshotDisabledRegion(final HRegionInfo regionInfo)
294       throws IOException {
295     snapshotManifest.addRegion(FSUtils.getTableDir(rootDir, snapshotTable), regionInfo);
296     monitor.rethrowException();
297     status.setStatus("Completed referencing HFiles for offline region " + regionInfo.toString() +
298         " of table: " + snapshotTable);
299   }
300 
301   @Override
302   public void cancel(String why) {
303     if (finished) return;
304 
305     this.finished = true;
306     LOG.info("Stop taking snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
307         " because: " + why);
308     CancellationException ce = new CancellationException(why);
309     monitor.receive(new ForeignException(master.getServerName().toString(), ce));
310   }
311 
312   @Override
313   public boolean isFinished() {
314     return finished;
315   }
316 
317   @Override
318   public long getCompletionTimestamp() {
319     return this.status.getCompletionTimestamp();
320   }
321 
322   @Override
323   public SnapshotDescription getSnapshot() {
324     return snapshot;
325   }
326 
327   @Override
328   public ForeignException getExceptionIfFailed() {
329     return monitor.getException();
330   }
331 
332   @Override
333   public void rethrowExceptionIfFailed() throws ForeignException {
334     monitor.rethrowException();
335   }
336 
337   @Override
338   public void rethrowException() throws ForeignException {
339     monitor.rethrowException();
340   }
341 
342   @Override
343   public boolean hasException() {
344     return monitor.hasException();
345   }
346 
347   @Override
348   public ForeignException getException() {
349     return monitor.getException();
350   }
351 
352 }