View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.snapshot;
19  
20  import com.google.common.util.concurrent.ThreadFactoryBuilder;
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.Collections;
25  import java.util.HashMap;
26  import java.util.HashSet;
27  import java.util.Iterator;
28  import java.util.List;
29  import java.util.Map;
30  import java.util.Set;
31  import java.util.concurrent.Executors;
32  import java.util.concurrent.ScheduledExecutorService;
33  import java.util.concurrent.ScheduledFuture;
34  import java.util.concurrent.ThreadPoolExecutor;
35  import java.util.concurrent.TimeUnit;
36  import java.util.concurrent.locks.ReadWriteLock;
37  import java.util.concurrent.locks.ReentrantReadWriteLock;
38  import org.apache.commons.logging.Log;
39  import org.apache.commons.logging.LogFactory;
40  import org.apache.hadoop.conf.Configuration;
41  import org.apache.hadoop.fs.FSDataInputStream;
42  import org.apache.hadoop.fs.FileStatus;
43  import org.apache.hadoop.fs.FileSystem;
44  import org.apache.hadoop.fs.Path;
45  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
46  import org.apache.hadoop.hbase.HConstants;
47  import org.apache.hadoop.hbase.HTableDescriptor;
48  import org.apache.hadoop.hbase.MetaTableAccessor;
49  import org.apache.hadoop.hbase.Stoppable;
50  import org.apache.hadoop.hbase.TableName;
51  import org.apache.hadoop.hbase.classification.InterfaceAudience;
52  import org.apache.hadoop.hbase.classification.InterfaceStability;
53  import org.apache.hadoop.hbase.errorhandling.ForeignException;
54  import org.apache.hadoop.hbase.executor.ExecutorService;
55  import org.apache.hadoop.hbase.ipc.RpcServer;
56  import org.apache.hadoop.hbase.master.AssignmentManager;
57  import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
58  import org.apache.hadoop.hbase.master.MasterFileSystem;
59  import org.apache.hadoop.hbase.master.MasterServices;
60  import org.apache.hadoop.hbase.master.MetricsMaster;
61  import org.apache.hadoop.hbase.master.SnapshotSentinel;
62  import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
63  import org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner;
64  import org.apache.hadoop.hbase.procedure.MasterProcedureManager;
65  import org.apache.hadoop.hbase.procedure.Procedure;
66  import org.apache.hadoop.hbase.procedure.ProcedureCoordinator;
67  import org.apache.hadoop.hbase.procedure.ProcedureCoordinatorRpcs;
68  import org.apache.hadoop.hbase.procedure.ZKProcedureCoordinatorRpcs;
69  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.NameStringPair;
70  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ProcedureDescription;
71  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
72  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type;
73  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
74  import org.apache.hadoop.hbase.quotas.QuotaExceededException;
75  import org.apache.hadoop.hbase.security.AccessDeniedException;
76  import org.apache.hadoop.hbase.security.User;
77  import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
78  import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException;
79  import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException;
80  import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
81  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
82  import org.apache.hadoop.hbase.snapshot.SnapshotDoesNotExistException;
83  import org.apache.hadoop.hbase.snapshot.SnapshotExistsException;
84  import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
85  import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil;
86  import org.apache.hadoop.hbase.snapshot.TablePartiallyOpenException;
87  import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException;
88  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
89  import org.apache.hadoop.hbase.util.FSUtils;
90  import org.apache.zookeeper.KeeperException;
91  
92  /**
93   * This class manages the procedure of taking and restoring snapshots. There is only one
94   * SnapshotManager for the master.
95   * <p>
96   * The class provides methods for monitoring in-progress snapshot actions.
97   * <p>
98   * Note: Currently there can only be one snapshot being taken at a time over the cluster. This is a
99   * simplification in the current implementation.
100  */
101 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
102 @InterfaceStability.Unstable
103 public class SnapshotManager extends MasterProcedureManager implements Stoppable {
104   private static final Log LOG = LogFactory.getLog(SnapshotManager.class);
105 
106   /** By default, check to see if the snapshot is complete every WAKE MILLIS (ms) */
107   private static final int SNAPSHOT_WAKE_MILLIS_DEFAULT = 500;
108 
109   /**
110    * Wait time before removing a finished sentinel from the in-progress map
111    *
112    * NOTE: This is used as a safety auto cleanup.
113    * The snapshot and restore handlers map entries are removed when a user asks if a snapshot or
114    * restore is completed. This operation is part of the HBaseAdmin snapshot/restore API flow.
115    * In case something fails on the client side and the snapshot/restore state is not reclaimed
116    * after a default timeout, the entry is removed from the in-progress map.
117    * At this point, if the user asks for the snapshot/restore status, the result will be
118    * snapshot done if exists or failed if it doesn't exists.
119    */
120   public static final String HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS =
121       "hbase.snapshot.sentinels.cleanup.timeoutMillis";
122   public static final long SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLS_DEFAULT = 60 * 1000L;
123 
124   /** Enable or disable snapshot support */
125   public static final String HBASE_SNAPSHOT_ENABLED = "hbase.snapshot.enabled";
126 
127   /**
128    * Conf key for # of ms elapsed between checks for snapshot errors while waiting for
129    * completion.
130    */
131   private static final String SNAPSHOT_WAKE_MILLIS_KEY = "hbase.snapshot.master.wakeMillis";
132 
133   /** Name of the operation to use in the controller */
134   public static final String ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION = "online-snapshot";
135 
136   /** Conf key for # of threads used by the SnapshotManager thread pool */
137   public static final String SNAPSHOT_POOL_THREADS_KEY = "hbase.snapshot.master.threads";
138 
139   /** number of current operations running on the master */
140   public static final int SNAPSHOT_POOL_THREADS_DEFAULT = 1;
141 
142   private boolean stopped;
143   private MasterServices master;  // Needed by TableEventHandlers
144   private ProcedureCoordinator coordinator;
145 
146   // Is snapshot feature enabled?
147   private boolean isSnapshotSupported = false;
148 
149   // Snapshot handlers map, with table name as key.
150   // The map is always accessed and modified under the object lock using synchronized.
151   // snapshotTable() will insert an Handler in the table.
152   // isSnapshotDone() will remove the handler requested if the operation is finished.
153   private final Map<TableName, SnapshotSentinel> snapshotHandlers =
154       new HashMap<TableName, SnapshotSentinel>();
155   private final ScheduledExecutorService scheduleThreadPool =
156         Executors.newScheduledThreadPool(1, new ThreadFactoryBuilder()
157               .setNameFormat("SnapshotHandlerChoreCleaner").setDaemon(true).build());
158   private ScheduledFuture<?> snapshotHandlerChoreCleanerTask;
159 
160   // Restore Sentinels map, with table name as key.
161   // The map is always accessed and modified under the object lock using synchronized.
162   // restoreSnapshot()/cloneSnapshot() will insert an Handler in the table.
163   // isRestoreDone() will remove the handler requested if the operation is finished.
164   private Map<TableName, SnapshotSentinel> restoreHandlers =
165       new HashMap<TableName, SnapshotSentinel>();
166 
167   private Path rootDir;
168   private ExecutorService executorService;
169 
170   /**
171    * Read write lock between taking snapshot and snapshot HFile cleaner. The cleaner should skip to
172    * check the HFiles if any snapshot is in progress, otherwise it may clean a HFile which would
173    * belongs to the newly creating snapshot. So we should grab the write lock first when cleaner
174    * start to work. (See HBASE-21387)
175    */
176   private ReentrantReadWriteLock takingSnapshotLock = new ReentrantReadWriteLock(true);
177 
178   public SnapshotManager() {}
179 
180   /**
181    * Fully specify all necessary components of a snapshot manager. Exposed for testing.
182    * @param master services for the master where the manager is running
183    * @param coordinator procedure coordinator instance.  exposed for testing.
184    * @param pool HBase ExecutorServcie instance, exposed for testing.
185    */
186   @InterfaceAudience.Private
187   SnapshotManager(final MasterServices master, ProcedureCoordinator coordinator,
188       ExecutorService pool, int sentinelCleanInterval)
189       throws IOException, UnsupportedOperationException {
190     this.master = master;
191 
192     this.rootDir = master.getMasterFileSystem().getRootDir();
193     Configuration conf = master.getConfiguration();
194     checkSnapshotSupport(conf, master.getMasterFileSystem());
195 
196     this.coordinator = coordinator;
197     this.executorService = pool;
198     resetTempDir();
199     initSnapshotHandlerChoreCleanerTask(sentinelCleanInterval);
200   }
201 
202   private void initSnapshotHandlerChoreCleanerTask(long sentinelCleanInterval) {
203     snapshotHandlerChoreCleanerTask = this.scheduleThreadPool.scheduleAtFixedRate(new Runnable() {
204       @Override
205       public void run() {
206         cleanupSentinels();
207       }
208     }, sentinelCleanInterval, sentinelCleanInterval, TimeUnit.SECONDS);
209   }
210 
211   /**
212    * Gets the list of all completed snapshots.
213    * @return list of SnapshotDescriptions
214    * @throws IOException File system exception
215    */
216   public List<SnapshotDescription> getCompletedSnapshots() throws IOException {
217     return getCompletedSnapshots(SnapshotDescriptionUtils.getSnapshotsDir(rootDir));
218   }
219 
220   /**
221    * Gets the list of all completed snapshots.
222    * @param snapshotDir snapshot directory
223    * @return list of SnapshotDescriptions
224    * @throws IOException File system exception
225    */
226   private List<SnapshotDescription> getCompletedSnapshots(Path snapshotDir) throws IOException {
227     List<SnapshotDescription> snapshotDescs = new ArrayList<SnapshotDescription>();
228     // first create the snapshot root path and check to see if it exists
229     FileSystem fs = master.getMasterFileSystem().getFileSystem();
230     if (snapshotDir == null) snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir);
231 
232     // if there are no snapshots, return an empty list
233     if (!fs.exists(snapshotDir)) {
234       return snapshotDescs;
235     }
236 
237     // ignore all the snapshots in progress
238     FileStatus[] snapshots = fs.listStatus(snapshotDir,
239       new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
240     MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
241     // loop through all the completed snapshots
242     for (FileStatus snapshot : snapshots) {
243       Path info = new Path(snapshot.getPath(), SnapshotDescriptionUtils.SNAPSHOTINFO_FILE);
244       // if the snapshot is bad
245       if (!fs.exists(info)) {
246         LOG.error("Snapshot information for " + snapshot.getPath() + " doesn't exist");
247         continue;
248       }
249       FSDataInputStream in = null;
250       try {
251         in = fs.open(info);
252         SnapshotDescription desc = SnapshotDescription.parseFrom(in);
253         if (cpHost != null) {
254           try {
255             cpHost.preListSnapshot(desc);
256           } catch (AccessDeniedException e) {
257             LOG.warn("Current user does not have access to " + desc.getName() + " snapshot. "
258                 + "Either you should be owner of this snapshot or admin user.");
259             // Skip this and try for next snapshot
260             continue;
261           }
262         }
263         snapshotDescs.add(desc);
264 
265         // call coproc post hook
266         if (cpHost != null) {
267           cpHost.postListSnapshot(desc);
268         }
269       } catch (IOException e) {
270         LOG.warn("Found a corrupted snapshot " + snapshot.getPath(), e);
271       } finally {
272         if (in != null) {
273           in.close();
274         }
275       }
276     }
277     return snapshotDescs;
278   }
279 
280   /**
281    * Cleans up any snapshots in the snapshot/.tmp directory that were left from failed
282    * snapshot attempts.
283    *
284    * @throws IOException if we can't reach the filesystem
285    */
286   private void resetTempDir() throws IOException {
287     // cleanup any existing snapshots.
288     Path tmpdir = SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir,
289         master.getConfiguration());
290     FileSystem tmpFs = tmpdir.getFileSystem(master.getConfiguration());
291     if (!tmpFs.delete(tmpdir, true)) {
292       LOG.warn("Couldn't delete working snapshot directory: " + tmpdir);
293     }
294   }
295 
296   /**
297    * Delete the specified snapshot
298    * @param snapshot
299    * @throws SnapshotDoesNotExistException If the specified snapshot does not exist.
300    * @throws IOException For filesystem IOExceptions
301    */
302   public void deleteSnapshot(SnapshotDescription snapshot) throws IOException {
303     // check to see if it is completed
304     if (!isSnapshotCompleted(snapshot)) {
305       throw new SnapshotDoesNotExistException(snapshot);
306     }
307 
308     String snapshotName = snapshot.getName();
309     // first create the snapshot description and check to see if it exists
310     FileSystem fs = master.getMasterFileSystem().getFileSystem();
311     Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
312     // Get snapshot info from file system. The one passed as parameter is a "fake" snapshotInfo with
313     // just the "name" and it does not contains the "real" snapshot information
314     snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
315 
316     // call coproc pre hook
317     MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
318     if (cpHost != null) {
319       cpHost.preDeleteSnapshot(snapshot);
320     }
321 
322     LOG.debug("Deleting snapshot: " + snapshotName);
323     // delete the existing snapshot
324     if (!fs.delete(snapshotDir, true)) {
325       throw new HBaseSnapshotException("Failed to delete snapshot directory: " + snapshotDir);
326     }
327 
328     // call coproc post hook
329     if (cpHost != null) {
330       cpHost.postDeleteSnapshot(snapshot);
331     }
332 
333   }
334 
335   /**
336    * Check if the specified snapshot is done
337    *
338    * @param expected
339    * @return true if snapshot is ready to be restored, false if it is still being taken.
340    * @throws IOException IOException if error from HDFS or RPC
341    * @throws UnknownSnapshotException if snapshot is invalid or does not exist.
342    */
343   public boolean isSnapshotDone(SnapshotDescription expected) throws IOException {
344     // check the request to make sure it has a snapshot
345     if (expected == null) {
346       throw new UnknownSnapshotException(
347          "No snapshot name passed in request, can't figure out which snapshot you want to check.");
348     }
349 
350     String ssString = ClientSnapshotDescriptionUtils.toString(expected);
351 
352     // check to see if the sentinel exists,
353     // and if the task is complete removes it from the in-progress snapshots map.
354     SnapshotSentinel handler = removeSentinelIfFinished(this.snapshotHandlers, expected);
355 
356     // stop tracking "abandoned" handlers
357     cleanupSentinels();
358 
359     if (handler == null) {
360       // If there's no handler in the in-progress map, it means one of the following:
361       //   - someone has already requested the snapshot state
362       //   - the requested snapshot was completed long time ago (cleanupSentinels() timeout)
363       //   - the snapshot was never requested
364       // In those cases returns to the user the "done state" if the snapshots exists on disk,
365       // otherwise raise an exception saying that the snapshot is not running and doesn't exist.
366       if (!isSnapshotCompleted(expected)) {
367         throw new UnknownSnapshotException("Snapshot " + ssString
368             + " is not currently running or one of the known completed snapshots.");
369       }
370       // was done, return true;
371       return true;
372     }
373 
374     // pass on any failure we find in the sentinel
375     try {
376       handler.rethrowExceptionIfFailed();
377     } catch (ForeignException e) {
378       // Give some procedure info on an exception.
379       String status;
380       Procedure p = coordinator.getProcedure(expected.getName());
381       if (p != null) {
382         status = p.getStatus();
383       } else {
384         status = expected.getName() + " not found in proclist " + coordinator.getProcedureNames();
385       }
386       throw new HBaseSnapshotException("Snapshot " + ssString +  " had an error.  " + status, e,
387           expected);
388     }
389 
390     // check to see if we are done
391     if (handler.isFinished()) {
392       LOG.debug("Snapshot '" + ssString + "' has completed, notifying client.");
393       return true;
394     } else if (LOG.isDebugEnabled()) {
395       LOG.debug("Snapshoting '" + ssString + "' is still in progress!");
396     }
397     return false;
398   }
399 
400   /**
401    * Check to see if there is a snapshot in progress with the same name or on the same table.
402    * Currently we have a limitation only allowing a single snapshot per table at a time. Also we
403    * don't allow snapshot with the same name.
404    * @param snapshot description of the snapshot being checked.
405    * @return <tt>true</tt> if there is a snapshot in progress with the same name or on the same
406    *         table.
407    */
408   synchronized boolean isTakingSnapshot(final SnapshotDescription snapshot) {
409     TableName snapshotTable = TableName.valueOf(snapshot.getTable());
410     if (isTakingSnapshot(snapshotTable)) {
411       return true;
412     }
413     Iterator<Map.Entry<TableName, SnapshotSentinel>> it = this.snapshotHandlers.entrySet().iterator();
414     while (it.hasNext()) {
415       Map.Entry<TableName, SnapshotSentinel> entry = it.next();
416       SnapshotSentinel sentinel = entry.getValue();
417       if (snapshot.getName().equals(sentinel.getSnapshot().getName()) && !sentinel.isFinished()) {
418         return true;
419       }
420     }
421     return false;
422   }
423 
424   /**
425    * Check to see if the specified table has a snapshot in progress.  Currently we have a
426    * limitation only allowing a single snapshot per table at a time.
427    * @param tableName name of the table being snapshotted.
428    * @return <tt>true</tt> if there is a snapshot in progress on the specified table.
429    */
430   synchronized boolean isTakingSnapshot(final TableName tableName) {
431     SnapshotSentinel handler = this.snapshotHandlers.get(tableName);
432     return handler != null && !handler.isFinished();
433   }
434 
435   /**
436    * Check to make sure that we are OK to run the passed snapshot. Checks to make sure that we
437    * aren't already running a snapshot or restore on the requested table.
438    * @param snapshot description of the snapshot we want to start
439    * @throws HBaseSnapshotException if the filesystem could not be prepared to start the snapshot
440    */
441   private synchronized void prepareToTakeSnapshot(SnapshotDescription snapshot)
442       throws HBaseSnapshotException {
443     Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir,
444         master.getConfiguration());
445     TableName snapshotTable =
446         TableName.valueOf(snapshot.getTable());
447 
448     // make sure we aren't already running a snapshot
449     if (isTakingSnapshot(snapshot)) {
450       SnapshotSentinel handler = this.snapshotHandlers.get(snapshotTable);
451       throw new SnapshotCreationException("Rejected taking "
452           + ClientSnapshotDescriptionUtils.toString(snapshot)
453           + " because we are already running another snapshot "
454           + (handler != null ? ("on the same table " +
455               ClientSnapshotDescriptionUtils.toString(handler.getSnapshot()))
456               : "with the same name"), snapshot);
457     }
458 
459     // make sure we aren't running a restore on the same table
460     if (isRestoringTable(snapshotTable)) {
461       SnapshotSentinel handler = restoreHandlers.get(snapshotTable);
462       throw new SnapshotCreationException("Rejected taking "
463           + ClientSnapshotDescriptionUtils.toString(snapshot)
464           + " because we are already have a restore in progress on the same snapshot "
465           + ClientSnapshotDescriptionUtils.toString(handler.getSnapshot()), snapshot);
466     }
467 
468     try {
469       FileSystem workingDirFS = workingDir.getFileSystem(master.getConfiguration());
470       // delete the working directory, since we aren't running the snapshot. Likely leftovers
471       // from a failed attempt.
472       workingDirFS.delete(workingDir, true);
473 
474       // recreate the working directory for the snapshot
475       if (!workingDirFS.mkdirs(workingDir)) {
476         throw new SnapshotCreationException("Couldn't create working directory (" + workingDir
477             + ") for snapshot" , snapshot);
478       }
479     } catch (HBaseSnapshotException e) {
480       throw e;
481     } catch (IOException e) {
482       throw new SnapshotCreationException(
483           "Exception while checking to see if snapshot could be started.", e, snapshot);
484     }
485   }
486 
487   /**
488    * Take a snapshot of a disabled table.
489    * @param snapshot description of the snapshot to take. Modified to be {@link Type#DISABLED}.
490    * @throws IOException if the snapshot could not be started or filesystem for snapshot
491    *         temporary directory could not be determined
492    */
493   private synchronized void snapshotDisabledTable(SnapshotDescription snapshot)
494       throws IOException {
495     // setup the snapshot
496     prepareToTakeSnapshot(snapshot);
497 
498     // set the snapshot to be a disabled snapshot, since the client doesn't know about that
499     snapshot = snapshot.toBuilder().setType(Type.DISABLED).build();
500 
501     // Take the snapshot of the disabled table
502     DisabledTableSnapshotHandler handler =
503         new DisabledTableSnapshotHandler(snapshot, master, this);
504     snapshotTable(snapshot, handler);
505   }
506 
507   /**
508    * Take a snapshot of an enabled table.
509    * @param snapshot description of the snapshot to take.
510    * @throws IOException if the snapshot could not be started or filesystem for snapshot
511    *         temporary directory could not be determined
512    */
513   private synchronized void snapshotEnabledTable(SnapshotDescription snapshot)
514           throws IOException {
515     // setup the snapshot
516     prepareToTakeSnapshot(snapshot);
517 
518     // Take the snapshot of the enabled table
519     EnabledTableSnapshotHandler handler =
520         new EnabledTableSnapshotHandler(snapshot, master, this);
521     snapshotTable(snapshot, handler);
522   }
523 
524   /**
525    * Take a snapshot using the specified handler.
526    * On failure the snapshot temporary working directory is removed.
527    * NOTE: prepareToTakeSnapshot() called before this one takes care of the rejecting the
528    *       snapshot request if the table is busy with another snapshot/restore operation.
529    * @param snapshot the snapshot description
530    * @param handler the snapshot handler
531    */
532   private synchronized void snapshotTable(SnapshotDescription snapshot,
533       final TakeSnapshotHandler handler) throws IOException {
534     try {
535       handler.prepare();
536       this.executorService.submit(handler);
537       this.snapshotHandlers.put(TableName.valueOf(snapshot.getTable()), handler);
538     } catch (Exception e) {
539       // cleanup the working directory by trying to delete it from the fs.
540       Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir,
541           master.getConfiguration());
542       FileSystem workingDirFs = workingDir.getFileSystem(master.getConfiguration());
543       try {
544         if (!workingDirFs.delete(workingDir, true)) {
545           LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" +
546               ClientSnapshotDescriptionUtils.toString(snapshot));
547         }
548       } catch (IOException e1) {
549         LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" +
550             ClientSnapshotDescriptionUtils.toString(snapshot));
551       }
552       // fail the snapshot
553       throw new SnapshotCreationException("Could not build snapshot handler", e, snapshot);
554     }
555   }
556 
557   /**
558    * Take a snapshot based on the enabled/disabled state of the table.
559    * @param snapshot
560    * @throws HBaseSnapshotException when a snapshot specific exception occurs.
561    * @throws IOException when some sort of generic IO exception occurs.
562    */
563   public void takeSnapshot(SnapshotDescription snapshot) throws IOException {
564     this.takingSnapshotLock.readLock().lock();
565     try {
566       takeSnapshotInternal(snapshot);
567     } finally {
568       this.takingSnapshotLock.readLock().unlock();
569     }
570   }
571 
572   private void takeSnapshotInternal(SnapshotDescription snapshot) throws IOException {
573     // check to see if we already completed the snapshot
574     if (isSnapshotCompleted(snapshot)) {
575       throw new SnapshotExistsException("Snapshot '" + snapshot.getName()
576           + "' already stored on the filesystem.", snapshot);
577     }
578 
579     LOG.debug("No existing snapshot, attempting snapshot...");
580 
581     // stop tracking "abandoned" handlers
582     cleanupSentinels();
583 
584     // check to see if the table exists
585     HTableDescriptor desc = null;
586     try {
587       desc = master.getTableDescriptors().get(
588           TableName.valueOf(snapshot.getTable()));
589     } catch (FileNotFoundException e) {
590       String msg = "Table:" + snapshot.getTable() + " info doesn't exist!";
591       LOG.error(msg);
592       throw new SnapshotCreationException(msg, e, snapshot);
593     } catch (IOException e) {
594       throw new SnapshotCreationException("Error while geting table description for table "
595           + snapshot.getTable(), e, snapshot);
596     }
597     if (desc == null) {
598       throw new SnapshotCreationException("Table '" + snapshot.getTable()
599           + "' doesn't exist, can't take snapshot.", snapshot);
600     }
601     SnapshotDescription.Builder builder = snapshot.toBuilder();
602     // if not specified, set the snapshot format
603     if (!snapshot.hasVersion()) {
604       builder.setVersion(SnapshotDescriptionUtils.SNAPSHOT_LAYOUT_VERSION);
605     }
606     User user = RpcServer.getRequestUser();
607     if (master.getConfiguration().
608       getBoolean(User.HBASE_SECURITY_AUTHORIZATION_CONF_KEY, false) && user != null) {
609       builder.setOwner(user.getShortName());
610     }
611     snapshot = builder.build();
612 
613     // call pre coproc hook
614     MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
615     if (cpHost != null) {
616       cpHost.preSnapshot(snapshot, desc);
617     }
618 
619     // if the table is enabled, then have the RS run actually the snapshot work
620     TableName snapshotTable = TableName.valueOf(snapshot.getTable());
621     AssignmentManager assignmentMgr = master.getAssignmentManager();
622     if (assignmentMgr.getTableStateManager().isTableState(snapshotTable,
623         ZooKeeperProtos.Table.State.ENABLED)) {
624       if (LOG.isDebugEnabled()) {
625         LOG.debug("Table enabled, starting distributed snapshot for "
626             + ClientSnapshotDescriptionUtils.toString(snapshot));
627       }
628       snapshotEnabledTable(snapshot);
629       if (LOG.isDebugEnabled()) {
630         LOG.debug("Started snapshot: " + ClientSnapshotDescriptionUtils.toString(snapshot));
631       }
632     }
633     // For disabled table, snapshot is created by the master
634     else if (assignmentMgr.getTableStateManager().isTableState(snapshotTable,
635         ZooKeeperProtos.Table.State.DISABLED)) {
636       if (LOG.isDebugEnabled()) {
637         LOG.debug("Table is disabled, running snapshot entirely on master "
638             + ClientSnapshotDescriptionUtils.toString(snapshot));
639       }
640       snapshotDisabledTable(snapshot);
641       if (LOG.isDebugEnabled()) {
642         LOG.debug("Started snapshot: " + ClientSnapshotDescriptionUtils.toString(snapshot));
643       }
644     } else {
645       LOG.error("Can't snapshot table '" + snapshot.getTable()
646           + "', isn't open or closed, we don't know what to do!");
647       TablePartiallyOpenException tpoe = new TablePartiallyOpenException(snapshot.getTable()
648           + " isn't fully open.");
649       throw new SnapshotCreationException("Table is not entirely open or closed", tpoe, snapshot);
650     }
651 
652     // call post coproc hook
653     if (cpHost != null) {
654       cpHost.postSnapshot(snapshot, desc);
655     }
656   }
657 
658   public ReadWriteLock getTakingSnapshotLock() {
659     return this.takingSnapshotLock;
660   }
661 
662   /**
663    * The snapshot operation processing as following: <br>
664    * 1. Create a Snapshot Handler, and do some initialization; <br>
665    * 2. Put the handler into snapshotHandlers <br>
666    * So when we consider if any snapshot is taking, we should consider both the takingSnapshotLock
667    * and snapshotHandlers;
668    * @return true to indicate that there're some running snapshots.
669    */
670   public synchronized boolean isTakingAnySnapshot() {
671     return this.takingSnapshotLock.getReadHoldCount() > 0 || this.snapshotHandlers.size() > 0;
672   }
673 
674   /**
675    * Set the handler for the current snapshot
676    * <p>
677    * Exposed for TESTING
678    * @param tableName
679    * @param handler handler the master should use
680    *
681    * TODO get rid of this if possible, repackaging, modify tests.
682    */
683   public synchronized void setSnapshotHandlerForTesting(
684       final TableName tableName,
685       final SnapshotSentinel handler) {
686     if (handler != null) {
687       this.snapshotHandlers.put(tableName, handler);
688     } else {
689       this.snapshotHandlers.remove(tableName);
690     }
691   }
692 
693   /**
694    * @return distributed commit coordinator for all running snapshots
695    */
696   ProcedureCoordinator getCoordinator() {
697     return coordinator;
698   }
699 
700   /**
701    * Check to see if the snapshot is one of the currently completed snapshots
702    * Returns true if the snapshot exists in the "completed snapshots folder".
703    *
704    * @param snapshot expected snapshot to check
705    * @return <tt>true</tt> if the snapshot is stored on the {@link FileSystem}, <tt>false</tt> if is
706    *         not stored
707    * @throws IOException if the filesystem throws an unexpected exception,
708    * @throws IllegalArgumentException if snapshot name is invalid.
709    */
710   private boolean isSnapshotCompleted(SnapshotDescription snapshot) throws IOException {
711     try {
712       final Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
713       FileSystem fs = master.getMasterFileSystem().getFileSystem();
714       // check to see if the snapshot already exists
715       return fs.exists(snapshotDir);
716     } catch (IllegalArgumentException iae) {
717       throw new UnknownSnapshotException("Unexpected exception thrown", iae);
718     }
719   }
720 
721   /**
722    * Clone the specified snapshot into a new table.
723    * The operation will fail if the destination table has a snapshot or restore in progress.
724    *
725    * @param snapshot Snapshot Descriptor
726    * @param hTableDescriptor Table Descriptor of the table to create
727    */
728   synchronized void cloneSnapshot(final SnapshotDescription snapshot,
729       final HTableDescriptor hTableDescriptor, final boolean restoreAcl)
730       throws HBaseSnapshotException {
731     TableName tableName = hTableDescriptor.getTableName();
732 
733     // make sure we aren't running a snapshot on the same table
734     if (isTakingSnapshot(tableName)) {
735       throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName);
736     }
737 
738     // make sure we aren't running a restore on the same table
739     if (isRestoringTable(tableName)) {
740       throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName);
741     }
742 
743     try {
744       CloneSnapshotHandler handler =
745           new CloneSnapshotHandler(master, snapshot, hTableDescriptor, restoreAcl).prepare();
746       this.executorService.submit(handler);
747       this.restoreHandlers.put(tableName, handler);
748     } catch (Exception e) {
749       String msg = "Couldn't clone the snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
750         " on table=" + tableName;
751       LOG.error(msg, e);
752       throw new RestoreSnapshotException(msg, e);
753     }
754   }
755 
756   /**
757    * Restore the specified snapshot
758    * @param reqSnapshot the snapshot to restore
759    * @throws IOException
760    */
761   @Deprecated
762   public void restoreSnapshot(SnapshotDescription reqSnapshot) throws IOException {
763     restoreSnapshot(reqSnapshot, false);
764   }
765 
766   /**
767    * Restore the specified snapshot
768    * @param reqSnapshot the snapshot to restore
769    * @param restoreAcl whether or not to restore ACLs on the snapshot
770    * @throws IOException
771    */
772   public void restoreSnapshot(SnapshotDescription reqSnapshot, boolean restoreAcl)
773       throws IOException {
774     FileSystem fs = master.getMasterFileSystem().getFileSystem();
775     Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(reqSnapshot, rootDir);
776     MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
777 
778     // check if the snapshot exists
779     if (!fs.exists(snapshotDir)) {
780       LOG.error("A Snapshot named '" + reqSnapshot.getName() + "' does not exist.");
781       throw new SnapshotDoesNotExistException(reqSnapshot);
782     }
783 
784     // Get snapshot info from file system. The reqSnapshot is a "fake" snapshotInfo with
785     // just the snapshot "name" and table name to restore. It does not contains the "real" snapshot
786     // information.
787     SnapshotDescription snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
788     SnapshotManifest manifest = SnapshotManifest.open(master.getConfiguration(), fs,
789         snapshotDir, snapshot);
790     HTableDescriptor snapshotTableDesc = manifest.getTableDescriptor();
791     TableName tableName = TableName.valueOf(reqSnapshot.getTable());
792 
793     // stop tracking "abandoned" handlers
794     cleanupSentinels();
795 
796     // Verify snapshot validity
797     SnapshotReferenceUtil.verifySnapshot(master.getConfiguration(), fs, manifest);
798 
799     // Execute the restore/clone operation
800     if (MetaTableAccessor.tableExists(master.getConnection(), tableName)) {
801       if (master.getAssignmentManager().getTableStateManager().isTableState(
802           TableName.valueOf(snapshot.getTable()), ZooKeeperProtos.Table.State.ENABLED)) {
803         throw new UnsupportedOperationException("Table '" +
804             TableName.valueOf(snapshot.getTable()) + "' must be disabled in order to " +
805             "perform a restore operation" +
806             ".");
807       }
808 
809       // call coproc pre hook
810       if (cpHost != null) {
811         cpHost.preRestoreSnapshot(snapshot, snapshotTableDesc);
812       }
813 
814       int tableRegionCount = -1;
815       try {
816         // Table already exist. Check and update the region quota for this table namespace.
817         // The region quota may not be updated correctly if there are concurrent restore snapshot
818         // requests for the same table
819 
820         tableRegionCount = getRegionCountOfTable(tableName);
821         int snapshotRegionCount = manifest.getRegionManifestsMap().size();
822 
823         // Update region quota when snapshotRegionCount is larger. If we updated the region count
824         // to a smaller value before retoreSnapshot and the retoreSnapshot fails, we may fail to
825         // reset the region count to its original value if the region quota is consumed by other
826         // tables in the namespace
827         if (tableRegionCount > 0 && tableRegionCount < snapshotRegionCount) {
828           checkAndUpdateNamespaceRegionQuota(snapshotRegionCount, tableName);
829         }
830         restoreSnapshot(snapshot, snapshotTableDesc, restoreAcl);
831         // Update the region quota if snapshotRegionCount is smaller. This step should not fail
832         // because we have reserved enough region quota before hand
833         if (tableRegionCount > 0 && tableRegionCount > snapshotRegionCount) {
834           checkAndUpdateNamespaceRegionQuota(snapshotRegionCount, tableName);
835         }
836       } catch (QuotaExceededException e) {
837         LOG.error("Region quota exceeded while restoring the snapshot " + snapshot.getName()
838           + " as table " + tableName.getNameAsString(), e);
839         // If QEE is thrown before restoreSnapshot, quota information is not updated, so we
840         // should throw the exception directly. If QEE is thrown after restoreSnapshot, there
841         // must be unexpected reasons, we also throw the exception directly
842         throw e;
843       } catch (IOException e) {
844         if (tableRegionCount > 0) {
845           // reset the region count for table
846           checkAndUpdateNamespaceRegionQuota(tableRegionCount, tableName);
847         }
848         LOG.error("Exception occurred while restoring the snapshot " + snapshot.getName()
849             + " as table " + tableName.getNameAsString(), e);
850         throw e;
851       }
852       LOG.info("Restore snapshot=" + snapshot.getName() + " as table=" + tableName);
853 
854       if (cpHost != null) {
855         cpHost.postRestoreSnapshot(snapshot, snapshotTableDesc);
856       }
857     } else {
858       HTableDescriptor htd = new HTableDescriptor(tableName, snapshotTableDesc);
859       if (cpHost != null) {
860         cpHost.preCloneSnapshot(snapshot, htd);
861       }
862       try {
863         checkAndUpdateNamespaceQuota(manifest, tableName);
864         cloneSnapshot(snapshot, htd, restoreAcl);
865       } catch (IOException e) {
866         this.master.getMasterQuotaManager().removeTableFromNamespaceQuota(tableName);
867         LOG.error("Exception occurred while cloning the snapshot " + snapshot.getName()
868             + " as table " + tableName.getNameAsString(), e);
869         throw e;
870       }
871       LOG.info("Clone snapshot=" + snapshot.getName() + " as table=" + tableName);
872 
873       if (cpHost != null) {
874         cpHost.postCloneSnapshot(snapshot, htd);
875       }
876     }
877   }
878 
879   private void checkAndUpdateNamespaceQuota(SnapshotManifest manifest, TableName tableName)
880       throws IOException {
881     if (this.master.getMasterQuotaManager().isQuotaInitialized()) {
882       this.master.getMasterQuotaManager().checkNamespaceTableAndRegionQuota(tableName,
883         manifest.getRegionManifestsMap().size());
884     }
885   }
886 
887   private void checkAndUpdateNamespaceRegionQuota(int updatedRegionCount, TableName tableName)
888       throws IOException {
889     if (this.master.getMasterQuotaManager().isQuotaInitialized()) {
890       this.master.getMasterQuotaManager().checkAndUpdateNamespaceRegionQuota(tableName,
891         updatedRegionCount);
892     }
893   }
894 
895   /**
896    * @return cached region count, or -1 if quota manager is disabled or table status not found
897   */
898   private int getRegionCountOfTable(TableName tableName) throws IOException {
899     if (this.master.getMasterQuotaManager().isQuotaInitialized()) {
900       return this.master.getMasterQuotaManager().getRegionCountOfTable(tableName);
901     }
902     return -1;
903   }
904 
905   /**
906    * Restore the specified snapshot.
907    * The restore will fail if the destination table has a snapshot or restore in progress.
908    *
909    * @param snapshot Snapshot Descriptor
910    * @param hTableDescriptor Table Descriptor
911    */
912   private synchronized void restoreSnapshot(final SnapshotDescription snapshot,
913       final HTableDescriptor hTableDescriptor, final boolean restoreAcl)
914       throws HBaseSnapshotException {
915     TableName tableName = hTableDescriptor.getTableName();
916 
917     // make sure we aren't running a snapshot on the same table
918     if (isTakingSnapshot(tableName)) {
919       throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName);
920     }
921 
922     // make sure we aren't running a restore on the same table
923     if (isRestoringTable(tableName)) {
924       throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName);
925     }
926 
927     try {
928       RestoreSnapshotHandler handler =
929         new RestoreSnapshotHandler(master, snapshot, hTableDescriptor, restoreAcl).prepare();
930       this.executorService.submit(handler);
931       restoreHandlers.put(tableName, handler);
932     } catch (Exception e) {
933       String msg = "Couldn't restore the snapshot=" + ClientSnapshotDescriptionUtils.toString(
934           snapshot)  +
935           " on table=" + tableName;
936       LOG.error(msg, e);
937       throw new RestoreSnapshotException(msg, e);
938     }
939   }
940 
941   /**
942    * Verify if the restore of the specified table is in progress.
943    *
944    * @param tableName table under restore
945    * @return <tt>true</tt> if there is a restore in progress of the specified table.
946    */
947   private synchronized boolean isRestoringTable(final TableName tableName) {
948     SnapshotSentinel sentinel = this.restoreHandlers.get(tableName);
949     return(sentinel != null && !sentinel.isFinished());
950   }
951 
952   /**
953    * Returns the status of a restore operation.
954    * If the in-progress restore is failed throws the exception that caused the failure.
955    *
956    * @param snapshot
957    * @return false if in progress, true if restore is completed or not requested.
958    * @throws IOException if there was a failure during the restore
959    */
960   public boolean isRestoreDone(final SnapshotDescription snapshot) throws IOException {
961     // check to see if the sentinel exists,
962     // and if the task is complete removes it from the in-progress restore map.
963     SnapshotSentinel sentinel = removeSentinelIfFinished(this.restoreHandlers, snapshot);
964 
965     // stop tracking "abandoned" handlers
966     cleanupSentinels();
967 
968     if (sentinel == null) {
969       // there is no sentinel so restore is not in progress.
970       return true;
971     }
972 
973     LOG.debug("Verify snapshot=" + snapshot.getName() + " against="
974         + sentinel.getSnapshot().getName() + " table=" +
975         TableName.valueOf(snapshot.getTable()));
976 
977     // If the restore is failed, rethrow the exception
978     sentinel.rethrowExceptionIfFailed();
979 
980     // check to see if we are done
981     if (sentinel.isFinished()) {
982       LOG.debug("Restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
983           " has completed. Notifying the client.");
984       return true;
985     }
986 
987     if (LOG.isDebugEnabled()) {
988       LOG.debug("Sentinel is not yet finished with restoring snapshot=" +
989           ClientSnapshotDescriptionUtils.toString(snapshot));
990     }
991     return false;
992   }
993 
994   /**
995    * Return the handler if it is currently live and has the same snapshot target name.
996    * The handler is removed from the sentinels map if completed.
997    * @param sentinels live handlers
998    * @param snapshot snapshot description
999    * @return null if doesn't match, else a live handler.
1000    */
1001   private synchronized SnapshotSentinel removeSentinelIfFinished(
1002       final Map<TableName, SnapshotSentinel> sentinels,
1003       final SnapshotDescription snapshot) {
1004     if (!snapshot.hasTable()) {
1005       return null;
1006     }
1007 
1008     TableName snapshotTable = TableName.valueOf(snapshot.getTable());
1009     SnapshotSentinel h = sentinels.get(snapshotTable);
1010     if (h == null) {
1011       return null;
1012     }
1013 
1014     if (!h.getSnapshot().getName().equals(snapshot.getName())) {
1015       // specified snapshot is to the one currently running
1016       return null;
1017     }
1018 
1019     // Remove from the "in-progress" list once completed
1020     if (h.isFinished()) {
1021       sentinels.remove(snapshotTable);
1022     }
1023 
1024     return h;
1025   }
1026 
1027   /**
1028    * Removes "abandoned" snapshot/restore requests.
1029    * As part of the HBaseAdmin snapshot/restore API the operation status is checked until completed,
1030    * and the in-progress maps are cleaned up when the status of a completed task is requested.
1031    * To avoid having sentinels staying around for long time if something client side is failed,
1032    * each operation tries to clean up the in-progress maps sentinels finished from a long time.
1033    */
1034   private void cleanupSentinels() {
1035     cleanupSentinels(this.snapshotHandlers);
1036     cleanupSentinels(this.restoreHandlers);
1037   }
1038 
1039   /**
1040    * Remove the sentinels that are marked as finished and the completion time
1041    * has exceeded the removal timeout.
1042    * @param sentinels map of sentinels to clean
1043    */
1044   private synchronized void cleanupSentinels(final Map<TableName, SnapshotSentinel> sentinels) {
1045     long currentTime = EnvironmentEdgeManager.currentTime();
1046     long sentinelsCleanupTimeoutMillis =
1047         master.getConfiguration().getLong(HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS,
1048           SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLS_DEFAULT);
1049     Iterator<Map.Entry<TableName, SnapshotSentinel>> it = sentinels.entrySet().iterator();
1050     while (it.hasNext()) {
1051       Map.Entry<TableName, SnapshotSentinel> entry = it.next();
1052       SnapshotSentinel sentinel = entry.getValue();
1053       if (sentinel.isFinished()
1054           && (currentTime - sentinel.getCompletionTimestamp()) > sentinelsCleanupTimeoutMillis) {
1055         it.remove();
1056       }
1057     }
1058   }
1059 
1060   //
1061   // Implementing Stoppable interface
1062   //
1063 
1064   @Override
1065   public void stop(String why) {
1066     // short circuit
1067     if (this.stopped) return;
1068     // make sure we get stop
1069     this.stopped = true;
1070     // pass the stop onto take snapshot handlers
1071     for (SnapshotSentinel snapshotHandler: this.snapshotHandlers.values()) {
1072       snapshotHandler.cancel(why);
1073     }
1074     if (snapshotHandlerChoreCleanerTask != null) {
1075       snapshotHandlerChoreCleanerTask.cancel(true);
1076     }
1077     // pass the stop onto all the restore handlers
1078     for (SnapshotSentinel restoreHandler: this.restoreHandlers.values()) {
1079       restoreHandler.cancel(why);
1080     }
1081     try {
1082       if (coordinator != null) {
1083         coordinator.close();
1084       }
1085     } catch (IOException e) {
1086       LOG.error("stop ProcedureCoordinator error", e);
1087     }
1088   }
1089 
1090   @Override
1091   public boolean isStopped() {
1092     return this.stopped;
1093   }
1094 
1095   /**
1096    * Throws an exception if snapshot operations (take a snapshot, restore, clone) are not supported.
1097    * Called at the beginning of snapshot() and restoreSnapshot() methods.
1098    * @throws UnsupportedOperationException if snapshot are not supported
1099    */
1100   public void checkSnapshotSupport() throws UnsupportedOperationException {
1101     if (!this.isSnapshotSupported) {
1102       throw new UnsupportedOperationException(
1103         "To use snapshots, You must add to the hbase-site.xml of the HBase Master: '" +
1104           HBASE_SNAPSHOT_ENABLED + "' property with value 'true'.");
1105     }
1106   }
1107 
1108   /**
1109    * Called at startup, to verify if snapshot operation is supported, and to avoid
1110    * starting the master if there're snapshots present but the cleaners needed are missing.
1111    * Otherwise we can end up with snapshot data loss.
1112    * @param conf The {@link Configuration} object to use
1113    * @param mfs The MasterFileSystem to use
1114    * @throws IOException in case of file-system operation failure
1115    * @throws UnsupportedOperationException in case cleaners are missing and
1116    *         there're snapshot in the system
1117    */
1118   private void checkSnapshotSupport(final Configuration conf, final MasterFileSystem mfs)
1119       throws IOException, UnsupportedOperationException {
1120     // Verify if snapshot is disabled by the user
1121     String enabled = conf.get(HBASE_SNAPSHOT_ENABLED);
1122     boolean snapshotEnabled = conf.getBoolean(HBASE_SNAPSHOT_ENABLED, false);
1123     boolean userDisabled = (enabled != null && enabled.trim().length() > 0 && !snapshotEnabled);
1124 
1125     // Extract cleaners from conf
1126     Set<String> hfileCleaners = new HashSet<String>();
1127     String[] cleaners = conf.getStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
1128     if (cleaners != null) Collections.addAll(hfileCleaners, cleaners);
1129 
1130     Set<String> logCleaners = new HashSet<String>();
1131     cleaners = conf.getStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS);
1132     if (cleaners != null) Collections.addAll(logCleaners, cleaners);
1133 
1134     // check if an older version of snapshot directory was present
1135     Path oldSnapshotDir = new Path(mfs.getRootDir(), HConstants.OLD_SNAPSHOT_DIR_NAME);
1136     FileSystem fs = mfs.getFileSystem();
1137     List<SnapshotDescription> ss = getCompletedSnapshots(new Path(rootDir, oldSnapshotDir));
1138     if (ss != null && !ss.isEmpty()) {
1139       LOG.error("Snapshots from an earlier release were found under: " + oldSnapshotDir);
1140       LOG.error("Please rename the directory as " + HConstants.SNAPSHOT_DIR_NAME);
1141     }
1142 
1143     // If the user has enabled the snapshot, we force the cleaners to be present
1144     // otherwise we still need to check if cleaners are enabled or not and verify
1145     // that there're no snapshot in the .snapshot folder.
1146     if (snapshotEnabled) {
1147       // Inject snapshot cleaners, if snapshot.enable is true
1148       hfileCleaners.add(SnapshotHFileCleaner.class.getName());
1149       hfileCleaners.add(HFileLinkCleaner.class.getName());
1150 
1151       // Set cleaners conf
1152       conf.setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS,
1153         hfileCleaners.toArray(new String[hfileCleaners.size()]));
1154       conf.setStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS,
1155         logCleaners.toArray(new String[logCleaners.size()]));
1156     } else {
1157       // Verify if cleaners are present
1158       snapshotEnabled =
1159         hfileCleaners.contains(SnapshotHFileCleaner.class.getName()) &&
1160         hfileCleaners.contains(HFileLinkCleaner.class.getName());
1161 
1162       // Warn if the cleaners are enabled but the snapshot.enabled property is false/not set.
1163       if (snapshotEnabled) {
1164         LOG.warn("Snapshot log and hfile cleaners are present in the configuration, " +
1165           "but the '" + HBASE_SNAPSHOT_ENABLED + "' property " +
1166           (userDisabled ? "is set to 'false'." : "is not set."));
1167       }
1168     }
1169 
1170     // Mark snapshot feature as enabled if cleaners are present and user has not disabled it.
1171     this.isSnapshotSupported = snapshotEnabled && !userDisabled;
1172 
1173     // If cleaners are not enabled, verify that there're no snapshot in the .snapshot folder
1174     // otherwise we end up with snapshot data loss.
1175     if (!snapshotEnabled) {
1176       LOG.info("Snapshot feature is not enabled, missing log and hfile cleaners.");
1177       Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(mfs.getRootDir());
1178       if (fs.exists(snapshotDir)) {
1179         FileStatus[] snapshots = FSUtils.listStatus(fs, snapshotDir,
1180           new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
1181         if (snapshots != null) {
1182           LOG.error("Snapshots are present, but cleaners are not enabled.");
1183           checkSnapshotSupport();
1184         }
1185       }
1186     }
1187   }
1188 
1189   @Override
1190   public void initialize(MasterServices master, MetricsMaster metricsMaster) throws KeeperException,
1191       IOException, UnsupportedOperationException {
1192     this.master = master;
1193 
1194     this.rootDir = master.getMasterFileSystem().getRootDir();
1195     checkSnapshotSupport(master.getConfiguration(), master.getMasterFileSystem());
1196 
1197     // get the configuration for the coordinator
1198     Configuration conf = master.getConfiguration();
1199     long wakeFrequency = conf.getInt(SNAPSHOT_WAKE_MILLIS_KEY, SNAPSHOT_WAKE_MILLIS_DEFAULT);
1200     long timeoutMillis = Math.max(conf.getLong(SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_KEY,
1201                     SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_DEFAULT),
1202             conf.getLong(SnapshotDescriptionUtils.MASTER_SNAPSHOT_TIMEOUT_MILLIS,
1203                     SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME));
1204     int opThreads = conf.getInt(SNAPSHOT_POOL_THREADS_KEY, SNAPSHOT_POOL_THREADS_DEFAULT);
1205 
1206     // setup the default procedure coordinator
1207     String name = master.getServerName().toString();
1208     ThreadPoolExecutor tpool = ProcedureCoordinator.defaultPool(name, opThreads);
1209     ProcedureCoordinatorRpcs comms = new ZKProcedureCoordinatorRpcs(
1210         master.getZooKeeper(), SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, name);
1211 
1212     this.coordinator = new ProcedureCoordinator(comms, tpool, timeoutMillis, wakeFrequency);
1213     this.executorService = master.getExecutorService();
1214     resetTempDir();
1215     initSnapshotHandlerChoreCleanerTask(10);
1216   }
1217 
1218   @Override
1219   public String getProcedureSignature() {
1220     return ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION;
1221   }
1222 
1223   @Override
1224   public void execProcedure(ProcedureDescription desc) throws IOException {
1225     takeSnapshot(toSnapshotDescription(desc));
1226   }
1227 
1228   @Override
1229   public boolean isProcedureDone(ProcedureDescription desc) throws IOException {
1230     return isSnapshotDone(toSnapshotDescription(desc));
1231   }
1232 
1233   private SnapshotDescription toSnapshotDescription(ProcedureDescription desc)
1234       throws IOException {
1235     SnapshotDescription.Builder builder = SnapshotDescription.newBuilder();
1236     if (!desc.hasInstance()) {
1237       throw new IOException("Snapshot name is not defined: " + desc.toString());
1238     }
1239     String snapshotName = desc.getInstance();
1240     List<NameStringPair> props = desc.getConfigurationList();
1241     String table = null;
1242     for (NameStringPair prop : props) {
1243       if ("table".equalsIgnoreCase(prop.getName())) {
1244         table = prop.getValue();
1245       }
1246     }
1247     if (table == null) {
1248       throw new IOException("Snapshot table is not defined: " + desc.toString());
1249     }
1250     TableName tableName = TableName.valueOf(table);
1251     builder.setTable(tableName.getNameAsString());
1252     builder.setName(snapshotName);
1253     builder.setType(SnapshotDescription.Type.FLUSH);
1254     return builder.build();
1255   }
1256 }