1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.snapshot;
19
20 import com.google.common.util.concurrent.ThreadFactoryBuilder;
21 import java.io.FileNotFoundException;
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.Collections;
25 import java.util.HashMap;
26 import java.util.HashSet;
27 import java.util.Iterator;
28 import java.util.List;
29 import java.util.Map;
30 import java.util.Set;
31 import java.util.concurrent.Executors;
32 import java.util.concurrent.ScheduledExecutorService;
33 import java.util.concurrent.ScheduledFuture;
34 import java.util.concurrent.ThreadPoolExecutor;
35 import java.util.concurrent.TimeUnit;
36 import java.util.concurrent.locks.ReadWriteLock;
37 import java.util.concurrent.locks.ReentrantReadWriteLock;
38 import org.apache.commons.logging.Log;
39 import org.apache.commons.logging.LogFactory;
40 import org.apache.hadoop.conf.Configuration;
41 import org.apache.hadoop.fs.FSDataInputStream;
42 import org.apache.hadoop.fs.FileStatus;
43 import org.apache.hadoop.fs.FileSystem;
44 import org.apache.hadoop.fs.Path;
45 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
46 import org.apache.hadoop.hbase.HConstants;
47 import org.apache.hadoop.hbase.HTableDescriptor;
48 import org.apache.hadoop.hbase.MetaTableAccessor;
49 import org.apache.hadoop.hbase.Stoppable;
50 import org.apache.hadoop.hbase.TableName;
51 import org.apache.hadoop.hbase.classification.InterfaceAudience;
52 import org.apache.hadoop.hbase.classification.InterfaceStability;
53 import org.apache.hadoop.hbase.errorhandling.ForeignException;
54 import org.apache.hadoop.hbase.executor.ExecutorService;
55 import org.apache.hadoop.hbase.ipc.RpcServer;
56 import org.apache.hadoop.hbase.master.AssignmentManager;
57 import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
58 import org.apache.hadoop.hbase.master.MasterFileSystem;
59 import org.apache.hadoop.hbase.master.MasterServices;
60 import org.apache.hadoop.hbase.master.MetricsMaster;
61 import org.apache.hadoop.hbase.master.SnapshotSentinel;
62 import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
63 import org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner;
64 import org.apache.hadoop.hbase.procedure.MasterProcedureManager;
65 import org.apache.hadoop.hbase.procedure.Procedure;
66 import org.apache.hadoop.hbase.procedure.ProcedureCoordinator;
67 import org.apache.hadoop.hbase.procedure.ProcedureCoordinatorRpcs;
68 import org.apache.hadoop.hbase.procedure.ZKProcedureCoordinatorRpcs;
69 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.NameStringPair;
70 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ProcedureDescription;
71 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
72 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type;
73 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
74 import org.apache.hadoop.hbase.quotas.QuotaExceededException;
75 import org.apache.hadoop.hbase.security.AccessDeniedException;
76 import org.apache.hadoop.hbase.security.User;
77 import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
78 import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException;
79 import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException;
80 import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
81 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
82 import org.apache.hadoop.hbase.snapshot.SnapshotDoesNotExistException;
83 import org.apache.hadoop.hbase.snapshot.SnapshotExistsException;
84 import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
85 import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil;
86 import org.apache.hadoop.hbase.snapshot.TablePartiallyOpenException;
87 import org.apache.hadoop.hbase.snapshot.UnknownSnapshotException;
88 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
89 import org.apache.hadoop.hbase.util.FSUtils;
90 import org.apache.zookeeper.KeeperException;
91
92
93
94
95
96
97
98
99
100
101 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
102 @InterfaceStability.Unstable
103 public class SnapshotManager extends MasterProcedureManager implements Stoppable {
104 private static final Log LOG = LogFactory.getLog(SnapshotManager.class);
105
106
107 private static final int SNAPSHOT_WAKE_MILLIS_DEFAULT = 500;
108
109
110
111
112
113
114
115
116
117
118
119
120 public static final String HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS =
121 "hbase.snapshot.sentinels.cleanup.timeoutMillis";
122 public static final long SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLS_DEFAULT = 60 * 1000L;
123
124
125 public static final String HBASE_SNAPSHOT_ENABLED = "hbase.snapshot.enabled";
126
127
128
129
130
131 private static final String SNAPSHOT_WAKE_MILLIS_KEY = "hbase.snapshot.master.wakeMillis";
132
133
134 public static final String ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION = "online-snapshot";
135
136
137 public static final String SNAPSHOT_POOL_THREADS_KEY = "hbase.snapshot.master.threads";
138
139
140 public static final int SNAPSHOT_POOL_THREADS_DEFAULT = 1;
141
142 private boolean stopped;
143 private MasterServices master;
144 private ProcedureCoordinator coordinator;
145
146
147 private boolean isSnapshotSupported = false;
148
149
150
151
152
153 private final Map<TableName, SnapshotSentinel> snapshotHandlers =
154 new HashMap<TableName, SnapshotSentinel>();
155 private final ScheduledExecutorService scheduleThreadPool =
156 Executors.newScheduledThreadPool(1, new ThreadFactoryBuilder()
157 .setNameFormat("SnapshotHandlerChoreCleaner").setDaemon(true).build());
158 private ScheduledFuture<?> snapshotHandlerChoreCleanerTask;
159
160
161
162
163
164 private Map<TableName, SnapshotSentinel> restoreHandlers =
165 new HashMap<TableName, SnapshotSentinel>();
166
167 private Path rootDir;
168 private ExecutorService executorService;
169
170
171
172
173
174
175
176 private ReentrantReadWriteLock takingSnapshotLock = new ReentrantReadWriteLock(true);
177
178 public SnapshotManager() {}
179
180
181
182
183
184
185
186 @InterfaceAudience.Private
187 SnapshotManager(final MasterServices master, ProcedureCoordinator coordinator,
188 ExecutorService pool, int sentinelCleanInterval)
189 throws IOException, UnsupportedOperationException {
190 this.master = master;
191
192 this.rootDir = master.getMasterFileSystem().getRootDir();
193 Configuration conf = master.getConfiguration();
194 checkSnapshotSupport(conf, master.getMasterFileSystem());
195
196 this.coordinator = coordinator;
197 this.executorService = pool;
198 resetTempDir();
199 initSnapshotHandlerChoreCleanerTask(sentinelCleanInterval);
200 }
201
202 private void initSnapshotHandlerChoreCleanerTask(long sentinelCleanInterval) {
203 snapshotHandlerChoreCleanerTask = this.scheduleThreadPool.scheduleAtFixedRate(new Runnable() {
204 @Override
205 public void run() {
206 cleanupSentinels();
207 }
208 }, sentinelCleanInterval, sentinelCleanInterval, TimeUnit.SECONDS);
209 }
210
211
212
213
214
215
216 public List<SnapshotDescription> getCompletedSnapshots() throws IOException {
217 return getCompletedSnapshots(SnapshotDescriptionUtils.getSnapshotsDir(rootDir));
218 }
219
220
221
222
223
224
225
226 private List<SnapshotDescription> getCompletedSnapshots(Path snapshotDir) throws IOException {
227 List<SnapshotDescription> snapshotDescs = new ArrayList<SnapshotDescription>();
228
229 FileSystem fs = master.getMasterFileSystem().getFileSystem();
230 if (snapshotDir == null) snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(rootDir);
231
232
233 if (!fs.exists(snapshotDir)) {
234 return snapshotDescs;
235 }
236
237
238 FileStatus[] snapshots = fs.listStatus(snapshotDir,
239 new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
240 MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
241
242 for (FileStatus snapshot : snapshots) {
243 Path info = new Path(snapshot.getPath(), SnapshotDescriptionUtils.SNAPSHOTINFO_FILE);
244
245 if (!fs.exists(info)) {
246 LOG.error("Snapshot information for " + snapshot.getPath() + " doesn't exist");
247 continue;
248 }
249 FSDataInputStream in = null;
250 try {
251 in = fs.open(info);
252 SnapshotDescription desc = SnapshotDescription.parseFrom(in);
253 if (cpHost != null) {
254 try {
255 cpHost.preListSnapshot(desc);
256 } catch (AccessDeniedException e) {
257 LOG.warn("Current user does not have access to " + desc.getName() + " snapshot. "
258 + "Either you should be owner of this snapshot or admin user.");
259
260 continue;
261 }
262 }
263 snapshotDescs.add(desc);
264
265
266 if (cpHost != null) {
267 cpHost.postListSnapshot(desc);
268 }
269 } catch (IOException e) {
270 LOG.warn("Found a corrupted snapshot " + snapshot.getPath(), e);
271 } finally {
272 if (in != null) {
273 in.close();
274 }
275 }
276 }
277 return snapshotDescs;
278 }
279
280
281
282
283
284
285
286 private void resetTempDir() throws IOException {
287
288 Path tmpdir = SnapshotDescriptionUtils.getWorkingSnapshotDir(rootDir,
289 master.getConfiguration());
290 FileSystem tmpFs = tmpdir.getFileSystem(master.getConfiguration());
291 if (!tmpFs.delete(tmpdir, true)) {
292 LOG.warn("Couldn't delete working snapshot directory: " + tmpdir);
293 }
294 }
295
296
297
298
299
300
301
302 public void deleteSnapshot(SnapshotDescription snapshot) throws IOException {
303
304 if (!isSnapshotCompleted(snapshot)) {
305 throw new SnapshotDoesNotExistException(snapshot);
306 }
307
308 String snapshotName = snapshot.getName();
309
310 FileSystem fs = master.getMasterFileSystem().getFileSystem();
311 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
312
313
314 snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
315
316
317 MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
318 if (cpHost != null) {
319 cpHost.preDeleteSnapshot(snapshot);
320 }
321
322 LOG.debug("Deleting snapshot: " + snapshotName);
323
324 if (!fs.delete(snapshotDir, true)) {
325 throw new HBaseSnapshotException("Failed to delete snapshot directory: " + snapshotDir);
326 }
327
328
329 if (cpHost != null) {
330 cpHost.postDeleteSnapshot(snapshot);
331 }
332
333 }
334
335
336
337
338
339
340
341
342
343 public boolean isSnapshotDone(SnapshotDescription expected) throws IOException {
344
345 if (expected == null) {
346 throw new UnknownSnapshotException(
347 "No snapshot name passed in request, can't figure out which snapshot you want to check.");
348 }
349
350 String ssString = ClientSnapshotDescriptionUtils.toString(expected);
351
352
353
354 SnapshotSentinel handler = removeSentinelIfFinished(this.snapshotHandlers, expected);
355
356
357 cleanupSentinels();
358
359 if (handler == null) {
360
361
362
363
364
365
366 if (!isSnapshotCompleted(expected)) {
367 throw new UnknownSnapshotException("Snapshot " + ssString
368 + " is not currently running or one of the known completed snapshots.");
369 }
370
371 return true;
372 }
373
374
375 try {
376 handler.rethrowExceptionIfFailed();
377 } catch (ForeignException e) {
378
379 String status;
380 Procedure p = coordinator.getProcedure(expected.getName());
381 if (p != null) {
382 status = p.getStatus();
383 } else {
384 status = expected.getName() + " not found in proclist " + coordinator.getProcedureNames();
385 }
386 throw new HBaseSnapshotException("Snapshot " + ssString + " had an error. " + status, e,
387 expected);
388 }
389
390
391 if (handler.isFinished()) {
392 LOG.debug("Snapshot '" + ssString + "' has completed, notifying client.");
393 return true;
394 } else if (LOG.isDebugEnabled()) {
395 LOG.debug("Snapshoting '" + ssString + "' is still in progress!");
396 }
397 return false;
398 }
399
400
401
402
403
404
405
406
407
408 synchronized boolean isTakingSnapshot(final SnapshotDescription snapshot) {
409 TableName snapshotTable = TableName.valueOf(snapshot.getTable());
410 if (isTakingSnapshot(snapshotTable)) {
411 return true;
412 }
413 Iterator<Map.Entry<TableName, SnapshotSentinel>> it = this.snapshotHandlers.entrySet().iterator();
414 while (it.hasNext()) {
415 Map.Entry<TableName, SnapshotSentinel> entry = it.next();
416 SnapshotSentinel sentinel = entry.getValue();
417 if (snapshot.getName().equals(sentinel.getSnapshot().getName()) && !sentinel.isFinished()) {
418 return true;
419 }
420 }
421 return false;
422 }
423
424
425
426
427
428
429
430 synchronized boolean isTakingSnapshot(final TableName tableName) {
431 SnapshotSentinel handler = this.snapshotHandlers.get(tableName);
432 return handler != null && !handler.isFinished();
433 }
434
435
436
437
438
439
440
441 private synchronized void prepareToTakeSnapshot(SnapshotDescription snapshot)
442 throws HBaseSnapshotException {
443 Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir,
444 master.getConfiguration());
445 TableName snapshotTable =
446 TableName.valueOf(snapshot.getTable());
447
448
449 if (isTakingSnapshot(snapshot)) {
450 SnapshotSentinel handler = this.snapshotHandlers.get(snapshotTable);
451 throw new SnapshotCreationException("Rejected taking "
452 + ClientSnapshotDescriptionUtils.toString(snapshot)
453 + " because we are already running another snapshot "
454 + (handler != null ? ("on the same table " +
455 ClientSnapshotDescriptionUtils.toString(handler.getSnapshot()))
456 : "with the same name"), snapshot);
457 }
458
459
460 if (isRestoringTable(snapshotTable)) {
461 SnapshotSentinel handler = restoreHandlers.get(snapshotTable);
462 throw new SnapshotCreationException("Rejected taking "
463 + ClientSnapshotDescriptionUtils.toString(snapshot)
464 + " because we are already have a restore in progress on the same snapshot "
465 + ClientSnapshotDescriptionUtils.toString(handler.getSnapshot()), snapshot);
466 }
467
468 try {
469 FileSystem workingDirFS = workingDir.getFileSystem(master.getConfiguration());
470
471
472 workingDirFS.delete(workingDir, true);
473
474
475 if (!workingDirFS.mkdirs(workingDir)) {
476 throw new SnapshotCreationException("Couldn't create working directory (" + workingDir
477 + ") for snapshot" , snapshot);
478 }
479 } catch (HBaseSnapshotException e) {
480 throw e;
481 } catch (IOException e) {
482 throw new SnapshotCreationException(
483 "Exception while checking to see if snapshot could be started.", e, snapshot);
484 }
485 }
486
487
488
489
490
491
492
493 private synchronized void snapshotDisabledTable(SnapshotDescription snapshot)
494 throws IOException {
495
496 prepareToTakeSnapshot(snapshot);
497
498
499 snapshot = snapshot.toBuilder().setType(Type.DISABLED).build();
500
501
502 DisabledTableSnapshotHandler handler =
503 new DisabledTableSnapshotHandler(snapshot, master, this);
504 snapshotTable(snapshot, handler);
505 }
506
507
508
509
510
511
512
513 private synchronized void snapshotEnabledTable(SnapshotDescription snapshot)
514 throws IOException {
515
516 prepareToTakeSnapshot(snapshot);
517
518
519 EnabledTableSnapshotHandler handler =
520 new EnabledTableSnapshotHandler(snapshot, master, this);
521 snapshotTable(snapshot, handler);
522 }
523
524
525
526
527
528
529
530
531
532 private synchronized void snapshotTable(SnapshotDescription snapshot,
533 final TakeSnapshotHandler handler) throws IOException {
534 try {
535 handler.prepare();
536 this.executorService.submit(handler);
537 this.snapshotHandlers.put(TableName.valueOf(snapshot.getTable()), handler);
538 } catch (Exception e) {
539
540 Path workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir,
541 master.getConfiguration());
542 FileSystem workingDirFs = workingDir.getFileSystem(master.getConfiguration());
543 try {
544 if (!workingDirFs.delete(workingDir, true)) {
545 LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" +
546 ClientSnapshotDescriptionUtils.toString(snapshot));
547 }
548 } catch (IOException e1) {
549 LOG.error("Couldn't delete working directory (" + workingDir + " for snapshot:" +
550 ClientSnapshotDescriptionUtils.toString(snapshot));
551 }
552
553 throw new SnapshotCreationException("Could not build snapshot handler", e, snapshot);
554 }
555 }
556
557
558
559
560
561
562
563 public void takeSnapshot(SnapshotDescription snapshot) throws IOException {
564 this.takingSnapshotLock.readLock().lock();
565 try {
566 takeSnapshotInternal(snapshot);
567 } finally {
568 this.takingSnapshotLock.readLock().unlock();
569 }
570 }
571
572 private void takeSnapshotInternal(SnapshotDescription snapshot) throws IOException {
573
574 if (isSnapshotCompleted(snapshot)) {
575 throw new SnapshotExistsException("Snapshot '" + snapshot.getName()
576 + "' already stored on the filesystem.", snapshot);
577 }
578
579 LOG.debug("No existing snapshot, attempting snapshot...");
580
581
582 cleanupSentinels();
583
584
585 HTableDescriptor desc = null;
586 try {
587 desc = master.getTableDescriptors().get(
588 TableName.valueOf(snapshot.getTable()));
589 } catch (FileNotFoundException e) {
590 String msg = "Table:" + snapshot.getTable() + " info doesn't exist!";
591 LOG.error(msg);
592 throw new SnapshotCreationException(msg, e, snapshot);
593 } catch (IOException e) {
594 throw new SnapshotCreationException("Error while geting table description for table "
595 + snapshot.getTable(), e, snapshot);
596 }
597 if (desc == null) {
598 throw new SnapshotCreationException("Table '" + snapshot.getTable()
599 + "' doesn't exist, can't take snapshot.", snapshot);
600 }
601 SnapshotDescription.Builder builder = snapshot.toBuilder();
602
603 if (!snapshot.hasVersion()) {
604 builder.setVersion(SnapshotDescriptionUtils.SNAPSHOT_LAYOUT_VERSION);
605 }
606 User user = RpcServer.getRequestUser();
607 if (master.getConfiguration().
608 getBoolean(User.HBASE_SECURITY_AUTHORIZATION_CONF_KEY, false) && user != null) {
609 builder.setOwner(user.getShortName());
610 }
611 snapshot = builder.build();
612
613
614 MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
615 if (cpHost != null) {
616 cpHost.preSnapshot(snapshot, desc);
617 }
618
619
620 TableName snapshotTable = TableName.valueOf(snapshot.getTable());
621 AssignmentManager assignmentMgr = master.getAssignmentManager();
622 if (assignmentMgr.getTableStateManager().isTableState(snapshotTable,
623 ZooKeeperProtos.Table.State.ENABLED)) {
624 if (LOG.isDebugEnabled()) {
625 LOG.debug("Table enabled, starting distributed snapshot for "
626 + ClientSnapshotDescriptionUtils.toString(snapshot));
627 }
628 snapshotEnabledTable(snapshot);
629 if (LOG.isDebugEnabled()) {
630 LOG.debug("Started snapshot: " + ClientSnapshotDescriptionUtils.toString(snapshot));
631 }
632 }
633
634 else if (assignmentMgr.getTableStateManager().isTableState(snapshotTable,
635 ZooKeeperProtos.Table.State.DISABLED)) {
636 if (LOG.isDebugEnabled()) {
637 LOG.debug("Table is disabled, running snapshot entirely on master "
638 + ClientSnapshotDescriptionUtils.toString(snapshot));
639 }
640 snapshotDisabledTable(snapshot);
641 if (LOG.isDebugEnabled()) {
642 LOG.debug("Started snapshot: " + ClientSnapshotDescriptionUtils.toString(snapshot));
643 }
644 } else {
645 LOG.error("Can't snapshot table '" + snapshot.getTable()
646 + "', isn't open or closed, we don't know what to do!");
647 TablePartiallyOpenException tpoe = new TablePartiallyOpenException(snapshot.getTable()
648 + " isn't fully open.");
649 throw new SnapshotCreationException("Table is not entirely open or closed", tpoe, snapshot);
650 }
651
652
653 if (cpHost != null) {
654 cpHost.postSnapshot(snapshot, desc);
655 }
656 }
657
658 public ReadWriteLock getTakingSnapshotLock() {
659 return this.takingSnapshotLock;
660 }
661
662
663
664
665
666
667
668
669
670 public synchronized boolean isTakingAnySnapshot() {
671 return this.takingSnapshotLock.getReadHoldCount() > 0 || this.snapshotHandlers.size() > 0;
672 }
673
674
675
676
677
678
679
680
681
682
683 public synchronized void setSnapshotHandlerForTesting(
684 final TableName tableName,
685 final SnapshotSentinel handler) {
686 if (handler != null) {
687 this.snapshotHandlers.put(tableName, handler);
688 } else {
689 this.snapshotHandlers.remove(tableName);
690 }
691 }
692
693
694
695
696 ProcedureCoordinator getCoordinator() {
697 return coordinator;
698 }
699
700
701
702
703
704
705
706
707
708
709
710 private boolean isSnapshotCompleted(SnapshotDescription snapshot) throws IOException {
711 try {
712 final Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
713 FileSystem fs = master.getMasterFileSystem().getFileSystem();
714
715 return fs.exists(snapshotDir);
716 } catch (IllegalArgumentException iae) {
717 throw new UnknownSnapshotException("Unexpected exception thrown", iae);
718 }
719 }
720
721
722
723
724
725
726
727
728 synchronized void cloneSnapshot(final SnapshotDescription snapshot,
729 final HTableDescriptor hTableDescriptor, final boolean restoreAcl)
730 throws HBaseSnapshotException {
731 TableName tableName = hTableDescriptor.getTableName();
732
733
734 if (isTakingSnapshot(tableName)) {
735 throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName);
736 }
737
738
739 if (isRestoringTable(tableName)) {
740 throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName);
741 }
742
743 try {
744 CloneSnapshotHandler handler =
745 new CloneSnapshotHandler(master, snapshot, hTableDescriptor, restoreAcl).prepare();
746 this.executorService.submit(handler);
747 this.restoreHandlers.put(tableName, handler);
748 } catch (Exception e) {
749 String msg = "Couldn't clone the snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
750 " on table=" + tableName;
751 LOG.error(msg, e);
752 throw new RestoreSnapshotException(msg, e);
753 }
754 }
755
756
757
758
759
760
761 @Deprecated
762 public void restoreSnapshot(SnapshotDescription reqSnapshot) throws IOException {
763 restoreSnapshot(reqSnapshot, false);
764 }
765
766
767
768
769
770
771
772 public void restoreSnapshot(SnapshotDescription reqSnapshot, boolean restoreAcl)
773 throws IOException {
774 FileSystem fs = master.getMasterFileSystem().getFileSystem();
775 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(reqSnapshot, rootDir);
776 MasterCoprocessorHost cpHost = master.getMasterCoprocessorHost();
777
778
779 if (!fs.exists(snapshotDir)) {
780 LOG.error("A Snapshot named '" + reqSnapshot.getName() + "' does not exist.");
781 throw new SnapshotDoesNotExistException(reqSnapshot);
782 }
783
784
785
786
787 SnapshotDescription snapshot = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
788 SnapshotManifest manifest = SnapshotManifest.open(master.getConfiguration(), fs,
789 snapshotDir, snapshot);
790 HTableDescriptor snapshotTableDesc = manifest.getTableDescriptor();
791 TableName tableName = TableName.valueOf(reqSnapshot.getTable());
792
793
794 cleanupSentinels();
795
796
797 SnapshotReferenceUtil.verifySnapshot(master.getConfiguration(), fs, manifest);
798
799
800 if (MetaTableAccessor.tableExists(master.getConnection(), tableName)) {
801 if (master.getAssignmentManager().getTableStateManager().isTableState(
802 TableName.valueOf(snapshot.getTable()), ZooKeeperProtos.Table.State.ENABLED)) {
803 throw new UnsupportedOperationException("Table '" +
804 TableName.valueOf(snapshot.getTable()) + "' must be disabled in order to " +
805 "perform a restore operation" +
806 ".");
807 }
808
809
810 if (cpHost != null) {
811 cpHost.preRestoreSnapshot(snapshot, snapshotTableDesc);
812 }
813
814 int tableRegionCount = -1;
815 try {
816
817
818
819
820 tableRegionCount = getRegionCountOfTable(tableName);
821 int snapshotRegionCount = manifest.getRegionManifestsMap().size();
822
823
824
825
826
827 if (tableRegionCount > 0 && tableRegionCount < snapshotRegionCount) {
828 checkAndUpdateNamespaceRegionQuota(snapshotRegionCount, tableName);
829 }
830 restoreSnapshot(snapshot, snapshotTableDesc, restoreAcl);
831
832
833 if (tableRegionCount > 0 && tableRegionCount > snapshotRegionCount) {
834 checkAndUpdateNamespaceRegionQuota(snapshotRegionCount, tableName);
835 }
836 } catch (QuotaExceededException e) {
837 LOG.error("Region quota exceeded while restoring the snapshot " + snapshot.getName()
838 + " as table " + tableName.getNameAsString(), e);
839
840
841
842 throw e;
843 } catch (IOException e) {
844 if (tableRegionCount > 0) {
845
846 checkAndUpdateNamespaceRegionQuota(tableRegionCount, tableName);
847 }
848 LOG.error("Exception occurred while restoring the snapshot " + snapshot.getName()
849 + " as table " + tableName.getNameAsString(), e);
850 throw e;
851 }
852 LOG.info("Restore snapshot=" + snapshot.getName() + " as table=" + tableName);
853
854 if (cpHost != null) {
855 cpHost.postRestoreSnapshot(snapshot, snapshotTableDesc);
856 }
857 } else {
858 HTableDescriptor htd = new HTableDescriptor(tableName, snapshotTableDesc);
859 if (cpHost != null) {
860 cpHost.preCloneSnapshot(snapshot, htd);
861 }
862 try {
863 checkAndUpdateNamespaceQuota(manifest, tableName);
864 cloneSnapshot(snapshot, htd, restoreAcl);
865 } catch (IOException e) {
866 this.master.getMasterQuotaManager().removeTableFromNamespaceQuota(tableName);
867 LOG.error("Exception occurred while cloning the snapshot " + snapshot.getName()
868 + " as table " + tableName.getNameAsString(), e);
869 throw e;
870 }
871 LOG.info("Clone snapshot=" + snapshot.getName() + " as table=" + tableName);
872
873 if (cpHost != null) {
874 cpHost.postCloneSnapshot(snapshot, htd);
875 }
876 }
877 }
878
879 private void checkAndUpdateNamespaceQuota(SnapshotManifest manifest, TableName tableName)
880 throws IOException {
881 if (this.master.getMasterQuotaManager().isQuotaInitialized()) {
882 this.master.getMasterQuotaManager().checkNamespaceTableAndRegionQuota(tableName,
883 manifest.getRegionManifestsMap().size());
884 }
885 }
886
887 private void checkAndUpdateNamespaceRegionQuota(int updatedRegionCount, TableName tableName)
888 throws IOException {
889 if (this.master.getMasterQuotaManager().isQuotaInitialized()) {
890 this.master.getMasterQuotaManager().checkAndUpdateNamespaceRegionQuota(tableName,
891 updatedRegionCount);
892 }
893 }
894
895
896
897
898 private int getRegionCountOfTable(TableName tableName) throws IOException {
899 if (this.master.getMasterQuotaManager().isQuotaInitialized()) {
900 return this.master.getMasterQuotaManager().getRegionCountOfTable(tableName);
901 }
902 return -1;
903 }
904
905
906
907
908
909
910
911
912 private synchronized void restoreSnapshot(final SnapshotDescription snapshot,
913 final HTableDescriptor hTableDescriptor, final boolean restoreAcl)
914 throws HBaseSnapshotException {
915 TableName tableName = hTableDescriptor.getTableName();
916
917
918 if (isTakingSnapshot(tableName)) {
919 throw new RestoreSnapshotException("Snapshot in progress on the restore table=" + tableName);
920 }
921
922
923 if (isRestoringTable(tableName)) {
924 throw new RestoreSnapshotException("Restore already in progress on the table=" + tableName);
925 }
926
927 try {
928 RestoreSnapshotHandler handler =
929 new RestoreSnapshotHandler(master, snapshot, hTableDescriptor, restoreAcl).prepare();
930 this.executorService.submit(handler);
931 restoreHandlers.put(tableName, handler);
932 } catch (Exception e) {
933 String msg = "Couldn't restore the snapshot=" + ClientSnapshotDescriptionUtils.toString(
934 snapshot) +
935 " on table=" + tableName;
936 LOG.error(msg, e);
937 throw new RestoreSnapshotException(msg, e);
938 }
939 }
940
941
942
943
944
945
946
947 private synchronized boolean isRestoringTable(final TableName tableName) {
948 SnapshotSentinel sentinel = this.restoreHandlers.get(tableName);
949 return(sentinel != null && !sentinel.isFinished());
950 }
951
952
953
954
955
956
957
958
959
960 public boolean isRestoreDone(final SnapshotDescription snapshot) throws IOException {
961
962
963 SnapshotSentinel sentinel = removeSentinelIfFinished(this.restoreHandlers, snapshot);
964
965
966 cleanupSentinels();
967
968 if (sentinel == null) {
969
970 return true;
971 }
972
973 LOG.debug("Verify snapshot=" + snapshot.getName() + " against="
974 + sentinel.getSnapshot().getName() + " table=" +
975 TableName.valueOf(snapshot.getTable()));
976
977
978 sentinel.rethrowExceptionIfFailed();
979
980
981 if (sentinel.isFinished()) {
982 LOG.debug("Restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
983 " has completed. Notifying the client.");
984 return true;
985 }
986
987 if (LOG.isDebugEnabled()) {
988 LOG.debug("Sentinel is not yet finished with restoring snapshot=" +
989 ClientSnapshotDescriptionUtils.toString(snapshot));
990 }
991 return false;
992 }
993
994
995
996
997
998
999
1000
1001 private synchronized SnapshotSentinel removeSentinelIfFinished(
1002 final Map<TableName, SnapshotSentinel> sentinels,
1003 final SnapshotDescription snapshot) {
1004 if (!snapshot.hasTable()) {
1005 return null;
1006 }
1007
1008 TableName snapshotTable = TableName.valueOf(snapshot.getTable());
1009 SnapshotSentinel h = sentinels.get(snapshotTable);
1010 if (h == null) {
1011 return null;
1012 }
1013
1014 if (!h.getSnapshot().getName().equals(snapshot.getName())) {
1015
1016 return null;
1017 }
1018
1019
1020 if (h.isFinished()) {
1021 sentinels.remove(snapshotTable);
1022 }
1023
1024 return h;
1025 }
1026
1027
1028
1029
1030
1031
1032
1033
1034 private void cleanupSentinels() {
1035 cleanupSentinels(this.snapshotHandlers);
1036 cleanupSentinels(this.restoreHandlers);
1037 }
1038
1039
1040
1041
1042
1043
1044 private synchronized void cleanupSentinels(final Map<TableName, SnapshotSentinel> sentinels) {
1045 long currentTime = EnvironmentEdgeManager.currentTime();
1046 long sentinelsCleanupTimeoutMillis =
1047 master.getConfiguration().getLong(HBASE_SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLIS,
1048 SNAPSHOT_SENTINELS_CLEANUP_TIMEOUT_MILLS_DEFAULT);
1049 Iterator<Map.Entry<TableName, SnapshotSentinel>> it = sentinels.entrySet().iterator();
1050 while (it.hasNext()) {
1051 Map.Entry<TableName, SnapshotSentinel> entry = it.next();
1052 SnapshotSentinel sentinel = entry.getValue();
1053 if (sentinel.isFinished()
1054 && (currentTime - sentinel.getCompletionTimestamp()) > sentinelsCleanupTimeoutMillis) {
1055 it.remove();
1056 }
1057 }
1058 }
1059
1060
1061
1062
1063
1064 @Override
1065 public void stop(String why) {
1066
1067 if (this.stopped) return;
1068
1069 this.stopped = true;
1070
1071 for (SnapshotSentinel snapshotHandler: this.snapshotHandlers.values()) {
1072 snapshotHandler.cancel(why);
1073 }
1074 if (snapshotHandlerChoreCleanerTask != null) {
1075 snapshotHandlerChoreCleanerTask.cancel(true);
1076 }
1077
1078 for (SnapshotSentinel restoreHandler: this.restoreHandlers.values()) {
1079 restoreHandler.cancel(why);
1080 }
1081 try {
1082 if (coordinator != null) {
1083 coordinator.close();
1084 }
1085 } catch (IOException e) {
1086 LOG.error("stop ProcedureCoordinator error", e);
1087 }
1088 }
1089
1090 @Override
1091 public boolean isStopped() {
1092 return this.stopped;
1093 }
1094
1095
1096
1097
1098
1099
1100 public void checkSnapshotSupport() throws UnsupportedOperationException {
1101 if (!this.isSnapshotSupported) {
1102 throw new UnsupportedOperationException(
1103 "To use snapshots, You must add to the hbase-site.xml of the HBase Master: '" +
1104 HBASE_SNAPSHOT_ENABLED + "' property with value 'true'.");
1105 }
1106 }
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118 private void checkSnapshotSupport(final Configuration conf, final MasterFileSystem mfs)
1119 throws IOException, UnsupportedOperationException {
1120
1121 String enabled = conf.get(HBASE_SNAPSHOT_ENABLED);
1122 boolean snapshotEnabled = conf.getBoolean(HBASE_SNAPSHOT_ENABLED, false);
1123 boolean userDisabled = (enabled != null && enabled.trim().length() > 0 && !snapshotEnabled);
1124
1125
1126 Set<String> hfileCleaners = new HashSet<String>();
1127 String[] cleaners = conf.getStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
1128 if (cleaners != null) Collections.addAll(hfileCleaners, cleaners);
1129
1130 Set<String> logCleaners = new HashSet<String>();
1131 cleaners = conf.getStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS);
1132 if (cleaners != null) Collections.addAll(logCleaners, cleaners);
1133
1134
1135 Path oldSnapshotDir = new Path(mfs.getRootDir(), HConstants.OLD_SNAPSHOT_DIR_NAME);
1136 FileSystem fs = mfs.getFileSystem();
1137 List<SnapshotDescription> ss = getCompletedSnapshots(new Path(rootDir, oldSnapshotDir));
1138 if (ss != null && !ss.isEmpty()) {
1139 LOG.error("Snapshots from an earlier release were found under: " + oldSnapshotDir);
1140 LOG.error("Please rename the directory as " + HConstants.SNAPSHOT_DIR_NAME);
1141 }
1142
1143
1144
1145
1146 if (snapshotEnabled) {
1147
1148 hfileCleaners.add(SnapshotHFileCleaner.class.getName());
1149 hfileCleaners.add(HFileLinkCleaner.class.getName());
1150
1151
1152 conf.setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS,
1153 hfileCleaners.toArray(new String[hfileCleaners.size()]));
1154 conf.setStrings(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS,
1155 logCleaners.toArray(new String[logCleaners.size()]));
1156 } else {
1157
1158 snapshotEnabled =
1159 hfileCleaners.contains(SnapshotHFileCleaner.class.getName()) &&
1160 hfileCleaners.contains(HFileLinkCleaner.class.getName());
1161
1162
1163 if (snapshotEnabled) {
1164 LOG.warn("Snapshot log and hfile cleaners are present in the configuration, " +
1165 "but the '" + HBASE_SNAPSHOT_ENABLED + "' property " +
1166 (userDisabled ? "is set to 'false'." : "is not set."));
1167 }
1168 }
1169
1170
1171 this.isSnapshotSupported = snapshotEnabled && !userDisabled;
1172
1173
1174
1175 if (!snapshotEnabled) {
1176 LOG.info("Snapshot feature is not enabled, missing log and hfile cleaners.");
1177 Path snapshotDir = SnapshotDescriptionUtils.getSnapshotsDir(mfs.getRootDir());
1178 if (fs.exists(snapshotDir)) {
1179 FileStatus[] snapshots = FSUtils.listStatus(fs, snapshotDir,
1180 new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
1181 if (snapshots != null) {
1182 LOG.error("Snapshots are present, but cleaners are not enabled.");
1183 checkSnapshotSupport();
1184 }
1185 }
1186 }
1187 }
1188
1189 @Override
1190 public void initialize(MasterServices master, MetricsMaster metricsMaster) throws KeeperException,
1191 IOException, UnsupportedOperationException {
1192 this.master = master;
1193
1194 this.rootDir = master.getMasterFileSystem().getRootDir();
1195 checkSnapshotSupport(master.getConfiguration(), master.getMasterFileSystem());
1196
1197
1198 Configuration conf = master.getConfiguration();
1199 long wakeFrequency = conf.getInt(SNAPSHOT_WAKE_MILLIS_KEY, SNAPSHOT_WAKE_MILLIS_DEFAULT);
1200 long timeoutMillis = Math.max(conf.getLong(SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_KEY,
1201 SnapshotDescriptionUtils.SNAPSHOT_TIMEOUT_MILLIS_DEFAULT),
1202 conf.getLong(SnapshotDescriptionUtils.MASTER_SNAPSHOT_TIMEOUT_MILLIS,
1203 SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME));
1204 int opThreads = conf.getInt(SNAPSHOT_POOL_THREADS_KEY, SNAPSHOT_POOL_THREADS_DEFAULT);
1205
1206
1207 String name = master.getServerName().toString();
1208 ThreadPoolExecutor tpool = ProcedureCoordinator.defaultPool(name, opThreads);
1209 ProcedureCoordinatorRpcs comms = new ZKProcedureCoordinatorRpcs(
1210 master.getZooKeeper(), SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, name);
1211
1212 this.coordinator = new ProcedureCoordinator(comms, tpool, timeoutMillis, wakeFrequency);
1213 this.executorService = master.getExecutorService();
1214 resetTempDir();
1215 initSnapshotHandlerChoreCleanerTask(10);
1216 }
1217
1218 @Override
1219 public String getProcedureSignature() {
1220 return ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION;
1221 }
1222
1223 @Override
1224 public void execProcedure(ProcedureDescription desc) throws IOException {
1225 takeSnapshot(toSnapshotDescription(desc));
1226 }
1227
1228 @Override
1229 public boolean isProcedureDone(ProcedureDescription desc) throws IOException {
1230 return isSnapshotDone(toSnapshotDescription(desc));
1231 }
1232
1233 private SnapshotDescription toSnapshotDescription(ProcedureDescription desc)
1234 throws IOException {
1235 SnapshotDescription.Builder builder = SnapshotDescription.newBuilder();
1236 if (!desc.hasInstance()) {
1237 throw new IOException("Snapshot name is not defined: " + desc.toString());
1238 }
1239 String snapshotName = desc.getInstance();
1240 List<NameStringPair> props = desc.getConfigurationList();
1241 String table = null;
1242 for (NameStringPair prop : props) {
1243 if ("table".equalsIgnoreCase(prop.getName())) {
1244 table = prop.getValue();
1245 }
1246 }
1247 if (table == null) {
1248 throw new IOException("Snapshot table is not defined: " + desc.toString());
1249 }
1250 TableName tableName = TableName.valueOf(table);
1251 builder.setTable(tableName.getNameAsString());
1252 builder.setName(snapshotName);
1253 builder.setType(SnapshotDescription.Type.FLUSH);
1254 return builder.build();
1255 }
1256 }