1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.snapshot;
19
20 import com.google.common.base.Preconditions;
21 import java.io.FileNotFoundException;
22 import java.io.IOException;
23 import java.util.HashSet;
24 import java.util.List;
25 import java.util.Set;
26 import java.util.concurrent.CancellationException;
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.conf.Configuration;
30 import org.apache.hadoop.fs.FileSystem;
31 import org.apache.hadoop.fs.Path;
32 import org.apache.hadoop.hbase.HRegionInfo;
33 import org.apache.hadoop.hbase.HTableDescriptor;
34 import org.apache.hadoop.hbase.MetaTableAccessor;
35 import org.apache.hadoop.hbase.ServerName;
36 import org.apache.hadoop.hbase.TableName;
37 import org.apache.hadoop.hbase.classification.InterfaceAudience;
38 import org.apache.hadoop.hbase.errorhandling.ForeignException;
39 import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
40 import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
41 import org.apache.hadoop.hbase.executor.EventHandler;
42 import org.apache.hadoop.hbase.executor.EventType;
43 import org.apache.hadoop.hbase.master.MasterServices;
44 import org.apache.hadoop.hbase.master.MetricsSnapshot;
45 import org.apache.hadoop.hbase.master.SnapshotSentinel;
46 import org.apache.hadoop.hbase.master.TableLockManager;
47 import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
48 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
49 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
50 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
51 import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
52 import org.apache.hadoop.hbase.snapshot.SnapshotCreationException;
53 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
54 import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
55 import org.apache.hadoop.hbase.util.FSUtils;
56 import org.apache.hadoop.hbase.util.Pair;
57 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
58 import org.apache.zookeeper.KeeperException;
59
60
61
62
63
64
65
66
67 @InterfaceAudience.Private
68 public abstract class TakeSnapshotHandler extends EventHandler implements SnapshotSentinel,
69 ForeignExceptionSnare {
70 private static final Log LOG = LogFactory.getLog(TakeSnapshotHandler.class);
71
72 private volatile boolean finished;
73
74
75 protected final MasterServices master;
76 protected final MetricsSnapshot metricsSnapshot = new MetricsSnapshot();
77 protected final SnapshotDescription snapshot;
78 protected final Configuration conf;
79 protected final FileSystem rootFs;
80 protected final FileSystem workingDirFs;
81 protected final Path rootDir;
82 private final Path snapshotDir;
83 protected final Path workingDir;
84 private final MasterSnapshotVerifier verifier;
85 protected final ForeignExceptionDispatcher monitor;
86 protected final TableLockManager tableLockManager;
87 protected final TableLock tableLock;
88 protected final MonitoredTask status;
89 protected final TableName snapshotTable;
90 protected final SnapshotManifest snapshotManifest;
91 protected final SnapshotManager snapshotManager;
92
93 protected HTableDescriptor htd;
94
95
96
97
98
99
100
101
102
103 public TakeSnapshotHandler(SnapshotDescription snapshot, final MasterServices masterServices,
104 final SnapshotManager snapshotManager) throws IOException {
105 super(masterServices, EventType.C_M_SNAPSHOT_TABLE);
106 assert snapshot != null : "SnapshotDescription must not be nul1";
107 assert masterServices != null : "MasterServices must not be nul1";
108 this.master = masterServices;
109 this.conf = this.master.getConfiguration();
110 this.rootDir = this.master.getMasterFileSystem().getRootDir();
111 this.workingDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(snapshot, rootDir, conf);
112 Preconditions.checkArgument(!SnapshotDescriptionUtils.isSubDirectoryOf(workingDir, rootDir) ||
113 SnapshotDescriptionUtils.isWithinDefaultWorkingDir(workingDir, conf),
114 "The working directory " + workingDir + " cannot be in the root directory unless it is "
115 + "within the default working directory");
116
117 this.snapshot = snapshot;
118 this.snapshotManager = snapshotManager;
119 this.snapshotTable = TableName.valueOf(snapshot.getTable());
120 this.rootFs = this.master.getMasterFileSystem().getFileSystem();
121 this.snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
122 this.workingDirFs = this.workingDir.getFileSystem(this.conf);
123 this.monitor = new ForeignExceptionDispatcher(snapshot.getName());
124
125 this.tableLockManager = master.getTableLockManager();
126 this.tableLock = this.tableLockManager.writeLock(
127 snapshotTable,
128 EventType.C_M_SNAPSHOT_TABLE.toString());
129
130
131 this.verifier = new MasterSnapshotVerifier(masterServices, snapshot, workingDirFs);
132
133 this.status = TaskMonitor.get().createStatus(
134 "Taking " + snapshot.getType() + " snapshot on table: " + snapshotTable);
135 this.status.enableStatusJournal(true);
136
137 this.snapshotManifest =
138 SnapshotManifest.create(conf, rootFs, workingDir, snapshot, monitor, status);
139 }
140
141 private HTableDescriptor loadTableDescriptor()
142 throws FileNotFoundException, IOException {
143 HTableDescriptor htd =
144 this.master.getTableDescriptors().get(snapshotTable);
145 if (htd == null) {
146 throw new IOException("HTableDescriptor missing for " + snapshotTable);
147 }
148 return htd;
149 }
150
151 @Override
152 public TakeSnapshotHandler prepare() throws Exception {
153 super.prepare();
154 this.tableLock.acquire();
155
156 boolean success = false;
157 try {
158 this.htd = loadTableDescriptor();
159 success = true;
160 } finally {
161 if (!success) {
162 releaseTableLock();
163 }
164 }
165
166 return this;
167 }
168
169
170
171
172
173 @Override
174 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
175 justification="Intentional")
176 public void process() {
177 String msg = "Running " + snapshot.getType() + " table snapshot " + snapshot.getName() + " "
178 + eventType + " on table " + snapshotTable;
179 LOG.info(msg);
180 status.setStatus(msg);
181 try {
182
183
184
185
186 SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, workingDirFs);
187 snapshotManifest.addTableDescriptor(this.htd);
188 monitor.rethrowException();
189
190 List<Pair<HRegionInfo, ServerName>> regionsAndLocations;
191 if (TableName.META_TABLE_NAME.equals(snapshotTable)) {
192 regionsAndLocations = new MetaTableLocator().getMetaRegionsAndLocations(
193 server.getZooKeeper());
194 } else {
195 regionsAndLocations = MetaTableAccessor.getTableRegionsAndLocations(
196 server.getZooKeeper(), server.getConnection(), snapshotTable, false);
197 }
198
199
200 snapshotRegions(regionsAndLocations);
201 monitor.rethrowException();
202
203
204 Set<String> serverNames = new HashSet<String>();
205 for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
206 if (p != null && p.getFirst() != null && p.getSecond() != null) {
207 HRegionInfo hri = p.getFirst();
208 if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue;
209 serverNames.add(p.getSecond().toString());
210 }
211 }
212
213
214 status.setStatus("Consolidate snapshot: " + snapshot.getName());
215 snapshotManifest.consolidate();
216
217
218 status.setStatus("Verifying snapshot: " + snapshot.getName());
219 verifier.verifySnapshot(this.workingDir, serverNames);
220
221
222 completeSnapshot(this.snapshotDir, this.workingDir, this.rootFs, this.workingDirFs);
223 msg = "Snapshot " + snapshot.getName() + " of table " + snapshotTable + " completed";
224 status.markComplete(msg);
225 LOG.info(msg);
226 metricsSnapshot.addSnapshot(status.getCompletionTimestamp() - status.getStartTime());
227 } catch (Exception e) {
228 status.abort("Failed to complete snapshot " + snapshot.getName() + " on table " +
229 snapshotTable + " because " + e.getMessage());
230 String reason = "Failed taking snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot)
231 + " due to exception:" + e.getMessage();
232 LOG.error(reason, e);
233 ForeignException ee = new ForeignException(reason, e);
234 monitor.receive(ee);
235
236 cancel(reason);
237 } finally {
238 LOG.debug("Launching cleanup of working dir:" + workingDir);
239 try {
240
241
242 if (!workingDirFs.delete(workingDir, true)) {
243 LOG.error("Couldn't delete snapshot working directory:" + workingDir);
244 }
245 } catch (IOException e) {
246 LOG.error("Couldn't delete snapshot working directory:" + workingDir);
247 }
248 if (LOG.isDebugEnabled()) {
249 LOG.debug("Table snapshot journal : \n" + status.prettyPrintJournal());
250 }
251 releaseTableLock();
252 }
253 }
254
255 protected void releaseTableLock() {
256 if (this.tableLock != null) {
257 try {
258 this.tableLock.release();
259 } catch (IOException ex) {
260 LOG.warn("Could not release the table lock", ex);
261 }
262 }
263 }
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278 public void completeSnapshot(Path snapshotDir, Path workingDir, FileSystem fs,
279 FileSystem workingDirFs) throws SnapshotCreationException, IOException {
280 SnapshotDescriptionUtils.completeSnapshot(snapshotDir, workingDir, fs, workingDirFs, conf);
281 finished = true;
282 }
283
284
285
286
287 protected abstract void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regions)
288 throws IOException, KeeperException;
289
290
291
292
293 protected void snapshotDisabledRegion(final HRegionInfo regionInfo)
294 throws IOException {
295 snapshotManifest.addRegion(FSUtils.getTableDir(rootDir, snapshotTable), regionInfo);
296 monitor.rethrowException();
297 status.setStatus("Completed referencing HFiles for offline region " + regionInfo.toString() +
298 " of table: " + snapshotTable);
299 }
300
301 @Override
302 public void cancel(String why) {
303 if (finished) return;
304
305 this.finished = true;
306 LOG.info("Stop taking snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
307 " because: " + why);
308 CancellationException ce = new CancellationException(why);
309 monitor.receive(new ForeignException(master.getServerName().toString(), ce));
310 }
311
312 @Override
313 public boolean isFinished() {
314 return finished;
315 }
316
317 @Override
318 public long getCompletionTimestamp() {
319 return this.status.getCompletionTimestamp();
320 }
321
322 @Override
323 public SnapshotDescription getSnapshot() {
324 return snapshot;
325 }
326
327 @Override
328 public ForeignException getExceptionIfFailed() {
329 return monitor.getException();
330 }
331
332 @Override
333 public void rethrowExceptionIfFailed() throws ForeignException {
334 monitor.rethrowException();
335 }
336
337 @Override
338 public void rethrowException() throws ForeignException {
339 monitor.rethrowException();
340 }
341
342 @Override
343 public boolean hasException() {
344 return monitor.hasException();
345 }
346
347 @Override
348 public ForeignException getException() {
349 return monitor.getException();
350 }
351
352 }