View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.master.snapshot;
21  
22  import java.io.IOException;
23  import java.util.LinkedList;
24  import java.util.List;
25  import java.util.concurrent.CancellationException;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.HRegionInfo;
33  import org.apache.hadoop.hbase.HTableDescriptor;
34  import org.apache.hadoop.hbase.TableName;
35  import org.apache.hadoop.hbase.MetaTableAccessor;
36  import org.apache.hadoop.hbase.client.Connection;
37  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
38  import org.apache.hadoop.hbase.errorhandling.ForeignException;
39  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
40  import org.apache.hadoop.hbase.executor.EventType;
41  import org.apache.hadoop.hbase.master.AssignmentManager;
42  import org.apache.hadoop.hbase.master.MasterFileSystem;
43  import org.apache.hadoop.hbase.master.MasterServices;
44  import org.apache.hadoop.hbase.master.MetricsSnapshot;
45  import org.apache.hadoop.hbase.master.RegionStates;
46  import org.apache.hadoop.hbase.master.SnapshotSentinel;
47  import org.apache.hadoop.hbase.master.handler.TableEventHandler;
48  import org.apache.hadoop.hbase.monitoring.MonitoredTask;
49  import org.apache.hadoop.hbase.monitoring.TaskMonitor;
50  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
51  import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
52  import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException;
53  import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
54  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
55  import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
56  
57  /**
58   * Handler to Restore a snapshot.
59   *
60   * <p>Uses {@link RestoreSnapshotHelper} to replace the table content with the
61   * data available in the snapshot.
62   */
63  @InterfaceAudience.Private
64  public class RestoreSnapshotHandler extends TableEventHandler implements SnapshotSentinel {
65    private static final Log LOG = LogFactory.getLog(RestoreSnapshotHandler.class);
66  
67    private final HTableDescriptor hTableDescriptor;
68    private final SnapshotDescription snapshot;
69    private final boolean restoreAcl;
70  
71    private final ForeignExceptionDispatcher monitor;
72    private final MetricsSnapshot metricsSnapshot = new MetricsSnapshot();
73    private final MonitoredTask status;
74  
75    private volatile boolean stopped = false;
76  
77    public RestoreSnapshotHandler(final MasterServices masterServices,
78        final SnapshotDescription snapshot, final HTableDescriptor htd, final boolean restoreAcl)
79        throws IOException {
80      super(EventType.C_M_RESTORE_SNAPSHOT, htd.getTableName(), masterServices, masterServices);
81  
82      // Snapshot information
83      this.snapshot = snapshot;
84      this.restoreAcl = restoreAcl;
85  
86      // Monitor
87      this.monitor = new ForeignExceptionDispatcher();
88  
89      // Check table exists.
90      getTableDescriptor();
91  
92      // This is the new schema we are going to write out as this modification.
93      this.hTableDescriptor = htd;
94  
95      this.status = TaskMonitor.get().createStatus(
96        "Restoring  snapshot '" + snapshot.getName() + "' to table "
97            + hTableDescriptor.getTableName());
98    }
99  
100   @Override
101   public RestoreSnapshotHandler prepare() throws IOException {
102     return (RestoreSnapshotHandler) super.prepare();
103   }
104 
105   /**
106    * The restore table is executed in place.
107    *  - The on-disk data will be restored - reference files are put in place without moving data
108    *  -  [if something fail here: you need to delete the table and re-run the restore]
109    *  - hbase:meta will be updated
110    *  -  [if something fail here: you need to run hbck to fix hbase:meta entries]
111    * The passed in list gets changed in this method
112    */
113   @Override
114   protected void handleTableOperation(List<HRegionInfo> hris) throws IOException {
115     MasterFileSystem fileSystemManager = masterServices.getMasterFileSystem();
116     Connection conn = masterServices.getConnection();
117     FileSystem fs = fileSystemManager.getFileSystem();
118     Path rootDir = fileSystemManager.getRootDir();
119     TableName tableName = hTableDescriptor.getTableName();
120 
121     try {
122       // 1. Update descriptor
123       this.masterServices.getTableDescriptors().add(hTableDescriptor);
124 
125       // 2. Execute the on-disk Restore
126       LOG.debug("Starting restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot));
127       Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
128       SnapshotManifest manifest = SnapshotManifest.open(masterServices.getConfiguration(), fs,
129                                                         snapshotDir, snapshot);
130       RestoreSnapshotHelper restoreHelper = new RestoreSnapshotHelper(
131           masterServices.getConfiguration(), fs, manifest,
132           this.hTableDescriptor, rootDir, monitor, status);
133       RestoreSnapshotHelper.RestoreMetaChanges metaChanges = restoreHelper.restoreHdfsRegions();
134 
135       // 3. Forces all the RegionStates to be offline
136       //
137       // The AssignmentManager keeps all the region states around
138       // with no possibility to remove them, until the master is restarted.
139       // This means that a region marked as SPLIT before the restore will never be assigned again.
140       // To avoid having all states around all the regions are switched to the OFFLINE state,
141       // which is the same state that the regions will be after a delete table.
142       forceRegionsOffline(metaChanges);
143 
144       // 4. Applies changes to hbase:meta
145       status.setStatus("Preparing to restore each region");
146 
147       // 4.1 Removes the current set of regions from META
148       //
149       // By removing also the regions to restore (the ones present both in the snapshot
150       // and in the current state) we ensure that no extra fields are present in META
151       // e.g. with a simple add addRegionToMeta() the splitA and splitB attributes
152       // not overwritten/removed, so you end up with old informations
153       // that are not correct after the restore.
154       List<HRegionInfo> hrisToRemove = new LinkedList<HRegionInfo>();
155       if (metaChanges.hasRegionsToRemove()) hrisToRemove.addAll(metaChanges.getRegionsToRemove());
156       MetaTableAccessor.deleteRegions(conn, hrisToRemove);
157 
158       // We also need to remove the current set of regions from in memory states
159       deleteRegionsFromInMemoryStates(hrisToRemove, hTableDescriptor.getRegionReplication());
160 
161       // 4.2 Add the new set of regions to META
162       //
163       // At this point the old regions are no longer present in META.
164       // and the set of regions present in the snapshot will be written to META.
165       // All the information in hbase:meta are coming from the .regioninfo of each region present
166       // in the snapshot folder.
167       hris.clear();
168       if (metaChanges.hasRegionsToAdd()) hris.addAll(metaChanges.getRegionsToAdd());
169       MetaTableAccessor.addRegionsToMeta(conn, hris, hTableDescriptor.getRegionReplication());
170       if (metaChanges.hasRegionsToRestore()) {
171         MetaTableAccessor.overwriteRegions(conn, metaChanges.getRegionsToRestore(),
172           hTableDescriptor.getRegionReplication());
173       }
174       metaChanges.updateMetaParentRegions(this.server.getConnection(), hris);
175 
176       // 5. restore acl of snapshot into the table.
177       if (restoreAcl && snapshot.hasUsersAndPermissions()
178           && SnapshotDescriptionUtils.isSecurityAvailable(server.getConfiguration())) {
179         RestoreSnapshotHelper.restoreSnapshotACL(snapshot, tableName, server.getConfiguration());
180       }
181 
182       // At this point the restore is complete. Next step is enabling the table.
183       LOG.info("Restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
184         " on table=" + tableName + " completed!");
185     } catch (IOException e) {
186       String msg = "restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot)
187           + " failed. Try re-running the restore command.";
188       LOG.error(msg, e);
189       IOException rse = new RestoreSnapshotException(msg, e);
190       monitor.receive(new ForeignException(masterServices.getServerName().toString(), rse));
191       throw rse;
192     }
193   }
194 
195   private void forceRegionsOffline(final RestoreSnapshotHelper.RestoreMetaChanges metaChanges) {
196     forceRegionsOffline(metaChanges.getRegionsToAdd());
197     forceRegionsOffline(metaChanges.getRegionsToRestore());
198     forceRegionsOffline(metaChanges.getRegionsToRemove());
199   }
200 
201   private void forceRegionsOffline(final List<HRegionInfo> hris) {
202     AssignmentManager am = this.masterServices.getAssignmentManager();
203     RegionStates states = am.getRegionStates();
204     if (hris != null) {
205       for (HRegionInfo hri: hris) {
206         states.regionOffline(hri);
207       }
208     }
209   }
210 
211   /**
212    * Delete regions from in-memory states
213    * @param regionInfos regions to delete
214    * @param regionReplication the number of region replications
215    */
216   private void deleteRegionsFromInMemoryStates(List<HRegionInfo> regionInfos,
217     int regionReplication) {
218     // Delete the regions from AssignmentManager
219     for (HRegionInfo hri : regionInfos) {
220       masterServices.getAssignmentManager().getRegionStates().deleteRegion(hri);
221     }
222     // Delete the regions from ServerManager
223     masterServices.getServerManager().removeRegions(regionInfos);
224 
225     // For region replicas
226     if (regionReplication > 1) {
227       for (HRegionInfo regionInfo : regionInfos) {
228         for (int i = 1; i < regionReplication; i++) {
229           HRegionInfo regionInfoForReplica =
230             RegionReplicaUtil.getRegionInfoForReplica(regionInfo, i);
231           // Delete the regions from AssignmentManager
232           masterServices.getAssignmentManager().getRegionStates()
233             .deleteRegion(regionInfoForReplica);
234           // Delete the regions from ServerManager
235           masterServices.getServerManager().removeRegion(regionInfoForReplica);
236         }
237       }
238     }
239   }
240 
241   @Override
242   protected void completed(final Throwable exception) {
243     this.stopped = true;
244     if (exception != null) {
245       status.abort("Restore snapshot '" + snapshot.getName() + "' failed because " +
246           exception.getMessage());
247     } else {
248       status.markComplete("Restore snapshot '"+ snapshot.getName() +"'!");
249     }
250     metricsSnapshot.addSnapshotRestore(status.getCompletionTimestamp() - status.getStartTime());
251     super.completed(exception);
252   }
253 
254   @Override
255   public boolean isFinished() {
256     return this.stopped;
257   }
258 
259   @Override
260   public long getCompletionTimestamp() {
261     return this.status.getCompletionTimestamp();
262   }
263 
264   @Override
265   public SnapshotDescription getSnapshot() {
266     return snapshot;
267   }
268 
269   @Override
270   public void cancel(String why) {
271     if (this.stopped) return;
272     this.stopped = true;
273     String msg = "Stopping restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot)
274         + " because: " + why;
275     LOG.info(msg);
276     CancellationException ce = new CancellationException(why);
277     this.monitor.receive(new ForeignException(masterServices.getServerName().toString(), ce));
278   }
279 
280   @Override
281   public ForeignException getExceptionIfFailed() {
282     return this.monitor.getException();
283   }
284 
285   @Override
286   public void rethrowExceptionIfFailed() throws ForeignException {
287     monitor.rethrowException();
288   }
289 }