View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.procedure;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.OutputStream;
24  import java.util.List;
25  import java.util.concurrent.ExecutorService;
26  import java.util.concurrent.atomic.AtomicBoolean;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.hbase.HRegionInfo;
31  import org.apache.hadoop.hbase.MetaTableAccessor;
32  import org.apache.hadoop.hbase.TableName;
33  import org.apache.hadoop.hbase.TableNotEnabledException;
34  import org.apache.hadoop.hbase.TableNotFoundException;
35  import org.apache.hadoop.hbase.TableStateManager;
36  import org.apache.hadoop.hbase.classification.InterfaceAudience;
37  import org.apache.hadoop.hbase.constraint.ConstraintException;
38  import org.apache.hadoop.hbase.exceptions.HBaseException;
39  import org.apache.hadoop.hbase.master.AssignmentManager;
40  import org.apache.hadoop.hbase.master.BulkAssigner;
41  import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
42  import org.apache.hadoop.hbase.master.RegionState;
43  import org.apache.hadoop.hbase.master.RegionStates;
44  import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
45  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
46  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos;
47  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DisableTableState;
48  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
49  import org.apache.hadoop.hbase.security.User;
50  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
51  import org.apache.htrace.Trace;
52  
53  @InterfaceAudience.Private
54  public class DisableTableProcedure
55      extends StateMachineProcedure<MasterProcedureEnv, DisableTableState>
56      implements TableProcedureInterface {
57    private static final Log LOG = LogFactory.getLog(DisableTableProcedure.class);
58  
59    private final AtomicBoolean aborted = new AtomicBoolean(false);
60  
61    // This is for back compatible with 1.0 asynchronized operations.
62    private final ProcedurePrepareLatch syncLatch;
63  
64    private TableName tableName;
65    private boolean skipTableStateCheck;
66    private User user;
67  
68    private Boolean traceEnabled = null;
69  
70    enum MarkRegionOfflineOpResult {
71      MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL,
72      BULK_ASSIGN_REGIONS_FAILED,
73      MARK_ALL_REGIONS_OFFLINE_INTERRUPTED,
74    }
75  
76    public DisableTableProcedure() {
77      syncLatch = null;
78    }
79  
80    /**
81     * Constructor
82     * @param env MasterProcedureEnv
83     * @param tableName the table to operate on
84     * @param skipTableStateCheck whether to check table state
85     */
86    public DisableTableProcedure(final MasterProcedureEnv env, final TableName tableName,
87        final boolean skipTableStateCheck) {
88      this(env, tableName, skipTableStateCheck, null);
89    }
90  
91    /**
92     * Constructor
93     * @param env MasterProcedureEnv
94     * @param tableName the table to operate on
95     * @param skipTableStateCheck whether to check table state
96     */
97    public DisableTableProcedure(final MasterProcedureEnv env, final TableName tableName,
98        final boolean skipTableStateCheck, final ProcedurePrepareLatch syncLatch) {
99      this.tableName = tableName;
100     this.skipTableStateCheck = skipTableStateCheck;
101     this.user = env.getRequestUser();
102     this.setOwner(this.user.getShortName());
103 
104     // Compatible with 1.0: We use latch to make sure that this procedure implementation is
105     // compatible with 1.0 asynchronized operations. We need to lock the table and check
106     // whether the Disable operation could be performed (table exists and online; table state
107     // is ENABLED). Once it is done, we are good to release the latch and the client can
108     // start asynchronously wait for the operation.
109     //
110     // Note: the member syncLatch could be null if we are in failover or recovery scenario.
111     // This is ok for backward compatible, as 1.0 client would not able to peek at procedure.
112     this.syncLatch = syncLatch;
113   }
114 
115   @Override
116   protected Flow executeFromState(final MasterProcedureEnv env, final DisableTableState state)
117       throws InterruptedException {
118     if (isTraceEnabled()) {
119       LOG.trace(this + " execute state=" + state);
120     }
121 
122     try {
123       switch (state) {
124       case DISABLE_TABLE_PREPARE:
125         if (prepareDisable(env)) {
126           setNextState(DisableTableState.DISABLE_TABLE_PRE_OPERATION);
127         } else {
128           assert isFailed() : "disable should have an exception here";
129           return Flow.NO_MORE_STATE;
130         }
131         break;
132       case DISABLE_TABLE_PRE_OPERATION:
133         preDisable(env, state);
134         setNextState(DisableTableState.DISABLE_TABLE_SET_DISABLING_TABLE_STATE);
135         break;
136       case DISABLE_TABLE_SET_DISABLING_TABLE_STATE:
137         setTableStateToDisabling(env, tableName);
138         setNextState(DisableTableState.DISABLE_TABLE_MARK_REGIONS_OFFLINE);
139         break;
140       case DISABLE_TABLE_MARK_REGIONS_OFFLINE:
141         if (markRegionsOffline(env, tableName, true) ==
142             MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL) {
143           setNextState(DisableTableState.DISABLE_TABLE_SET_DISABLED_TABLE_STATE);
144         } else {
145           LOG.trace("Retrying later to disable the missing regions");
146         }
147         break;
148       case DISABLE_TABLE_SET_DISABLED_TABLE_STATE:
149         setTableStateToDisabled(env, tableName);
150         setNextState(DisableTableState.DISABLE_TABLE_POST_OPERATION);
151         break;
152       case DISABLE_TABLE_POST_OPERATION:
153         postDisable(env, state);
154         return Flow.NO_MORE_STATE;
155       default:
156         throw new UnsupportedOperationException("unhandled state=" + state);
157       }
158     } catch (HBaseException|IOException e) {
159       LOG.warn("Retriable error trying to disable table=" + tableName + " state=" + state, e);
160     }
161     return Flow.HAS_MORE_STATE;
162   }
163 
164   @Override
165   protected void rollbackState(final MasterProcedureEnv env, final DisableTableState state)
166       throws IOException {
167     if (state == DisableTableState.DISABLE_TABLE_PREPARE) {
168       undoTableStateChange(env);
169       ProcedurePrepareLatch.releaseLatch(syncLatch, this);
170       return;
171     }
172 
173     // The delete doesn't have a rollback. The execution will succeed, at some point.
174     throw new UnsupportedOperationException("unhandled state=" + state);
175   }
176 
177   @Override
178   protected DisableTableState getState(final int stateId) {
179     return DisableTableState.valueOf(stateId);
180   }
181 
182   @Override
183   protected int getStateId(final DisableTableState state) {
184     return state.getNumber();
185   }
186 
187   @Override
188   protected DisableTableState getInitialState() {
189     return DisableTableState.DISABLE_TABLE_PREPARE;
190   }
191 
192   @Override
193   protected void setNextState(final DisableTableState state) {
194     if (aborted.get()) {
195       setAbortFailure("disable-table", "abort requested");
196     } else {
197       super.setNextState(state);
198     }
199   }
200 
201   @Override
202   public boolean abort(final MasterProcedureEnv env) {
203     aborted.set(true);
204     return true;
205   }
206 
207   @Override
208   protected boolean acquireLock(final MasterProcedureEnv env) {
209     if (env.waitInitialized(this)) return false;
210     return env.getProcedureQueue().tryAcquireTableExclusiveLock(this, tableName);
211   }
212 
213   @Override
214   protected void releaseLock(final MasterProcedureEnv env) {
215     env.getProcedureQueue().releaseTableExclusiveLock(this, tableName);
216   }
217 
218   @Override
219   public void serializeStateData(final OutputStream stream) throws IOException {
220     super.serializeStateData(stream);
221 
222     MasterProcedureProtos.DisableTableStateData.Builder disableTableMsg =
223         MasterProcedureProtos.DisableTableStateData.newBuilder()
224             .setUserInfo(MasterProcedureUtil.toProtoUserInfo(user))
225             .setTableName(ProtobufUtil.toProtoTableName(tableName))
226             .setSkipTableStateCheck(skipTableStateCheck);
227 
228     disableTableMsg.build().writeDelimitedTo(stream);
229   }
230 
231   @Override
232   public void deserializeStateData(final InputStream stream) throws IOException {
233     super.deserializeStateData(stream);
234 
235     MasterProcedureProtos.DisableTableStateData disableTableMsg =
236         MasterProcedureProtos.DisableTableStateData.parseDelimitedFrom(stream);
237     user = MasterProcedureUtil.toUserInfo(disableTableMsg.getUserInfo());
238     tableName = ProtobufUtil.toTableName(disableTableMsg.getTableName());
239     skipTableStateCheck = disableTableMsg.getSkipTableStateCheck();
240   }
241 
242   @Override
243   public void toStringClassDetails(StringBuilder sb) {
244     sb.append(getClass().getSimpleName());
245     sb.append(" (table=");
246     sb.append(tableName);
247     sb.append(")");
248   }
249 
250   @Override
251   public TableName getTableName() {
252     return tableName;
253   }
254 
255   @Override
256   public TableOperationType getTableOperationType() {
257     return TableOperationType.DISABLE;
258   }
259 
260   /**
261    * Action before any real action of disabling table. Set the exception in the procedure instead
262    * of throwing it.  This approach is to deal with backward compatible with 1.0.
263    * @param env MasterProcedureEnv
264    * @throws HBaseException
265    * @throws IOException
266    */
267   private boolean prepareDisable(final MasterProcedureEnv env) throws HBaseException, IOException {
268     boolean canTableBeDisabled = true;
269     if (tableName.equals(TableName.META_TABLE_NAME)) {
270       setFailure("master-disable-table", new ConstraintException("Cannot disable catalog table"));
271       canTableBeDisabled = false;
272     } else if (!MetaTableAccessor.tableExists(env.getMasterServices().getConnection(), tableName)) {
273       setFailure("master-disable-table", new TableNotFoundException(tableName));
274       canTableBeDisabled = false;
275     } else if (!skipTableStateCheck) {
276       // There could be multiple client requests trying to disable or enable
277       // the table at the same time. Ensure only the first request is honored
278       // After that, no other requests can be accepted until the table reaches
279       // DISABLED or ENABLED.
280       //
281       // Note: A quick state check should be enough for us to move forward. However, instead of
282       // calling TableStateManager.isTableState() to just check the state, we called
283       // TableStateManager.setTableStateIfInStates() to set the state to DISABLING from ENABLED.
284       // This is because we treat empty state as enabled from 0.92-clusters. See
285       // ZKTableStateManager.setTableStateIfInStates() that has a hack solution to work around
286       // this issue.
287       TableStateManager tsm =
288         env.getMasterServices().getAssignmentManager().getTableStateManager();
289       if (!tsm.setTableStateIfInStates(tableName, ZooKeeperProtos.Table.State.DISABLING,
290             ZooKeeperProtos.Table.State.DISABLING, ZooKeeperProtos.Table.State.ENABLED)) {
291         LOG.info("Table " + tableName + " isn't enabled; skipping disable");
292         setFailure("master-disable-table", new TableNotEnabledException(tableName));
293         canTableBeDisabled = false;
294       }
295     }
296 
297     // We are done the check. Future actions in this procedure could be done asynchronously.
298     ProcedurePrepareLatch.releaseLatch(syncLatch, this);
299 
300     return canTableBeDisabled;
301   }
302 
303   /**
304    * Rollback of table state change in prepareDisable()
305    * @param env MasterProcedureEnv
306    */
307   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
308       justification="Intended")
309   private void undoTableStateChange(final MasterProcedureEnv env) {
310     if (!skipTableStateCheck) {
311       try {
312         // If the state was changed, undo it.
313         if (env.getMasterServices().getAssignmentManager().getTableStateManager().isTableState(
314             tableName, ZooKeeperProtos.Table.State.DISABLING)) {
315           EnableTableProcedure.setTableStateToEnabled(env, tableName);
316         }
317       } catch (Exception e) {
318         // Ignore exception.
319         LOG.trace(e.getMessage());
320       }
321     }
322   }
323 
324   /**
325    * Action before disabling table.
326    * @param env MasterProcedureEnv
327    * @param state the procedure state
328    * @throws IOException
329    * @throws InterruptedException
330    */
331   protected void preDisable(final MasterProcedureEnv env, final DisableTableState state)
332       throws IOException, InterruptedException {
333     runCoprocessorAction(env, state);
334   }
335 
336   /**
337    * Mark table state to Disabling
338    * @param env MasterProcedureEnv
339    * @throws IOException
340    */
341   protected static void setTableStateToDisabling(
342       final MasterProcedureEnv env,
343       final TableName tableName) throws HBaseException, IOException {
344     // Set table disabling flag up in zk.
345     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
346       tableName,
347       ZooKeeperProtos.Table.State.DISABLING);
348   }
349 
350   /**
351    * Mark regions of the table offline with retries
352    * @param env MasterProcedureEnv
353    * @param tableName the target table
354    * @param retryRequired whether to retry if the first run failed
355    * @return whether the operation is fully completed or being interrupted.
356    * @throws IOException
357    */
358   protected static MarkRegionOfflineOpResult markRegionsOffline(
359       final MasterProcedureEnv env,
360       final TableName tableName,
361       final Boolean retryRequired) throws IOException {
362     // Dev consideration: add a config to control max number of retry. For now, it is hard coded.
363     int maxTry = (retryRequired ? 10 : 1);
364     MarkRegionOfflineOpResult operationResult =
365         MarkRegionOfflineOpResult.BULK_ASSIGN_REGIONS_FAILED;
366     do {
367       try {
368         operationResult = markRegionsOffline(env, tableName);
369         if (operationResult == MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL) {
370           break;
371         }
372         maxTry--;
373       } catch (Exception e) {
374         LOG.warn("Received exception while marking regions online. tries left: " + maxTry, e);
375         maxTry--;
376         if (maxTry > 0) {
377           continue; // we still have some retry left, try again.
378         }
379         throw e;
380       }
381     } while (maxTry > 0);
382 
383     if (operationResult != MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL) {
384       LOG.warn("Some or all regions of the Table '" + tableName + "' were still online");
385     }
386 
387     return operationResult;
388   }
389 
390   /**
391    * Mark regions of the table offline
392    * @param env MasterProcedureEnv
393    * @param tableName the target table
394    * @return whether the operation is fully completed or being interrupted.
395    * @throws IOException
396    */
397   private static MarkRegionOfflineOpResult markRegionsOffline(
398       final MasterProcedureEnv env,
399       final TableName tableName) throws IOException {
400     // Get list of online regions that are of this table.  Regions that are
401     // already closed will not be included in this list; i.e. the returned
402     // list is not ALL regions in a table, its all online regions according
403     // to the in-memory state on this master.
404     MarkRegionOfflineOpResult operationResult =
405         MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL;
406     final List<HRegionInfo> regions =
407         env.getMasterServices().getAssignmentManager().getRegionStates()
408             .getRegionsOfTable(tableName);
409     if (regions.size() > 0) {
410       LOG.info("Offlining " + regions.size() + " regions.");
411 
412       BulkDisabler bd = new BulkDisabler(env, tableName, regions);
413       try {
414         if (!bd.bulkAssign()) {
415           operationResult = MarkRegionOfflineOpResult.BULK_ASSIGN_REGIONS_FAILED;
416         }
417       } catch (InterruptedException e) {
418         LOG.warn("Disable was interrupted");
419         // Preserve the interrupt.
420         Thread.currentThread().interrupt();
421         operationResult = MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_INTERRUPTED;
422       }
423     }
424     return operationResult;
425   }
426 
427   /**
428    * Mark table state to Disabled
429    * @param env MasterProcedureEnv
430    * @throws IOException
431    */
432   protected static void setTableStateToDisabled(
433       final MasterProcedureEnv env,
434       final TableName tableName) throws HBaseException, IOException {
435     // Flip the table to disabled
436     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
437       tableName,
438       ZooKeeperProtos.Table.State.DISABLED);
439     LOG.info("Disabled table, " + tableName + ", is completed.");
440   }
441 
442   /**
443    * Action after disabling table.
444    * @param env MasterProcedureEnv
445    * @param state the procedure state
446    * @throws IOException
447    * @throws InterruptedException
448    */
449   protected void postDisable(final MasterProcedureEnv env, final DisableTableState state)
450       throws IOException, InterruptedException {
451     runCoprocessorAction(env, state);
452   }
453 
454   /**
455    * The procedure could be restarted from a different machine. If the variable is null, we need to
456    * retrieve it.
457    * @return traceEnabled
458    */
459   private Boolean isTraceEnabled() {
460     if (traceEnabled == null) {
461       traceEnabled = LOG.isTraceEnabled();
462     }
463     return traceEnabled;
464   }
465 
466   /**
467    * Coprocessor Action.
468    * @param env MasterProcedureEnv
469    * @param state the procedure state
470    * @throws IOException
471    * @throws InterruptedException
472    */
473   private void runCoprocessorAction(final MasterProcedureEnv env, final DisableTableState state)
474       throws IOException, InterruptedException {
475     final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
476     if (cpHost != null) {
477       switch (state) {
478         case DISABLE_TABLE_PRE_OPERATION:
479           cpHost.preDisableTableHandler(tableName, user);
480           break;
481         case DISABLE_TABLE_POST_OPERATION:
482           cpHost.postDisableTableHandler(tableName, user);
483           break;
484         default:
485           throw new UnsupportedOperationException(this + " unhandled state=" + state);
486       }
487     }
488   }
489 
490   /**
491    * Run bulk disable.
492    */
493   private static class BulkDisabler extends BulkAssigner {
494     private final AssignmentManager assignmentManager;
495     private final List<HRegionInfo> regions;
496     private final TableName tableName;
497     private final int waitingTimeForEvents;
498 
499     public BulkDisabler(final MasterProcedureEnv env, final TableName tableName,
500         final List<HRegionInfo> regions) {
501       super(env.getMasterServices());
502       this.assignmentManager = env.getMasterServices().getAssignmentManager();
503       this.tableName = tableName;
504       this.regions = regions;
505       this.waitingTimeForEvents =
506           env.getMasterServices().getConfiguration()
507               .getInt("hbase.master.event.waiting.time", 1000);
508     }
509 
510     @Override
511     protected void populatePool(ExecutorService pool) {
512       RegionStates regionStates = assignmentManager.getRegionStates();
513       for (final HRegionInfo region : regions) {
514         if (regionStates.isRegionInTransition(region)
515             && !regionStates.isRegionInState(region, RegionState.State.FAILED_CLOSE)) {
516           continue;
517         }
518         pool.execute(Trace.wrap("DisableTableHandler.BulkDisabler", new Runnable() {
519           @Override
520           public void run() {
521             assignmentManager.unassign(region);
522           }
523         }));
524       }
525     }
526 
527     @Override
528     protected boolean waitUntilDone(long timeout) throws InterruptedException {
529       long startTime = EnvironmentEdgeManager.currentTime();
530       long remaining = timeout;
531       List<HRegionInfo> regions = null;
532       long lastLogTime = startTime;
533       while (!server.isStopped() && remaining > 0) {
534         Thread.sleep(waitingTimeForEvents);
535         regions = assignmentManager.getRegionStates().getRegionsOfTable(tableName);
536         long now = EnvironmentEdgeManager.currentTime();
537         // Don't log more than once every ten seconds. Its obnoxious. And only log table regions
538         // if we are waiting a while for them to go down...
539         if (LOG.isDebugEnabled() && ((now - lastLogTime) > 10000)) {
540           lastLogTime = now;
541           LOG.debug("Disable waiting until done; " + remaining + " ms remaining; " + regions);
542         }
543         if (regions.isEmpty()) break;
544         remaining = timeout - (now - startTime);
545       }
546       return regions != null && regions.isEmpty();
547     }
548   }
549 }