View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.procedure;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.OutputStream;
24  import java.util.HashMap;
25  import java.util.HashSet;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.concurrent.atomic.AtomicBoolean;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.hbase.HRegionInfo;
33  import org.apache.hadoop.hbase.MetaTableAccessor;
34  import org.apache.hadoop.hbase.ServerName;
35  import org.apache.hadoop.hbase.TableName;
36  import org.apache.hadoop.hbase.TableNotDisabledException;
37  import org.apache.hadoop.hbase.TableNotFoundException;
38  import org.apache.hadoop.hbase.TableStateManager;
39  import org.apache.hadoop.hbase.classification.InterfaceAudience;
40  import org.apache.hadoop.hbase.exceptions.HBaseException;
41  import org.apache.hadoop.hbase.master.AssignmentManager;
42  import org.apache.hadoop.hbase.master.BulkAssigner;
43  import org.apache.hadoop.hbase.master.GeneralBulkAssigner;
44  import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
45  import org.apache.hadoop.hbase.master.MasterServices;
46  import org.apache.hadoop.hbase.master.RegionStates;
47  import org.apache.hadoop.hbase.master.ServerManager;
48  import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
49  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
50  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos;
51  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.EnableTableState;
52  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
53  import org.apache.hadoop.hbase.security.User;
54  import org.apache.hadoop.hbase.util.Pair;
55  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
56  
57  @InterfaceAudience.Private
58  public class EnableTableProcedure
59      extends StateMachineProcedure<MasterProcedureEnv, EnableTableState>
60      implements TableProcedureInterface {
61    private static final Log LOG = LogFactory.getLog(EnableTableProcedure.class);
62  
63    private final AtomicBoolean aborted = new AtomicBoolean(false);
64  
65    // This is for back compatible with 1.0 asynchronized operations.
66    private final ProcedurePrepareLatch syncLatch;
67  
68    private TableName tableName;
69    private boolean skipTableStateCheck;
70    private User user;
71  
72    private Boolean traceEnabled = null;
73  
74    public EnableTableProcedure() {
75      syncLatch = null;
76    }
77  
78    /**
79     * Constructor
80     * @param env MasterProcedureEnv
81     * @param tableName the table to operate on
82     * @param skipTableStateCheck whether to check table state
83     */
84    public EnableTableProcedure(final MasterProcedureEnv env, final TableName tableName,
85        final boolean skipTableStateCheck) {
86      this(env, tableName, skipTableStateCheck, null);
87    }
88  
89    /**
90     * Constructor
91     * @param env MasterProcedureEnv
92     * @param tableName the table to operate on
93     * @param skipTableStateCheck whether to check table state
94     */
95    public EnableTableProcedure(final MasterProcedureEnv env, final TableName tableName,
96        final boolean skipTableStateCheck, final ProcedurePrepareLatch syncLatch) {
97      this.tableName = tableName;
98      this.skipTableStateCheck = skipTableStateCheck;
99      this.user = env.getRequestUser();
100     this.setOwner(this.user.getShortName());
101 
102     // Compatible with 1.0: We use latch to make sure that this procedure implementation is
103     // compatible with 1.0 asynchronized operations. We need to lock the table and check
104     // whether the Enable operation could be performed (table exists and offline; table state
105     // is DISABLED). Once it is done, we are good to release the latch and the client can
106     // start asynchronously wait for the operation.
107     //
108     // Note: the member syncLatch could be null if we are in failover or recovery scenario.
109     // This is ok for backward compatible, as 1.0 client would not able to peek at procedure.
110     this.syncLatch = syncLatch;
111   }
112 
113   @Override
114   protected Flow executeFromState(final MasterProcedureEnv env, final EnableTableState state)
115       throws InterruptedException {
116     if (isTraceEnabled()) {
117       LOG.trace(this + " execute state=" + state);
118     }
119 
120     try {
121       switch (state) {
122       case ENABLE_TABLE_PREPARE:
123         if (prepareEnable(env)) {
124           setNextState(EnableTableState.ENABLE_TABLE_PRE_OPERATION);
125         } else {
126           assert isFailed() : "enable should have an exception here";
127           return Flow.NO_MORE_STATE;
128         }
129         break;
130       case ENABLE_TABLE_PRE_OPERATION:
131         preEnable(env, state);
132         setNextState(EnableTableState.ENABLE_TABLE_SET_ENABLING_TABLE_STATE);
133         break;
134       case ENABLE_TABLE_SET_ENABLING_TABLE_STATE:
135         setTableStateToEnabling(env, tableName);
136         setNextState(EnableTableState.ENABLE_TABLE_MARK_REGIONS_ONLINE);
137         break;
138       case ENABLE_TABLE_MARK_REGIONS_ONLINE:
139         markRegionsOnline(env, tableName, true);
140         setNextState(EnableTableState.ENABLE_TABLE_SET_ENABLED_TABLE_STATE);
141         break;
142       case ENABLE_TABLE_SET_ENABLED_TABLE_STATE:
143         setTableStateToEnabled(env, tableName);
144         setNextState(EnableTableState.ENABLE_TABLE_POST_OPERATION);
145         break;
146       case ENABLE_TABLE_POST_OPERATION:
147         postEnable(env, state);
148         return Flow.NO_MORE_STATE;
149       default:
150         throw new UnsupportedOperationException("unhandled state=" + state);
151       }
152     } catch (HBaseException|IOException e) {
153       LOG.error("Error trying to enable table=" + tableName + " state=" + state, e);
154       setFailure("master-enable-table", e);
155     }
156     return Flow.HAS_MORE_STATE;
157   }
158 
159   @Override
160   protected void rollbackState(final MasterProcedureEnv env, final EnableTableState state)
161       throws IOException {
162     if (isTraceEnabled()) {
163       LOG.trace(this + " rollback state=" + state);
164     }
165     try {
166       switch (state) {
167       case ENABLE_TABLE_POST_OPERATION:
168         // TODO-MAYBE: call the coprocessor event to undo (eg. DisableTableProcedure.preDisable())?
169         break;
170       case ENABLE_TABLE_SET_ENABLED_TABLE_STATE:
171         DisableTableProcedure.setTableStateToDisabling(env, tableName);
172         break;
173       case ENABLE_TABLE_MARK_REGIONS_ONLINE:
174         markRegionsOfflineDuringRecovery(env);
175         break;
176       case ENABLE_TABLE_SET_ENABLING_TABLE_STATE:
177         DisableTableProcedure.setTableStateToDisabled(env, tableName);
178         break;
179       case ENABLE_TABLE_PRE_OPERATION:
180         // TODO-MAYBE: call the coprocessor event to undo (eg. DisableTableProcedure.postDisable())?
181         break;
182       case ENABLE_TABLE_PREPARE:
183         // Nothing to undo for this state.
184         // We do need to count down the latch count so that we don't stuck.
185         ProcedurePrepareLatch.releaseLatch(syncLatch, this);
186         break;
187       default:
188         throw new UnsupportedOperationException("unhandled state=" + state);
189       }
190     } catch (HBaseException e) {
191       LOG.warn("Failed enable table rollback attempt step=" + state + " table=" + tableName, e);
192       throw new IOException(e);
193     } catch (IOException e) {
194       // This will be retried. Unless there is a bug in the code,
195       // this should be just a "temporary error" (e.g. network down)
196       LOG.warn("Failed enable table rollback attempt step=" + state + " table=" + tableName, e);
197       throw e;
198     }
199   }
200 
201   @Override
202   protected EnableTableState getState(final int stateId) {
203     return EnableTableState.valueOf(stateId);
204   }
205 
206   @Override
207   protected int getStateId(final EnableTableState state) {
208     return state.getNumber();
209   }
210 
211   @Override
212   protected EnableTableState getInitialState() {
213     return EnableTableState.ENABLE_TABLE_PREPARE;
214   }
215 
216   @Override
217   protected void setNextState(final EnableTableState state) {
218     if (aborted.get()) {
219       setAbortFailure("Enable-table", "abort requested");
220     } else {
221       super.setNextState(state);
222     }
223   }
224 
225   @Override
226   public boolean abort(final MasterProcedureEnv env) {
227     aborted.set(true);
228     return true;
229   }
230 
231   @Override
232   protected boolean acquireLock(final MasterProcedureEnv env) {
233     if (env.waitInitialized(this)) return false;
234     return env.getProcedureQueue().tryAcquireTableExclusiveLock(this, tableName);
235   }
236 
237   @Override
238   protected void releaseLock(final MasterProcedureEnv env) {
239     env.getProcedureQueue().releaseTableExclusiveLock(this, tableName);
240   }
241 
242   @Override
243   public void serializeStateData(final OutputStream stream) throws IOException {
244     super.serializeStateData(stream);
245 
246     MasterProcedureProtos.EnableTableStateData.Builder enableTableMsg =
247         MasterProcedureProtos.EnableTableStateData.newBuilder()
248             .setUserInfo(MasterProcedureUtil.toProtoUserInfo(user))
249             .setTableName(ProtobufUtil.toProtoTableName(tableName))
250             .setSkipTableStateCheck(skipTableStateCheck);
251 
252     enableTableMsg.build().writeDelimitedTo(stream);
253   }
254 
255   @Override
256   public void deserializeStateData(final InputStream stream) throws IOException {
257     super.deserializeStateData(stream);
258 
259     MasterProcedureProtos.EnableTableStateData enableTableMsg =
260         MasterProcedureProtos.EnableTableStateData.parseDelimitedFrom(stream);
261     user = MasterProcedureUtil.toUserInfo(enableTableMsg.getUserInfo());
262     tableName = ProtobufUtil.toTableName(enableTableMsg.getTableName());
263     skipTableStateCheck = enableTableMsg.getSkipTableStateCheck();
264   }
265 
266   @Override
267   public void toStringClassDetails(StringBuilder sb) {
268     sb.append(getClass().getSimpleName());
269     sb.append(" (table=");
270     sb.append(tableName);
271     sb.append(")");
272   }
273 
274   @Override
275   public TableName getTableName() {
276     return tableName;
277   }
278 
279   @Override
280   public TableOperationType getTableOperationType() {
281     return TableOperationType.ENABLE;
282   }
283 
284 
285   /**
286    * Action before any real action of enabling table. Set the exception in the procedure instead
287    * of throwing it.  This approach is to deal with backward compatible with 1.0.
288    * @param env MasterProcedureEnv
289    * @return whether the table passes the necessary checks
290    * @throws IOException
291    */
292   private boolean prepareEnable(final MasterProcedureEnv env) throws IOException {
293     boolean canTableBeEnabled = true;
294 
295     // Check whether table exists
296     if (!MetaTableAccessor.tableExists(env.getMasterServices().getConnection(), tableName)) {
297       setFailure("master-enable-table", new TableNotFoundException(tableName));
298       canTableBeEnabled = false;
299     } else if (!skipTableStateCheck) {
300       // There could be multiple client requests trying to disable or enable
301       // the table at the same time. Ensure only the first request is honored
302       // After that, no other requests can be accepted until the table reaches
303       // DISABLED or ENABLED.
304       //
305       // Note: in 1.0 release, we called TableStateManager.setTableStateIfInStates() to set
306       // the state to ENABLING from DISABLED. The implementation was done before table lock
307       // was implemented. With table lock, there is no need to set the state here (it will
308       // set the state later on). A quick state check should be enough for us to move forward.
309       TableStateManager tsm = env.getMasterServices().getAssignmentManager().getTableStateManager();
310       if (!tsm.isTableState(tableName, ZooKeeperProtos.Table.State.DISABLED)) {
311         LOG.info("Table " + tableName + " isn't disabled; skipping enable");
312         setFailure("master-enable-table", new TableNotDisabledException(this.tableName));
313         canTableBeEnabled = false;
314       }
315     }
316 
317     // We are done the check. Future actions in this procedure could be done asynchronously.
318     ProcedurePrepareLatch.releaseLatch(syncLatch, this);
319 
320     return canTableBeEnabled;
321   }
322 
323   /**
324    * Action before enabling table.
325    * @param env MasterProcedureEnv
326    * @param state the procedure state
327    * @throws IOException
328    * @throws InterruptedException
329    */
330   private void preEnable(final MasterProcedureEnv env, final EnableTableState state)
331       throws IOException, InterruptedException {
332     runCoprocessorAction(env, state);
333   }
334 
335   /**
336    * Mark table state to Enabling
337    * @param env MasterProcedureEnv
338    * @param tableName the target table
339    * @throws IOException
340    */
341   protected static void setTableStateToEnabling(
342       final MasterProcedureEnv env,
343       final TableName tableName) throws HBaseException, IOException {
344     // Set table disabling flag up in zk.
345     LOG.info("Attempting to enable the table " + tableName);
346     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
347       tableName,
348       ZooKeeperProtos.Table.State.ENABLING);
349   }
350 
351   /**
352    * Mark offline regions of the table online with retry
353    * @param env MasterProcedureEnv
354    * @param tableName the target table
355    * @param retryRequired whether to retry if the first run failed
356    * @throws IOException
357    */
358   protected static void markRegionsOnline(
359       final MasterProcedureEnv env,
360       final TableName tableName,
361       final Boolean retryRequired) throws IOException {
362     // This is best effort approach to make all regions of a table online.  If we fail to do
363     // that, it is ok that the table has some offline regions; user can fix it manually.
364 
365     // Dev consideration: add a config to control max number of retry. For now, it is hard coded.
366     int maxTry = (retryRequired ? 10 : 1);
367     boolean done = false;
368 
369     do {
370       try {
371         done = markRegionsOnline(env, tableName);
372         if (done) {
373           break;
374         }
375         maxTry--;
376       } catch (Exception e) {
377         LOG.warn("Received exception while marking regions online. tries left: " + maxTry, e);
378         maxTry--;
379         if (maxTry > 0) {
380           continue; // we still have some retry left, try again.
381         }
382         throw e;
383       }
384     } while (maxTry > 0);
385 
386     if (!done) {
387       LOG.warn("Some or all regions of the Table '" + tableName + "' were offline");
388     }
389   }
390 
391   /**
392    * Mark offline regions of the table online
393    * @param env MasterProcedureEnv
394    * @param tableName the target table
395    * @return whether the operation is fully completed or being interrupted.
396    * @throws IOException
397    */
398   private static boolean markRegionsOnline(final MasterProcedureEnv env, final TableName tableName)
399       throws IOException {
400     final AssignmentManager assignmentManager = env.getMasterServices().getAssignmentManager();
401     final MasterServices masterServices = env.getMasterServices();
402     final ServerManager serverManager = masterServices.getServerManager();
403     boolean done = false;
404     // Get the regions of this table. We're done when all listed
405     // tables are onlined.
406     List<Pair<HRegionInfo, ServerName>> tableRegionsAndLocations;
407 
408     if (TableName.META_TABLE_NAME.equals(tableName)) {
409       tableRegionsAndLocations =
410           new MetaTableLocator().getMetaRegionsAndLocations(masterServices.getZooKeeper());
411     } else {
412       tableRegionsAndLocations =
413           MetaTableAccessor.getTableRegionsAndLocations(
414             masterServices.getZooKeeper(), masterServices.getConnection(), tableName, true);
415     }
416 
417     int countOfRegionsInTable = tableRegionsAndLocations.size();
418     Map<HRegionInfo, ServerName> regionsToAssign =
419         regionsToAssignWithServerName(env, tableRegionsAndLocations);
420 
421     // need to potentially create some regions for the replicas
422     List<HRegionInfo> unrecordedReplicas =
423         AssignmentManager.replicaRegionsNotRecordedInMeta(new HashSet<HRegionInfo>(
424             regionsToAssign.keySet()), masterServices);
425     Map<ServerName, List<HRegionInfo>> srvToUnassignedRegs =
426         assignmentManager.getBalancer().roundRobinAssignment(unrecordedReplicas,
427           serverManager.getOnlineServersList());
428     if (srvToUnassignedRegs != null) {
429       for (Map.Entry<ServerName, List<HRegionInfo>> entry : srvToUnassignedRegs.entrySet()) {
430         for (HRegionInfo h : entry.getValue()) {
431           regionsToAssign.put(h, entry.getKey());
432         }
433       }
434     }
435 
436     int offlineRegionsCount = regionsToAssign.size();
437 
438     LOG.info("Table '" + tableName + "' has " + countOfRegionsInTable + " regions, of which "
439         + offlineRegionsCount + " are offline.");
440     if (offlineRegionsCount == 0) {
441       return true;
442     }
443 
444     List<ServerName> onlineServers = serverManager.createDestinationServersList();
445     Map<ServerName, List<HRegionInfo>> bulkPlan =
446         env.getMasterServices().getAssignmentManager().getBalancer()
447             .retainAssignment(regionsToAssign, onlineServers);
448     if (bulkPlan != null) {
449       LOG.info("Bulk assigning " + offlineRegionsCount + " region(s) across " + bulkPlan.size()
450           + " server(s), retainAssignment=true");
451 
452       BulkAssigner ba = new GeneralBulkAssigner(masterServices, bulkPlan, assignmentManager, true);
453       try {
454         if (ba.bulkAssign()) {
455           done = true;
456         }
457       } catch (InterruptedException e) {
458         LOG.warn("Enable operation was interrupted when enabling table '" + tableName + "'");
459         // Preserve the interrupt.
460         Thread.currentThread().interrupt();
461       }
462     } else {
463       LOG.info("Balancer was unable to find suitable servers for table " + tableName
464           + ", leaving unassigned");
465     }
466     return done;
467   }
468 
469   /**
470    * Mark regions of the table offline during recovery
471    * @param env MasterProcedureEnv
472    */
473   private void markRegionsOfflineDuringRecovery(final MasterProcedureEnv env) {
474     try {
475       // This is a best effort attempt. We will move on even it does not succeed. We will retry
476       // several times until we giving up.
477       DisableTableProcedure.markRegionsOffline(env, tableName, true);
478     } catch (Exception e) {
479       LOG.debug("Failed to offline all regions of table " + tableName + ". Ignoring", e);
480     }
481   }
482 
483   /**
484    * Mark table state to Enabled
485    * @param env MasterProcedureEnv
486    * @throws IOException
487    */
488   protected static void setTableStateToEnabled(
489       final MasterProcedureEnv env,
490       final TableName tableName) throws HBaseException, IOException {
491     // Flip the table to Enabled
492     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
493       tableName,
494       ZooKeeperProtos.Table.State.ENABLED);
495     LOG.info("Table '" + tableName + "' was successfully enabled.");
496   }
497 
498   /**
499    * Action after enabling table.
500    * @param env MasterProcedureEnv
501    * @param state the procedure state
502    * @throws IOException
503    * @throws InterruptedException
504    */
505   private void postEnable(final MasterProcedureEnv env, final EnableTableState state)
506       throws IOException, InterruptedException {
507     runCoprocessorAction(env, state);
508   }
509 
510   /**
511    * The procedure could be restarted from a different machine. If the variable is null, we need to
512    * retrieve it.
513    * @return traceEnabled
514    */
515   private Boolean isTraceEnabled() {
516     if (traceEnabled == null) {
517       traceEnabled = LOG.isTraceEnabled();
518     }
519     return traceEnabled;
520   }
521 
522   /**
523    * @param regionsInMeta
524    * @return List of regions neither in transition nor assigned.
525    * @throws IOException
526    */
527   private static Map<HRegionInfo, ServerName> regionsToAssignWithServerName(
528       final MasterProcedureEnv env,
529       final List<Pair<HRegionInfo, ServerName>> regionsInMeta) throws IOException {
530     Map<HRegionInfo, ServerName> regionsToAssign =
531         new HashMap<HRegionInfo, ServerName>(regionsInMeta.size());
532     RegionStates regionStates = env.getMasterServices().getAssignmentManager().getRegionStates();
533     for (Pair<HRegionInfo, ServerName> regionLocation : regionsInMeta) {
534       HRegionInfo hri = regionLocation.getFirst();
535       ServerName sn = regionLocation.getSecond();
536       if (regionStates.isRegionOffline(hri)) {
537         regionsToAssign.put(hri, sn);
538       } else {
539         if (LOG.isDebugEnabled()) {
540           LOG.debug("Skipping assign for the region " + hri + " during enable table "
541               + hri.getTable() + " because its already in tranition or assigned.");
542         }
543       }
544     }
545     return regionsToAssign;
546   }
547 
548   /**
549    * Coprocessor Action.
550    * @param env MasterProcedureEnv
551    * @param state the procedure state
552    * @throws IOException
553    * @throws InterruptedException
554    */
555   private void runCoprocessorAction(final MasterProcedureEnv env, final EnableTableState state)
556       throws IOException, InterruptedException {
557     final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
558     if (cpHost != null) {
559       switch (state) {
560         case ENABLE_TABLE_PRE_OPERATION:
561           cpHost.preEnableTableHandler(getTableName(), user);
562           break;
563         case ENABLE_TABLE_POST_OPERATION:
564           cpHost.postEnableTableHandler(getTableName(), user);
565           break;
566         default:
567           throw new UnsupportedOperationException(this + " unhandled state=" + state);
568       }
569     }
570   }
571 }