View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.procedure;
19  
20  import com.google.common.collect.MapMaker;
21  import java.io.IOException;
22  import java.util.Collection;
23  import java.util.HashSet;
24  import java.util.List;
25  import java.util.Set;
26  import java.util.concurrent.ConcurrentMap;
27  import java.util.concurrent.ExecutorService;
28  import java.util.concurrent.RejectedExecutionException;
29  import java.util.concurrent.SynchronousQueue;
30  import java.util.concurrent.ThreadPoolExecutor;
31  import java.util.concurrent.TimeUnit;
32  import org.apache.commons.logging.Log;
33  import org.apache.commons.logging.LogFactory;
34  import org.apache.hadoop.hbase.DaemonThreadFactory;
35  import org.apache.hadoop.hbase.classification.InterfaceAudience;
36  import org.apache.hadoop.hbase.errorhandling.ForeignException;
37  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
38  
39  /**
40   * This is the master side of a distributed complex procedure execution.
41   * <p>
42   * The {@link Procedure} is generic and subclassing or customization shouldn't be
43   * necessary -- any customization should happen just in {@link Subprocedure}s.
44   */
45  @InterfaceAudience.Private
46  public class ProcedureCoordinator {
47    private static final Log LOG = LogFactory.getLog(ProcedureCoordinator.class);
48  
49    final static long KEEP_ALIVE_MILLIS_DEFAULT = 5000;
50    final static long TIMEOUT_MILLIS_DEFAULT = 60000;
51    final static long WAKE_MILLIS_DEFAULT = 500;
52  
53    private final ProcedureCoordinatorRpcs rpcs;
54    private final ExecutorService pool;
55    private final long wakeTimeMillis;
56    private final long timeoutMillis;
57  
58    // Running procedure table.  Maps procedure name to running procedure reference
59    private final ConcurrentMap<String, Procedure> procedures =
60        new MapMaker().concurrencyLevel(4).weakValues().makeMap();
61  
62    /**
63     * Create and start a ProcedureCoordinator.
64     *
65     * The rpc object registers the ProcedureCoordinator and starts any threads in this
66     * constructor.
67     *
68     * @param rpcs
69     * @param pool Used for executing procedures.
70     */
71    public ProcedureCoordinator(ProcedureCoordinatorRpcs rpcs, ThreadPoolExecutor pool) {
72      this(rpcs, pool, TIMEOUT_MILLIS_DEFAULT, WAKE_MILLIS_DEFAULT);
73    }
74  
75    /**
76     * Create and start a ProcedureCoordinator.
77     *
78     * The rpc object registers the ProcedureCoordinator and starts any threads in
79     * this constructor.
80     *
81     * @param rpcs
82     * @param pool Used for executing procedures.
83     * @param timeoutMillis
84     */
85    public ProcedureCoordinator(ProcedureCoordinatorRpcs rpcs, ThreadPoolExecutor pool,
86        long timeoutMillis, long wakeTimeMillis) {
87      this.timeoutMillis = timeoutMillis;
88      this.wakeTimeMillis = wakeTimeMillis;
89      this.rpcs = rpcs;
90      this.pool = pool;
91      this.rpcs.start(this);
92    }
93  
94    /**
95     * Default thread pool for the procedure
96     *
97     * @param coordName
98     * @param opThreads the maximum number of threads to allow in the pool
99     */
100   public static ThreadPoolExecutor defaultPool(String coordName, int opThreads) {
101     return defaultPool(coordName, opThreads, KEEP_ALIVE_MILLIS_DEFAULT);
102   }
103 
104   /**
105    * Default thread pool for the procedure
106    *
107    * @param coordName
108    * @param opThreads the maximum number of threads to allow in the pool
109    * @param keepAliveMillis the maximum time (ms) that excess idle threads will wait for new tasks
110    */
111   public static ThreadPoolExecutor defaultPool(String coordName, int opThreads,
112       long keepAliveMillis) {
113     return new ThreadPoolExecutor(1, opThreads, keepAliveMillis, TimeUnit.MILLISECONDS,
114         new SynchronousQueue<Runnable>(),
115         new DaemonThreadFactory("(" + coordName + ")-proc-coordinator-pool"));
116   }
117 
118   /**
119    * Shutdown the thread pools and release rpc resources
120    * @throws IOException
121    */
122   public void close() throws IOException {
123     // have to use shutdown now to break any latch waiting
124     pool.shutdownNow();
125     rpcs.close();
126   }
127 
128   /**
129    * Submit an procedure to kick off its dependent subprocedures.
130    * @param proc Procedure to execute
131    * @return <tt>true</tt> if the procedure was started correctly, <tt>false</tt> if the
132    *         procedure or any subprocedures could not be started.  Failure could be due to
133    *         submitting a procedure multiple times (or one with the same name), or some sort
134    *         of IO problem.  On errors, the procedure's monitor holds a reference to the exception
135    *         that caused the failure.
136    */
137   boolean submitProcedure(Procedure proc) {
138     // if the submitted procedure was null, then we don't want to run it
139     if (proc == null) {
140       return false;
141     }
142     String procName = proc.getName();
143 
144     // make sure we aren't already running a procedure of that name
145     Procedure oldProc = procedures.get(procName);
146     if (oldProc != null) {
147       // procedures are always eventually completed on both successful and failed execution
148       try {
149         if (!oldProc.isCompleted()) {
150           LOG.warn("Procedure " + procName + " currently running.  Rejecting new request");
151           return false;
152         } else {
153           LOG.debug("Procedure " + procName
154               + " was in running list but was completed.  Accepting new attempt.");
155           if (!procedures.remove(procName, oldProc)) {
156             LOG.warn("Procedure " + procName
157                 + " has been resubmitted by another thread. Rejecting this request.");
158             return false;
159           }
160         }
161       } catch (ForeignException e) {
162         LOG.debug("Procedure " + procName
163             + " was in running list but has exception.  Accepting new attempt.");
164         if (!procedures.remove(procName, oldProc)) {
165           LOG.warn("Procedure " + procName
166               + " has been resubmitted by another thread. Rejecting this request.");
167           return false;
168         }
169       }
170     }
171 
172     // kick off the procedure's execution in a separate thread
173     try {
174       if (this.procedures.putIfAbsent(procName, proc) == null) {
175         LOG.debug("Submitting procedure " + procName);
176         this.pool.submit(proc);
177         return true;
178       } else {
179         LOG.error("Another thread has submitted procedure '" + procName + "'. Ignoring this attempt.");
180         return false;
181       }
182     } catch (RejectedExecutionException e) {
183       LOG.warn("Procedure " + procName + " rejected by execution pool.  Propagating error.", e);
184       // Remove the procedure from the list since is not started
185       this.procedures.remove(procName, proc);
186       // the thread pool is full and we can't run the procedure
187       proc.receive(new ForeignException(procName, e));
188     }
189     return false;
190   }
191 
192   /**
193    * The connection to the rest of the procedure group (members and coordinator) has been
194    * broken/lost/failed. This should fail any interested procedures, but not attempt to notify other
195    * members since we cannot reach them anymore.
196    * @param message description of the error
197    * @param cause the actual cause of the failure
198    */
199   void rpcConnectionFailure(final String message, final IOException cause) {
200     Collection<Procedure> toNotify = procedures.values();
201 
202     boolean isTraceEnabled = LOG.isTraceEnabled();
203     LOG.debug("received connection failure: " + message, cause);
204     for (Procedure proc : toNotify) {
205       if (proc == null) {
206         continue;
207       }
208       // notify the elements, if they aren't null
209       if (isTraceEnabled) {
210         LOG.trace("connection failure - notify procedure: " + proc.getName());
211       }
212       proc.receive(new ForeignException(proc.getName(), cause));
213     }
214   }
215 
216   /**
217    * Abort the procedure with the given name
218    * @param procName name of the procedure to abort
219    * @param reason serialized information about the abort
220    */
221   public void abortProcedure(String procName, ForeignException reason) {
222     LOG.debug("abort procedure " + procName, reason);
223     // if we know about the Procedure, notify it
224     Procedure proc = procedures.get(procName);
225     if (proc == null) {
226       return;
227     }
228     proc.receive(reason);
229   }
230 
231   /**
232    * Exposed for hooking with unit tests.
233    * @param procName
234    * @param procArgs
235    * @param expectedMembers
236    * @return the newly created procedure
237    */
238   Procedure createProcedure(ForeignExceptionDispatcher fed, String procName, byte[] procArgs,
239       List<String> expectedMembers) {
240     // build the procedure
241     return new Procedure(this, fed, wakeTimeMillis, timeoutMillis,
242         procName, procArgs, expectedMembers);
243   }
244 
245   /**
246    * Kick off the named procedure
247    * Currently only one procedure with the same type and name is allowed to run at a time.
248    * @param procName name of the procedure to start
249    * @param procArgs arguments for the procedure
250    * @param expectedMembers expected members to start
251    * @return handle to the running procedure, if it was started correctly,
252    *         <tt>null</tt> otherwise.
253    *         Null could be due to submitting a procedure multiple times
254    *         (or one with the same name), or runtime exception.
255    *         Check the procedure's monitor that holds a reference to the exception
256    *         that caused the failure.
257    */
258   public Procedure startProcedure(ForeignExceptionDispatcher fed, String procName, byte[] procArgs,
259       List<String> expectedMembers) {
260     Procedure proc = createProcedure(fed, procName, procArgs, expectedMembers);
261     if (!this.submitProcedure(proc)) {
262       LOG.error("Failed to submit procedure '" + procName + "'");
263       return null;
264     }
265     return proc;
266   }
267 
268   /**
269    * Notification that the procedure had the specified member acquired its part of the barrier
270    * via {@link Subprocedure#acquireBarrier()}.
271    * @param procName name of the procedure that acquired
272    * @param member name of the member that acquired
273    */
274   void memberAcquiredBarrier(String procName, final String member) {
275     Procedure proc = procedures.get(procName);
276     if (proc == null) {
277       LOG.warn("Member '"+ member +"' is trying to acquire an unknown procedure '"+ procName +"'");
278       return;
279     }
280     if (LOG.isTraceEnabled()) {
281       LOG.trace("Member '"+ member +"' acquired procedure '"+ procName +"'");
282     }
283     proc.barrierAcquiredByMember(member);
284   }
285 
286   /**
287    * Notification that the procedure had another member finished executing its in-barrier subproc
288    * via {@link Subprocedure#insideBarrier()}.
289    * @param procName name of the subprocedure that finished
290    * @param member name of the member that executed and released its barrier
291    * @param dataFromMember the data that the member returned along with the notification
292    */
293   void memberFinishedBarrier(String procName, final String member, byte[] dataFromMember) {
294     Procedure proc = procedures.get(procName);
295     if (proc == null) {
296       LOG.warn("Member '"+ member +"' is trying to release an unknown procedure '"+ procName +"'");
297       return;
298     }
299     if (LOG.isTraceEnabled()) {
300       LOG.trace("Member '"+ member +"' released procedure '"+ procName +"'");
301     }
302     proc.barrierReleasedByMember(member, dataFromMember);
303   }
304 
305   /**
306    * @return the rpcs implementation for all current procedures
307    */
308   ProcedureCoordinatorRpcs getRpcs() {
309     return rpcs;
310   }
311 
312   /**
313    * Returns the procedure.  This Procedure is a live instance so should not be modified but can
314    * be inspected.
315    * @param name Name of the procedure
316    * @return Procedure or null if not present any more
317    */
318   public Procedure getProcedure(String name) {
319     return procedures.get(name);
320   }
321 
322   /**
323    * @return Return set of all procedure names.
324    */
325   public Set<String> getProcedureNames() {
326     return new HashSet<String>(procedures.keySet());
327   }
328 }