View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.client;
21  
22  import java.io.IOException;
23  import java.io.InterruptedIOException;
24  import java.lang.reflect.UndeclaredThrowableException;
25  import java.net.SocketTimeoutException;
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.concurrent.atomic.AtomicBoolean;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.hbase.classification.InterfaceAudience;
33  import org.apache.hadoop.hbase.CallQueueTooBigException;
34  import org.apache.hadoop.hbase.DoNotRetryIOException;
35  import org.apache.hadoop.hbase.exceptions.PreemptiveFastFailException;
36  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
37  import org.apache.hadoop.hbase.util.ExceptionUtil;
38  import org.apache.hadoop.ipc.RemoteException;
39  import org.apache.hadoop.util.StringUtils;
40  
41  import com.google.protobuf.ServiceException;
42  
43  /**
44   * Runs an rpc'ing {@link RetryingCallable}. Sets into rpc client
45   * threadlocal outstanding timeouts as so we don't persist too much.
46   * Dynamic rather than static so can set the generic appropriately.
47   *
48   * This object has a state. It should not be used by in parallel by different threads.
49   * Reusing it is possible however, even between multiple threads. However, the user will
50   *  have to manage the synchronization on its side: there is no synchronization inside the class.
51   */
52  @InterfaceAudience.Private
53  public class RpcRetryingCaller<T> {
54    public static final Log LOG = LogFactory.getLog(RpcRetryingCaller.class);
55    /**
56     * When we started making calls.
57     */
58    private long globalStartTime;
59    /**
60     * Start and end times for a single call.
61     */
62    private final static int MIN_RPC_TIMEOUT = 1;
63    /** How many retries are allowed before we start to log */
64    private final int startLogErrorsCnt;
65  
66    private final long pause;
67    private final long pauseForCQTBE;
68    private final int retries;
69    private final int rpcTimeout;// timeout for each rpc request
70    private final Object lock = new Object();
71    private final AtomicBoolean cancelled = new AtomicBoolean(false);
72    private final RetryingCallerInterceptor interceptor;
73    private final RetryingCallerInterceptorContext context;
74  
75    public RpcRetryingCaller(long pause, long pauseForCQTBE, int retries, int startLogErrorsCnt) {
76      this(pause, pauseForCQTBE, retries, RetryingCallerInterceptorFactory.NO_OP_INTERCEPTOR,
77          startLogErrorsCnt, 0);
78    }
79  
80    public RpcRetryingCaller(long pause, long pauseForCQTBE, int retries,
81        RetryingCallerInterceptor interceptor, int startLogErrorsCnt, int rpcTimeout) {
82      this.pause = pause;
83      this.pauseForCQTBE = pauseForCQTBE;
84      this.retries = retries;
85      this.interceptor = interceptor;
86      context = interceptor.createEmptyContext();
87      this.startLogErrorsCnt = startLogErrorsCnt;
88      this.rpcTimeout = rpcTimeout;
89    }
90  
91    private int getRemainingTime(int callTimeout) {
92      if (callTimeout <= 0) {
93        return 0;
94      } else {
95        if (callTimeout == Integer.MAX_VALUE) return Integer.MAX_VALUE;
96        int remainingTime = (int) (callTimeout -
97            (EnvironmentEdgeManager.currentTime() - this.globalStartTime));
98        if (remainingTime < MIN_RPC_TIMEOUT) {
99          // If there is no time left, we're trying anyway. It's too late.
100         // 0 means no timeout, and it's not the intent here. So we secure both cases by
101         // resetting to the minimum.
102         remainingTime = MIN_RPC_TIMEOUT;
103       }
104       return remainingTime;
105     }
106   }
107 
108   private int getTimeout(int callTimeout){
109     int timeout = getRemainingTime(callTimeout);
110     if (timeout <= 0 || (rpcTimeout > 0 && rpcTimeout < timeout)){
111       timeout = rpcTimeout;
112     }
113     return timeout;
114   }
115 
116   public void cancel(){
117     synchronized (lock){
118       cancelled.set(true);
119       lock.notifyAll();
120     }
121   }
122 
123   /**
124    * Retries if invocation fails.
125    * @param callTimeout Timeout for this call
126    * @param callable The {@link RetryingCallable} to run.
127    * @return an object of type T
128    * @throws IOException if a remote or network exception occurs
129    * @throws RuntimeException other unspecified error
130    */
131   public T callWithRetries(RetryingCallable<T> callable, int callTimeout)
132   throws IOException, RuntimeException {
133     List<RetriesExhaustedException.ThrowableWithExtraContext> exceptions =
134       new ArrayList<RetriesExhaustedException.ThrowableWithExtraContext>();
135     this.globalStartTime = EnvironmentEdgeManager.currentTime();
136     context.clear();
137     for (int tries = 0;; tries++) {
138       long expectedSleep;
139       try {
140         // bad cache entries are cleared in the call to RetryingCallable#throwable() in catch block
141         callable.prepare(tries != 0); // if called with false, check table status on ZK
142         interceptor.intercept(context.prepare(callable, tries));
143         return callable.call(getTimeout(callTimeout));
144       } catch (PreemptiveFastFailException e) {
145         throw e;
146       } catch (Throwable t) {
147         ExceptionUtil.rethrowIfInterrupt(t);
148 
149         // translateException throws exception when should not retry: i.e. when request is bad.
150         interceptor.handleFailure(context, t);
151         t = translateException(t);
152         if (tries > startLogErrorsCnt) {
153           if (LOG.isInfoEnabled()) {
154             StringBuilder builder = new StringBuilder("Call exception, tries=").append(tries)
155                 .append(", retries=").append(retries).append(", started=")
156                 .append(EnvironmentEdgeManager.currentTime() - this.globalStartTime)
157                 .append(" ms ago, ").append("cancelled=").append(cancelled.get())
158                 .append(", msg=").append(t.getMessage())
159                 .append(", details=").append(callable.getExceptionMessageAdditionalDetail())
160                 .append(", see https://s.apache.org/timeout");
161             if (LOG.isDebugEnabled()) {
162               builder.append(", exception=").append(StringUtils.stringifyException(t));
163               LOG.debug(builder.toString());
164             } else {
165               LOG.info(builder.toString());
166             }
167           }
168         }
169 
170         callable.throwable(t, retries != 1);
171         RetriesExhaustedException.ThrowableWithExtraContext qt =
172             new RetriesExhaustedException.ThrowableWithExtraContext(t,
173                 EnvironmentEdgeManager.currentTime(), toString());
174         exceptions.add(qt);
175         if (tries >= retries - 1) {
176           throw new RetriesExhaustedException(tries, exceptions);
177         }
178         // If the server is dead, we need to wait a little before retrying, to give
179         // a chance to the regions to be moved
180         // get right pause time, start by RETRY_BACKOFF[0] * pauseBase, where pauseBase might be
181         // special when encountering CallQueueTooBigException, see #HBASE-17114
182         long pauseBase = (t instanceof CallQueueTooBigException) ? pauseForCQTBE : pause;
183         expectedSleep = callable.sleep(pauseBase, tries);
184 
185         // If, after the planned sleep, there won't be enough time left, we stop now.
186         long duration = singleCallDuration(expectedSleep);
187         if (duration > callTimeout) {
188           String msg = "callTimeout=" + callTimeout + ", callDuration=" + duration +
189               ": " + t.getMessage() + " " + callable.getExceptionMessageAdditionalDetail();
190           throw (SocketTimeoutException)(new SocketTimeoutException(msg).initCause(t));
191         }
192       } finally {
193         interceptor.updateFailureInfo(context);
194       }
195       try {
196         if (expectedSleep > 0) {
197           synchronized (lock) {
198             if (cancelled.get()) return null;
199             lock.wait(expectedSleep);
200           }
201         }
202         if (cancelled.get()) return null;
203       } catch (InterruptedException e) {
204         throw new InterruptedIOException("Interrupted after " + tries + " tries  on " + retries);
205       }
206     }
207   }
208 
209   /**
210    * @return Calculate how long a single call took
211    */
212   private long singleCallDuration(final long expectedSleep) {
213     return (EnvironmentEdgeManager.currentTime() - this.globalStartTime) + expectedSleep;
214   }
215 
216   /**
217    * Call the server once only.
218    * {@link RetryingCallable} has a strange shape so we can do retrys.  Use this invocation if you
219    * want to do a single call only (A call to {@link RetryingCallable#call(int)} will not likely
220    * succeed).
221    * @return an object of type T
222    * @throws IOException if a remote or network exception occurs
223    * @throws RuntimeException other unspecified error
224    */
225   public T callWithoutRetries(RetryingCallable<T> callable, int callTimeout)
226   throws IOException, RuntimeException {
227     // The code of this method should be shared with withRetries.
228     this.globalStartTime = EnvironmentEdgeManager.currentTime();
229     try {
230       callable.prepare(false);
231       return callable.call(callTimeout);
232     } catch (Throwable t) {
233       Throwable t2 = translateException(t);
234       ExceptionUtil.rethrowIfInterrupt(t2);
235       // It would be nice to clear the location cache here.
236       if (t2 instanceof IOException) {
237         throw (IOException)t2;
238       } else {
239         throw new RuntimeException(t2);
240       }
241     }
242   }
243 
244   /**
245    * Get the good or the remote exception if any, throws the DoNotRetryIOException.
246    * @param t the throwable to analyze
247    * @return the translated exception, if it's not a DoNotRetryIOException
248    * @throws DoNotRetryIOException - if we find it, we throw it instead of translating.
249    */
250   static Throwable translateException(Throwable t) throws DoNotRetryIOException {
251     if (t instanceof UndeclaredThrowableException) {
252       if (t.getCause() != null) {
253         t = t.getCause();
254       }
255     }
256     if (t instanceof RemoteException) {
257       t = ((RemoteException)t).unwrapRemoteException();
258     }
259     if (t instanceof LinkageError) {
260       throw new DoNotRetryIOException(t);
261     }
262     if (t instanceof ServiceException) {
263       ServiceException se = (ServiceException)t;
264       Throwable cause = se.getCause();
265       if (cause != null) {
266         if (cause instanceof DoNotRetryIOException) {
267           throw (DoNotRetryIOException)cause;
268         } else if (cause instanceof NeedUnmanagedConnectionException) {
269           throw new DoNotRetryIOException(cause);
270         }
271       }
272       // Don't let ServiceException out; its rpc specific.
273       t = cause;
274       // t could be a RemoteException so go aaround again.
275       translateException(t);
276     } else if (t instanceof DoNotRetryIOException) {
277       throw (DoNotRetryIOException)t;
278     } else if (t instanceof NeedUnmanagedConnectionException) {
279       throw new DoNotRetryIOException(t);
280     }
281     return t;
282   }
283 
284   @Override
285   public String toString() {
286     return "RpcRetryingCaller{" + "globalStartTime=" + globalStartTime +
287         ", pause=" + pause + ", retries=" + retries + '}';
288   }
289 }