View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.text.SimpleDateFormat;
22  import java.util.Date;
23  import java.util.Map;
24  import java.util.concurrent.ConcurrentHashMap;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.HConstants;
30  import org.apache.hadoop.hbase.ScheduledChore;
31  import org.apache.hadoop.hbase.Stoppable;
32  import org.apache.hadoop.hbase.classification.InterfaceAudience;
33  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
34  import org.apache.hadoop.hbase.util.NonceKey;
35  
36  /**
37   * Implementation of nonce manager that stores nonces in a hash map and cleans them up after
38   * some time; if nonce group/client ID is supplied, nonces are stored by client ID.
39   */
40  @InterfaceAudience.Private
41  public class ServerNonceManager {
42    public static final String HASH_NONCE_GRACE_PERIOD_KEY = "hbase.server.hashNonce.gracePeriod";
43    private static final Log LOG = LogFactory.getLog(ServerNonceManager.class);
44  
45    /** The time to wait in an extremely unlikely case of a conflict with a running op.
46     * Only here so that tests could override it and not wait. */
47    private int conflictWaitIterationMs = 30000;
48  
49    private static final SimpleDateFormat tsFormat = new SimpleDateFormat("HH:mm:ss.SSS");
50  
51    // This object is used to synchronize on in case of collisions, and for cleanup.
52    private static class OperationContext {
53      static final int DONT_PROCEED = 0;
54      static final int PROCEED = 1;
55      static final int WAIT = 2;
56  
57      // 0..1 - state, 2..2 - whether anyone is waiting, 3.. - ts of last activity
58      private long data = 0;
59      private static final long STATE_BITS = 3;
60      private static final long WAITING_BIT = 4;
61      private static final long ALL_FLAG_BITS = WAITING_BIT | STATE_BITS;
62  
63      private volatile long mvcc;
64  
65      @Override
66      public String toString() {
67        return "[state " + getState() + ", hasWait " + hasWait() + ", activity "
68            + tsFormat.format(new Date(getActivityTime())) + "]";
69      }
70  
71      public OperationContext() {
72        setState(WAIT);
73        reportActivity();
74      }
75  
76      public void setState(int state) {
77        this.data = (this.data & ~STATE_BITS) | state;
78      }
79  
80      public int getState() {
81        return (int)(this.data & STATE_BITS);
82      }
83  
84      public void setHasWait() {
85        this.data = this.data | WAITING_BIT;
86      }
87  
88      public boolean hasWait() {
89        return (this.data & WAITING_BIT) == WAITING_BIT;
90      }
91  
92      public void reportActivity() {
93        long now = EnvironmentEdgeManager.currentTime();
94        this.data = (this.data & ALL_FLAG_BITS) | (now << 3);
95      }
96  
97      public boolean isExpired(long minRelevantTime) {
98        return getActivityTime() < (minRelevantTime & (~0l >>> 3));
99      }
100 
101     private long getActivityTime() {
102       return this.data >>> 3;
103     }
104 
105     public void setMvcc(long mvcc) {
106       this.mvcc = mvcc;
107     }
108 
109     public long getMvcc() {
110       return this.mvcc;
111     }
112   }
113 
114   /**
115    * Nonces.
116    * Approximate overhead per nonce: 64 bytes from hashmap, 32 from two objects (k/v),
117    * NK: 16 bytes (2 longs), OC: 8 bytes (1 long) - so, 120 bytes.
118    * With 30min expiration time, 5k increments/appends per sec., we'd use approximately 1Gb,
119    * which is a realistic worst case. If it's much worse, we could use some sort of memory
120    * limit and cleanup.
121    */
122   private ConcurrentHashMap<NonceKey, OperationContext> nonces =
123       new ConcurrentHashMap<NonceKey, OperationContext>();
124 
125   private int deleteNonceGracePeriod;
126 
127   public ServerNonceManager(Configuration conf) {
128     // Default - 30 minutes.
129     deleteNonceGracePeriod = conf.getInt(HASH_NONCE_GRACE_PERIOD_KEY, 30 * 60 * 1000);
130     if (deleteNonceGracePeriod < 60 * 1000) {
131       LOG.warn("Nonce grace period " + deleteNonceGracePeriod
132           + " is less than a minute; might be too small to be useful");
133     }
134   }
135 
136   public void setConflictWaitIterationMs(int conflictWaitIterationMs) {
137     this.conflictWaitIterationMs = conflictWaitIterationMs;
138   }
139 
140   /**
141    * Starts the operation if operation with such nonce has not already succeeded. If the
142    * operation is in progress, waits for it to end and checks whether it has succeeded.
143    * @param group Nonce group.
144    * @param nonce Nonce.
145    * @param stoppable Stoppable that terminates waiting (if any) when the server is stopped.
146    * @return true if the operation has not already succeeded and can proceed; false otherwise.
147    */
148   public boolean startOperation(long group, long nonce, Stoppable stoppable)
149       throws InterruptedException {
150     if (nonce == HConstants.NO_NONCE) return true;
151     NonceKey nk = new NonceKey(group, nonce);
152     OperationContext ctx = new OperationContext();
153     while (true) {
154       OperationContext oldResult = nonces.putIfAbsent(nk, ctx);
155       if (oldResult == null) return true;
156 
157       // Collision with some operation - should be extremely rare.
158       synchronized (oldResult) {
159         int oldState = oldResult.getState();
160         LOG.debug("Conflict detected by nonce: " + nk + ", " + oldResult);
161         if (oldState != OperationContext.WAIT) {
162           return oldState == OperationContext.PROCEED; // operation ended
163         }
164         oldResult.setHasWait();
165         oldResult.wait(this.conflictWaitIterationMs); // operation is still active... wait and loop
166         if (stoppable.isStopped()) {
167           throw new InterruptedException("Server stopped");
168         }
169       }
170     }
171   }
172 
173   /**
174    * Ends the operation started by startOperation.
175    * @param group Nonce group.
176    * @param nonce Nonce.
177    * @param success Whether the operation has succeeded.
178    */
179   public void endOperation(long group, long nonce, boolean success) {
180     if (nonce == HConstants.NO_NONCE) return;
181     NonceKey nk = new NonceKey(group, nonce);
182     OperationContext newResult = nonces.get(nk);
183     assert newResult != null;
184     synchronized (newResult) {
185       assert newResult.getState() == OperationContext.WAIT;
186       // If we failed, other retries can proceed.
187       newResult.setState(success ? OperationContext.DONT_PROCEED : OperationContext.PROCEED);
188       if (success) {
189         newResult.reportActivity(); // Set time to use for cleanup.
190       } else {
191         OperationContext val = nonces.remove(nk);
192         assert val == newResult;
193       }
194       if (newResult.hasWait()) {
195         LOG.debug("Conflict with running op ended: " + nk + ", " + newResult);
196         newResult.notifyAll();
197       }
198     }
199   }
200 
201   /**
202    * Store the write point in OperationContext when the operation succeed.
203    * @param group Nonce group.
204    * @param nonce Nonce.
205    * @param mvcc Write point of the succeed operation.
206    */
207   public void addMvccToOperationContext(long group, long nonce, long mvcc) {
208     if (nonce == HConstants.NO_NONCE) {
209       return;
210     }
211     NonceKey nk = new NonceKey(group, nonce);
212     OperationContext result = nonces.get(nk);
213     assert result != null;
214     synchronized (result) {
215       result.setMvcc(mvcc);
216     }
217   }
218 
219   /**
220    * Return the write point of the previous succeed operation.
221    * @param group Nonce group.
222    * @param nonce Nonce.
223    * @return write point of the previous succeed operation.
224    */
225   public long getMvccFromOperationContext(long group, long nonce) {
226     if (nonce == HConstants.NO_NONCE) {
227       return Long.MAX_VALUE;
228     }
229     NonceKey nk = new NonceKey(group, nonce);
230     OperationContext result = nonces.get(nk);
231     return result == null ? Long.MAX_VALUE : result.getMvcc();
232   }
233 
234   /**
235    * Reports the operation from WAL during replay.
236    * @param group Nonce group.
237    * @param nonce Nonce.
238    * @param writeTime Entry write time, used to ignore entries that are too old.
239    */
240   public void reportOperationFromWal(long group, long nonce, long writeTime) {
241     if (nonce == HConstants.NO_NONCE) return;
242     // Give the write time some slack in case the clocks are not synchronized.
243     long now = EnvironmentEdgeManager.currentTime();
244     if (now > writeTime + (deleteNonceGracePeriod * 1.5)) return;
245     OperationContext newResult = new OperationContext();
246     newResult.setState(OperationContext.DONT_PROCEED);
247     NonceKey nk = new NonceKey(group, nonce);
248     OperationContext oldResult = nonces.putIfAbsent(nk, newResult);
249     if (oldResult != null) {
250       // Some schemes can have collisions (for example, expiring hashes), so just log it.
251       // We have no idea about the semantics here, so this is the least of many evils.
252       LOG.warn("Nonce collision during WAL recovery: " + nk
253           + ", " + oldResult + " with " + newResult);
254     }
255   }
256 
257   /**
258    * Creates a scheduled chore that is used to clean up old nonces.
259    * @param stoppable Stoppable for the chore.
260    * @return ScheduledChore; the scheduled chore is not started.
261    */
262   public ScheduledChore createCleanupScheduledChore(Stoppable stoppable) {
263     // By default, it will run every 6 minutes (30 / 5).
264     return new ScheduledChore("nonceCleaner", stoppable, deleteNonceGracePeriod / 5) {
265       @Override
266       protected void chore() {
267         cleanUpOldNonces();
268       }
269     };
270   }
271 
272   private void cleanUpOldNonces() {
273     long cutoff = EnvironmentEdgeManager.currentTime() - deleteNonceGracePeriod;
274     for (Map.Entry<NonceKey, OperationContext> entry : nonces.entrySet()) {
275       OperationContext oc = entry.getValue();
276       if (!oc.isExpired(cutoff)) continue;
277       synchronized (oc) {
278         if (oc.getState() == OperationContext.WAIT || !oc.isExpired(cutoff)) continue;
279         nonces.remove(entry.getKey());
280       }
281     }
282   }
283 }