View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   * http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  /*
21   * Written by Doug Lea with assistance from members of JCP JSR-166
22   * Expert Group and released to the public domain, as explained at
23   * http://creativecommons.org/publicdomain/zero/1.0/
24   */
25  
26  package org.apache.hadoop.hbase.util;
27  
28  import java.util.Random;
29  
30  import org.apache.hadoop.hbase.classification.InterfaceAudience;
31  
32  /**
33   * A package-local class holding common representation and mechanics
34   * for classes supporting dynamic striping on 64bit values. The class
35   * extends Number so that concrete subclasses must publicly do so.
36   */
37  @InterfaceAudience.Private
38  abstract class Striped64 extends Number {
39      /*
40       * This class maintains a lazily-initialized table of atomically
41       * updated variables, plus an extra "base" field. The table size
42       * is a power of two. Indexing uses masked per-thread hash codes.
43       * Nearly all declarations in this class are package-private,
44       * accessed directly by subclasses.
45       *
46       * Table entries are of class Cell; a variant of AtomicLong padded
47       * to reduce cache contention on most processors. Padding is
48       * overkill for most Atomics because they are usually irregularly
49       * scattered in memory and thus don't interfere much with each
50       * other. But Atomic objects residing in arrays will tend to be
51       * placed adjacent to each other, and so will most often share
52       * cache lines (with a huge negative performance impact) without
53       * this precaution.
54       *
55       * In part because Cells are relatively large, we avoid creating
56       * them until they are needed.  When there is no contention, all
57       * updates are made to the base field.  Upon first contention (a
58       * failed CAS on base update), the table is initialized to size 2.
59       * The table size is doubled upon further contention until
60       * reaching the nearest power of two greater than or equal to the
61       * number of CPUS. Table slots remain empty (null) until they are
62       * needed.
63       *
64       * A single spinlock ("busy") is used for initializing and
65       * resizing the table, as well as populating slots with new Cells.
66       * There is no need for a blocking lock; when the lock is not
67       * available, threads try other slots (or the base).  During these
68       * retries, there is increased contention and reduced locality,
69       * which is still better than alternatives.
70       *
71       * Per-thread hash codes are initialized to random values.
72       * Contention and/or table collisions are indicated by failed
73       * CASes when performing an update operation (see method
74       * retryUpdate). Upon a collision, if the table size is less than
75       * the capacity, it is doubled in size unless some other thread
76       * holds the lock. If a hashed slot is empty, and lock is
77       * available, a new Cell is created. Otherwise, if the slot
78       * exists, a CAS is tried.  Retries proceed by "double hashing",
79       * using a secondary hash (Marsaglia XorShift) to try to find a
80       * free slot.
81       *
82       * The table size is capped because, when there are more threads
83       * than CPUs, supposing that each thread were bound to a CPU,
84       * there would exist a perfect hash function mapping threads to
85       * slots that eliminates collisions. When we reach capacity, we
86       * search for this mapping by randomly varying the hash codes of
87       * colliding threads.  Because search is random, and collisions
88       * only become known via CAS failures, convergence can be slow,
89       * and because threads are typically not bound to CPUS forever,
90       * may not occur at all. However, despite these limitations,
91       * observed contention rates are typically low in these cases.
92       *
93       * It is possible for a Cell to become unused when threads that
94       * once hashed to it terminate, as well as in the case where
95       * doubling the table causes no thread to hash to it under
96       * expanded mask.  We do not try to detect or remove such cells,
97       * under the assumption that for long-running instances, observed
98       * contention levels will recur, so the cells will eventually be
99       * needed again; and for short-lived ones, it does not matter.
100      */
101 
102     /**
103      * Padded variant of AtomicLong supporting only raw accesses plus CAS.
104      * The value field is placed between pads, hoping that the JVM doesn't
105      * reorder them.
106      *
107      * JVM intrinsics note: It would be possible to use a release-only
108      * form of CAS here, if it were provided.
109      */
110     static final class Cell {
111         volatile long p0, p1, p2, p3, p4, p5, p6;
112         volatile long value;
113         volatile long q0, q1, q2, q3, q4, q5, q6;
114         Cell(long x) { value = x; }
115 
116         final boolean cas(long cmp, long val) {
117             return UNSAFE.compareAndSwapLong(this, valueOffset, cmp, val);
118         }
119 
120         // Unsafe mechanics
121         private static final sun.misc.Unsafe UNSAFE;
122         private static final long valueOffset;
123         static {
124             try {
125                 UNSAFE = getUnsafe();
126                 Class<?> ak = Cell.class;
127                 valueOffset = UNSAFE.objectFieldOffset
128                     (ak.getDeclaredField("value"));
129             } catch (Exception e) {
130                 throw new Error(e);
131             }
132         }
133 
134     }
135 
136     /**
137      * ThreadLocal holding a single-slot int array holding hash code.
138      * Unlike the JDK8 version of this class, we use a suboptimal
139      * int[] representation to avoid introducing a new type that can
140      * impede class-unloading when ThreadLocals are not removed.
141      */
142     static final ThreadLocal<int[]> threadHashCode = new ThreadLocal<int[]>();
143 
144     /**
145      * Generator of new random hash codes
146      */
147     static final Random rng = new Random();
148 
149     /** Number of CPUS, to place bound on table size */
150     static final int NCPU = Runtime.getRuntime().availableProcessors();
151 
152     /**
153      * Table of cells. When non-null, size is a power of 2.
154      */
155     transient volatile Cell[] cells;
156 
157     /**
158      * Base value, used mainly when there is no contention, but also as
159      * a fallback during table initialization races. Updated via CAS.
160      */
161     transient volatile long base;
162 
163     /**
164      * Spinlock (locked via CAS) used when resizing and/or creating Cells.
165      */
166     transient volatile int busy;
167 
168     /**
169      * Package-private default constructor
170      */
171     Striped64() {
172     }
173 
174     /**
175      * CASes the base field.
176      */
177     final boolean casBase(long cmp, long val) {
178         return UNSAFE.compareAndSwapLong(this, baseOffset, cmp, val);
179     }
180 
181     /**
182      * CASes the busy field from 0 to 1 to acquire lock.
183      */
184     final boolean casBusy() {
185         return UNSAFE.compareAndSwapInt(this, busyOffset, 0, 1);
186     }
187 
188     /**
189      * Computes the function of current and new value. Subclasses
190      * should open-code this update function for most uses, but the
191      * virtualized form is needed within retryUpdate.
192      *
193      * @param currentValue the current value (of either base or a cell)
194      * @param newValue the argument from a user update call
195      * @return result of the update function
196      */
197     abstract long fn(long currentValue, long newValue);
198 
199     /**
200      * Handles cases of updates involving initialization, resizing,
201      * creating new Cells, and/or contention. See above for
202      * explanation. This method suffers the usual non-modularity
203      * problems of optimistic retry code, relying on rechecked sets of
204      * reads.
205      *
206      * @param x the value
207      * @param hc the hash code holder
208      * @param wasUncontended false if CAS failed before call
209      */
210     final void retryUpdate(long x, int[] hc, boolean wasUncontended) {
211         int h;
212         if (hc == null) {
213             threadHashCode.set(hc = new int[1]); // Initialize randomly
214             int r = rng.nextInt(); // Avoid zero to allow xorShift rehash
215             h = hc[0] = (r == 0) ? 1 : r;
216         }
217         else
218             h = hc[0];
219         boolean collide = false;                // True if last slot nonempty
220         for (;;) {
221             Cell[] as; Cell a; int n; long v;
222             if ((as = cells) != null && (n = as.length) > 0) {
223                 if ((a = as[(n - 1) & h]) == null) {
224                     if (busy == 0) {            // Try to attach new Cell
225                         Cell r = new Cell(x);   // Optimistically create
226                         if (busy == 0 && casBusy()) {
227                             boolean created = false;
228                             try {               // Recheck under lock
229                                 Cell[] rs; int m, j;
230                                 if ((rs = cells) != null &&
231                                     (m = rs.length) > 0 &&
232                                     rs[j = (m - 1) & h] == null) {
233                                     rs[j] = r;
234                                     created = true;
235                                 }
236                             } finally {
237                                 busy = 0;
238                             }
239                             if (created)
240                                 break;
241                             continue;           // Slot is now non-empty
242                         }
243                     }
244                     collide = false;
245                 }
246                 else if (!wasUncontended)       // CAS already known to fail
247                     wasUncontended = true;      // Continue after rehash
248                 else if (a.cas(v = a.value, fn(v, x)))
249                     break;
250                 else if (n >= NCPU || cells != as)
251                     collide = false;            // At max size or stale
252                 else if (!collide)
253                     collide = true;
254                 else if (busy == 0 && casBusy()) {
255                     try {
256                         if (cells == as) {      // Expand table unless stale
257                             Cell[] rs = new Cell[n << 1];
258                             for (int i = 0; i < n; ++i)
259                                 rs[i] = as[i];
260                             cells = rs;
261                         }
262                     } finally {
263                         busy = 0;
264                     }
265                     collide = false;
266                     continue;                   // Retry with expanded table
267                 }
268                 h ^= h << 13;                   // Rehash
269                 h ^= h >>> 17;
270                 h ^= h << 5;
271                 hc[0] = h;                      // Record index for next time
272             }
273             else if (busy == 0 && cells == as && casBusy()) {
274                 boolean init = false;
275                 try {                           // Initialize table
276                     if (cells == as) {
277                         Cell[] rs = new Cell[2];
278                         rs[h & 1] = new Cell(x);
279                         cells = rs;
280                         init = true;
281                     }
282                 } finally {
283                     busy = 0;
284                 }
285                 if (init)
286                     break;
287             }
288             else if (casBase(v = base, fn(v, x)))
289                 break;                          // Fall back on using base
290         }
291     }
292 
293 
294     /**
295      * Sets base and all cells to the given value.
296      */
297     final void internalReset(long initialValue) {
298         Cell[] as = cells;
299         base = initialValue;
300         if (as != null) {
301             int n = as.length;
302             for (int i = 0; i < n; ++i) {
303                 Cell a = as[i];
304                 if (a != null)
305                     a.value = initialValue;
306             }
307         }
308     }
309 
310     // Unsafe mechanics
311     private static final sun.misc.Unsafe UNSAFE;
312     private static final long baseOffset;
313     private static final long busyOffset;
314     static {
315         try {
316             UNSAFE = getUnsafe();
317             Class<?> sk = Striped64.class;
318             baseOffset = UNSAFE.objectFieldOffset
319                 (sk.getDeclaredField("base"));
320             busyOffset = UNSAFE.objectFieldOffset
321                 (sk.getDeclaredField("busy"));
322         } catch (Exception e) {
323             throw new Error(e);
324         }
325     }
326 
327     /**
328      * Returns a sun.misc.Unsafe.  Suitable for use in a 3rd party package.
329      * Replace with a simple call to Unsafe.getUnsafe when integrating
330      * into a jdk.
331      *
332      * @return a sun.misc.Unsafe
333      */
334     private static sun.misc.Unsafe getUnsafe() {
335         try {
336             return sun.misc.Unsafe.getUnsafe();
337         } catch (SecurityException tryReflectionInstead) {}
338         try {
339             return java.security.AccessController.doPrivileged
340             (new java.security.PrivilegedExceptionAction<sun.misc.Unsafe>() {
341                 @Override
342                 public sun.misc.Unsafe run() throws Exception {
343                     Class<sun.misc.Unsafe> k = sun.misc.Unsafe.class;
344                     for (java.lang.reflect.Field f : k.getDeclaredFields()) {
345                         f.setAccessible(true);
346                         Object x = f.get(null);
347                         if (k.isInstance(x))
348                             return k.cast(x);
349                     }
350                     throw new NoSuchFieldError("the Unsafe");
351                 }});
352         } catch (java.security.PrivilegedActionException e) {
353             throw new RuntimeException("Could not initialize intrinsics",
354                                        e.getCause());
355         }
356     }
357 }