1 /**
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 /*
21 * Written by Doug Lea with assistance from members of JCP JSR-166
22 * Expert Group and released to the public domain, as explained at
23 * http://creativecommons.org/publicdomain/zero/1.0/
24 */
25
26 package org.apache.hadoop.hbase.util;
27
28 import java.util.Random;
29
30 import org.apache.hadoop.hbase.classification.InterfaceAudience;
31
32 /**
33 * A package-local class holding common representation and mechanics
34 * for classes supporting dynamic striping on 64bit values. The class
35 * extends Number so that concrete subclasses must publicly do so.
36 */
37 @InterfaceAudience.Private
38 abstract class Striped64 extends Number {
39 /*
40 * This class maintains a lazily-initialized table of atomically
41 * updated variables, plus an extra "base" field. The table size
42 * is a power of two. Indexing uses masked per-thread hash codes.
43 * Nearly all declarations in this class are package-private,
44 * accessed directly by subclasses.
45 *
46 * Table entries are of class Cell; a variant of AtomicLong padded
47 * to reduce cache contention on most processors. Padding is
48 * overkill for most Atomics because they are usually irregularly
49 * scattered in memory and thus don't interfere much with each
50 * other. But Atomic objects residing in arrays will tend to be
51 * placed adjacent to each other, and so will most often share
52 * cache lines (with a huge negative performance impact) without
53 * this precaution.
54 *
55 * In part because Cells are relatively large, we avoid creating
56 * them until they are needed. When there is no contention, all
57 * updates are made to the base field. Upon first contention (a
58 * failed CAS on base update), the table is initialized to size 2.
59 * The table size is doubled upon further contention until
60 * reaching the nearest power of two greater than or equal to the
61 * number of CPUS. Table slots remain empty (null) until they are
62 * needed.
63 *
64 * A single spinlock ("busy") is used for initializing and
65 * resizing the table, as well as populating slots with new Cells.
66 * There is no need for a blocking lock; when the lock is not
67 * available, threads try other slots (or the base). During these
68 * retries, there is increased contention and reduced locality,
69 * which is still better than alternatives.
70 *
71 * Per-thread hash codes are initialized to random values.
72 * Contention and/or table collisions are indicated by failed
73 * CASes when performing an update operation (see method
74 * retryUpdate). Upon a collision, if the table size is less than
75 * the capacity, it is doubled in size unless some other thread
76 * holds the lock. If a hashed slot is empty, and lock is
77 * available, a new Cell is created. Otherwise, if the slot
78 * exists, a CAS is tried. Retries proceed by "double hashing",
79 * using a secondary hash (Marsaglia XorShift) to try to find a
80 * free slot.
81 *
82 * The table size is capped because, when there are more threads
83 * than CPUs, supposing that each thread were bound to a CPU,
84 * there would exist a perfect hash function mapping threads to
85 * slots that eliminates collisions. When we reach capacity, we
86 * search for this mapping by randomly varying the hash codes of
87 * colliding threads. Because search is random, and collisions
88 * only become known via CAS failures, convergence can be slow,
89 * and because threads are typically not bound to CPUS forever,
90 * may not occur at all. However, despite these limitations,
91 * observed contention rates are typically low in these cases.
92 *
93 * It is possible for a Cell to become unused when threads that
94 * once hashed to it terminate, as well as in the case where
95 * doubling the table causes no thread to hash to it under
96 * expanded mask. We do not try to detect or remove such cells,
97 * under the assumption that for long-running instances, observed
98 * contention levels will recur, so the cells will eventually be
99 * needed again; and for short-lived ones, it does not matter.
100 */
101
102 /**
103 * Padded variant of AtomicLong supporting only raw accesses plus CAS.
104 * The value field is placed between pads, hoping that the JVM doesn't
105 * reorder them.
106 *
107 * JVM intrinsics note: It would be possible to use a release-only
108 * form of CAS here, if it were provided.
109 */
110 static final class Cell {
111 volatile long p0, p1, p2, p3, p4, p5, p6;
112 volatile long value;
113 volatile long q0, q1, q2, q3, q4, q5, q6;
114 Cell(long x) { value = x; }
115
116 final boolean cas(long cmp, long val) {
117 return UNSAFE.compareAndSwapLong(this, valueOffset, cmp, val);
118 }
119
120 // Unsafe mechanics
121 private static final sun.misc.Unsafe UNSAFE;
122 private static final long valueOffset;
123 static {
124 try {
125 UNSAFE = getUnsafe();
126 Class<?> ak = Cell.class;
127 valueOffset = UNSAFE.objectFieldOffset
128 (ak.getDeclaredField("value"));
129 } catch (Exception e) {
130 throw new Error(e);
131 }
132 }
133
134 }
135
136 /**
137 * ThreadLocal holding a single-slot int array holding hash code.
138 * Unlike the JDK8 version of this class, we use a suboptimal
139 * int[] representation to avoid introducing a new type that can
140 * impede class-unloading when ThreadLocals are not removed.
141 */
142 static final ThreadLocal<int[]> threadHashCode = new ThreadLocal<int[]>();
143
144 /**
145 * Generator of new random hash codes
146 */
147 static final Random rng = new Random();
148
149 /** Number of CPUS, to place bound on table size */
150 static final int NCPU = Runtime.getRuntime().availableProcessors();
151
152 /**
153 * Table of cells. When non-null, size is a power of 2.
154 */
155 transient volatile Cell[] cells;
156
157 /**
158 * Base value, used mainly when there is no contention, but also as
159 * a fallback during table initialization races. Updated via CAS.
160 */
161 transient volatile long base;
162
163 /**
164 * Spinlock (locked via CAS) used when resizing and/or creating Cells.
165 */
166 transient volatile int busy;
167
168 /**
169 * Package-private default constructor
170 */
171 Striped64() {
172 }
173
174 /**
175 * CASes the base field.
176 */
177 final boolean casBase(long cmp, long val) {
178 return UNSAFE.compareAndSwapLong(this, baseOffset, cmp, val);
179 }
180
181 /**
182 * CASes the busy field from 0 to 1 to acquire lock.
183 */
184 final boolean casBusy() {
185 return UNSAFE.compareAndSwapInt(this, busyOffset, 0, 1);
186 }
187
188 /**
189 * Computes the function of current and new value. Subclasses
190 * should open-code this update function for most uses, but the
191 * virtualized form is needed within retryUpdate.
192 *
193 * @param currentValue the current value (of either base or a cell)
194 * @param newValue the argument from a user update call
195 * @return result of the update function
196 */
197 abstract long fn(long currentValue, long newValue);
198
199 /**
200 * Handles cases of updates involving initialization, resizing,
201 * creating new Cells, and/or contention. See above for
202 * explanation. This method suffers the usual non-modularity
203 * problems of optimistic retry code, relying on rechecked sets of
204 * reads.
205 *
206 * @param x the value
207 * @param hc the hash code holder
208 * @param wasUncontended false if CAS failed before call
209 */
210 final void retryUpdate(long x, int[] hc, boolean wasUncontended) {
211 int h;
212 if (hc == null) {
213 threadHashCode.set(hc = new int[1]); // Initialize randomly
214 int r = rng.nextInt(); // Avoid zero to allow xorShift rehash
215 h = hc[0] = (r == 0) ? 1 : r;
216 }
217 else
218 h = hc[0];
219 boolean collide = false; // True if last slot nonempty
220 for (;;) {
221 Cell[] as; Cell a; int n; long v;
222 if ((as = cells) != null && (n = as.length) > 0) {
223 if ((a = as[(n - 1) & h]) == null) {
224 if (busy == 0) { // Try to attach new Cell
225 Cell r = new Cell(x); // Optimistically create
226 if (busy == 0 && casBusy()) {
227 boolean created = false;
228 try { // Recheck under lock
229 Cell[] rs; int m, j;
230 if ((rs = cells) != null &&
231 (m = rs.length) > 0 &&
232 rs[j = (m - 1) & h] == null) {
233 rs[j] = r;
234 created = true;
235 }
236 } finally {
237 busy = 0;
238 }
239 if (created)
240 break;
241 continue; // Slot is now non-empty
242 }
243 }
244 collide = false;
245 }
246 else if (!wasUncontended) // CAS already known to fail
247 wasUncontended = true; // Continue after rehash
248 else if (a.cas(v = a.value, fn(v, x)))
249 break;
250 else if (n >= NCPU || cells != as)
251 collide = false; // At max size or stale
252 else if (!collide)
253 collide = true;
254 else if (busy == 0 && casBusy()) {
255 try {
256 if (cells == as) { // Expand table unless stale
257 Cell[] rs = new Cell[n << 1];
258 for (int i = 0; i < n; ++i)
259 rs[i] = as[i];
260 cells = rs;
261 }
262 } finally {
263 busy = 0;
264 }
265 collide = false;
266 continue; // Retry with expanded table
267 }
268 h ^= h << 13; // Rehash
269 h ^= h >>> 17;
270 h ^= h << 5;
271 hc[0] = h; // Record index for next time
272 }
273 else if (busy == 0 && cells == as && casBusy()) {
274 boolean init = false;
275 try { // Initialize table
276 if (cells == as) {
277 Cell[] rs = new Cell[2];
278 rs[h & 1] = new Cell(x);
279 cells = rs;
280 init = true;
281 }
282 } finally {
283 busy = 0;
284 }
285 if (init)
286 break;
287 }
288 else if (casBase(v = base, fn(v, x)))
289 break; // Fall back on using base
290 }
291 }
292
293
294 /**
295 * Sets base and all cells to the given value.
296 */
297 final void internalReset(long initialValue) {
298 Cell[] as = cells;
299 base = initialValue;
300 if (as != null) {
301 int n = as.length;
302 for (int i = 0; i < n; ++i) {
303 Cell a = as[i];
304 if (a != null)
305 a.value = initialValue;
306 }
307 }
308 }
309
310 // Unsafe mechanics
311 private static final sun.misc.Unsafe UNSAFE;
312 private static final long baseOffset;
313 private static final long busyOffset;
314 static {
315 try {
316 UNSAFE = getUnsafe();
317 Class<?> sk = Striped64.class;
318 baseOffset = UNSAFE.objectFieldOffset
319 (sk.getDeclaredField("base"));
320 busyOffset = UNSAFE.objectFieldOffset
321 (sk.getDeclaredField("busy"));
322 } catch (Exception e) {
323 throw new Error(e);
324 }
325 }
326
327 /**
328 * Returns a sun.misc.Unsafe. Suitable for use in a 3rd party package.
329 * Replace with a simple call to Unsafe.getUnsafe when integrating
330 * into a jdk.
331 *
332 * @return a sun.misc.Unsafe
333 */
334 private static sun.misc.Unsafe getUnsafe() {
335 try {
336 return sun.misc.Unsafe.getUnsafe();
337 } catch (SecurityException tryReflectionInstead) {}
338 try {
339 return java.security.AccessController.doPrivileged
340 (new java.security.PrivilegedExceptionAction<sun.misc.Unsafe>() {
341 @Override
342 public sun.misc.Unsafe run() throws Exception {
343 Class<sun.misc.Unsafe> k = sun.misc.Unsafe.class;
344 for (java.lang.reflect.Field f : k.getDeclaredFields()) {
345 f.setAccessible(true);
346 Object x = f.get(null);
347 if (k.isInstance(x))
348 return k.cast(x);
349 }
350 throw new NoSuchFieldError("the Unsafe");
351 }});
352 } catch (java.security.PrivilegedActionException e) {
353 throw new RuntimeException("Could not initialize intrinsics",
354 e.getCause());
355 }
356 }
357 }