1 /**
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.regionserver;
20
21 import com.google.common.base.Preconditions;
22 import java.util.concurrent.BlockingQueue;
23 import java.util.concurrent.LinkedBlockingQueue;
24 import java.util.concurrent.atomic.AtomicBoolean;
25 import java.util.concurrent.atomic.AtomicInteger;
26 import java.util.concurrent.atomic.AtomicReference;
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.hbase.classification.InterfaceAudience;
30 import org.apache.hadoop.conf.Configuration;
31 import org.apache.hadoop.hbase.util.ByteRange;
32 import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
33
34 /**
35 * A memstore-local allocation buffer.
36 * <p>
37 * The MemStoreLAB is basically a bump-the-pointer allocator that allocates
38 * big (2MB) byte[] chunks from and then doles it out to threads that request
39 * slices into the array.
40 * <p>
41 * The purpose of this class is to combat heap fragmentation in the
42 * regionserver. By ensuring that all KeyValues in a given memstore refer
43 * only to large chunks of contiguous memory, we ensure that large blocks
44 * get freed up when the memstore is flushed.
45 * <p>
46 * Without the MSLAB, the byte array allocated during insertion end up
47 * interleaved throughout the heap, and the old generation gets progressively
48 * more fragmented until a stop-the-world compacting collection occurs.
49 * <p>
50 * TODO: we should probably benchmark whether word-aligning the allocations
51 * would provide a performance improvement - probably would speed up the
52 * Bytes.toLong/Bytes.toInt calls in KeyValue, but some of those are cached
53 * anyway
54 */
55 @InterfaceAudience.Private
56 public class HeapMemStoreLAB implements MemStoreLAB {
57
58 static final String CHUNK_SIZE_KEY = "hbase.hregion.memstore.mslab.chunksize";
59 static final int CHUNK_SIZE_DEFAULT = 2048 * 1024;
60 static final String MAX_ALLOC_KEY = "hbase.hregion.memstore.mslab.max.allocation";
61 static final int MAX_ALLOC_DEFAULT = 256 * 1024; // allocs bigger than this don't go through
62 // allocator
63
64 static final Log LOG = LogFactory.getLog(HeapMemStoreLAB.class);
65
66 private AtomicReference<Chunk> curChunk = new AtomicReference<Chunk>();
67 // A queue of chunks contained by this memstore, used with chunk pool
68 private BlockingQueue<Chunk> chunkQueue = null;
69 final int chunkSize;
70 final int maxAlloc;
71 private final MemStoreChunkPool chunkPool;
72
73 // This flag is for closing this instance, its set when clearing snapshot of
74 // memstore
75 private volatile boolean closed = false;
76 // This flag is for reclaiming chunks. Its set when putting chunks back to
77 // pool
78 private AtomicBoolean reclaimed = new AtomicBoolean(false);
79 // Current count of open scanners which reading data from this MemStoreLAB
80 private final AtomicInteger openScannerCount = new AtomicInteger();
81
82 // Used in testing
83 public HeapMemStoreLAB() {
84 this(new Configuration());
85 }
86
87 public HeapMemStoreLAB(Configuration conf) {
88 chunkSize = conf.getInt(CHUNK_SIZE_KEY, CHUNK_SIZE_DEFAULT);
89 maxAlloc = conf.getInt(MAX_ALLOC_KEY, MAX_ALLOC_DEFAULT);
90 this.chunkPool = MemStoreChunkPool.getPool(conf);
91 // currently chunkQueue is only used for chunkPool
92 if (this.chunkPool != null) {
93 // set queue length to chunk pool max count to avoid keeping reference of
94 // too many non-reclaimable chunks
95 chunkQueue = new LinkedBlockingQueue<Chunk>(chunkPool.getMaxCount());
96 }
97
98 // if we don't exclude allocations >CHUNK_SIZE, we'd infiniteloop on one!
99 Preconditions.checkArgument(
100 maxAlloc <= chunkSize,
101 MAX_ALLOC_KEY + " must be less than " + CHUNK_SIZE_KEY);
102 }
103
104 /**
105 * Allocate a slice of the given length.
106 *
107 * If the size is larger than the maximum size specified for this
108 * allocator, returns null.
109 */
110 @Override
111 public ByteRange allocateBytes(int size) {
112 Preconditions.checkArgument(size >= 0, "negative size");
113
114 // Callers should satisfy large allocations directly from JVM since they
115 // don't cause fragmentation as badly.
116 if (size > maxAlloc) {
117 return null;
118 }
119
120 while (true) {
121 Chunk c = getOrMakeChunk();
122
123 // Try to allocate from this chunk
124 int allocOffset = c.alloc(size);
125 if (allocOffset != -1) {
126 // We succeeded - this is the common case - small alloc
127 // from a big buffer
128 return new SimpleMutableByteRange(c.data, allocOffset, size);
129 }
130
131 // not enough space!
132 // try to retire this chunk
133 tryRetireChunk(c);
134 }
135 }
136
137 /**
138 * Close this instance since it won't be used any more, try to put the chunks
139 * back to pool
140 */
141 @Override
142 public void close() {
143 this.closed = true;
144 // We could put back the chunks to pool for reusing only when there is no
145 // opening scanner which will read their data
146 if (chunkPool != null && openScannerCount.get() == 0
147 && reclaimed.compareAndSet(false, true)) {
148 chunkPool.putbackChunks(this.chunkQueue);
149 }
150 }
151
152 /**
153 * Called when opening a scanner on the data of this MemStoreLAB
154 */
155 @Override
156 public void incScannerCount() {
157 this.openScannerCount.incrementAndGet();
158 }
159
160 /**
161 * Called when closing a scanner on the data of this MemStoreLAB
162 */
163 @Override
164 public void decScannerCount() {
165 int count = this.openScannerCount.decrementAndGet();
166 if (chunkPool != null && count == 0 && this.closed
167 && reclaimed.compareAndSet(false, true)) {
168 chunkPool.putbackChunks(this.chunkQueue);
169 }
170 }
171
172 /**
173 * Try to retire the current chunk if it is still
174 * <code>c</code>. Postcondition is that curChunk.get()
175 * != c
176 * @param c the chunk to retire
177 * @return true if we won the race to retire the chunk
178 */
179 private void tryRetireChunk(Chunk c) {
180 curChunk.compareAndSet(c, null);
181 // If the CAS succeeds, that means that we won the race
182 // to retire the chunk. We could use this opportunity to
183 // update metrics on external fragmentation.
184 //
185 // If the CAS fails, that means that someone else already
186 // retired the chunk for us.
187 }
188
189 /**
190 * Get the current chunk, or, if there is no current chunk,
191 * allocate a new one from the JVM.
192 */
193 private Chunk getOrMakeChunk() {
194 while (true) {
195 // Try to get the chunk
196 Chunk c = curChunk.get();
197 if (c != null) {
198 return c;
199 }
200
201 // No current chunk, so we want to allocate one. We race
202 // against other allocators to CAS in an uninitialized chunk
203 // (which is cheap to allocate)
204 c = (chunkPool != null) ? chunkPool.getChunk() : new Chunk(chunkSize);
205 if (curChunk.compareAndSet(null, c)) {
206 // we won race - now we need to actually do the expensive
207 // allocation step
208 c.init();
209 if (chunkQueue != null && !this.closed && !this.chunkQueue.offer(c)) {
210 if (LOG.isTraceEnabled()) {
211 LOG.trace("Chunk queue is full, won't reuse this new chunk. Current queue size: "
212 + chunkQueue.size());
213 }
214 }
215 return c;
216 } else if (chunkPool != null) {
217 chunkPool.putbackChunk(c);
218 }
219 // someone else won race - that's fine, we'll try to grab theirs
220 // in the next iteration of the loop.
221 }
222 }
223
224 Chunk getCurrentChunk() {
225 return this.curChunk.get();
226 }
227
228 BlockingQueue<Chunk> getChunkQueue() {
229 return this.chunkQueue;
230 }
231
232 /**
233 * A chunk of memory out of which allocations are sliced.
234 */
235 static class Chunk {
236 /** Actual underlying data */
237 private byte[] data;
238
239 private static final int UNINITIALIZED = -1;
240 private static final int OOM = -2;
241 /**
242 * Offset for the next allocation, or the sentinel value -1
243 * which implies that the chunk is still uninitialized.
244 * */
245 private AtomicInteger nextFreeOffset = new AtomicInteger(UNINITIALIZED);
246
247 /** Total number of allocations satisfied from this buffer */
248 private AtomicInteger allocCount = new AtomicInteger();
249
250 /** Size of chunk in bytes */
251 private final int size;
252
253 /**
254 * Create an uninitialized chunk. Note that memory is not allocated yet, so
255 * this is cheap.
256 * @param size in bytes
257 */
258 Chunk(int size) {
259 this.size = size;
260 }
261
262 /**
263 * Actually claim the memory for this chunk. This should only be called from
264 * the thread that constructed the chunk. It is thread-safe against other
265 * threads calling alloc(), who will block until the allocation is complete.
266 */
267 public void init() {
268 assert nextFreeOffset.get() == UNINITIALIZED;
269 try {
270 if (data == null) {
271 data = new byte[size];
272 }
273 } catch (OutOfMemoryError e) {
274 boolean failInit = nextFreeOffset.compareAndSet(UNINITIALIZED, OOM);
275 assert failInit; // should be true.
276 throw e;
277 }
278 // Mark that it's ready for use
279 boolean initted = nextFreeOffset.compareAndSet(
280 UNINITIALIZED, 0);
281 // We should always succeed the above CAS since only one thread
282 // calls init()!
283 Preconditions.checkState(initted,
284 "Multiple threads tried to init same chunk");
285 }
286
287 /**
288 * Reset the offset to UNINITIALIZED before before reusing an old chunk
289 */
290 void reset() {
291 if (nextFreeOffset.get() != UNINITIALIZED) {
292 nextFreeOffset.set(UNINITIALIZED);
293 allocCount.set(0);
294 }
295 }
296
297 /**
298 * Try to allocate <code>size</code> bytes from the chunk.
299 * @return the offset of the successful allocation, or -1 to indicate not-enough-space
300 */
301 public int alloc(int size) {
302 while (true) {
303 int oldOffset = nextFreeOffset.get();
304 if (oldOffset == UNINITIALIZED) {
305 // The chunk doesn't have its data allocated yet.
306 // Since we found this in curChunk, we know that whoever
307 // CAS-ed it there is allocating it right now. So spin-loop
308 // shouldn't spin long!
309 Thread.yield();
310 continue;
311 }
312 if (oldOffset == OOM) {
313 // doh we ran out of ram. return -1 to chuck this away.
314 return -1;
315 }
316
317 if (oldOffset + size > data.length) {
318 return -1; // alloc doesn't fit
319 }
320
321 // Try to atomically claim this chunk
322 if (nextFreeOffset.compareAndSet(oldOffset, oldOffset + size)) {
323 // we got the alloc
324 allocCount.incrementAndGet();
325 return oldOffset;
326 }
327 // we raced and lost alloc, try again
328 }
329 }
330
331 @Override
332 public String toString() {
333 return "Chunk@" + System.identityHashCode(this) +
334 " allocs=" + allocCount.get() + "waste=" +
335 (data.length - nextFreeOffset.get());
336 }
337
338 int getNextFreeOffset() {
339 return this.nextFreeOffset.get();
340 }
341 }
342 }