View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import com.google.common.base.Preconditions;
22  import java.util.concurrent.BlockingQueue;
23  import java.util.concurrent.LinkedBlockingQueue;
24  import java.util.concurrent.atomic.AtomicBoolean;
25  import java.util.concurrent.atomic.AtomicInteger;
26  import java.util.concurrent.atomic.AtomicReference;
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.hbase.util.ByteRange;
32  import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
33  
34  /**
35   * A memstore-local allocation buffer.
36   * <p>
37   * The MemStoreLAB is basically a bump-the-pointer allocator that allocates
38   * big (2MB) byte[] chunks from and then doles it out to threads that request
39   * slices into the array.
40   * <p>
41   * The purpose of this class is to combat heap fragmentation in the
42   * regionserver. By ensuring that all KeyValues in a given memstore refer
43   * only to large chunks of contiguous memory, we ensure that large blocks
44   * get freed up when the memstore is flushed.
45   * <p>
46   * Without the MSLAB, the byte array allocated during insertion end up
47   * interleaved throughout the heap, and the old generation gets progressively
48   * more fragmented until a stop-the-world compacting collection occurs.
49   * <p>
50   * TODO: we should probably benchmark whether word-aligning the allocations
51   * would provide a performance improvement - probably would speed up the
52   * Bytes.toLong/Bytes.toInt calls in KeyValue, but some of those are cached
53   * anyway
54   */
55  @InterfaceAudience.Private
56  public class HeapMemStoreLAB implements MemStoreLAB {
57  
58    static final String CHUNK_SIZE_KEY = "hbase.hregion.memstore.mslab.chunksize";
59    static final int CHUNK_SIZE_DEFAULT = 2048 * 1024;
60    static final String MAX_ALLOC_KEY = "hbase.hregion.memstore.mslab.max.allocation";
61    static final int MAX_ALLOC_DEFAULT = 256 * 1024; // allocs bigger than this don't go through
62                                                     // allocator
63  
64    static final Log LOG = LogFactory.getLog(HeapMemStoreLAB.class);
65  
66    private AtomicReference<Chunk> curChunk = new AtomicReference<Chunk>();
67    // A queue of chunks contained by this memstore, used with chunk pool
68    private BlockingQueue<Chunk> chunkQueue = null;
69    final int chunkSize;
70    final int maxAlloc;
71    private final MemStoreChunkPool chunkPool;
72  
73    // This flag is for closing this instance, its set when clearing snapshot of
74    // memstore
75    private volatile boolean closed = false;
76    // This flag is for reclaiming chunks. Its set when putting chunks back to
77    // pool
78    private AtomicBoolean reclaimed = new AtomicBoolean(false);
79    // Current count of open scanners which reading data from this MemStoreLAB
80    private final AtomicInteger openScannerCount = new AtomicInteger();
81  
82    // Used in testing
83    public HeapMemStoreLAB() {
84      this(new Configuration());
85    }
86  
87    public HeapMemStoreLAB(Configuration conf) {
88      chunkSize = conf.getInt(CHUNK_SIZE_KEY, CHUNK_SIZE_DEFAULT);
89      maxAlloc = conf.getInt(MAX_ALLOC_KEY, MAX_ALLOC_DEFAULT);
90      this.chunkPool = MemStoreChunkPool.getPool(conf);
91      // currently chunkQueue is only used for chunkPool
92      if (this.chunkPool != null) {
93        // set queue length to chunk pool max count to avoid keeping reference of
94        // too many non-reclaimable chunks
95        chunkQueue = new LinkedBlockingQueue<Chunk>(chunkPool.getMaxCount());
96      }
97  
98      // if we don't exclude allocations >CHUNK_SIZE, we'd infiniteloop on one!
99      Preconditions.checkArgument(
100       maxAlloc <= chunkSize,
101       MAX_ALLOC_KEY + " must be less than " + CHUNK_SIZE_KEY);
102   }
103 
104   /**
105    * Allocate a slice of the given length.
106    *
107    * If the size is larger than the maximum size specified for this
108    * allocator, returns null.
109    */
110   @Override
111   public ByteRange allocateBytes(int size) {
112     Preconditions.checkArgument(size >= 0, "negative size");
113 
114     // Callers should satisfy large allocations directly from JVM since they
115     // don't cause fragmentation as badly.
116     if (size > maxAlloc) {
117       return null;
118     }
119 
120     while (true) {
121       Chunk c = getOrMakeChunk();
122 
123       // Try to allocate from this chunk
124       int allocOffset = c.alloc(size);
125       if (allocOffset != -1) {
126         // We succeeded - this is the common case - small alloc
127         // from a big buffer
128         return new SimpleMutableByteRange(c.data, allocOffset, size);
129       }
130 
131       // not enough space!
132       // try to retire this chunk
133       tryRetireChunk(c);
134     }
135   }
136 
137   /**
138    * Close this instance since it won't be used any more, try to put the chunks
139    * back to pool
140    */
141   @Override
142   public void close() {
143     this.closed = true;
144     // We could put back the chunks to pool for reusing only when there is no
145     // opening scanner which will read their data
146     if (chunkPool != null && openScannerCount.get() == 0
147         && reclaimed.compareAndSet(false, true)) {
148       chunkPool.putbackChunks(this.chunkQueue);
149     }
150   }
151 
152   /**
153    * Called when opening a scanner on the data of this MemStoreLAB
154    */
155   @Override
156   public void incScannerCount() {
157     this.openScannerCount.incrementAndGet();
158   }
159 
160   /**
161    * Called when closing a scanner on the data of this MemStoreLAB
162    */
163   @Override
164   public void decScannerCount() {
165     int count = this.openScannerCount.decrementAndGet();
166     if (chunkPool != null && count == 0 && this.closed
167         && reclaimed.compareAndSet(false, true)) {
168       chunkPool.putbackChunks(this.chunkQueue);
169     }
170   }
171 
172   /**
173    * Try to retire the current chunk if it is still
174    * <code>c</code>. Postcondition is that curChunk.get()
175    * != c
176    * @param c the chunk to retire
177    * @return true if we won the race to retire the chunk
178    */
179   private void tryRetireChunk(Chunk c) {
180     curChunk.compareAndSet(c, null);
181     // If the CAS succeeds, that means that we won the race
182     // to retire the chunk. We could use this opportunity to
183     // update metrics on external fragmentation.
184     //
185     // If the CAS fails, that means that someone else already
186     // retired the chunk for us.
187   }
188 
189   /**
190    * Get the current chunk, or, if there is no current chunk,
191    * allocate a new one from the JVM.
192    */
193   private Chunk getOrMakeChunk() {
194     while (true) {
195       // Try to get the chunk
196       Chunk c = curChunk.get();
197       if (c != null) {
198         return c;
199       }
200 
201       // No current chunk, so we want to allocate one. We race
202       // against other allocators to CAS in an uninitialized chunk
203       // (which is cheap to allocate)
204       c = (chunkPool != null) ? chunkPool.getChunk() : new Chunk(chunkSize);
205       if (curChunk.compareAndSet(null, c)) {
206         // we won race - now we need to actually do the expensive
207         // allocation step
208         c.init();
209         if (chunkQueue != null && !this.closed && !this.chunkQueue.offer(c)) {
210           if (LOG.isTraceEnabled()) {
211             LOG.trace("Chunk queue is full, won't reuse this new chunk. Current queue size: "
212                 + chunkQueue.size());
213           }
214         }
215         return c;
216       } else if (chunkPool != null) {
217         chunkPool.putbackChunk(c);
218       }
219       // someone else won race - that's fine, we'll try to grab theirs
220       // in the next iteration of the loop.
221     }
222   }
223 
224   Chunk getCurrentChunk() {
225     return this.curChunk.get();
226   }
227 
228   BlockingQueue<Chunk> getChunkQueue() {
229     return this.chunkQueue;
230   }
231 
232   /**
233    * A chunk of memory out of which allocations are sliced.
234    */
235   static class Chunk {
236     /** Actual underlying data */
237     private byte[] data;
238 
239     private static final int UNINITIALIZED = -1;
240     private static final int OOM = -2;
241     /**
242      * Offset for the next allocation, or the sentinel value -1
243      * which implies that the chunk is still uninitialized.
244      * */
245     private AtomicInteger nextFreeOffset = new AtomicInteger(UNINITIALIZED);
246 
247     /** Total number of allocations satisfied from this buffer */
248     private AtomicInteger allocCount = new AtomicInteger();
249 
250     /** Size of chunk in bytes */
251     private final int size;
252 
253     /**
254      * Create an uninitialized chunk. Note that memory is not allocated yet, so
255      * this is cheap.
256      * @param size in bytes
257      */
258     Chunk(int size) {
259       this.size = size;
260     }
261 
262     /**
263      * Actually claim the memory for this chunk. This should only be called from
264      * the thread that constructed the chunk. It is thread-safe against other
265      * threads calling alloc(), who will block until the allocation is complete.
266      */
267     public void init() {
268       assert nextFreeOffset.get() == UNINITIALIZED;
269       try {
270         if (data == null) {
271           data = new byte[size];
272         }
273       } catch (OutOfMemoryError e) {
274         boolean failInit = nextFreeOffset.compareAndSet(UNINITIALIZED, OOM);
275         assert failInit; // should be true.
276         throw e;
277       }
278       // Mark that it's ready for use
279       boolean initted = nextFreeOffset.compareAndSet(
280           UNINITIALIZED, 0);
281       // We should always succeed the above CAS since only one thread
282       // calls init()!
283       Preconditions.checkState(initted,
284           "Multiple threads tried to init same chunk");
285     }
286 
287     /**
288      * Reset the offset to UNINITIALIZED before before reusing an old chunk
289      */
290     void reset() {
291       if (nextFreeOffset.get() != UNINITIALIZED) {
292         nextFreeOffset.set(UNINITIALIZED);
293         allocCount.set(0);
294       }
295     }
296 
297     /**
298      * Try to allocate <code>size</code> bytes from the chunk.
299      * @return the offset of the successful allocation, or -1 to indicate not-enough-space
300      */
301     public int alloc(int size) {
302       while (true) {
303         int oldOffset = nextFreeOffset.get();
304         if (oldOffset == UNINITIALIZED) {
305           // The chunk doesn't have its data allocated yet.
306           // Since we found this in curChunk, we know that whoever
307           // CAS-ed it there is allocating it right now. So spin-loop
308           // shouldn't spin long!
309           Thread.yield();
310           continue;
311         }
312         if (oldOffset == OOM) {
313           // doh we ran out of ram. return -1 to chuck this away.
314           return -1;
315         }
316 
317         if (oldOffset + size > data.length) {
318           return -1; // alloc doesn't fit
319         }
320 
321         // Try to atomically claim this chunk
322         if (nextFreeOffset.compareAndSet(oldOffset, oldOffset + size)) {
323           // we got the alloc
324           allocCount.incrementAndGet();
325           return oldOffset;
326         }
327         // we raced and lost alloc, try again
328       }
329     }
330 
331     @Override
332     public String toString() {
333       return "Chunk@" + System.identityHashCode(this) +
334         " allocs=" + allocCount.get() + "waste=" +
335         (data.length - nextFreeOffset.get());
336     }
337 
338     int getNextFreeOffset() {
339       return this.nextFreeOffset.get();
340     }
341   }
342 }