View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import com.google.common.util.concurrent.ThreadFactoryBuilder;
22  import java.lang.ref.WeakReference;
23  import java.util.EnumMap;
24  import java.util.Iterator;
25  import java.util.List;
26  import java.util.Map;
27  import java.util.PriorityQueue;
28  import java.util.SortedSet;
29  import java.util.TreeSet;
30  import java.util.concurrent.ConcurrentHashMap;
31  import java.util.concurrent.Executors;
32  import java.util.concurrent.ScheduledExecutorService;
33  import java.util.concurrent.TimeUnit;
34  import java.util.concurrent.atomic.AtomicLong;
35  import java.util.concurrent.locks.ReentrantLock;
36  import com.google.common.base.Objects;
37  import org.apache.commons.logging.Log;
38  import org.apache.commons.logging.LogFactory;
39  import org.apache.hadoop.hbase.classification.InterfaceAudience;
40  import org.apache.hadoop.conf.Configuration;
41  import org.apache.hadoop.hbase.io.HeapSize;
42  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
43  import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.apache.hadoop.hbase.util.ClassSize;
46  import org.apache.hadoop.hbase.util.HasThread;
47  import org.apache.hadoop.util.StringUtils;
48  
49  /**
50   * A block cache implementation that is memory-aware using {@link HeapSize},
51   * memory-bound using an LRU eviction algorithm, and concurrent: backed by a
52   * {@link ConcurrentHashMap} and with a non-blocking eviction thread giving
53   * constant-time {@link #cacheBlock} and {@link #getBlock} operations.<p>
54   *
55   * Contains three levels of block priority to allow for
56   * scan-resistance and in-memory families 
57   * {@link org.apache.hadoop.hbase.HColumnDescriptor#setInMemory(boolean)} (An
58   * in-memory column family is a column family that should be served from memory if possible):
59   * single-access, multiple-accesses, and in-memory priority.
60   * A block is added with an in-memory priority flag if
61   * {@link org.apache.hadoop.hbase.HColumnDescriptor#isInMemory()}, 
62   * otherwise a block becomes a single access
63   * priority the first time it is read into this block cache.  If a block is accessed again while
64   * in cache, it is marked as a multiple access priority block.  This delineation of blocks is used
65   * to prevent scans from thrashing the cache adding a least-frequently-used
66   * element to the eviction algorithm.<p>
67   *
68   * Each priority is given its own chunk of the total cache to ensure
69   * fairness during eviction.  Each priority will retain close to its maximum
70   * size, however, if any priority is not using its entire chunk the others
71   * are able to grow beyond their chunk size.<p>
72   *
73   * Instantiated at a minimum with the total size and average block size.
74   * All sizes are in bytes.  The block size is not especially important as this
75   * cache is fully dynamic in its sizing of blocks.  It is only used for
76   * pre-allocating data structures and in initial heap estimation of the map.<p>
77   *
78   * The detailed constructor defines the sizes for the three priorities (they
79   * should total to the <code>maximum size</code> defined).  It also sets the levels that
80   * trigger and control the eviction thread.<p>
81   *
82   * The <code>acceptable size</code> is the cache size level which triggers the eviction
83   * process to start.  It evicts enough blocks to get the size below the
84   * minimum size specified.<p>
85   *
86   * Eviction happens in a separate thread and involves a single full-scan
87   * of the map.  It determines how many bytes must be freed to reach the minimum
88   * size, and then while scanning determines the fewest least-recently-used
89   * blocks necessary from each of the three priorities (would be 3 times bytes
90   * to free).  It then uses the priority chunk sizes to evict fairly according
91   * to the relative sizes and usage.
92   */
93  @InterfaceAudience.Private
94  public class LruBlockCache implements ResizableBlockCache, HeapSize {
95  
96    private static final Log LOG = LogFactory.getLog(LruBlockCache.class);
97  
98    /**
99     * Percentage of total size that eviction will evict until; e.g. if set to .8, then we will keep
100    * evicting during an eviction run till the cache size is down to 80% of the total.
101    */
102   static final String LRU_MIN_FACTOR_CONFIG_NAME = "hbase.lru.blockcache.min.factor";
103 
104   /**
105    * Acceptable size of cache (no evictions if size < acceptable)
106    */
107   static final String LRU_ACCEPTABLE_FACTOR_CONFIG_NAME = "hbase.lru.blockcache.acceptable.factor";
108 
109   /**
110    * Hard capacity limit of cache, will reject any put if size > this * acceptable
111    */
112   static final String LRU_HARD_CAPACITY_LIMIT_FACTOR_CONFIG_NAME = "hbase.lru.blockcache.hard.capacity.limit.factor";
113   static final String LRU_SINGLE_PERCENTAGE_CONFIG_NAME = "hbase.lru.blockcache.single.percentage";
114   static final String LRU_MULTI_PERCENTAGE_CONFIG_NAME = "hbase.lru.blockcache.multi.percentage";
115   static final String LRU_MEMORY_PERCENTAGE_CONFIG_NAME = "hbase.lru.blockcache.memory.percentage";
116 
117   /**
118    * Configuration key to force data-block always (except in-memory are too much)
119    * cached in memory for in-memory hfile, unlike inMemory, which is a column-family
120    * configuration, inMemoryForceMode is a cluster-wide configuration
121    */
122   static final String LRU_IN_MEMORY_FORCE_MODE_CONFIG_NAME = "hbase.lru.rs.inmemoryforcemode";
123 
124   /** Default Configuration Parameters*/
125 
126   /** Backing Concurrent Map Configuration */
127   static final float DEFAULT_LOAD_FACTOR = 0.75f;
128   static final int DEFAULT_CONCURRENCY_LEVEL = 16;
129 
130   /** Eviction thresholds */
131   static final float DEFAULT_MIN_FACTOR = 0.95f;
132   static final float DEFAULT_ACCEPTABLE_FACTOR = 0.99f;
133 
134   /** Priority buckets */
135   static final float DEFAULT_SINGLE_FACTOR = 0.25f;
136   static final float DEFAULT_MULTI_FACTOR = 0.50f;
137   static final float DEFAULT_MEMORY_FACTOR = 0.25f;
138 
139   /** default hard capacity limit */
140   static final float DEFAULT_HARD_CAPACITY_LIMIT_FACTOR = 1.2f;
141 
142   static final boolean DEFAULT_IN_MEMORY_FORCE_MODE = false;
143 
144   /** Statistics thread */
145   static final int statThreadPeriod = 60 * 5;
146   private static final String LRU_MAX_BLOCK_SIZE = "hbase.lru.max.block.size";
147   private static final long DEFAULT_MAX_BLOCK_SIZE = 16L * 1024L * 1024L;
148 
149   /** Concurrent map (the cache) */
150   private transient final Map<BlockCacheKey,LruCachedBlock> map;
151 
152   /** Eviction lock (locked when eviction in process) */
153   private transient final ReentrantLock evictionLock = new ReentrantLock(true);
154   private final long maxBlockSize;
155 
156   /** Volatile boolean to track if we are in an eviction process or not */
157   private volatile boolean evictionInProgress = false;
158 
159   /** Eviction thread */
160   private transient final EvictionThread evictionThread;
161 
162   /** Statistics thread schedule pool (for heavy debugging, could remove) */
163   private transient final ScheduledExecutorService scheduleThreadPool =
164     Executors.newScheduledThreadPool(1,
165       new ThreadFactoryBuilder().setNameFormat("LruBlockCacheStatsExecutor")
166         .setDaemon(true).build());
167 
168   /** Current size of cache */
169   private final AtomicLong size;
170 
171   /** Current size of data blocks */
172   private final AtomicLong dataBlockSize;
173 
174   /** Current number of cached elements */
175   private final AtomicLong elements;
176 
177   /** Current number of cached data block elements */
178   private final AtomicLong dataBlockElements;
179 
180   /** Cache access count (sequential ID) */
181   private final AtomicLong count;
182 
183   /** hard capacity limit */
184   private float hardCapacityLimitFactor;
185 
186   /** Cache statistics */
187   private final CacheStats stats;
188 
189   /** Maximum allowable size of cache (block put if size > max, evict) */
190   private long maxSize;
191 
192   /** Approximate block size */
193   private long blockSize;
194 
195   /** Acceptable size of cache (no evictions if size < acceptable) */
196   private float acceptableFactor;
197 
198   /** Minimum threshold of cache (when evicting, evict until size < min) */
199   private float minFactor;
200 
201   /** Single access bucket size */
202   private float singleFactor;
203 
204   /** Multiple access bucket size */
205   private float multiFactor;
206 
207   /** In-memory bucket size */
208   private float memoryFactor;
209 
210   /** Overhead of the structure itself */
211   private long overhead;
212 
213   /** Whether in-memory hfile's data block has higher priority when evicting */
214   private boolean forceInMemory;
215 
216   /** Where to send victims (blocks evicted/missing from the cache) */
217   private transient BlockCache victimHandler = null;
218 
219   /**
220    * Default constructor.  Specify maximum size and expected average block
221    * size (approximation is fine).
222    *
223    * <p>All other factors will be calculated based on defaults specified in
224    * this class.
225    * @param maxSize maximum size of cache, in bytes
226    * @param blockSize approximate size of each block, in bytes
227    */
228   public LruBlockCache(long maxSize, long blockSize) {
229     this(maxSize, blockSize, true);
230   }
231 
232   /**
233    * Constructor used for testing.  Allows disabling of the eviction thread.
234    */
235   public LruBlockCache(long maxSize, long blockSize, boolean evictionThread) {
236     this(maxSize, blockSize, evictionThread,
237         (int)Math.ceil(1.2*maxSize/blockSize),
238         DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL,
239         DEFAULT_MIN_FACTOR, DEFAULT_ACCEPTABLE_FACTOR,
240         DEFAULT_SINGLE_FACTOR,
241         DEFAULT_MULTI_FACTOR,
242         DEFAULT_MEMORY_FACTOR,
243         DEFAULT_HARD_CAPACITY_LIMIT_FACTOR,
244         false,
245         DEFAULT_MAX_BLOCK_SIZE
246         );
247   }
248 
249   public LruBlockCache(long maxSize, long blockSize, boolean evictionThread, Configuration conf) {
250     this(maxSize, blockSize, evictionThread,
251         (int)Math.ceil(1.2*maxSize/blockSize),
252         DEFAULT_LOAD_FACTOR,
253         DEFAULT_CONCURRENCY_LEVEL,
254         conf.getFloat(LRU_MIN_FACTOR_CONFIG_NAME, DEFAULT_MIN_FACTOR),
255         conf.getFloat(LRU_ACCEPTABLE_FACTOR_CONFIG_NAME, DEFAULT_ACCEPTABLE_FACTOR),
256         conf.getFloat(LRU_SINGLE_PERCENTAGE_CONFIG_NAME, DEFAULT_SINGLE_FACTOR),
257         conf.getFloat(LRU_MULTI_PERCENTAGE_CONFIG_NAME, DEFAULT_MULTI_FACTOR),
258         conf.getFloat(LRU_MEMORY_PERCENTAGE_CONFIG_NAME, DEFAULT_MEMORY_FACTOR),
259         conf.getFloat(LRU_HARD_CAPACITY_LIMIT_FACTOR_CONFIG_NAME, DEFAULT_HARD_CAPACITY_LIMIT_FACTOR),
260         conf.getBoolean(LRU_IN_MEMORY_FORCE_MODE_CONFIG_NAME, DEFAULT_IN_MEMORY_FORCE_MODE),
261         conf.getLong(LRU_MAX_BLOCK_SIZE, DEFAULT_MAX_BLOCK_SIZE)
262         );
263   }
264 
265   public LruBlockCache(long maxSize, long blockSize, Configuration conf) {
266     this(maxSize, blockSize, true, conf);
267   }
268 
269   /**
270    * Configurable constructor.  Use this constructor if not using defaults.
271    * @param maxSize maximum size of this cache, in bytes
272    * @param blockSize expected average size of blocks, in bytes
273    * @param evictionThread whether to run evictions in a bg thread or not
274    * @param mapInitialSize initial size of backing ConcurrentHashMap
275    * @param mapLoadFactor initial load factor of backing ConcurrentHashMap
276    * @param mapConcurrencyLevel initial concurrency factor for backing CHM
277    * @param minFactor percentage of total size that eviction will evict until
278    * @param acceptableFactor percentage of total size that triggers eviction
279    * @param singleFactor percentage of total size for single-access blocks
280    * @param multiFactor percentage of total size for multiple-access blocks
281    * @param memoryFactor percentage of total size for in-memory blocks
282    */
283   public LruBlockCache(long maxSize, long blockSize, boolean evictionThread,
284       int mapInitialSize, float mapLoadFactor, int mapConcurrencyLevel,
285       float minFactor, float acceptableFactor, float singleFactor,
286       float multiFactor, float memoryFactor, float hardLimitFactor,
287       boolean forceInMemory, long maxBlockSize) {
288     this.maxBlockSize = maxBlockSize;
289     if(singleFactor + multiFactor + memoryFactor != 1 ||
290         singleFactor < 0 || multiFactor < 0 || memoryFactor < 0) {
291       throw new IllegalArgumentException("Single, multi, and memory factors " +
292           " should be non-negative and total 1.0");
293     }
294     if(minFactor >= acceptableFactor) {
295       throw new IllegalArgumentException("minFactor must be smaller than acceptableFactor");
296     }
297     if(minFactor >= 1.0f || acceptableFactor >= 1.0f) {
298       throw new IllegalArgumentException("all factors must be < 1");
299     }
300     this.maxSize = maxSize;
301     this.blockSize = blockSize;
302     this.forceInMemory = forceInMemory;
303     map = new ConcurrentHashMap<BlockCacheKey,LruCachedBlock>(mapInitialSize,
304         mapLoadFactor, mapConcurrencyLevel);
305     this.minFactor = minFactor;
306     this.acceptableFactor = acceptableFactor;
307     this.singleFactor = singleFactor;
308     this.multiFactor = multiFactor;
309     this.memoryFactor = memoryFactor;
310     this.stats = new CacheStats(this.getClass().getSimpleName());
311     this.count = new AtomicLong(0);
312     this.elements = new AtomicLong(0);
313     this.dataBlockElements = new AtomicLong(0);
314     this.dataBlockSize = new AtomicLong(0);
315     this.overhead = calculateOverhead(maxSize, blockSize, mapConcurrencyLevel);
316     this.size = new AtomicLong(this.overhead);
317     this.hardCapacityLimitFactor = hardLimitFactor;
318     if(evictionThread) {
319       this.evictionThread = new EvictionThread(this);
320       this.evictionThread.start(); // FindBugs SC_START_IN_CTOR
321     } else {
322       this.evictionThread = null;
323     }
324     // TODO: Add means of turning this off.  Bit obnoxious running thread just to make a log
325     // every five minutes.
326     this.scheduleThreadPool.scheduleAtFixedRate(new StatisticsThread(this),
327         statThreadPeriod, statThreadPeriod, TimeUnit.SECONDS);
328   }
329 
330   @Override
331   public void setMaxSize(long maxSize) {
332     this.maxSize = maxSize;
333     if(this.size.get() > acceptableSize() && !evictionInProgress) {
334       runEviction();
335     }
336   }
337 
338   // BlockCache implementation
339 
340   /**
341    * Cache the block with the specified name and buffer.
342    * <p>
343    * It is assumed this will NOT be called on an already cached block. In rare cases (HBASE-8547)
344    * this can happen, for which we compare the buffer contents.
345    * @param cacheKey block's cache key
346    * @param buf block buffer
347    * @param inMemory if block is in-memory
348    * @param cacheDataInL1
349    */
350   @Override
351   public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory,
352       final boolean cacheDataInL1) {
353 
354     if (buf.heapSize() > maxBlockSize) {
355       // If there are a lot of blocks that are too
356       // big this can make the logs way too noisy.
357       // So we log 2%
358       if (stats.failInsert() % 50 == 0) {
359         LOG.warn("Trying to cache too large a block "
360             + cacheKey.getHfileName() + " @ "
361             + cacheKey.getOffset()
362             + " is " + buf.heapSize()
363             + " which is larger than " + maxBlockSize);
364       }
365       return;
366     }
367 
368     LruCachedBlock cb = map.get(cacheKey);
369     if (!cacheDataInL1 && cb != null
370         && !BlockCacheUtil.shouldReplaceExistingCacheBlock(this, cacheKey, buf)) {
371       return;
372     }
373     long currentSize = size.get();
374     long currentAcceptableSize = acceptableSize();
375     long hardLimitSize = (long) (hardCapacityLimitFactor * currentAcceptableSize);
376     if (currentSize >= hardLimitSize) {
377       stats.failInsert();
378       if (LOG.isTraceEnabled()) {
379         LOG.trace("LruBlockCache current size " + StringUtils.byteDesc(currentSize)
380           + " has exceeded acceptable size " + StringUtils.byteDesc(currentAcceptableSize) + "  too many."
381           + " the hard limit size is " + StringUtils.byteDesc(hardLimitSize) + ", failed to put cacheKey:"
382           + cacheKey + " into LruBlockCache.");
383       }
384       if (!evictionInProgress) {
385         runEviction();
386       }
387       return;
388     }
389     cb = new LruCachedBlock(cacheKey, buf, count.incrementAndGet(), inMemory);
390     long newSize = updateSizeMetrics(cb, false);
391     map.put(cacheKey, cb);
392     long val = elements.incrementAndGet();
393     if (buf.getBlockType().isData()) {
394       dataBlockElements.incrementAndGet();
395     }
396     if (LOG.isTraceEnabled()) {
397       long size = map.size();
398       assertCounterSanity(size, val);
399     }
400     if (newSize > currentAcceptableSize && !evictionInProgress) {
401       runEviction();
402     }
403   }
404 
405   /**
406    * Sanity-checking for parity between actual block cache content and metrics.
407    * Intended only for use with TRACE level logging and -ea JVM.
408    */
409   private static void assertCounterSanity(long mapSize, long counterVal) {
410     if (counterVal < 0) {
411       LOG.trace("counterVal overflow. Assertions unreliable. counterVal=" + counterVal +
412         ", mapSize=" + mapSize);
413       return;
414     }
415     if (mapSize < Integer.MAX_VALUE) {
416       double pct_diff = Math.abs((((double) counterVal) / ((double) mapSize)) - 1.);
417       if (pct_diff > 0.05) {
418         LOG.trace("delta between reported and actual size > 5%. counterVal=" + counterVal +
419           ", mapSize=" + mapSize);
420       }
421     }
422   }
423 
424   /**
425    * Cache the block with the specified name and buffer.
426    * <p>
427    * @param cacheKey block's cache key
428    * @param buf block buffer
429    */
430   @Override
431   public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf) {
432     cacheBlock(cacheKey, buf, false, false);
433   }
434 
435   /**
436    * Helper function that updates the local size counter and also updates any
437    * per-cf or per-blocktype metrics it can discern from given
438    * {@link LruCachedBlock}
439    *
440    * @param cb
441    * @param evict
442    */
443   protected long updateSizeMetrics(LruCachedBlock cb, boolean evict) {
444     long heapsize = cb.heapSize();
445     BlockType bt = cb.getBuffer().getBlockType();
446     if (evict) {
447       heapsize *= -1;
448     }
449     if (bt != null && bt.isData()) {
450       dataBlockSize.addAndGet(heapsize);
451     }
452     return size.addAndGet(heapsize);
453   }
454 
455   /**
456    * Get the buffer of the block with the specified name.
457    * @param cacheKey block's cache key
458    * @param caching true if the caller caches blocks on cache misses
459    * @param repeat Whether this is a repeat lookup for the same block
460    *        (used to avoid double counting cache misses when doing double-check locking)
461    * @param updateCacheMetrics Whether to update cache metrics or not
462    * @return buffer of specified cache key, or null if not in cache
463    */
464   @Override
465   public Cacheable getBlock(BlockCacheKey cacheKey, boolean caching, boolean repeat,
466       boolean updateCacheMetrics) {
467     LruCachedBlock cb = map.get(cacheKey);
468     if (cb == null) {
469       if (!repeat && updateCacheMetrics) {
470         stats.miss(caching, cacheKey.isPrimary(), cacheKey.getBlockType());
471       }
472       // If there is another block cache then try and read there.
473       // However if this is a retry ( second time in double checked locking )
474       // And it's already a miss then the l2 will also be a miss.
475       if (victimHandler != null && !repeat) {
476         Cacheable result = victimHandler.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
477 
478         // Promote this to L1.
479         if (result != null && caching) {
480           cacheBlock(cacheKey, result, false, true);
481         }
482         return result;
483       }
484       return null;
485     }
486     if (updateCacheMetrics) stats.hit(caching, cacheKey.isPrimary(), cacheKey.getBlockType());
487     cb.access(count.incrementAndGet());
488     return cb.getBuffer();
489   }
490 
491   /**
492    * Whether the cache contains block with specified cacheKey
493    * @param cacheKey
494    * @return true if contains the block
495    */
496   public boolean containsBlock(BlockCacheKey cacheKey) {
497     return map.containsKey(cacheKey);
498   }
499 
500   @Override
501   public boolean evictBlock(BlockCacheKey cacheKey) {
502     LruCachedBlock cb = map.get(cacheKey);
503     if (cb == null) return false;
504     return evictBlock(cb, false) > 0;
505   }
506 
507   /**
508    * Evicts all blocks for a specific HFile. This is an
509    * expensive operation implemented as a linear-time search through all blocks
510    * in the cache. Ideally this should be a search in a log-access-time map.
511    *
512    * <p>
513    * This is used for evict-on-close to remove all blocks of a specific HFile.
514    *
515    * @return the number of blocks evicted
516    */
517   @Override
518   public int evictBlocksByHfileName(String hfileName) {
519     int numEvicted = 0;
520     for (BlockCacheKey key : map.keySet()) {
521       if (key.getHfileName().equals(hfileName)) {
522         if (evictBlock(key))
523           ++numEvicted;
524       }
525     }
526     if (victimHandler != null) {
527       numEvicted += victimHandler.evictBlocksByHfileName(hfileName);
528     }
529     return numEvicted;
530   }
531 
532   /**
533    * Evict the block, and it will be cached by the victim handler if exists &amp;&amp;
534    * block may be read again later
535    * @param block
536    * @param evictedByEvictionProcess true if the given block is evicted by
537    *          EvictionThread
538    * @return the heap size of evicted block
539    */
540   protected long evictBlock(LruCachedBlock block, boolean evictedByEvictionProcess) {
541     boolean found = map.remove(block.getCacheKey()) != null;
542     if (!found) {
543       return 0;
544     }
545     updateSizeMetrics(block, true);
546     long val = elements.decrementAndGet();
547     if (LOG.isTraceEnabled()) {
548       long size = map.size();
549       assertCounterSanity(size, val);
550     }
551     if (block.getBuffer().getBlockType().isData()) {
552       dataBlockElements.decrementAndGet();
553     }
554     stats.evicted(block.getCachedTime(), block.getCacheKey().isPrimary());
555     if (evictedByEvictionProcess && victimHandler != null) {
556       if (victimHandler instanceof BucketCache) {
557         boolean wait = getCurrentSize() < acceptableSize();
558         boolean inMemory = block.getPriority() == BlockPriority.MEMORY;
559         ((BucketCache)victimHandler).cacheBlockWithWait(block.getCacheKey(), block.getBuffer(),
560             inMemory, true, wait);
561       } else {
562         victimHandler.cacheBlock(block.getCacheKey(), block.getBuffer());
563       }
564     }
565     return block.heapSize();
566   }
567 
568   /**
569    * Multi-threaded call to run the eviction process.
570    */
571   private void runEviction() {
572     if(evictionThread == null) {
573       evict();
574     } else {
575       evictionThread.evict();
576     }
577   }
578 
579   boolean isEvictionInProgress() {
580     return evictionInProgress;
581   }
582 
583   long getOverhead() {
584     return overhead;
585   }
586 
587   /**
588    * Eviction method.
589    */
590   void evict() {
591 
592     // Ensure only one eviction at a time
593     if(!evictionLock.tryLock()) return;
594 
595     try {
596       evictionInProgress = true;
597       long currentSize = this.size.get();
598       long bytesToFree = currentSize - minSize();
599 
600       if (LOG.isTraceEnabled()) {
601         LOG.trace("Block cache LRU eviction started; Attempting to free " +
602           StringUtils.byteDesc(bytesToFree) + " of total=" +
603           StringUtils.byteDesc(currentSize));
604       }
605 
606       if(bytesToFree <= 0) return;
607 
608       // Instantiate priority buckets
609       BlockBucket bucketSingle = new BlockBucket("single", bytesToFree, blockSize,
610           singleSize());
611       BlockBucket bucketMulti = new BlockBucket("multi", bytesToFree, blockSize,
612           multiSize());
613       BlockBucket bucketMemory = new BlockBucket("memory", bytesToFree, blockSize,
614           memorySize());
615 
616       // Scan entire map putting into appropriate buckets
617       for(LruCachedBlock cachedBlock : map.values()) {
618         switch(cachedBlock.getPriority()) {
619           case SINGLE: {
620             bucketSingle.add(cachedBlock);
621             break;
622           }
623           case MULTI: {
624             bucketMulti.add(cachedBlock);
625             break;
626           }
627           case MEMORY: {
628             bucketMemory.add(cachedBlock);
629             break;
630           }
631         }
632       }
633 
634       long bytesFreed = 0;
635       if (forceInMemory || memoryFactor > 0.999f) {
636         long s = bucketSingle.totalSize();
637         long m = bucketMulti.totalSize();
638         if (bytesToFree > (s + m)) {
639           // this means we need to evict blocks in memory bucket to make room,
640           // so the single and multi buckets will be emptied
641           bytesFreed = bucketSingle.free(s);
642           bytesFreed += bucketMulti.free(m);
643           if (LOG.isTraceEnabled()) {
644             LOG.trace("freed " + StringUtils.byteDesc(bytesFreed) +
645               " from single and multi buckets");
646           }
647           bytesFreed += bucketMemory.free(bytesToFree - bytesFreed);
648           if (LOG.isTraceEnabled()) {
649             LOG.trace("freed " + StringUtils.byteDesc(bytesFreed) +
650               " total from all three buckets ");
651           }
652         } else {
653           // this means no need to evict block in memory bucket,
654           // and we try best to make the ratio between single-bucket and
655           // multi-bucket is 1:2
656           long bytesRemain = s + m - bytesToFree;
657           if (3 * s <= bytesRemain) {
658             // single-bucket is small enough that no eviction happens for it
659             // hence all eviction goes from multi-bucket
660             bytesFreed = bucketMulti.free(bytesToFree);
661           } else if (3 * m <= 2 * bytesRemain) {
662             // multi-bucket is small enough that no eviction happens for it
663             // hence all eviction goes from single-bucket
664             bytesFreed = bucketSingle.free(bytesToFree);
665           } else {
666             // both buckets need to evict some blocks
667             bytesFreed = bucketSingle.free(s - bytesRemain / 3);
668             if (bytesFreed < bytesToFree) {
669               bytesFreed += bucketMulti.free(bytesToFree - bytesFreed);
670             }
671           }
672         }
673       } else {
674         PriorityQueue<BlockBucket> bucketQueue =
675           new PriorityQueue<BlockBucket>(3);
676 
677         bucketQueue.add(bucketSingle);
678         bucketQueue.add(bucketMulti);
679         bucketQueue.add(bucketMemory);
680 
681         int remainingBuckets = bucketQueue.size();
682 
683         BlockBucket bucket;
684         while((bucket = bucketQueue.poll()) != null) {
685           long overflow = bucket.overflow();
686           if(overflow > 0) {
687             long bucketBytesToFree = Math.min(overflow,
688                 (bytesToFree - bytesFreed) / remainingBuckets);
689             bytesFreed += bucket.free(bucketBytesToFree);
690           }
691           remainingBuckets--;
692         }
693       }
694       if (LOG.isTraceEnabled()) {
695         long single = bucketSingle.totalSize();
696         long multi = bucketMulti.totalSize();
697         long memory = bucketMemory.totalSize();
698         LOG.trace("Block cache LRU eviction completed; " +
699           "freed=" + StringUtils.byteDesc(bytesFreed) + ", " +
700           "total=" + StringUtils.byteDesc(this.size.get()) + ", " +
701           "single=" + StringUtils.byteDesc(single) + ", " +
702           "multi=" + StringUtils.byteDesc(multi) + ", " +
703           "memory=" + StringUtils.byteDesc(memory));
704       }
705     } finally {
706       stats.evict();
707       evictionInProgress = false;
708       evictionLock.unlock();
709     }
710   }
711 
712   @Override
713   public String toString() {
714     return Objects.toStringHelper(this)
715       .add("blockCount", getBlockCount())
716       .add("currentSize", getCurrentSize())
717       .add("freeSize", getFreeSize())
718       .add("maxSize", getMaxSize())
719       .add("heapSize", heapSize())
720       .add("minSize", minSize())
721       .add("minFactor", minFactor)
722       .add("multiSize", multiSize())
723       .add("multiFactor", multiFactor)
724       .add("singleSize", singleSize())
725       .add("singleFactor", singleFactor)
726       .toString();
727   }
728 
729   /**
730    * Used to group blocks into priority buckets.  There will be a BlockBucket
731    * for each priority (single, multi, memory).  Once bucketed, the eviction
732    * algorithm takes the appropriate number of elements out of each according
733    * to configuration parameters and their relatives sizes.
734    */
735   private class BlockBucket implements Comparable<BlockBucket> {
736 
737     private final String name;
738     private LruCachedBlockQueue queue;
739     private long totalSize = 0;
740     private long bucketSize;
741 
742     public BlockBucket(String name, long bytesToFree, long blockSize, long bucketSize) {
743       this.name = name;
744       this.bucketSize = bucketSize;
745       queue = new LruCachedBlockQueue(bytesToFree, blockSize);
746       totalSize = 0;
747     }
748 
749     public void add(LruCachedBlock block) {
750       totalSize += block.heapSize();
751       queue.add(block);
752     }
753 
754     public long free(long toFree) {
755       if (LOG.isTraceEnabled()) {
756         LOG.trace("freeing " + StringUtils.byteDesc(toFree) + " from " + this);
757       }
758       LruCachedBlock cb;
759       long freedBytes = 0;
760       while ((cb = queue.pollLast()) != null) {
761         freedBytes += evictBlock(cb, true);
762         if (freedBytes >= toFree) {
763           return freedBytes;
764         }
765       }
766       if (LOG.isTraceEnabled()) {
767         LOG.trace("freed " + StringUtils.byteDesc(freedBytes) + " from " + this);
768       }
769       return freedBytes;
770     }
771 
772     public long overflow() {
773       return totalSize - bucketSize;
774     }
775 
776     public long totalSize() {
777       return totalSize;
778     }
779 
780     @Override
781     public int compareTo(BlockBucket that) {
782       return Long.compare(this.overflow(), that.overflow());
783     }
784 
785     @Override
786     public boolean equals(Object that) {
787       if (that == null || !(that instanceof BlockBucket)){
788         return false;
789       }
790       return compareTo((BlockBucket)that) == 0;
791     }
792 
793     @Override
794     public int hashCode() {
795       return Objects.hashCode(name, bucketSize, queue, totalSize);
796     }
797 
798     @Override
799     public String toString() {
800       return Objects.toStringHelper(this)
801         .add("name", name)
802         .add("totalSize", StringUtils.byteDesc(totalSize))
803         .add("bucketSize", StringUtils.byteDesc(bucketSize))
804         .toString();
805     }
806   }
807 
808   /**
809    * Get the maximum size of this cache.
810    * @return max size in bytes
811    */
812 
813   @Override
814   public long getMaxSize() {
815     return this.maxSize;
816   }
817 
818   @Override
819   public long getCurrentSize() {
820     return this.size.get();
821   }
822 
823   @Override
824   public long getCurrentDataSize() {
825     return this.dataBlockSize.get();
826   }
827 
828   @Override
829   public long getFreeSize() {
830     return getMaxSize() - getCurrentSize();
831   }
832 
833   @Override
834   public long size() {
835     return getMaxSize();
836   }
837 
838   @Override
839   public long getBlockCount() {
840     return this.elements.get();
841   }
842 
843   @Override
844   public long getDataBlockCount() {
845     return this.dataBlockElements.get();
846   }
847 
848   EvictionThread getEvictionThread() {
849     return this.evictionThread;
850   }
851 
852   /*
853    * Eviction thread.  Sits in waiting state until an eviction is triggered
854    * when the cache size grows above the acceptable level.<p>
855    *
856    * Thread is triggered into action by {@link LruBlockCache#runEviction()}
857    */
858   static class EvictionThread extends HasThread {
859     private WeakReference<LruBlockCache> cache;
860     private volatile boolean go = true;
861     // flag set after enter the run method, used for test
862     private boolean enteringRun = false;
863 
864     public EvictionThread(LruBlockCache cache) {
865       super(Thread.currentThread().getName() + ".LruBlockCache.EvictionThread");
866       setDaemon(true);
867       this.cache = new WeakReference<LruBlockCache>(cache);
868     }
869 
870     @Override
871     public void run() {
872       enteringRun = true;
873       while (this.go) {
874         synchronized(this) {
875           try {
876             this.wait(1000 * 10/*Don't wait for ever*/);
877           } catch(InterruptedException e) {
878             LOG.warn("Interrupted eviction thread ", e);
879             Thread.currentThread().interrupt();
880           }
881         }
882         LruBlockCache cache = this.cache.get();
883         if (cache == null) break;
884         cache.evict();
885       }
886     }
887 
888     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NN_NAKED_NOTIFY",
889         justification="This is what we want")
890     public void evict() {
891       synchronized(this) {
892         this.notifyAll();
893       }
894     }
895 
896     synchronized void shutdown() {
897       this.go = false;
898       this.notifyAll();
899     }
900 
901     /**
902      * Used for the test.
903      */
904     boolean isEnteringRun() {
905       return this.enteringRun;
906     }
907   }
908 
909   /*
910    * Statistics thread.  Periodically prints the cache statistics to the log.
911    */
912   static class StatisticsThread extends Thread {
913     private final LruBlockCache lru;
914 
915     public StatisticsThread(LruBlockCache lru) {
916       super("LruBlockCacheStats");
917       setDaemon(true);
918       this.lru = lru;
919     }
920 
921     @Override
922     public void run() {
923       lru.logStats();
924     }
925   }
926 
927   public void logStats() {
928     // Log size
929     long totalSize = heapSize();
930     long freeSize = maxSize - totalSize;
931     LruBlockCache.LOG.info("totalSize=" + StringUtils.byteDesc(totalSize) + ", " +
932         "freeSize=" + StringUtils.byteDesc(freeSize) + ", " +
933         "max=" + StringUtils.byteDesc(this.maxSize) + ", " +
934         "blockCount=" + getBlockCount() + ", " +
935         "accesses=" + stats.getRequestCount() + ", " +
936         "hits=" + stats.getHitCount() + ", " +
937         "hitRatio=" + (stats.getHitCount() == 0 ?
938           "0" : (StringUtils.formatPercent(stats.getHitRatio(), 2)+ ", ")) + ", " +
939         "cachingAccesses=" + stats.getRequestCachingCount() + ", " +
940         "cachingHits=" + stats.getHitCachingCount() + ", " +
941         "cachingHitsRatio=" + (stats.getHitCachingCount() == 0 ?
942           "0,": (StringUtils.formatPercent(stats.getHitCachingRatio(), 2) + ", ")) +
943         "evictions=" + stats.getEvictionCount() + ", " +
944         "evicted=" + stats.getEvictedCount() + ", " +
945         "evictedPerRun=" + stats.evictedPerEviction());
946   }
947 
948   /**
949    * Get counter statistics for this cache.
950    *
951    * <p>Includes: total accesses, hits, misses, evicted blocks, and runs
952    * of the eviction processes.
953    */
954   @Override
955   public CacheStats getStats() {
956     return this.stats;
957   }
958 
959   public final static long CACHE_FIXED_OVERHEAD = ClassSize.align(
960       (4 * Bytes.SIZEOF_LONG) + (11 * ClassSize.REFERENCE) +
961       (6 * Bytes.SIZEOF_FLOAT) + (2 * Bytes.SIZEOF_BOOLEAN)
962       + ClassSize.OBJECT);
963 
964   @Override
965   public long heapSize() {
966     return getCurrentSize();
967   }
968 
969   public static long calculateOverhead(long maxSize, long blockSize, int concurrency){
970     // FindBugs ICAST_INTEGER_MULTIPLY_CAST_TO_LONG
971     return CACHE_FIXED_OVERHEAD + ClassSize.CONCURRENT_HASHMAP +
972         ((long)Math.ceil(maxSize*1.2/blockSize)
973             * ClassSize.CONCURRENT_HASHMAP_ENTRY) +
974         ((long)concurrency * ClassSize.CONCURRENT_HASHMAP_SEGMENT);
975   }
976 
977   @Override
978   public Iterator<CachedBlock> iterator() {
979     final Iterator<LruCachedBlock> iterator = map.values().iterator();
980 
981     return new Iterator<CachedBlock>() {
982       private final long now = System.nanoTime();
983 
984       @Override
985       public boolean hasNext() {
986         return iterator.hasNext();
987       }
988 
989       @Override
990       public CachedBlock next() {
991         final LruCachedBlock b = iterator.next();
992         return new CachedBlock() {
993           @Override
994           public String toString() {
995             return BlockCacheUtil.toString(this, now);
996           }
997 
998           @Override
999           public BlockPriority getBlockPriority() {
1000             return b.getPriority();
1001           }
1002 
1003           @Override
1004           public BlockType getBlockType() {
1005             return b.getBuffer().getBlockType();
1006           }
1007 
1008           @Override
1009           public long getOffset() {
1010             return b.getCacheKey().getOffset();
1011           }
1012 
1013           @Override
1014           public long getSize() {
1015             return b.getBuffer().heapSize();
1016           }
1017 
1018           @Override
1019           public long getCachedTime() {
1020             return b.getCachedTime();
1021           }
1022 
1023           @Override
1024           public String getFilename() {
1025             return b.getCacheKey().getHfileName();
1026           }
1027 
1028           @Override
1029           public int compareTo(CachedBlock other) {
1030             int diff = this.getFilename().compareTo(other.getFilename());
1031             if (diff != 0) return diff;
1032             diff = Long.compare(this.getOffset(), other.getOffset());
1033             if (diff != 0) return diff;
1034             if (other.getCachedTime() < 0 || this.getCachedTime() < 0) {
1035               throw new IllegalStateException("" + this.getCachedTime() + ", " +
1036                 other.getCachedTime());
1037             }
1038             return Long.compare(other.getCachedTime(), this.getCachedTime());
1039           }
1040 
1041           @Override
1042           public int hashCode() {
1043             return b.hashCode();
1044           }
1045 
1046           @Override
1047           public boolean equals(Object obj) {
1048             if (obj instanceof CachedBlock) {
1049               CachedBlock cb = (CachedBlock)obj;
1050               return compareTo(cb) == 0;
1051             } else {
1052               return false;
1053             }
1054           }
1055         };
1056       }
1057 
1058       @Override
1059       public void remove() {
1060         throw new UnsupportedOperationException();
1061       }
1062     };
1063   }
1064 
1065   // Simple calculators of sizes given factors and maxSize
1066 
1067   long acceptableSize() {
1068     return (long)Math.floor(this.maxSize * this.acceptableFactor);
1069   }
1070 
1071   private long minSize() {
1072     return (long)Math.floor(this.maxSize * this.minFactor);
1073   }
1074 
1075   private long singleSize() {
1076     return (long)Math.floor(this.maxSize * this.singleFactor * this.minFactor);
1077   }
1078 
1079   private long multiSize() {
1080     return (long)Math.floor(this.maxSize * this.multiFactor * this.minFactor);
1081   }
1082 
1083   private long memorySize() {
1084     return (long)Math.floor(this.maxSize * this.memoryFactor * this.minFactor);
1085   }
1086 
1087   @Override
1088   public void shutdown() {
1089     if (victimHandler != null)
1090       victimHandler.shutdown();
1091     this.scheduleThreadPool.shutdown();
1092     for (int i = 0; i < 10; i++) {
1093       if (!this.scheduleThreadPool.isShutdown()) {
1094         try {
1095           Thread.sleep(10);
1096         } catch (InterruptedException e) {
1097           LOG.warn("Interrupted while sleeping");
1098           Thread.currentThread().interrupt();
1099           break;
1100         }
1101       }
1102     }
1103 
1104     if (!this.scheduleThreadPool.isShutdown()) {
1105       List<Runnable> runnables = this.scheduleThreadPool.shutdownNow();
1106       LOG.debug("Still running " + runnables);
1107     }
1108     this.evictionThread.shutdown();
1109   }
1110 
1111   /** Clears the cache. Used in tests. */
1112   public void clearCache() {
1113     this.map.clear();
1114     this.elements.set(0);
1115   }
1116 
1117   /**
1118    * Used in testing. May be very inefficient.
1119    * @return the set of cached file names
1120    */
1121   SortedSet<String> getCachedFileNamesForTest() {
1122     SortedSet<String> fileNames = new TreeSet<String>();
1123     for (BlockCacheKey cacheKey : map.keySet()) {
1124       fileNames.add(cacheKey.getHfileName());
1125     }
1126     return fileNames;
1127   }
1128 
1129   Map<BlockType, Integer> getBlockTypeCountsForTest() {
1130     Map<BlockType, Integer> counts =
1131         new EnumMap<BlockType, Integer>(BlockType.class);
1132     for (LruCachedBlock cb : map.values()) {
1133       BlockType blockType = ((Cacheable)cb.getBuffer()).getBlockType();
1134       Integer count = counts.get(blockType);
1135       counts.put(blockType, (count == null ? 0 : count) + 1);
1136     }
1137     return counts;
1138   }
1139 
1140   public Map<DataBlockEncoding, Integer> getEncodingCountsForTest() {
1141     Map<DataBlockEncoding, Integer> counts =
1142         new EnumMap<DataBlockEncoding, Integer>(DataBlockEncoding.class);
1143     for (LruCachedBlock block : map.values()) {
1144       DataBlockEncoding encoding =
1145               ((HFileBlock) block.getBuffer()).getDataBlockEncoding();
1146       Integer count = counts.get(encoding);
1147       counts.put(encoding, (count == null ? 0 : count) + 1);
1148     }
1149     return counts;
1150   }
1151 
1152   public void setVictimCache(BlockCache handler) {
1153     assert victimHandler == null;
1154     victimHandler = handler;
1155   }
1156 
1157   Map<BlockCacheKey, LruCachedBlock> getMapForTests() {
1158     return map;
1159   }
1160 
1161   BlockCache getVictimHandler() {
1162     return this.victimHandler;
1163   }
1164 
1165   @Override
1166   public BlockCache[] getBlockCaches() {
1167     if (victimHandler != null) {
1168       return new BlockCache[]{this, this.victimHandler};
1169     }
1170     return null;
1171   }
1172 }