View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.IOException;
21  import java.nio.ByteBuffer;
22  import java.util.NavigableMap;
23  import java.util.NavigableSet;
24  import java.util.concurrent.ConcurrentSkipListMap;
25  import java.util.concurrent.ConcurrentSkipListSet;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.hbase.metrics.impl.FastLongHistogram;
32  import org.apache.hadoop.hbase.util.Bytes;
33  import org.apache.hadoop.hbase.util.GsonUtil;
34  
35  
36  import org.apache.hbase.thirdparty.com.google.gson.Gson;
37  import org.apache.hbase.thirdparty.com.google.gson.TypeAdapter;
38  import org.apache.hbase.thirdparty.com.google.gson.stream.JsonReader;
39  import org.apache.hbase.thirdparty.com.google.gson.stream.JsonWriter;
40  
41  /**
42   * Utilty for aggregating counts in CachedBlocks and toString/toJSON CachedBlocks and BlockCaches.
43   * No attempt has been made at making this thread safe.
44   */
45  @InterfaceAudience.Private
46  public class BlockCacheUtil {
47  
48    private static final Log LOG = LogFactory.getLog(BlockCacheUtil.class);
49  
50    public static final long NANOS_PER_SECOND = 1000000000;
51  
52    /**
53     * Needed generating JSON.
54     */
55    private static final Gson GSON = GsonUtil.createGson()
56      .registerTypeAdapter(FastLongHistogram.class, new TypeAdapter<FastLongHistogram>() {
57  
58        @Override
59        public void write(JsonWriter out, FastLongHistogram value) throws IOException {
60          AgeSnapshot snapshot = new AgeSnapshot(value);
61          out.beginObject();
62          out.name("mean").value(snapshot.getMean());
63          out.name("min").value(snapshot.getMin());
64          out.name("max").value(snapshot.getMax());
65          out.name("75thPercentile").value(snapshot.get75thPercentile());
66          out.name("95thPercentile").value(snapshot.get95thPercentile());
67          out.name("98thPercentile").value(snapshot.get98thPercentile());
68          out.name("99thPercentile").value(snapshot.get99thPercentile());
69          out.name("999thPercentile").value(snapshot.get999thPercentile());
70          out.endObject();
71        }
72  
73        @Override
74        public FastLongHistogram read(JsonReader in) throws IOException {
75          throw new UnsupportedOperationException();
76        }
77      }).setPrettyPrinting().create();
78  
79    /**
80     * @param cb
81     * @return The block content as String.
82     */
83    public static String toString(final CachedBlock cb, final long now) {
84      return "filename=" + cb.getFilename() + ", " + toStringMinusFileName(cb, now);
85    }
86  
87    /**
88     * Little data structure to hold counts for a file.
89     * Used doing a toJSON.
90     */
91    static class CachedBlockCountsPerFile {
92      private int count = 0;
93      private long size = 0;
94      private int countData = 0;
95      private long sizeData = 0;
96      private final String filename;
97  
98      CachedBlockCountsPerFile(final String filename) {
99        this.filename = filename;
100     }
101 
102     public int getCount() {
103       return count;
104     }
105 
106     public long getSize() {
107       return size;
108     }
109 
110     public int getCountData() {
111       return countData;
112     }
113 
114     public long getSizeData() {
115       return sizeData;
116     }
117 
118     public String getFilename() {
119       return filename;
120     }
121   }
122 
123   /**
124    * @return A JSON String of <code>filename</code> and counts of <code>blocks</code>
125    */
126   public static String toJSON(final String filename, final NavigableSet<CachedBlock> blocks)
127       throws IOException {
128     CachedBlockCountsPerFile counts = new CachedBlockCountsPerFile(filename);
129     for (CachedBlock cb: blocks) {
130       counts.count++;
131       counts.size += cb.getSize();
132       BlockType bt = cb.getBlockType();
133       if (bt != null && bt.isData()) {
134         counts.countData++;
135         counts.sizeData += cb.getSize();
136       }
137     }
138     return GSON.toJson(counts);
139   }
140 
141   /**
142    * @return JSON string of <code>cbsf</code> aggregated
143    */
144   public static String toJSON(final CachedBlocksByFile cbsbf) throws IOException {
145     return GSON.toJson(cbsbf);
146   }
147 
148   /**
149    * @return JSON string of <code>bc</code> content.
150    */
151   public static String toJSON(final BlockCache bc) throws IOException {
152     return GSON.toJson(bc);
153   }
154 
155   /**
156    * @param cb
157    * @return The block content of <code>bc</code> as a String minus the filename.
158    */
159   public static String toStringMinusFileName(final CachedBlock cb, final long now) {
160     return "offset=" + cb.getOffset() +
161       ", size=" + cb.getSize() +
162       ", age=" + (now - cb.getCachedTime()) +
163       ", type=" + cb.getBlockType() +
164       ", priority=" + cb.getBlockPriority();
165   }
166 
167   /**
168    * Get a {@link CachedBlocksByFile} instance and load it up by iterating content in
169    * {@link BlockCache}.
170    * @param conf Used to read configurations
171    * @param bc Block Cache to iterate.
172    * @return Laoded up instance of CachedBlocksByFile
173    */
174   public static CachedBlocksByFile getLoadedCachedBlocksByFile(final Configuration conf,
175       final BlockCache bc) {
176     CachedBlocksByFile cbsbf = new CachedBlocksByFile(conf);
177     for (CachedBlock cb: bc) {
178       if (cbsbf.update(cb)) break;
179     }
180     return cbsbf;
181   }
182 
183   private static int compareCacheBlock(Cacheable left, Cacheable right,
184                                        boolean includeNextBlockOnDiskSize) {
185     ByteBuffer l = ByteBuffer.allocate(left.getSerializedLength());
186     left.serialize(l, includeNextBlockOnDiskSize);
187     ByteBuffer r = ByteBuffer.allocate(right.getSerializedLength());
188     right.serialize(r, includeNextBlockOnDiskSize);
189     return Bytes.compareTo(l.array(), l.arrayOffset(), l.limit(),
190              r.array(), r.arrayOffset(), r.limit());
191   }
192 
193   /**
194    * Validate that the existing and newBlock are the same without including the nextBlockMetadata,
195    * if not, throw an exception. If they are the same without the nextBlockMetadata, return the
196    * comparison.
197    * @param existing block that is existing in the cache.
198    * @param newBlock block that is trying to be cached.
199    * @param cacheKey the cache key of the blocks.
200    * @return comparison of the existing block to the newBlock.
201    */
202   public static int validateBlockAddition(Cacheable existing, Cacheable newBlock,
203       BlockCacheKey cacheKey) {
204     int comparison = compareCacheBlock(existing, newBlock, false);
205     if (comparison != 0) {
206       throw new RuntimeException(
207           "Cached block contents differ, which should not have happened." + "cacheKey:" + cacheKey);
208     }
209     if ((existing instanceof HFileBlock) && (newBlock instanceof HFileBlock)) {
210       comparison = ((HFileBlock) existing).getNextBlockOnDiskSize()
211           - ((HFileBlock) newBlock).getNextBlockOnDiskSize();
212     }
213     return comparison;
214   }
215 
216   /**
217    * Because of the region splitting, it's possible that the split key locate in the middle of a
218    * block. So it's possible that both the daughter regions load the same block from their parent
219    * HFile. When pread, we don't force the read to read all of the next block header. So when two
220    * threads try to cache the same block, it's possible that one thread read all of the next block
221    * header but the other one didn't. if the already cached block hasn't next block header but the
222    * new block to cache has, then we can replace the existing block with the new block for better
223    * performance.(HBASE-20447)
224    * @param blockCache BlockCache to check
225    * @param cacheKey the block cache key
226    * @param newBlock the new block which try to put into the block cache.
227    * @return true means need to replace existing block with new block for the same block cache key.
228    *         false means just keep the existing block.
229    */
230   public static boolean shouldReplaceExistingCacheBlock(BlockCache blockCache,
231       BlockCacheKey cacheKey, Cacheable newBlock) {
232     Cacheable existingBlock = blockCache.getBlock(cacheKey, false, false, false);
233     if (existingBlock == null) {
234       return true;
235     }
236     int comparison = BlockCacheUtil.validateBlockAddition(existingBlock, newBlock, cacheKey);
237     if (comparison < 0) {
238       LOG.warn("Cached block contents differ by nextBlockOnDiskSize, the new block has "
239           + "nextBlockOnDiskSize set. Caching new block.");
240       return true;
241     } else if (comparison > 0) {
242       LOG.warn("Cached block contents differ by nextBlockOnDiskSize, the existing block has "
243           + "nextBlockOnDiskSize set, Keeping cached block.");
244       return false;
245     } else {
246       LOG.warn("Caching an already cached block: " + cacheKey
247           + ". This is harmless and can happen in rare " + "cases (see HBASE-8547)");
248       return false;
249     }
250   }
251 
252   /**
253    * Use one of these to keep a running account of cached blocks by file.  Throw it away when done.
254    * This is different than metrics in that it is stats on current state of a cache.
255    * See getLoadedCachedBlocksByFile
256    */
257   public static class CachedBlocksByFile {
258     private int count;
259     private int dataBlockCount;
260     private long size;
261     private long dataSize;
262     private final long now = System.nanoTime();
263     /**
264      * How many blocks to look at before we give up.
265      * There could be many millions of blocks. We don't want the
266      * ui to freeze while we run through 1B blocks... users will
267      * think hbase dead. UI displays warning in red when stats
268      * are incomplete.
269      */
270     private final int max;
271     public static final int DEFAULT_MAX = 1000000;
272 
273     CachedBlocksByFile() {
274       this(null);
275     }
276 
277     CachedBlocksByFile(final Configuration c) {
278       this.max = c == null? DEFAULT_MAX: c.getInt("hbase.ui.blockcache.by.file.max", DEFAULT_MAX);
279     }
280 
281     /**
282      * Map by filename. use concurent utils because we want our Map and contained blocks sorted.
283      */
284     private transient NavigableMap<String, NavigableSet<CachedBlock>> cachedBlockByFile =
285       new ConcurrentSkipListMap<String, NavigableSet<CachedBlock>>();
286     FastLongHistogram hist = new FastLongHistogram();
287 
288     /**
289      * @param cb
290      * @return True if full.... if we won't be adding any more.
291      */
292     public boolean update(final CachedBlock cb) {
293       if (isFull()) return true;
294       NavigableSet<CachedBlock> set = this.cachedBlockByFile.get(cb.getFilename());
295       if (set == null) {
296         set = new ConcurrentSkipListSet<CachedBlock>();
297         this.cachedBlockByFile.put(cb.getFilename(), set);
298       }
299       set.add(cb);
300       this.size += cb.getSize();
301       this.count++;
302       BlockType bt = cb.getBlockType();
303       if (bt != null && bt.isData()) {
304         this.dataBlockCount++;
305         this.dataSize += cb.getSize();
306       }
307       long age = (this.now - cb.getCachedTime())/NANOS_PER_SECOND;
308       this.hist.add(age, 1);
309       return false;
310     }
311 
312     /**
313      * @return True if full; i.e. there are more items in the cache but we only loaded up
314      * the maximum set in configuration <code>hbase.ui.blockcache.by.file.max</code>
315      * (Default: DEFAULT_MAX).
316      */
317     public boolean isFull() {
318       return this.count >= this.max;
319     }
320 
321     public NavigableMap<String, NavigableSet<CachedBlock>> getCachedBlockStatsByFile() {
322       return this.cachedBlockByFile;
323     }
324 
325     /**
326      * @return count of blocks in the cache
327      */
328     public int getCount() {
329       return count;
330     }
331 
332     public int getDataCount() {
333       return dataBlockCount;
334     }
335 
336     /**
337      * @return size of blocks in the cache
338      */
339     public long getSize() {
340       return size;
341     }
342 
343     /**
344      * @return Size of data.
345      */
346     public long getDataSize() {
347       return dataSize;
348     }
349 
350     public AgeSnapshot getAgeInCacheSnapshot() {
351       return new AgeSnapshot(this.hist);
352     }
353 
354     @Override
355     public String toString() {
356       AgeSnapshot snapshot = getAgeInCacheSnapshot();
357       return "count=" + count + ", dataBlockCount=" + dataBlockCount + ", size=" + size +
358           ", dataSize=" + getDataSize() +
359           ", mean age=" + snapshot.getMean() +
360           ", min age=" + snapshot.getMin() +
361           ", max age=" + snapshot.getMax() +
362           ", 75th percentile age="   + snapshot.get75thPercentile() +
363           ", 95th percentile age="   + snapshot.get95thPercentile() +
364           ", 98th percentile age="   + snapshot.get98thPercentile() +
365           ", 99th percentile age="   + snapshot.get99thPercentile() +
366           ", 99.9th percentile age=" + snapshot.get99thPercentile();
367     }
368   }
369 }