View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.DataInput;
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  import java.util.ArrayList;
24  import java.util.List;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.Path;
30  import org.apache.hadoop.hbase.Cell;
31  import org.apache.hadoop.hbase.CellUtil;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.KeyValue;
34  import org.apache.hadoop.hbase.KeyValue.KVComparator;
35  import org.apache.hadoop.hbase.NoTagsKeyValue;
36  import org.apache.hadoop.hbase.classification.InterfaceAudience;
37  import org.apache.hadoop.hbase.fs.HFileSystem;
38  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
39  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
40  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
41  import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
42  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
43  import org.apache.hadoop.hbase.regionserver.KeyValueScanner;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.apache.hadoop.hbase.util.IdLock;
46  import org.apache.hadoop.io.WritableUtils;
47  import org.apache.htrace.Trace;
48  import org.apache.htrace.TraceScope;
49  
50  /**
51   * {@link HFile} reader for version 2.
52   */
53  @InterfaceAudience.Private
54  public class HFileReaderV2 extends AbstractHFileReader {
55  
56    private static final Log LOG = LogFactory.getLog(HFileReaderV2.class);
57  
58    /** Minor versions in HFile V2 starting with this number have hbase checksums */
59    public static final int MINOR_VERSION_WITH_CHECKSUM = 1;
60    /** In HFile V2 minor version that does not support checksums */
61    public static final int MINOR_VERSION_NO_CHECKSUM = 0;
62  
63    /** HFile minor version that introduced pbuf filetrailer */
64    public static final int PBUF_TRAILER_MINOR_VERSION = 2;
65  
66    /**
67     * The size of a (key length, value length) tuple that prefixes each entry in
68     * a data block.
69     */
70    public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT;
71  
72    private boolean includesMemstoreTS = false;
73    protected boolean decodeMemstoreTS = false;
74  
75    protected boolean shouldIncludeMemstoreTS() {
76      return includesMemstoreTS;
77    }
78  
79    /** Filesystem-level block reader. */
80    private HFileBlock.FSReader fsBlockReader;
81  
82    /**
83     * A "sparse lock" implementation allowing to lock on a particular block
84     * identified by offset. The purpose of this is to avoid two clients loading
85     * the same block, and have all but one client wait to get the block from the
86     * cache.
87     */
88    private IdLock offsetLock = new IdLock();
89  
90    /**
91     * Blocks read from the load-on-open section, excluding data root index, meta
92     * index, and file info.
93     */
94    private List<HFileBlock> loadOnOpenBlocks = new ArrayList<HFileBlock>();
95  
96    /** Minimum minor version supported by this HFile format */
97    static final int MIN_MINOR_VERSION = 0;
98  
99    /** Maximum minor version supported by this HFile format */
100   // We went to version 2 when we moved to pb'ing fileinfo and the trailer on
101   // the file. This version can read Writables version 1.
102   static final int MAX_MINOR_VERSION = 3;
103 
104   /** Minor versions starting with this number have faked index key */
105   static final int MINOR_VERSION_WITH_FAKED_KEY = 3;
106 
107   HFileContext hfileContext;
108 
109   /**
110    * Opens a HFile. You must load the index before you can use it by calling
111    * {@link #loadFileInfo()}.
112    *
113    * @param path Path to HFile.
114    * @param trailer File trailer.
115    * @param fsdis input stream.
116    * @param size Length of the stream.
117    * @param cacheConf Cache configuration.
118    * @param hfs
119    * @param conf
120    */
121   public HFileReaderV2(final Path path, final FixedFileTrailer trailer,
122       final FSDataInputStreamWrapper fsdis, final long size, final CacheConfig cacheConf,
123       final HFileSystem hfs, final Configuration conf) throws IOException {
124     super(path, trailer, size, cacheConf, hfs, conf);
125     this.conf = conf;
126     trailer.expectMajorVersion(getMajorVersion());
127     validateMinorVersion(path, trailer.getMinorVersion());
128     this.hfileContext = createHFileContext(fsdis, fileSize, hfs, path, trailer);
129     HFileBlock.FSReaderImpl fsBlockReaderV2 =
130       new HFileBlock.FSReaderImpl(fsdis, fileSize, hfs, path, hfileContext);
131     this.fsBlockReader = fsBlockReaderV2; // upcast
132 
133     try {
134       // Comparator class name is stored in the trailer in version 2.
135       comparator = trailer.createComparator();
136       dataBlockIndexReader = new HFileBlockIndex.BlockIndexReader(comparator,
137         trailer.getNumDataIndexLevels(), this);
138       metaBlockIndexReader = new HFileBlockIndex.BlockIndexReader(
139         KeyValue.RAW_COMPARATOR, 1);
140 
141       // Parse load-on-open data.
142 
143       HFileBlock.BlockIterator blockIter = fsBlockReaderV2.blockRange(
144         trailer.getLoadOnOpenDataOffset(),
145         fileSize - trailer.getTrailerSize());
146 
147       // Data index. We also read statistics about the block index written after
148       // the root level.
149       dataBlockIndexReader.readMultiLevelIndexRoot(
150         blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
151         trailer.getDataIndexCount());
152 
153       // Meta index.
154       metaBlockIndexReader.readRootIndex(
155         blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX),
156         trailer.getMetaIndexCount());
157 
158       // File info
159       fileInfo = new FileInfo();
160       fileInfo.read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
161       byte[] creationTimeBytes = fileInfo.get(FileInfo.CREATE_TIME_TS);
162       this.hfileContext.setFileCreateTime(creationTimeBytes == null? 0:
163         Bytes.toLong(creationTimeBytes));
164       lastKey = fileInfo.get(FileInfo.LASTKEY);
165       avgKeyLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_KEY_LEN));
166       avgValueLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_VALUE_LEN));
167       byte [] keyValueFormatVersion =
168         fileInfo.get(HFileWriterV2.KEY_VALUE_VERSION);
169       includesMemstoreTS = keyValueFormatVersion != null &&
170         Bytes.toInt(keyValueFormatVersion) ==
171           HFileWriterV2.KEY_VALUE_VER_WITH_MEMSTORE;
172       fsBlockReaderV2.setIncludesMemstoreTS(includesMemstoreTS);
173       if (includesMemstoreTS) {
174         decodeMemstoreTS = Bytes.toLong(fileInfo.get(HFileWriterV2.MAX_MEMSTORE_TS_KEY)) > 0;
175       }
176 
177       // Read data block encoding algorithm name from file info.
178       dataBlockEncoder = HFileDataBlockEncoderImpl.createFromFileInfo(fileInfo);
179       fsBlockReaderV2.setDataBlockEncoder(dataBlockEncoder);
180 
181       // Store all other load-on-open blocks for further consumption.
182       HFileBlock b;
183       while ((b = blockIter.nextBlock()) != null) {
184         loadOnOpenBlocks.add(b);
185       }
186       // close the block reader
187       fsdis.unbuffer();
188     } catch (Throwable t) {
189       fsBlockReaderV2.closeStreams();
190       throw new CorruptHFileException("Problem reading data index and meta index from file "
191         + path, t);
192     }
193 
194     // Prefetch file blocks upon open if requested
195     if (cacheConf.shouldPrefetchOnOpen()) {
196       PrefetchExecutor.request(path, new Runnable() {
197         public void run() {
198           long offset = 0;
199           long end = 0;
200           try {
201             end = getTrailer().getLoadOnOpenDataOffset();
202             HFileBlock prevBlock = null;
203             if (LOG.isTraceEnabled()) {
204               LOG.trace("Prefetch start " + getPathOffsetEndStr(path, offset, end));
205             }
206             while (offset < end) {
207               if (Thread.interrupted()) {
208                 break;
209               }
210               // Perhaps we got our block from cache? Unlikely as this may be, if it happens, then
211               // the internal-to-hfileblock thread local which holds the overread that gets the
212               // next header, will not have happened...so, pass in the onDiskSize gotten from the
213               // cached block. This 'optimization' triggers extremely rarely I'd say.
214               long onDiskSize = prevBlock != null? prevBlock.getNextBlockOnDiskSize(): -1;
215               HFileBlock block = readBlock(offset, onDiskSize, true, false, false, false,
216                 null, null);
217               prevBlock = block;
218               offset += block.getOnDiskSizeWithHeader();
219             }
220           } catch (IOException e) {
221             // IOExceptions are probably due to region closes (relocation, etc.)
222             if (LOG.isTraceEnabled()) {
223               LOG.trace("Prefetch " + getPathOffsetEndStr(path, offset, end), e);
224             }
225           } catch (NullPointerException e) {
226             LOG.warn("Stream moved/closed or prefetch cancelled?" +
227                 getPathOffsetEndStr(path, offset, end), e);
228           } catch (Exception e) {
229             // Other exceptions are interesting
230             LOG.warn("Prefetch " + getPathOffsetEndStr(path, offset, end), e);
231           } finally {
232             PrefetchExecutor.complete(path);
233           }
234         }
235       });
236     }
237   }
238 
239   protected HFileContext createHFileContext(FSDataInputStreamWrapper fsdis, long fileSize,
240       HFileSystem hfs, Path path, FixedFileTrailer trailer) throws IOException {
241     return new HFileContextBuilder()
242       .withIncludesMvcc(this.includesMemstoreTS)
243       .withCompression(this.compressAlgo)
244       .withHBaseCheckSum(trailer.getMinorVersion() >= MINOR_VERSION_WITH_CHECKSUM)
245       .build();
246   }
247 
248   private static String getPathOffsetEndStr(final Path path, final long offset, final long end) {
249     return "path=" + path.toString() + ", offset=" + offset + ", end=" + end;
250   }
251 
252   /**
253    * Create a Scanner on this file. No seeks or reads are done on creation. Call
254    * {@link HFileScanner#seekTo(byte[])} to position an start the read. There is
255    * nothing to clean up in a Scanner. Letting go of your references to the
256    * scanner is sufficient.
257    *
258    * @param cacheBlocks True if we should cache blocks read in by this scanner.
259    * @param pread Use positional read rather than seek+read if true (pread is
260    *          better for random reads, seek+read is better scanning).
261    * @param isCompaction is scanner being used for a compaction?
262    * @return Scanner on this file.
263    */
264    @Override
265    public HFileScanner getScanner(boolean cacheBlocks, final boolean pread,
266       final boolean isCompaction) {
267     if (dataBlockEncoder.useEncodedScanner()) {
268       return new EncodedScannerV2(this, cacheBlocks, pread, isCompaction,
269           hfileContext);
270     }
271 
272     return new ScannerV2(this, cacheBlocks, pread, isCompaction);
273   }
274 
275   /**
276    * Retrieve block from cache. Validates the retrieved block's type vs {@code expectedBlockType}
277    * and its encoding vs. {@code expectedDataBlockEncoding}. Unpacks the block as necessary.
278    */
279    private HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock,
280        boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType,
281        DataBlockEncoding expectedDataBlockEncoding) throws IOException {
282      // Check cache for block. If found return.
283      if (cacheConf.isBlockCacheEnabled()) {
284        BlockCache cache = cacheConf.getBlockCache();
285        HFileBlock cachedBlock = (HFileBlock) cache.getBlock(cacheKey, cacheBlock, useLock,
286          updateCacheMetrics);
287        if (cachedBlock != null) {
288          if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) {
289            cachedBlock = cachedBlock.unpack(hfileContext, fsBlockReader);
290          }
291          validateBlockType(cachedBlock, expectedBlockType);
292 
293          if (expectedDataBlockEncoding == null) {
294            return cachedBlock;
295          }
296          DataBlockEncoding actualDataBlockEncoding =
297                  cachedBlock.getDataBlockEncoding();
298          // Block types other than data blocks always have
299          // DataBlockEncoding.NONE. To avoid false negative cache misses, only
300          // perform this check if cached block is a data block.
301          if (cachedBlock.getBlockType().isData() &&
302                  !actualDataBlockEncoding.equals(expectedDataBlockEncoding)) {
303            // This mismatch may happen if a ScannerV2, which is used for say a
304            // compaction, tries to read an encoded block from the block cache.
305            // The reverse might happen when an EncodedScannerV2 tries to read
306            // un-encoded blocks which were cached earlier.
307            //
308            // Because returning a data block with an implicit BlockType mismatch
309            // will cause the requesting scanner to throw a disk read should be
310            // forced here. This will potentially cause a significant number of
311            // cache misses, so update so we should keep track of this as it might
312            // justify the work on a CompoundScannerV2.
313            if (!expectedDataBlockEncoding.equals(DataBlockEncoding.NONE) &&
314                    !actualDataBlockEncoding.equals(DataBlockEncoding.NONE)) {
315              // If the block is encoded but the encoding does not match the
316              // expected encoding it is likely the encoding was changed but the
317              // block was not yet evicted. Evictions on file close happen async
318              // so blocks with the old encoding still linger in cache for some
319              // period of time. This event should be rare as it only happens on
320              // schema definition change.
321              LOG.info("Evicting cached block with key " + cacheKey +
322                      " because of a data block encoding mismatch" +
323                      "; expected: " + expectedDataBlockEncoding +
324                      ", actual: " + actualDataBlockEncoding);
325              cache.evictBlock(cacheKey);
326            }
327            return null;
328          }
329          return cachedBlock;
330        }
331      }
332      return null;
333    }
334   /**
335    * @param metaBlockName
336    * @param cacheBlock Add block to cache, if found
337    * @return block wrapped in a ByteBuffer, with header skipped
338    * @throws IOException
339    */
340   @Override
341   public ByteBuffer getMetaBlock(String metaBlockName, boolean cacheBlock)
342       throws IOException {
343     if (trailer.getMetaIndexCount() == 0) {
344       return null; // there are no meta blocks
345     }
346     if (metaBlockIndexReader == null) {
347       throw new IOException("Meta index not loaded");
348     }
349 
350     byte[] mbname = Bytes.toBytes(metaBlockName);
351     int block = metaBlockIndexReader.rootBlockContainingKey(mbname,
352         0, mbname.length);
353     if (block == -1)
354       return null;
355     long blockSize = metaBlockIndexReader.getRootBlockDataSize(block);
356 
357     // Per meta key from any given file, synchronize reads for said block. This
358     // is OK to do for meta blocks because the meta block index is always
359     // single-level.
360     synchronized (metaBlockIndexReader.getRootBlockKey(block)) {
361       // Check cache for block. If found return.
362       long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block);
363       BlockCacheKey cacheKey = new BlockCacheKey(name, metaBlockOffset,
364         this.isPrimaryReplicaReader(), BlockType.META);
365 
366       cacheBlock &= cacheConf.shouldCacheDataOnRead();
367       if (cacheConf.isBlockCacheEnabled()) {
368         HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, false, true, true,
369           BlockType.META, null);
370         if (cachedBlock != null) {
371           assert cachedBlock.isUnpacked() : "Packed block leak.";
372           // Return a distinct 'shallow copy' of the block,
373           // so pos does not get messed by the scanner
374           return cachedBlock.getBufferWithoutHeader();
375         }
376         // Cache Miss, please load.
377       }
378 
379       HFileBlock metaBlock = fsBlockReader.readBlockData(metaBlockOffset,
380           blockSize, true, false).unpack(hfileContext, fsBlockReader);
381 
382       // Cache the block
383       if (cacheBlock) {
384         cacheConf.getBlockCache().cacheBlock(cacheKey, metaBlock,
385             cacheConf.isInMemory(), this.cacheConf.isCacheDataInL1());
386       }
387 
388       return metaBlock.getBufferWithoutHeader();
389     }
390   }
391 
392   @Override
393   public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize,
394       final boolean cacheBlock, boolean pread, final boolean isCompaction,
395       boolean updateCacheMetrics, BlockType expectedBlockType,
396       DataBlockEncoding expectedDataBlockEncoding)
397       throws IOException {
398     if (dataBlockIndexReader == null) {
399       throw new IOException("Block index not loaded");
400     }
401     long trailerOffset = trailer.getLoadOnOpenDataOffset();
402     if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) {
403       throw new IOException("Requested block is out of range: " + dataBlockOffset +
404         ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset() +
405         ", trailer.getLoadOnOpenDataOffset: " + trailerOffset);
406     }
407 
408     // For any given block from any given file, synchronize reads for said block.
409     // Without a cache, this synchronizing is needless overhead, but really
410     // the other choice is to duplicate work (which the cache would prevent you
411     // from doing).
412     BlockCacheKey cacheKey =
413         new BlockCacheKey(name, dataBlockOffset, this.isPrimaryReplicaReader(), expectedBlockType);
414     boolean useLock = false;
415     IdLock.Entry lockEntry = null;
416     TraceScope traceScope = Trace.startSpan("HFileReaderV2.readBlock");
417     try {
418       while (true) {
419         // Check cache for block. If found return.
420         if (cacheConf.shouldReadBlockFromCache(expectedBlockType)) {
421           if (useLock) {
422             lockEntry = offsetLock.getLockEntry(dataBlockOffset);
423           }
424           // Try and get the block from the block cache. If the useLock variable is true then this
425           // is the second time through the loop and it should not be counted as a block cache miss.
426           HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, useLock, isCompaction,
427             updateCacheMetrics, expectedBlockType, expectedDataBlockEncoding);
428           if (cachedBlock != null) {
429             if (Trace.isTracing()) {
430               traceScope.getSpan().addTimelineAnnotation("blockCacheHit");
431             }
432             assert cachedBlock.isUnpacked() : "Packed block leak.";
433             if (cachedBlock.getBlockType().isData()) {
434               if (updateCacheMetrics) {
435                 HFile.DATABLOCK_READ_COUNT.increment();
436               }
437               // Validate encoding type for data blocks. We include encoding
438               // type in the cache key, and we expect it to match on a cache hit.
439               if (cachedBlock.getDataBlockEncoding() != dataBlockEncoder.getDataBlockEncoding()) {
440                 throw new IOException("Cached block under key " + cacheKey + " "
441                   + "has wrong encoding: " + cachedBlock.getDataBlockEncoding() + " (expected: "
442                   + dataBlockEncoder.getDataBlockEncoding() + ")");
443               }
444             }
445             // Cache-hit. Return!
446             return cachedBlock;
447           }
448           if (!useLock && cacheBlock && cacheConf.shouldLockOnCacheMiss(expectedBlockType)) {
449             // check cache again with lock
450             useLock = true;
451             continue;
452           }
453           // Carry on, please load.
454         }
455 
456         if (Trace.isTracing()) {
457           traceScope.getSpan().addTimelineAnnotation("blockCacheMiss");
458         }
459         // Load block from filesystem.
460         HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize,
461             pread, !isCompaction);
462         validateBlockType(hfileBlock, expectedBlockType);
463         HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader);
464         BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory();
465 
466         // Cache the block if necessary
467         if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) {
468           cacheConf.getBlockCache().cacheBlock(cacheKey,
469             cacheConf.shouldCacheCompressed(category) ? hfileBlock : unpacked,
470             cacheConf.isInMemory(), this.cacheConf.isCacheDataInL1());
471         }
472 
473         if (updateCacheMetrics && hfileBlock.getBlockType().isData()) {
474           HFile.DATABLOCK_READ_COUNT.increment();
475         }
476 
477         return unpacked;
478       }
479     } finally {
480       traceScope.close();
481       if (lockEntry != null) {
482         offsetLock.releaseLockEntry(lockEntry);
483       }
484     }
485   }
486 
487   @Override
488   public boolean hasMVCCInfo() {
489     return includesMemstoreTS && decodeMemstoreTS;
490   }
491 
492   /**
493    * Compares the actual type of a block retrieved from cache or disk with its
494    * expected type and throws an exception in case of a mismatch. Expected
495    * block type of {@link BlockType#DATA} is considered to match the actual
496    * block type [@link {@link BlockType#ENCODED_DATA} as well.
497    * @param block a block retrieved from cache or disk
498    * @param expectedBlockType the expected block type, or null to skip the
499    *          check
500    */
501   private void validateBlockType(HFileBlock block,
502       BlockType expectedBlockType) throws IOException {
503     if (expectedBlockType == null) {
504       return;
505     }
506     BlockType actualBlockType = block.getBlockType();
507     if (expectedBlockType.isData() && actualBlockType.isData()) {
508       // We consider DATA to match ENCODED_DATA for the purpose of this
509       // verification.
510       return;
511     }
512     if (actualBlockType != expectedBlockType) {
513       throw new IOException("Expected block type " + expectedBlockType + ", " +
514           "but got " + actualBlockType + ": " + block);
515     }
516   }
517 
518   /**
519    * @return Last key in the file. May be null if file has no entries. Note that
520    *         this is not the last row key, but rather the byte form of the last
521    *         KeyValue.
522    */
523   @Override
524   public byte[] getLastKey() {
525     return dataBlockIndexReader.isEmpty() ? null : lastKey;
526   }
527 
528   /**
529    * @return Midkey for this file. We work with block boundaries only so
530    *         returned midkey is an approximation only.
531    * @throws IOException
532    */
533   @Override
534   public byte[] midkey() throws IOException {
535     return dataBlockIndexReader.midkey();
536   }
537 
538   @Override
539   public void close() throws IOException {
540     close(cacheConf.shouldEvictOnClose());
541   }
542 
543   public void close(boolean evictOnClose) throws IOException {
544     PrefetchExecutor.cancel(path);
545     if (evictOnClose && cacheConf.isBlockCacheEnabled()) {
546       int numEvicted = cacheConf.getBlockCache().evictBlocksByHfileName(name);
547       if (LOG.isTraceEnabled()) {
548         LOG.trace("On close, file=" + name + " evicted=" + numEvicted
549           + " block(s)");
550       }
551     }
552     fsBlockReader.closeStreams();
553   }
554 
555   public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) {
556     return dataBlockEncoder.getEffectiveEncodingInCache(isCompaction);
557   }
558 
559   /** For testing */
560   @Override
561   HFileBlock.FSReader getUncachedBlockReader() {
562     return fsBlockReader;
563   }
564 
565 
566   protected abstract static class AbstractScannerV2
567       extends AbstractHFileReader.Scanner {
568     protected HFileBlock block;
569 
570     @Override
571     public Cell getNextIndexedKey() {
572       return nextIndexedKey;
573     }
574     /**
575      * The next indexed key is to keep track of the indexed key of the next data block.
576      * If the nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the
577      * current data block is the last data block.
578      *
579      * If the nextIndexedKey is null, it means the nextIndexedKey has not been loaded yet.
580      */
581     protected Cell nextIndexedKey;
582 
583     public AbstractScannerV2(HFileReaderV2 r, boolean cacheBlocks,
584         final boolean pread, final boolean isCompaction) {
585       super(r, cacheBlocks, pread, isCompaction);
586     }
587 
588     protected abstract ByteBuffer getFirstKeyInBlock(HFileBlock curBlock);
589 
590     protected abstract int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey,
591         boolean rewind, Cell key, boolean seekBefore) throws IOException;
592 
593     @Override
594     public int seekTo(byte[] key, int offset, int length) throws IOException {
595       // Always rewind to the first key of the block, because the given key
596       // might be before or after the current key.
597       return seekTo(new KeyValue.KeyOnlyKeyValue(key, offset, length));
598     }
599 
600     @Override
601     public int reseekTo(byte[] key, int offset, int length) throws IOException {
602       return reseekTo(new KeyValue.KeyOnlyKeyValue(key, offset, length));
603     }
604 
605     @Override
606     public int seekTo(Cell key) throws IOException {
607       return seekTo(key, true);
608     }
609 
610     @Override
611     public int reseekTo(Cell key) throws IOException {
612       int compared;
613       if (isSeeked()) {
614         compared = compareKey(reader.getComparator(), key);
615         if (compared < 1) {
616           // If the required key is less than or equal to current key, then
617           // don't do anything.
618           return compared;
619         } else {
620           // The comparison with no_next_index_key has to be checked
621           if (this.nextIndexedKey != null &&
622               (this.nextIndexedKey == KeyValueScanner.NO_NEXT_INDEXED_KEY || reader
623               .getComparator()
624                   .compareOnlyKeyPortion(key, nextIndexedKey) < 0)) {
625             // The reader shall continue to scan the current data block instead
626             // of querying the
627             // block index as long as it knows the target key is strictly
628             // smaller than
629             // the next indexed key or the current data block is the last data
630             // block.
631             return loadBlockAndSeekToKey(this.block, nextIndexedKey, false, key, false);
632           }
633         }
634       }
635       // Don't rewind on a reseek operation, because reseek implies that we are
636       // always going forward in the file.
637       return seekTo(key, false);
638     }
639 
640 
641     /**
642      * An internal API function. Seek to the given key, optionally rewinding to
643      * the first key of the block before doing the seek.
644      *
645      * @param key - a cell representing the key that we need to fetch
646      * @param rewind whether to rewind to the first key of the block before
647      *        doing the seek. If this is false, we are assuming we never go
648      *        back, otherwise the result is undefined.
649      * @return -1 if the key is earlier than the first key of the file,
650      *         0 if we are at the given key, 1 if we are past the given key
651      *         -2 if the key is earlier than the first key of the file while
652      *         using a faked index key
653      * @throws IOException
654      */
655     public int seekTo(Cell key, boolean rewind) throws IOException {
656       HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader();
657       BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, block,
658           cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding());
659       if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) {
660         // This happens if the key e.g. falls before the beginning of the file.
661         return -1;
662       }
663       return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(),
664           blockWithScanInfo.getNextIndexedKey(), rewind, key, false);
665     }
666 
667     @Override
668     public boolean seekBefore(byte[] key, int offset, int length) throws IOException {
669       return seekBefore(new KeyValue.KeyOnlyKeyValue(key, offset, length));
670     }
671 
672     @Override
673     public boolean seekBefore(Cell key) throws IOException {
674       HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, block,
675           cacheBlocks, pread, isCompaction,
676           ((HFileReaderV2) reader).getEffectiveEncodingInCache(isCompaction));
677       if (seekToBlock == null) {
678         return false;
679       }
680       ByteBuffer firstKey = getFirstKeyInBlock(seekToBlock);
681 
682       if (reader.getComparator()
683           .compareOnlyKeyPortion(
684               new KeyValue.KeyOnlyKeyValue(firstKey.array(), firstKey.arrayOffset(),
685                   firstKey.limit()), key) >= 0) {
686         long previousBlockOffset = seekToBlock.getPrevBlockOffset();
687         // The key we are interested in
688         if (previousBlockOffset == -1) {
689           // we have a 'problem', the key we want is the first of the file.
690           return false;
691         }
692 
693         // It is important that we compute and pass onDiskSize to the block
694         // reader so that it does not have to read the header separately to
695         // figure out the size.  Currently, we do not have a way to do this
696         // correctly in the general case however.
697         // TODO: See https://issues.apache.org/jira/browse/HBASE-14576
698         int prevBlockSize = -1;
699         seekToBlock = reader.readBlock(previousBlockOffset,
700             prevBlockSize, cacheBlocks,
701             pread, isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
702         // TODO shortcut: seek forward in this block to the last key of the
703         // block.
704       }
705       Cell firstKeyInCurrentBlock = new KeyValue.KeyOnlyKeyValue(Bytes.getBytes(firstKey));
706       loadBlockAndSeekToKey(seekToBlock, firstKeyInCurrentBlock, true, key, true);
707       return true;
708     }
709 
710     /**
711      * Scans blocks in the "scanned" section of the {@link HFile} until the next
712      * data block is found.
713      *
714      * @return the next block, or null if there are no more data blocks
715      * @throws IOException
716      */
717     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH",
718         justification="Yeah, unnecessary null check; could do w/ clean up")
719     protected HFileBlock readNextDataBlock() throws IOException {
720       long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
721       if (block == null)
722         return null;
723 
724       HFileBlock curBlock = block;
725 
726       do {
727         if (curBlock.getOffset() >= lastDataBlockOffset) {
728           return null;
729         }
730 
731         if (curBlock.getOffset() < 0) {
732           throw new IOException("Invalid block file offset: " + block);
733         }
734 
735         // We are reading the next block without block type validation, because
736         // it might turn out to be a non-data block.
737         curBlock = reader.readBlock(curBlock.getOffset()
738             + curBlock.getOnDiskSizeWithHeader(),
739             curBlock.getNextBlockOnDiskSize(), cacheBlocks, pread,
740             isCompaction, true, null, getEffectiveDataBlockEncoding());
741       } while (!curBlock.getBlockType().isData());
742 
743       return curBlock;
744     }
745 
746     public DataBlockEncoding getEffectiveDataBlockEncoding() {
747       return ((HFileReaderV2)reader).getEffectiveEncodingInCache(isCompaction);
748     }
749     /**
750      * Compare the given key against the current key
751      * @param comparator
752      * @param key
753      * @param offset
754      * @param length
755      * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater
756      */
757     public abstract int compareKey(KVComparator comparator, byte[] key, int offset,
758         int length);
759 
760     public abstract int compareKey(KVComparator comparator, Cell kv);
761   }
762 
763   /**
764    * Implementation of {@link HFileScanner} interface.
765    */
766   protected static class ScannerV2 extends AbstractScannerV2 {
767     private HFileReaderV2 reader;
768 
769     public ScannerV2(HFileReaderV2 r, boolean cacheBlocks,
770         final boolean pread, final boolean isCompaction) {
771       super(r, cacheBlocks, pread, isCompaction);
772       this.reader = r;
773     }
774 
775     @Override
776     public Cell getKeyValue() {
777       if (!isSeeked())
778         return null;
779 
780       return formNoTagsKeyValue();
781     }
782 
783     protected Cell formNoTagsKeyValue() {
784       NoTagsKeyValue ret = new NoTagsKeyValue(blockBuffer.array(), blockBuffer.arrayOffset()
785           + blockBuffer.position(), getCellBufSize());
786       if (this.reader.shouldIncludeMemstoreTS()) {
787         ret.setSequenceId(currMemstoreTS);
788       }
789       return ret;
790     }
791 
792     protected int getCellBufSize() {
793       return KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen;
794     }
795 
796     @Override
797     public ByteBuffer getKey() {
798       assertSeeked();
799       return ByteBuffer.wrap(
800           blockBuffer.array(),
801           blockBuffer.arrayOffset() + blockBuffer.position()
802               + KEY_VALUE_LEN_SIZE, currKeyLen).slice();
803     }
804 
805     @Override
806     public int compareKey(KVComparator comparator, byte[] key, int offset, int length) {
807       return comparator.compareFlatKey(key, offset, length, blockBuffer.array(),
808           blockBuffer.arrayOffset() + blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen);
809     }
810 
811     @Override
812     public ByteBuffer getValue() {
813       assertSeeked();
814       return ByteBuffer.wrap(
815           blockBuffer.array(),
816           blockBuffer.arrayOffset() + blockBuffer.position()
817               + KEY_VALUE_LEN_SIZE + currKeyLen, currValueLen).slice();
818     }
819 
820     protected void setNonSeekedState() {
821       block = null;
822       blockBuffer = null;
823       currKeyLen = 0;
824       currValueLen = 0;
825       currMemstoreTS = 0;
826       currMemstoreTSLen = 0;
827     }
828 
829     /**
830      * Set the position on current backing blockBuffer.
831      */
832     private void positionThisBlockBuffer() {
833       try {
834         blockBuffer.position(getNextCellStartPosition());
835       } catch (IllegalArgumentException e) {
836         LOG.error("Current pos = " + blockBuffer.position()
837             + "; currKeyLen = " + currKeyLen + "; currValLen = "
838             + currValueLen + "; block limit = " + blockBuffer.limit()
839             + "; HFile name = " + reader.getName()
840             + "; currBlock currBlockOffset = " + block.getOffset());
841         throw e;
842       }
843     }
844 
845     /**
846      * Set our selves up for the next 'next' invocation, set up next block.
847      * @return True is more to read else false if at the end.
848      * @throws IOException
849      */
850     private boolean positionForNextBlock() throws IOException {
851       // Methods are small so they get inlined because they are 'hot'.
852       long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
853       if (block.getOffset() >= lastDataBlockOffset) {
854         setNonSeekedState();
855         return false;
856       }
857       return isNextBlock();
858     }
859 
860     private boolean isNextBlock() throws IOException {
861       // Methods are small so they get inlined because they are 'hot'.
862       HFileBlock nextBlock = readNextDataBlock();
863       if (nextBlock == null) {
864         setNonSeekedState();
865         return false;
866       }
867       updateCurrBlock(nextBlock);
868       return true;
869     }
870 
871     private final boolean _next() throws IOException {
872       // Small method so can be inlined. It is a hot one.
873       if (blockBuffer.remaining() <= 0) {
874         return positionForNextBlock();
875       }
876       // We are still in the same block.
877       readKeyValueLen();
878       return true;
879     }
880 
881     /**
882      * Go to the next key/value in the block section. Loads the next block if
883      * necessary. If successful, {@link #getKey()} and {@link #getValue()} can
884      * be called.
885      *
886      * @return true if successfully navigated to the next key/value
887      */
888     @Override
889     public boolean next() throws IOException {
890       // This is a hot method so extreme measures taken to ensure it is small and inlineable.
891       // Checked by setting: -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining -XX:+PrintCompilation
892       assertSeeked();
893       positionThisBlockBuffer();
894       return _next();
895     }
896 
897     protected int getNextCellStartPosition() {
898       return blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen
899           + currMemstoreTSLen;
900     }
901 
902     /**
903      * Positions this scanner at the start of the file.
904      *
905      * @return false if empty file; i.e. a call to next would return false and
906      *         the current key and value are undefined.
907      * @throws IOException
908      */
909     @Override
910     public boolean seekTo() throws IOException {
911       if (reader == null) {
912         return false;
913       }
914 
915       if (reader.getTrailer().getEntryCount() == 0) {
916         // No data blocks.
917         return false;
918       }
919 
920       long firstDataBlockOffset =
921           reader.getTrailer().getFirstDataBlockOffset();
922       if (block != null && block.getOffset() == firstDataBlockOffset) {
923         blockBuffer.rewind();
924         readKeyValueLen();
925         return true;
926       }
927 
928       block = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread,
929           isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
930       if (block.getOffset() < 0) {
931         throw new IOException("Invalid block offset: " + block.getOffset());
932       }
933       updateCurrBlock(block);
934       return true;
935     }
936 
937     @Override
938     protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey,
939         boolean rewind, Cell key, boolean seekBefore) throws IOException {
940       if (block == null || block.getOffset() != seekToBlock.getOffset()) {
941         updateCurrBlock(seekToBlock);
942       } else if (rewind) {
943         blockBuffer.rewind();
944       }
945 
946       // Update the nextIndexedKey
947       this.nextIndexedKey = nextIndexedKey;
948       return blockSeek(key, seekBefore);
949     }
950 
951     /**
952      * Updates the current block to be the given {@link HFileBlock}. Seeks to
953      * the the first key/value pair.
954      *
955      * @param newBlock the block to make current
956      */
957     protected void updateCurrBlock(HFileBlock newBlock) {
958       block = newBlock;
959 
960       // sanity check
961       if (block.getBlockType() != BlockType.DATA) {
962         throw new IllegalStateException("ScannerV2 works only on data " +
963             "blocks, got " + block.getBlockType() + "; " +
964             "fileName=" + reader.name + ", " +
965             "dataBlockEncoder=" + reader.dataBlockEncoder + ", " +
966             "isCompaction=" + isCompaction);
967       }
968 
969       blockBuffer = block.getBufferWithoutHeader();
970       readKeyValueLen();
971       blockFetches.incrementAndGet();
972 
973       // Reset the next indexed key
974       this.nextIndexedKey = null;
975     }
976 
977     /**
978      * @param v
979      * @return True if v &lt;= 0 or v &gt; current block buffer limit.
980      */
981     protected final boolean checkKeyLen(final int v) {
982       return v <= 0 || v > this.blockBuffer.limit();
983     }
984 
985     /**
986      * @param v
987      * @return True if v &lt; 0 or v &gt; current block buffer limit.
988      */
989     protected final boolean checkLen(final int v) {
990       return v < 0 || v > this.blockBuffer.limit();
991     }
992 
993     /**
994      * Check key and value lengths are wholesome.
995      */
996     protected final void checkKeyValueLen() {
997       if (checkKeyLen(this.currKeyLen) || checkLen(this.currValueLen)) {
998         throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen +
999           " or currValueLen " + this.currValueLen + ". Block offset: " + block.getOffset() +
1000           ", block length: " + this.blockBuffer.limit() + ", position: " +
1001            this.blockBuffer.position() + " (without header).");
1002       }
1003     }
1004 
1005     protected void readKeyValueLen() {
1006       // TODO: METHOD (mostly) DUPLICATED IN V3!!!!! FIXED in master branch by collapsing v3 and v2.
1007       // This is a hot method. We go out of our way to make this method short so it can be
1008       // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves
1009       // because it is faster than going via range-checked ByteBuffer methods or going through a
1010       // byte buffer array a byte at a time.
1011       int p = blockBuffer.position() + blockBuffer.arrayOffset();
1012       // Get a long at a time rather than read two individual ints. In micro-benchmarking, even
1013       // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints.
1014       long ll = Bytes.toLong(blockBuffer.array(), p);
1015       // Read top half as an int of key length and bottom int as value length
1016       this.currKeyLen = (int)(ll >> Integer.SIZE);
1017       this.currValueLen = (int)(Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
1018       checkKeyValueLen();
1019       // Move position past the key and value lengths and then beyond the key and value
1020       p += (Bytes.SIZEOF_LONG + currKeyLen + currValueLen);
1021       readMvccVersion(p);
1022     }
1023 
1024     /**
1025      * Read mvcc. Does checks to see if we even need to read the mvcc at all.
1026      * @param position
1027      */
1028     protected void readMvccVersion(final int position) {
1029       // See if we even need to decode mvcc.
1030       if (!this.reader.shouldIncludeMemstoreTS()) return;
1031       if (!this.reader.decodeMemstoreTS) {
1032         currMemstoreTS = 0;
1033         currMemstoreTSLen = 1;
1034         return;
1035       }
1036       _readMvccVersion(position);
1037     }
1038 
1039     /**
1040      * Actually do the mvcc read. Does no checks.
1041      * @param position
1042      */
1043     private void _readMvccVersion(final int position) {
1044       // This is Bytes#bytesToVint inlined so can save a few instructions in this hot method; i.e.
1045       // previous if one-byte vint, we'd redo the vint call to find int size.
1046       // Also the method is kept small so can be inlined.
1047       byte firstByte = blockBuffer.array()[position];
1048       int len = WritableUtils.decodeVIntSize(firstByte);
1049       if (len == 1) {
1050         this.currMemstoreTS = firstByte;
1051       } else {
1052         long i = 0;
1053         for (int idx = 0; idx < len - 1; idx++) {
1054           byte b = blockBuffer.array()[position + 1 + idx];
1055           i = i << 8;
1056           i = i | (b & 0xFF);
1057         }
1058         currMemstoreTS = (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
1059       }
1060       this.currMemstoreTSLen = len;
1061     }
1062 
1063     protected void readMvccVersion() {
1064       // TODO CLEANUP!!!
1065       readMvccVersion(blockBuffer.arrayOffset() + blockBuffer.position());
1066     }
1067 
1068     /**
1069      * Within a loaded block, seek looking for the last key that is smaller than
1070      * (or equal to?) the key we are interested in.
1071      *
1072      * A note on the seekBefore: if you have seekBefore = true, AND the first
1073      * key in the block = key, then you'll get thrown exceptions. The caller has
1074      * to check for that case and load the previous block as appropriate.
1075      *
1076      * @param key
1077      *          the key to find
1078      * @param seekBefore
1079      *          find the key before the given key in case of exact match.
1080      * @return 0 in case of an exact key match, 1 in case of an inexact match,
1081      *         -2 in case of an inexact match and furthermore, the input key
1082      *         less than the first key of current block(e.g. using a faked index
1083      *         key)
1084      */
1085     protected int blockSeek(Cell key, boolean seekBefore) {
1086       int klen, vlen;
1087       long memstoreTS = 0;
1088       int memstoreTSLen = 0;
1089       int lastKeyValueSize = -1;
1090       KeyValue.KeyOnlyKeyValue keyOnlykv = new KeyValue.KeyOnlyKeyValue();
1091       do {
1092         blockBuffer.mark();
1093         klen = blockBuffer.getInt();
1094         vlen = blockBuffer.getInt();
1095         if (checkKeyLen(klen) || checkLen(vlen)) {
1096           throw new IllegalStateException("Invalid klen " + klen + " or vlen "
1097               + vlen + ". Block offset: "
1098               + block.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
1099               + blockBuffer.position() + " (without header).");
1100         }
1101         blockBuffer.reset();
1102         if (this.reader.shouldIncludeMemstoreTS()) {
1103           if (this.reader.decodeMemstoreTS) {
1104             int memstoreTSOffset = blockBuffer.arrayOffset() + blockBuffer.position()
1105                 + KEY_VALUE_LEN_SIZE + klen + vlen;
1106             memstoreTS = Bytes.readAsVLong(blockBuffer.array(), memstoreTSOffset);
1107             memstoreTSLen = WritableUtils.getVIntSize(memstoreTS);
1108           } else {
1109             memstoreTS = 0;
1110             memstoreTSLen = 1;
1111           }
1112         }
1113 
1114         int keyOffset = blockBuffer.arrayOffset() + blockBuffer.position() + KEY_VALUE_LEN_SIZE;
1115         keyOnlykv.setKey(blockBuffer.array(), keyOffset, klen);
1116         int comp = reader.getComparator().compareOnlyKeyPortion(key, keyOnlykv);
1117 
1118         if (comp == 0) {
1119           if (seekBefore) {
1120             if (lastKeyValueSize < 0) {
1121               throw new IllegalStateException("blockSeek with seekBefore "
1122                   + "at the first key of the block: key="
1123                   + CellUtil.getCellKeyAsString(key)
1124                   + ", blockOffset=" + block.getOffset() + ", onDiskSize="
1125                   + block.getOnDiskSizeWithHeader());
1126             }
1127             blockBuffer.position(blockBuffer.position() - lastKeyValueSize);
1128             readKeyValueLen();
1129             return 1; // non exact match.
1130           }
1131           currKeyLen = klen;
1132           currValueLen = vlen;
1133           if (this.reader.shouldIncludeMemstoreTS()) {
1134             currMemstoreTS = memstoreTS;
1135             currMemstoreTSLen = memstoreTSLen;
1136           }
1137           return 0; // indicate exact match
1138         } else if (comp < 0) {
1139           if (lastKeyValueSize > 0)
1140             blockBuffer.position(blockBuffer.position() - lastKeyValueSize);
1141           readKeyValueLen();
1142           if (lastKeyValueSize == -1 && blockBuffer.position() == 0
1143               && this.reader.trailer.getMinorVersion() >= MINOR_VERSION_WITH_FAKED_KEY) {
1144             return HConstants.INDEX_KEY_MAGIC;
1145           }
1146           return 1;
1147         }
1148 
1149         // The size of this key/value tuple, including key/value length fields.
1150         lastKeyValueSize = klen + vlen + memstoreTSLen + KEY_VALUE_LEN_SIZE;
1151         blockBuffer.position(blockBuffer.position() + lastKeyValueSize);
1152       } while (blockBuffer.remaining() > 0);
1153 
1154       // Seek to the last key we successfully read. This will happen if this is
1155       // the last key/value pair in the file, in which case the following call
1156       // to next() has to return false.
1157       blockBuffer.position(blockBuffer.position() - lastKeyValueSize);
1158       readKeyValueLen();
1159       return 1; // didn't exactly find it.
1160     }
1161 
1162     @Override
1163     protected ByteBuffer getFirstKeyInBlock(HFileBlock curBlock) {
1164       ByteBuffer buffer = curBlock.getBufferWithoutHeader();
1165       // It is safe to manipulate this buffer because we own the buffer object.
1166       buffer.rewind();
1167       int klen = buffer.getInt();
1168       buffer.getInt();
1169       ByteBuffer keyBuff = buffer.slice();
1170       keyBuff.limit(klen);
1171       keyBuff.rewind();
1172       return keyBuff;
1173     }
1174 
1175     @Override
1176     public String getKeyString() {
1177       return Bytes.toStringBinary(blockBuffer.array(),
1178           blockBuffer.arrayOffset() + blockBuffer.position()
1179               + KEY_VALUE_LEN_SIZE, currKeyLen);
1180     }
1181 
1182     @Override
1183     public String getValueString() {
1184       return Bytes.toString(blockBuffer.array(), blockBuffer.arrayOffset()
1185           + blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen,
1186           currValueLen);
1187     }
1188 
1189     @Override
1190     public int compareKey(KVComparator comparator, Cell key) {
1191       return comparator.compareOnlyKeyPortion(
1192           key,
1193           new KeyValue.KeyOnlyKeyValue(blockBuffer.array(), blockBuffer.arrayOffset()
1194               + blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen));
1195     }
1196   }
1197 
1198   /**
1199    * ScannerV2 that operates on encoded data blocks.
1200    */
1201   protected static class EncodedScannerV2 extends AbstractScannerV2 {
1202     private final HFileBlockDecodingContext decodingCtx;
1203     private final DataBlockEncoder.EncodedSeeker seeker;
1204     private final DataBlockEncoder dataBlockEncoder;
1205     protected final HFileContext meta;
1206 
1207     public EncodedScannerV2(HFileReaderV2 reader, boolean cacheBlocks,
1208         boolean pread, boolean isCompaction, HFileContext meta) {
1209       super(reader, cacheBlocks, pread, isCompaction);
1210       DataBlockEncoding encoding = reader.dataBlockEncoder.getDataBlockEncoding();
1211       dataBlockEncoder = encoding.getEncoder();
1212       decodingCtx = dataBlockEncoder.newDataBlockDecodingContext(meta);
1213       seeker = dataBlockEncoder.createSeeker(
1214         reader.getComparator(), decodingCtx);
1215       this.meta = meta;
1216     }
1217 
1218     @Override
1219     public boolean isSeeked(){
1220       return this.block != null;
1221     }
1222 
1223     /**
1224      * Updates the current block to be the given {@link HFileBlock}. Seeks to
1225      * the the first key/value pair.
1226      *
1227      * @param newBlock the block to make current
1228      * @throws CorruptHFileException
1229      */
1230     private void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException {
1231       block = newBlock;
1232 
1233       // sanity checks
1234       if (block.getBlockType() != BlockType.ENCODED_DATA) {
1235         throw new IllegalStateException(
1236             "EncodedScanner works only on encoded data blocks");
1237       }
1238       short dataBlockEncoderId = block.getDataBlockEncodingId();
1239       if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) {
1240         String encoderCls = dataBlockEncoder.getClass().getName();
1241         throw new CorruptHFileException("Encoder " + encoderCls
1242           + " doesn't support data block encoding "
1243           + DataBlockEncoding.getNameFromId(dataBlockEncoderId));
1244       }
1245 
1246       seeker.setCurrentBuffer(getEncodedBuffer(newBlock));
1247       blockFetches.incrementAndGet();
1248 
1249       // Reset the next indexed key
1250       this.nextIndexedKey = null;
1251     }
1252 
1253     private ByteBuffer getEncodedBuffer(HFileBlock newBlock) {
1254       ByteBuffer origBlock = newBlock.getBufferReadOnly();
1255       ByteBuffer encodedBlock = ByteBuffer.wrap(origBlock.array(),
1256           origBlock.arrayOffset() + newBlock.headerSize() +
1257           DataBlockEncoding.ID_SIZE,
1258           newBlock.getUncompressedSizeWithoutHeader() -
1259           DataBlockEncoding.ID_SIZE).slice();
1260       return encodedBlock;
1261     }
1262 
1263     @Override
1264     public boolean seekTo() throws IOException {
1265       if (reader == null) {
1266         return false;
1267       }
1268 
1269       if (reader.getTrailer().getEntryCount() == 0) {
1270         // No data blocks.
1271         return false;
1272       }
1273 
1274       long firstDataBlockOffset =
1275           reader.getTrailer().getFirstDataBlockOffset();
1276       if (block != null && block.getOffset() == firstDataBlockOffset) {
1277         seeker.rewind();
1278         return true;
1279       }
1280 
1281       block = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread,
1282           isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
1283       if (block.getOffset() < 0) {
1284         throw new IOException("Invalid block offset: " + block.getOffset());
1285       }
1286       updateCurrentBlock(block);
1287       return true;
1288     }
1289 
1290     @Override
1291     public boolean next() throws IOException {
1292       boolean isValid = seeker.next();
1293       if (!isValid) {
1294         block = readNextDataBlock();
1295         isValid = block != null;
1296         if (isValid) {
1297           updateCurrentBlock(block);
1298         }
1299       }
1300       return isValid;
1301     }
1302 
1303     @Override
1304     public ByteBuffer getKey() {
1305       assertValidSeek();
1306       return seeker.getKeyDeepCopy();
1307     }
1308 
1309     @Override
1310     public int compareKey(KVComparator comparator, byte[] key, int offset, int length) {
1311       return seeker.compareKey(comparator, key, offset, length);
1312     }
1313 
1314     @Override
1315     public ByteBuffer getValue() {
1316       assertValidSeek();
1317       return seeker.getValueShallowCopy();
1318     }
1319 
1320     @Override
1321     public Cell getKeyValue() {
1322       if (block == null) {
1323         return null;
1324       }
1325       return seeker.getKeyValue();
1326     }
1327 
1328     @Override
1329     public String getKeyString() {
1330       ByteBuffer keyBuffer = getKey();
1331       return Bytes.toStringBinary(keyBuffer.array(),
1332           keyBuffer.arrayOffset(), keyBuffer.limit());
1333     }
1334 
1335     @Override
1336     public String getValueString() {
1337       ByteBuffer valueBuffer = getValue();
1338       return Bytes.toStringBinary(valueBuffer.array(),
1339           valueBuffer.arrayOffset(), valueBuffer.limit());
1340     }
1341 
1342     private void assertValidSeek() {
1343       if (block == null) {
1344         throw new NotSeekedException();
1345       }
1346     }
1347 
1348     @Override
1349     protected ByteBuffer getFirstKeyInBlock(HFileBlock curBlock) {
1350       return dataBlockEncoder.getFirstKeyInBlock(getEncodedBuffer(curBlock));
1351     }
1352 
1353     @Override
1354     protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey,
1355         boolean rewind, Cell key, boolean seekBefore) throws IOException {
1356       if (block == null || block.getOffset() != seekToBlock.getOffset()) {
1357         updateCurrentBlock(seekToBlock);
1358       } else if (rewind) {
1359         seeker.rewind();
1360       }
1361       this.nextIndexedKey = nextIndexedKey;
1362       return seeker.seekToKeyInBlock(key, seekBefore);
1363     }
1364 
1365     @Override
1366     public int compareKey(KVComparator comparator, Cell key) {
1367       return seeker.compareKey(comparator, key);
1368     }
1369   }
1370 
1371   /**
1372    * Returns a buffer with the Bloom filter metadata. The caller takes
1373    * ownership of the buffer.
1374    */
1375   @Override
1376   public DataInput getGeneralBloomFilterMetadata() throws IOException {
1377     return this.getBloomFilterMetadata(BlockType.GENERAL_BLOOM_META);
1378   }
1379 
1380   @Override
1381   public DataInput getDeleteBloomFilterMetadata() throws IOException {
1382     return this.getBloomFilterMetadata(BlockType.DELETE_FAMILY_BLOOM_META);
1383   }
1384 
1385   private DataInput getBloomFilterMetadata(BlockType blockType)
1386   throws IOException {
1387     if (blockType != BlockType.GENERAL_BLOOM_META &&
1388         blockType != BlockType.DELETE_FAMILY_BLOOM_META) {
1389       throw new RuntimeException("Block Type: " + blockType.toString() +
1390           " is not supported") ;
1391     }
1392 
1393     for (HFileBlock b : loadOnOpenBlocks)
1394       if (b.getBlockType() == blockType)
1395         return b.getByteStream();
1396     return null;
1397   }
1398 
1399   @Override
1400   public boolean isFileInfoLoaded() {
1401     return true; // We load file info in constructor in version 2.
1402   }
1403 
1404   /**
1405    * Validates that the minor version is within acceptable limits.
1406    * Otherwise throws an Runtime exception
1407    */
1408   private void validateMinorVersion(Path path, int minorVersion) {
1409     if (minorVersion < MIN_MINOR_VERSION ||
1410         minorVersion > MAX_MINOR_VERSION) {
1411       String msg = "Minor version for path " + path +
1412                    " is expected to be between " +
1413                    MIN_MINOR_VERSION + " and " + MAX_MINOR_VERSION +
1414                    " but is found to be " + minorVersion;
1415       LOG.error(msg);
1416       throw new RuntimeException(msg);
1417     }
1418   }
1419 
1420   @Override
1421   public int getMajorVersion() {
1422     return 2;
1423   }
1424 
1425   @Override
1426   public HFileContext getFileContext() {
1427     return hfileContext;
1428   }
1429 
1430   /**
1431    * Returns false if block prefetching was requested for this file and has
1432    * not completed, true otherwise
1433    */
1434   boolean prefetchComplete() {
1435     return PrefetchExecutor.isCompleted(path);
1436   }
1437 
1438   @Override
1439   public void unbufferStream() {
1440     fsBlockReader.unbufferStream();
1441   }
1442 }