View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertFalse;
24  import static org.junit.Assert.assertTrue;
25  
26  import java.io.ByteArrayOutputStream;
27  import java.io.DataOutputStream;
28  import java.io.IOException;
29  import java.nio.ByteBuffer;
30  import java.util.ArrayList;
31  import java.util.Arrays;
32  import java.util.Collection;
33  import java.util.HashSet;
34  import java.util.List;
35  import java.util.Random;
36  import java.util.Set;
37  
38  import org.apache.commons.logging.Log;
39  import org.apache.commons.logging.LogFactory;
40  import org.apache.hadoop.conf.Configuration;
41  import org.apache.hadoop.fs.FSDataInputStream;
42  import org.apache.hadoop.fs.FSDataOutputStream;
43  import org.apache.hadoop.fs.FileSystem;
44  import org.apache.hadoop.fs.Path;
45  import org.apache.hadoop.hbase.CellUtil;
46  import org.apache.hadoop.hbase.HBaseTestingUtility;
47  import org.apache.hadoop.hbase.KeyValue;
48  import org.apache.hadoop.hbase.fs.HFileSystem;
49  import org.apache.hadoop.hbase.io.compress.Compression;
50  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
51  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
52  import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk;
53  import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader;
54  import org.apache.hadoop.hbase.testclassification.MediumTests;
55  import org.apache.hadoop.hbase.util.Bytes;
56  import org.apache.hadoop.hbase.util.ClassSize;
57  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
58  import org.junit.Before;
59  import org.junit.Test;
60  import org.junit.experimental.categories.Category;
61  import org.junit.runner.RunWith;
62  import org.junit.runners.Parameterized;
63  import org.junit.runners.Parameterized.Parameters;
64  
65  @RunWith(Parameterized.class)
66  @Category(MediumTests.class)
67  public class TestHFileBlockIndex {
68  
69    @Parameters
70    public static Collection<Object[]> compressionAlgorithms() {
71      return HBaseTestingUtility.COMPRESSION_ALGORITHMS_PARAMETERIZED;
72    }
73  
74    public TestHFileBlockIndex(Compression.Algorithm compr) {
75      this.compr = compr;
76    }
77  
78    private static final Log LOG = LogFactory.getLog(TestHFileBlockIndex.class);
79  
80    private static final int NUM_DATA_BLOCKS = 1000;
81    private static final HBaseTestingUtility TEST_UTIL =
82        new HBaseTestingUtility();
83  
84    private static final int SMALL_BLOCK_SIZE = 4096;
85    private static final int NUM_KV = 10000;
86  
87    private static FileSystem fs;
88    private Path path;
89    private Random rand;
90    private long rootIndexOffset;
91    private int numRootEntries;
92    private int numLevels;
93    private static final List<byte[]> keys = new ArrayList<byte[]>();
94    private final Compression.Algorithm compr;
95    private byte[] firstKeyInFile;
96    private Configuration conf;
97  
98    private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 };
99    private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 };
100   private static final int[] UNCOMPRESSED_INDEX_SIZES =
101       { 19187, 21813, 23086 };
102 
103   private static final boolean includesMemstoreTS = true;
104 
105   static {
106     assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length;
107     assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length;
108   }
109 
110   @Before
111   public void setUp() throws IOException {
112     keys.clear();
113     rand = new Random(2389757);
114     firstKeyInFile = null;
115     conf = TEST_UTIL.getConfiguration();
116 
117     // This test requires at least HFile format version 2.
118     conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
119 
120     fs = HFileSystem.get(conf);
121   }
122 
123   @Test
124   public void testBlockIndex() throws IOException {
125     testBlockIndexInternals(false);
126     clear();
127     testBlockIndexInternals(true);
128   }
129 
130   private void clear() throws IOException {
131     keys.clear();
132     rand = new Random(2389757);
133     firstKeyInFile = null;
134     conf = TEST_UTIL.getConfiguration();
135 
136     // This test requires at least HFile format version 2.
137     conf.setInt(HFile.FORMAT_VERSION_KEY, 3);
138 
139     fs = HFileSystem.get(conf);
140   }
141 
142   private void testBlockIndexInternals(boolean useTags) throws IOException {
143     path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr + useTags);
144     writeWholeIndex(useTags);
145     readIndex(useTags);
146   }
147 
148   /**
149    * A wrapper around a block reader which only caches the results of the last
150    * operation. Not thread-safe.
151    */
152   private static class BlockReaderWrapper implements HFile.CachingBlockReader {
153 
154     private HFileBlock.FSReader realReader;
155     private long prevOffset;
156     private long prevOnDiskSize;
157     private boolean prevPread;
158     private HFileBlock prevBlock;
159 
160     public int hitCount = 0;
161     public int missCount = 0;
162 
163     public BlockReaderWrapper(HFileBlock.FSReader realReader) {
164       this.realReader = realReader;
165     }
166 
167     @Override
168     public HFileBlock readBlock(long offset, long onDiskSize,
169         boolean cacheBlock, boolean pread, boolean isCompaction,
170         boolean updateCacheMetrics, BlockType expectedBlockType,
171         DataBlockEncoding expectedDataBlockEncoding)
172         throws IOException {
173       if (offset == prevOffset && onDiskSize == prevOnDiskSize &&
174           pread == prevPread) {
175         hitCount += 1;
176         return prevBlock;
177       }
178 
179       missCount += 1;
180       prevBlock = realReader.readBlockData(offset, onDiskSize, pread, false);
181       prevOffset = offset;
182       prevOnDiskSize = onDiskSize;
183       prevPread = pread;
184 
185       return prevBlock;
186     }
187   }
188 
189   private void readIndex(boolean useTags) throws IOException {
190     long fileSize = fs.getFileStatus(path).getLen();
191     LOG.info("Size of " + path + ": " + fileSize + ", compression=" + compr);
192 
193     FSDataInputStream istream = fs.open(path);
194     HFileContext meta = new HFileContextBuilder()
195                         .withHBaseCheckSum(true)
196                         .withIncludesMvcc(includesMemstoreTS)
197                         .withIncludesTags(useTags)
198                         .withCompression(compr)
199                         .build();
200     HFileBlock.FSReader blockReader = new HFileBlock.FSReaderImpl(istream, fs.getFileStatus(path)
201         .getLen(), meta);
202 
203     BlockReaderWrapper brw = new BlockReaderWrapper(blockReader);
204     HFileBlockIndex.BlockIndexReader indexReader =
205         new HFileBlockIndex.BlockIndexReader(
206             KeyValue.RAW_COMPARATOR, numLevels, brw);
207 
208     indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset,
209         fileSize).nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries);
210 
211     long prevOffset = -1;
212     int i = 0;
213     int expectedHitCount = 0;
214     int expectedMissCount = 0;
215     LOG.info("Total number of keys: " + keys.size());
216     for (byte[] key : keys) {
217       assertTrue(key != null);
218       assertTrue(indexReader != null);
219       HFileBlock b =
220           indexReader.seekToDataBlock(new KeyValue.KeyOnlyKeyValue(key, 0, key.length), null, true,
221             true, false, null);
222       if (KeyValue.COMPARATOR.compareFlatKey(key, firstKeyInFile) < 0) {
223         assertTrue(b == null);
224         ++i;
225         continue;
226       }
227 
228       String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key);
229 
230       assertTrue("seekToDataBlock failed for " + keyStr, b != null);
231 
232       if (prevOffset == b.getOffset()) {
233         assertEquals(++expectedHitCount, brw.hitCount);
234       } else {
235         LOG.info("First key in a new block: " + keyStr + ", block offset: "
236             + b.getOffset() + ")");
237         assertTrue(b.getOffset() > prevOffset);
238         assertEquals(++expectedMissCount, brw.missCount);
239         prevOffset = b.getOffset();
240       }
241       ++i;
242     }
243 
244     istream.close();
245   }
246 
247   private void writeWholeIndex(boolean useTags) throws IOException {
248     assertEquals(0, keys.size());
249     HFileContext meta = new HFileContextBuilder()
250                         .withHBaseCheckSum(true)
251                         .withIncludesMvcc(includesMemstoreTS)
252                         .withIncludesTags(useTags)
253                         .withCompression(compr)
254                         .withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM)
255                         .build();
256     HFileBlock.Writer hbw = new HFileBlock.Writer(null,
257         meta);
258     FSDataOutputStream outputStream = fs.create(path);
259     HFileBlockIndex.BlockIndexWriter biw =
260         new HFileBlockIndex.BlockIndexWriter(hbw, null, null);
261 
262     for (int i = 0; i < NUM_DATA_BLOCKS; ++i) {
263       hbw.startWriting(BlockType.DATA).write(String.valueOf(rand.nextInt(1000)).getBytes());
264       long blockOffset = outputStream.getPos();
265       hbw.writeHeaderAndData(outputStream);
266 
267       byte[] firstKey = null;
268       byte[] family = Bytes.toBytes("f");
269       byte[] qualifier = Bytes.toBytes("q");
270       for (int j = 0; j < 16; ++j) {
271         byte[] k =
272             new KeyValue(TestHFileWriterV2.randomOrderedKey(rand, i * 16 + j), family, qualifier,
273                 EnvironmentEdgeManager.currentTime(), KeyValue.Type.Put).getKey();
274         keys.add(k);
275         if (j == 8) {
276           firstKey = k;
277         }
278       }
279       assertTrue(firstKey != null);
280       if (firstKeyInFile == null) {
281         firstKeyInFile = firstKey;
282       }
283       biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader());
284 
285       writeInlineBlocks(hbw, outputStream, biw, false);
286     }
287     writeInlineBlocks(hbw, outputStream, biw, true);
288     rootIndexOffset = biw.writeIndexBlocks(outputStream);
289     outputStream.close();
290 
291     numLevels = biw.getNumLevels();
292     numRootEntries = biw.getNumRootEntries();
293 
294     LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" +
295         numRootEntries + ", rootIndexOffset=" + rootIndexOffset);
296   }
297 
298   private void writeInlineBlocks(HFileBlock.Writer hbw,
299       FSDataOutputStream outputStream, HFileBlockIndex.BlockIndexWriter biw,
300       boolean isClosing) throws IOException {
301     while (biw.shouldWriteBlock(isClosing)) {
302       long offset = outputStream.getPos();
303       biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType()));
304       hbw.writeHeaderAndData(outputStream);
305       biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(),
306           hbw.getUncompressedSizeWithoutHeader());
307       LOG.info("Wrote an inline index block at " + offset + ", size " +
308           hbw.getOnDiskSizeWithHeader());
309     }
310   }
311 
312   private static final long getDummyFileOffset(int i) {
313     return i * 185 + 379;
314   }
315 
316   private static final int getDummyOnDiskSize(int i) {
317     return i * i * 37 + i * 19 + 13;
318   }
319 
320   @Test
321   public void testSecondaryIndexBinarySearch() throws IOException {
322     int numTotalKeys = 99;
323     assertTrue(numTotalKeys % 2 == 1); // Ensure no one made this even.
324 
325     // We only add odd-index keys into the array that we will binary-search.
326     int numSearchedKeys = (numTotalKeys - 1) / 2;
327 
328     ByteArrayOutputStream baos = new ByteArrayOutputStream();
329     DataOutputStream dos = new DataOutputStream(baos);
330 
331     dos.writeInt(numSearchedKeys);
332     int curAllEntriesSize = 0;
333     int numEntriesAdded = 0;
334 
335     // Only odd-index elements of this array are used to keep the secondary
336     // index entries of the corresponding keys.
337     int secondaryIndexEntries[] = new int[numTotalKeys];
338 
339     for (int i = 0; i < numTotalKeys; ++i) {
340       byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i * 2);
341       KeyValue cell = new KeyValue(k, Bytes.toBytes("f"), Bytes.toBytes("q"), 
342           Bytes.toBytes("val"));
343       //KeyValue cell = new KeyValue.KeyOnlyKeyValue(k, 0, k.length);
344       keys.add(cell.getKey());
345       String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): ";
346       StringBuilder padding = new StringBuilder();
347       while (msgPrefix.length() + padding.length() < 70)
348         padding.append(' ');
349       msgPrefix += padding;
350       if (i % 2 != 0) {
351         dos.writeInt(curAllEntriesSize);
352         secondaryIndexEntries[i] = curAllEntriesSize;
353         LOG.info(msgPrefix + "secondary index entry #" + ((i - 1) / 2) +
354             ", offset " + curAllEntriesSize);
355         curAllEntriesSize += cell.getKey().length
356             + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
357         ++numEntriesAdded;
358       } else {
359         secondaryIndexEntries[i] = -1;
360         LOG.info(msgPrefix + "not in the searched array");
361       }
362     }
363 
364     // Make sure the keys are increasing.
365     for (int i = 0; i < keys.size() - 1; ++i)
366       assertTrue(KeyValue.COMPARATOR.compare(
367           new KeyValue.KeyOnlyKeyValue(keys.get(i), 0, keys.get(i).length),
368           new KeyValue.KeyOnlyKeyValue(keys.get(i + 1), 0, keys.get(i + 1).length)) < 0);
369 
370     dos.writeInt(curAllEntriesSize);
371     assertEquals(numSearchedKeys, numEntriesAdded);
372     int secondaryIndexOffset = dos.size();
373     assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2),
374         secondaryIndexOffset);
375 
376     for (int i = 1; i <= numTotalKeys - 1; i += 2) {
377       assertEquals(dos.size(),
378           secondaryIndexOffset + secondaryIndexEntries[i]);
379       long dummyFileOffset = getDummyFileOffset(i);
380       int dummyOnDiskSize = getDummyOnDiskSize(i);
381       LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" +
382           dummyOnDiskSize + " at offset " + dos.size());
383       dos.writeLong(dummyFileOffset);
384       dos.writeInt(dummyOnDiskSize);
385       LOG.debug("Stored key " + ((i - 1) / 2) +" at offset " + dos.size());
386       dos.write(keys.get(i));
387     }
388 
389     dos.writeInt(curAllEntriesSize);
390 
391     ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray());
392     for (int i = 0; i < numTotalKeys; ++i) {
393       byte[] searchKey = keys.get(i);
394       byte[] arrayHoldingKey = new byte[searchKey.length +
395                                         searchKey.length / 2];
396 
397       // To make things a bit more interesting, store the key we are looking
398       // for at a non-zero offset in a new array.
399       System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2,
400             searchKey.length);
401 
402       KeyValue.KeyOnlyKeyValue cell = new KeyValue.KeyOnlyKeyValue(
403           arrayHoldingKey, searchKey.length / 2, searchKey.length);
404       int searchResult = BlockIndexReader.binarySearchNonRootIndex(cell,
405           nonRootIndex, KeyValue.COMPARATOR);
406       String lookupFailureMsg = "Failed to look up key #" + i + " ("
407           + Bytes.toStringBinary(searchKey) + ")";
408 
409       int expectedResult;
410       int referenceItem;
411 
412       if (i % 2 != 0) {
413         // This key is in the array we search as the element (i - 1) / 2. Make
414         // sure we find it.
415         expectedResult = (i - 1) / 2;
416         referenceItem = i;
417       } else {
418         // This key is not in the array but between two elements on the array,
419         // in the beginning, or in the end. The result should be the previous
420         // key in the searched array, or -1 for i = 0.
421         expectedResult = i / 2 - 1;
422         referenceItem = i - 1;
423       }
424 
425       assertEquals(lookupFailureMsg, expectedResult, searchResult);
426 
427       // Now test we can get the offset and the on-disk-size using a
428       // higher-level API function.s
429       boolean locateBlockResult =
430           (BlockIndexReader.locateNonRootIndexEntry(nonRootIndex, cell,
431           KeyValue.COMPARATOR) != -1);
432 
433       if (i == 0) {
434         assertFalse(locateBlockResult);
435       } else {
436         assertTrue(locateBlockResult);
437         String errorMsg = "i=" + i + ", position=" + nonRootIndex.position();
438         assertEquals(errorMsg, getDummyFileOffset(referenceItem),
439             nonRootIndex.getLong());
440         assertEquals(errorMsg, getDummyOnDiskSize(referenceItem),
441             nonRootIndex.getInt());
442       }
443     }
444 
445   }
446 
447   @Test
448   public void testBlockIndexChunk() throws IOException {
449     BlockIndexChunk c = new BlockIndexChunk();
450     ByteArrayOutputStream baos = new ByteArrayOutputStream();
451     int N = 1000;
452     int[] numSubEntriesAt = new int[N];
453     int numSubEntries = 0;
454     for (int i = 0; i < N; ++i) {
455       baos.reset();
456       DataOutputStream dos = new DataOutputStream(baos);
457       c.writeNonRoot(dos);
458       assertEquals(c.getNonRootSize(), dos.size());
459 
460       baos.reset();
461       dos = new DataOutputStream(baos);
462       c.writeRoot(dos);
463       assertEquals(c.getRootSize(), dos.size());
464 
465       byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
466       numSubEntries += rand.nextInt(5) + 1;
467       keys.add(k);
468       c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries);
469     }
470 
471     // Test the ability to look up the entry that contains a particular
472     // deeper-level index block's entry ("sub-entry"), assuming a global
473     // 0-based ordering of sub-entries. This is needed for mid-key calculation.
474     for (int i = 0; i < N; ++i) {
475       for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1];
476            j < numSubEntriesAt[i];
477            ++j) {
478         assertEquals(i, c.getEntryBySubEntry(j));
479       }
480     }
481   }
482 
483   /** Checks if the HeapSize calculator is within reason */
484   @Test
485   public void testHeapSizeForBlockIndex() throws IOException {
486     Class<HFileBlockIndex.BlockIndexReader> cl =
487         HFileBlockIndex.BlockIndexReader.class;
488     long expected = ClassSize.estimateBase(cl, false);
489 
490     HFileBlockIndex.BlockIndexReader bi =
491         new HFileBlockIndex.BlockIndexReader(KeyValue.RAW_COMPARATOR, 1);
492     long actual = bi.heapSize();
493 
494     // Since the arrays in BlockIndex(byte [][] blockKeys, long [] blockOffsets,
495     // int [] blockDataSizes) are all null they are not going to show up in the
496     // HeapSize calculation, so need to remove those array costs from expected.
497     expected -= ClassSize.align(3 * ClassSize.ARRAY);
498 
499     if (expected != actual) {
500       ClassSize.estimateBase(cl, true);
501       assertEquals(expected, actual);
502     }
503   }
504 
505   /**
506   * to check if looks good when midKey on a leaf index block boundary
507   * @throws IOException
508   */
509   @Test
510  public void testMidKeyOnLeafIndexBlockBoundary() throws IOException {
511    Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
512        "hfile_for_midkey");
513    int maxChunkSize = 512;
514    conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize);
515    // should open hfile.block.index.cacheonwrite
516    conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, true);
517 
518    CacheConfig cacheConf = new CacheConfig(conf);
519    BlockCache blockCache = cacheConf.getBlockCache();
520    // Evict all blocks that were cached-on-write by the previous invocation.
521    blockCache.evictBlocksByHfileName(hfilePath.getName());
522    // Write the HFile
523    {
524      HFileContext meta = new HFileContextBuilder()
525                          .withBlockSize(SMALL_BLOCK_SIZE)
526                          .withCompression(Algorithm.NONE)
527                          .withDataBlockEncoding(DataBlockEncoding.NONE)
528                          .build();
529      HFile.Writer writer =
530            HFile.getWriterFactory(conf, cacheConf)
531                .withPath(fs, hfilePath)
532                .withFileContext(meta)
533                .create();
534      Random rand = new Random(19231737);
535      byte[] family = Bytes.toBytes("f");
536      byte[] qualifier = Bytes.toBytes("q");
537      int kvNumberToBeWritten = 16;
538      // the new generated hfile will contain 2 leaf-index blocks and 16 data blocks,
539      // midkey is just on the boundary of the first leaf-index block
540      for (int i = 0; i < kvNumberToBeWritten; ++i) {
541        byte[] row = TestHFileWriterV2.randomOrderedFixedLengthKey(rand, i, 30);
542 
543        // Key will be interpreted by KeyValue.KEY_COMPARATOR
544        KeyValue kv =
545              new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(),
546                  TestHFileWriterV2.randomFixedLengthValue(rand, SMALL_BLOCK_SIZE));
547        writer.append(kv);
548      }
549      writer.close();
550    }
551 
552    // close hfile.block.index.cacheonwrite
553    conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, false);
554 
555    // Read the HFile
556    HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, conf);
557 
558    boolean hasArrayIndexOutOfBoundsException = false;
559    try {
560      // get the mid-key.
561      reader.midkey();
562    } catch (ArrayIndexOutOfBoundsException e) {
563      hasArrayIndexOutOfBoundsException = true;
564    } finally {
565      reader.close();
566    }
567 
568    // to check if ArrayIndexOutOfBoundsException occured
569    assertFalse(hasArrayIndexOutOfBoundsException);
570  }
571 
572   /**
573    * Testing block index through the HFile writer/reader APIs. Allows to test
574    * setting index block size through configuration, intermediate-level index
575    * blocks, and caching index blocks on write.
576    *
577    * @throws IOException
578    */
579   @Test
580   public void testHFileWriterAndReader() throws IOException {
581     Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
582         "hfile_for_block_index");
583     CacheConfig cacheConf = new CacheConfig(conf);
584     BlockCache blockCache = cacheConf.getBlockCache();
585 
586     for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) {
587       int indexBlockSize = INDEX_CHUNK_SIZES[testI];
588       int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
589       LOG.info("Index block size: " + indexBlockSize + ", compression: "
590           + compr);
591       // Evict all blocks that were cached-on-write by the previous invocation.
592       blockCache.evictBlocksByHfileName(hfilePath.getName());
593 
594       conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
595       Set<String> keyStrSet = new HashSet<String>();
596       byte[][] keys = new byte[NUM_KV][];
597       byte[][] values = new byte[NUM_KV][];
598 
599       // Write the HFile
600       {
601         HFileContext meta = new HFileContextBuilder()
602                             .withBlockSize(SMALL_BLOCK_SIZE)
603                             .withCompression(compr)
604                             .build();
605         HFile.Writer writer =
606             HFile.getWriterFactory(conf, cacheConf)
607                 .withPath(fs, hfilePath)
608                 .withFileContext(meta)
609                 .create();
610         Random rand = new Random(19231737);
611         byte[] family = Bytes.toBytes("f");
612         byte[] qualifier = Bytes.toBytes("q");
613         for (int i = 0; i < NUM_KV; ++i) {
614           byte[] row = TestHFileWriterV2.randomOrderedKey(rand, i);
615 
616           // Key will be interpreted by KeyValue.KEY_COMPARATOR
617           KeyValue kv =
618               new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(),
619                   TestHFileWriterV2.randomValue(rand));
620           byte[] k = kv.getKey();
621           writer.append(kv);
622           keys[i] = k;
623           values[i] = CellUtil.cloneValue(kv);
624           keyStrSet.add(Bytes.toStringBinary(k));
625           if (i > 0) {
626             assertTrue(KeyValue.COMPARATOR.compareFlatKey(keys[i - 1],
627                 keys[i]) < 0);
628           }
629         }
630 
631         writer.close();
632       }
633 
634       // Read the HFile
635       HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, conf);
636       assertEquals(expectedNumLevels,
637           reader.getTrailer().getNumDataIndexLevels());
638 
639       assertTrue(Bytes.equals(keys[0], reader.getFirstKey()));
640       assertTrue(Bytes.equals(keys[NUM_KV - 1], reader.getLastKey()));
641       LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1]));
642 
643       for (boolean pread : new boolean[] { false, true }) {
644         HFileScanner scanner = reader.getScanner(true, pread);
645         for (int i = 0; i < NUM_KV; ++i) {
646           checkSeekTo(keys, scanner, i);
647           checkKeyValue("i=" + i, keys[i], values[i], scanner.getKey(),
648               scanner.getValue());
649         }
650         assertTrue(scanner.seekTo());
651         for (int i = NUM_KV - 1; i >= 0; --i) {
652           checkSeekTo(keys, scanner, i);
653           checkKeyValue("i=" + i, keys[i], values[i], scanner.getKey(),
654               scanner.getValue());
655         }
656       }
657 
658       // Manually compute the mid-key and validate it.
659       HFileReaderV2 reader2 = (HFileReaderV2) reader;
660       HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader();
661 
662       HFileBlock.BlockIterator iter = fsReader.blockRange(0,
663           reader.getTrailer().getLoadOnOpenDataOffset());
664       HFileBlock block;
665       List<byte[]> blockKeys = new ArrayList<byte[]>();
666       while ((block = iter.nextBlock()) != null) {
667         if (block.getBlockType() != BlockType.LEAF_INDEX)
668           return;
669         ByteBuffer b = block.getBufferReadOnly();
670         int n = b.getInt();
671         // One int for the number of items, and n + 1 for the secondary index.
672         int entriesOffset = Bytes.SIZEOF_INT * (n + 2);
673 
674         // Get all the keys from the leaf index block. S
675         for (int i = 0; i < n; ++i) {
676           int keyRelOffset = b.getInt(Bytes.SIZEOF_INT * (i + 1));
677           int nextKeyRelOffset = b.getInt(Bytes.SIZEOF_INT * (i + 2));
678           int keyLen = nextKeyRelOffset - keyRelOffset;
679           int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset +
680               HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD;
681           byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset
682               + keyLen);
683           String blockKeyStr = Bytes.toString(blockKey);
684           blockKeys.add(blockKey);
685 
686           // If the first key of the block is not among the keys written, we
687           // are not parsing the non-root index block format correctly.
688           assertTrue("Invalid block key from leaf-level block: " + blockKeyStr,
689               keyStrSet.contains(blockKeyStr));
690         }
691       }
692 
693       // Validate the mid-key.
694       assertEquals(
695           Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)),
696           Bytes.toStringBinary(reader.midkey()));
697 
698       assertEquals(UNCOMPRESSED_INDEX_SIZES[testI],
699           reader.getTrailer().getUncompressedDataIndexSize());
700 
701       reader.close();
702       reader2.close();
703     }
704   }
705 
706   private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i)
707       throws IOException {
708     assertEquals("Failed to seek to key #" + i + " (" + Bytes.toStringBinary(keys[i]) + ")", 0,
709         scanner.seekTo(KeyValue.createKeyValueFromKey(keys[i])));
710   }
711 
712   private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr,
713       ByteBuffer buf) {
714     assertEquals(msgPrefix + ": expected " + Bytes.toStringBinary(arr)
715         + ", actual " + Bytes.toStringBinary(buf), 0, Bytes.compareTo(arr, 0,
716         arr.length, buf.array(), buf.arrayOffset(), buf.limit()));
717   }
718 
719   /** Check a key/value pair after it was read by the reader */
720   private void checkKeyValue(String msgPrefix, byte[] expectedKey,
721       byte[] expectedValue, ByteBuffer keyRead, ByteBuffer valueRead) {
722     if (!msgPrefix.isEmpty())
723       msgPrefix += ". ";
724 
725     assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead);
726     assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue,
727         valueRead);
728   }
729 
730   @Test(timeout=10000)
731   public void testIntermediateLevelIndicesWithLargeKeys() throws IOException {
732     testIntermediateLevelIndicesWithLargeKeys(16);
733   }
734 
735   @Test(timeout=10000)
736   public void testIntermediateLevelIndicesWithLargeKeysWithMinNumEntries() throws IOException {
737     // because of the large rowKeys, we will end up with a 50-level block index without sanity check
738     testIntermediateLevelIndicesWithLargeKeys(2);
739   }
740 
741   public void testIntermediateLevelIndicesWithLargeKeys(int minNumEntries) throws IOException {
742     Path hfPath = new Path(TEST_UTIL.getDataTestDir(),
743       "testIntermediateLevelIndicesWithLargeKeys.hfile");
744     int maxChunkSize = 1024;
745     FileSystem fs = FileSystem.get(conf);
746     CacheConfig cacheConf = new CacheConfig(conf);
747     conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize);
748     conf.setInt(HFileBlockIndex.MIN_INDEX_NUM_ENTRIES_KEY, minNumEntries);
749     HFileContext context = new HFileContextBuilder().withBlockSize(16).build();
750     HFileWriterV2 hfw =
751         (HFileWriterV2) new HFileWriterV2.WriterFactoryV2(conf, cacheConf)
752         .withFileContext(context)
753         .withPath(fs, hfPath).create();
754     List<byte[]> keys = new ArrayList<byte[]>();
755 
756     // This should result in leaf-level indices and a root level index
757     for (int i=0; i < 100; i++) {
758       byte[] rowkey = new byte[maxChunkSize + 1];
759       byte[] b = Bytes.toBytes(i);
760       System.arraycopy(b, 0, rowkey, rowkey.length - b.length, b.length);
761       keys.add(rowkey);
762       hfw.append(CellUtil.createCell(rowkey));
763     }
764     hfw.close();
765 
766     HFile.Reader reader = HFile.createReader(fs, hfPath, cacheConf, conf);
767     // Scanner doesn't do Cells yet.  Fix.
768     HFileScanner scanner = reader.getScanner(true, true);
769     for (int i = 0; i < keys.size(); ++i) {
770       scanner.seekTo(CellUtil.createCell(keys.get(i)));
771     }
772     reader.close();
773   }
774 }
775