View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  import java.util.ArrayList;
24  import java.util.Arrays;
25  import java.util.Collection;
26  import java.util.List;
27  import java.util.Random;
28  
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FSDataInputStream;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.Cell;
34  import org.apache.hadoop.hbase.HBaseTestingUtility;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.KeyValue;
37  import org.apache.hadoop.hbase.Tag;
38  import org.apache.hadoop.hbase.io.compress.Compression;
39  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
40  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
41  import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
42  import org.apache.hadoop.hbase.testclassification.IOTests;
43  import org.apache.hadoop.hbase.testclassification.SmallTests;
44  import org.apache.hadoop.hbase.util.Bytes;
45  import org.apache.hadoop.hbase.util.Writables;
46  import org.apache.hadoop.io.Text;
47  import org.junit.Assert;
48  import org.junit.Before;
49  import org.junit.Test;
50  import org.junit.experimental.categories.Category;
51  import org.junit.runner.RunWith;
52  import org.junit.runners.Parameterized;
53  import org.slf4j.Logger;
54  import org.slf4j.LoggerFactory;
55  
56  /**
57   * Testing writing a version 3 {@link HFile} for all encoded blocks
58   */
59  @RunWith(Parameterized.class)
60  @Category({IOTests.class, SmallTests.class})
61  public class TestHFileWriterV3WithDataEncoders {
62  
63    private static final Logger LOG =
64      LoggerFactory.getLogger(TestHFileWriterV3WithDataEncoders.class);
65  
66    private static final HBaseTestingUtility TEST_UTIL =
67      new HBaseTestingUtility();
68  
69    private Configuration conf;
70    private FileSystem fs;
71    private boolean useTags;
72    private DataBlockEncoding dataBlockEncoding;
73  
74    public TestHFileWriterV3WithDataEncoders(boolean useTags,
75        DataBlockEncoding dataBlockEncoding) {
76      this.useTags = useTags;
77      this.dataBlockEncoding = dataBlockEncoding;
78    }
79  
80    @Parameterized.Parameters
81    public static Collection<Object[]> parameters() {
82      DataBlockEncoding[] dataBlockEncodings = DataBlockEncoding.values();
83      Object[][] params = new Object[dataBlockEncodings.length * 2 - 2][];
84      int i = 0;
85      for (DataBlockEncoding dataBlockEncoding : dataBlockEncodings) {
86        if (dataBlockEncoding == DataBlockEncoding.NONE) {
87          continue;
88        }
89        params[i++] = new Object[]{false, dataBlockEncoding};
90        params[i++] = new Object[]{true, dataBlockEncoding};
91      }
92      return Arrays.asList(params);
93    }
94  
95    @Before
96    public void setUp() throws IOException {
97      conf = TEST_UTIL.getConfiguration();
98      fs = FileSystem.get(conf);
99    }
100 
101   @Test
102   public void testHFileFormatV3() throws IOException {
103     testHFileFormatV3Internals(useTags);
104   }
105 
106   private void testHFileFormatV3Internals(boolean useTags) throws IOException {
107     Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "testHFileFormatV3");
108     final Compression.Algorithm compressAlgo = Compression.Algorithm.GZ;
109     final int entryCount = 10000;
110     writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, false, useTags);
111   }
112 
113   @Test
114   public void testMidKeyInHFile() throws IOException{
115     testMidKeyInHFileInternals(useTags);
116   }
117 
118   private void testMidKeyInHFileInternals(boolean useTags) throws IOException {
119     Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
120       "testMidKeyInHFile");
121     Compression.Algorithm compressAlgo = Compression.Algorithm.NONE;
122     int entryCount = 50000;
123     writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, true, useTags);
124   }
125 
126   private void writeDataAndReadFromHFile(Path hfilePath,
127       Compression.Algorithm compressAlgo, int entryCount, boolean findMidKey, boolean useTags)
128       throws IOException {
129 
130     HFileContext context = new HFileContextBuilder()
131       .withBlockSize(4096)
132       .withIncludesTags(useTags)
133       .withDataBlockEncoding(dataBlockEncoding)
134       .withCompression(compressAlgo).build();
135     CacheConfig cacheConfig = new CacheConfig(conf);
136     HFileWriterV3 writer = (HFileWriterV3)
137       new HFileWriterV3.WriterFactoryV3(conf, new CacheConfig(conf))
138         .withPath(fs, hfilePath)
139         .withFileContext(context)
140         .create();
141 
142     Random rand = new Random(9713312); // Just a fixed seed.
143     List<KeyValue> keyValues = new ArrayList<>(entryCount);
144 
145     writeKeyValues(entryCount, useTags, writer, rand, keyValues);
146 
147 
148     FSDataInputStream fsdis = fs.open(hfilePath);
149 
150     long fileSize = fs.getFileStatus(hfilePath).getLen();
151     FixedFileTrailer trailer =
152       FixedFileTrailer.readFromStream(fsdis, fileSize);
153 
154     Assert.assertEquals(3, trailer.getMajorVersion());
155     Assert.assertEquals(entryCount, trailer.getEntryCount());
156     HFileContext meta = new HFileContextBuilder()
157       .withCompression(compressAlgo)
158       .withIncludesMvcc(true)
159       .withIncludesTags(useTags)
160       .withDataBlockEncoding(dataBlockEncoding)
161       .withHBaseCheckSum(true).build();
162     HFileBlock.FSReader blockReader =
163       new HFileBlock.FSReaderImpl(fsdis, fileSize, meta);
164     // Comparator class name is stored in the trailer in version 2.
165     KeyValue.KVComparator comparator = trailer.createComparator();
166     HFileBlockIndex.BlockIndexReader dataBlockIndexReader =
167       new HFileBlockIndex.BlockIndexReader(comparator,
168         trailer.getNumDataIndexLevels());
169     HFileBlockIndex.BlockIndexReader metaBlockIndexReader =
170       new HFileBlockIndex.BlockIndexReader(
171         KeyValue.RAW_COMPARATOR, 1);
172 
173     HFileBlock.BlockIterator blockIter = blockReader.blockRange(
174       trailer.getLoadOnOpenDataOffset(),
175       fileSize - trailer.getTrailerSize());
176     // Data index. We also read statistics about the block index written after
177     // the root level.
178     dataBlockIndexReader.readMultiLevelIndexRoot(
179       blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), trailer.getDataIndexCount());
180 
181     if (findMidKey) {
182       byte[] midkey = dataBlockIndexReader.midkey();
183       Assert.assertNotNull("Midkey should not be null", midkey);
184     }
185 
186     // Meta index.
187     metaBlockIndexReader.readRootIndex(
188       blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX)
189         .getByteStream(), trailer.getMetaIndexCount());
190     // File info
191     HFile.FileInfo fileInfo = new HFile.FileInfo();
192     fileInfo.read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
193     byte [] keyValueFormatVersion = fileInfo.get(HFileWriterV3.KEY_VALUE_VERSION);
194     boolean includeMemstoreTS = keyValueFormatVersion != null &&
195       Bytes.toInt(keyValueFormatVersion) > 0;
196 
197     // Counters for the number of key/value pairs and the number of blocks
198     int entriesRead = 0;
199     int blocksRead = 0;
200     long memstoreTS = 0;
201 
202     DataBlockEncoder encoder = dataBlockEncoding.getEncoder();
203     long curBlockPos = scanBlocks(entryCount, context, keyValues, fsdis, trailer,
204       meta, blockReader, entriesRead, blocksRead, encoder);
205 
206 
207     // Meta blocks. We can scan until the load-on-open data offset (which is
208     // the root block index offset in version 2) because we are not testing
209     // intermediate-level index blocks here.
210 
211     int metaCounter = 0;
212     while (fsdis.getPos() < trailer.getLoadOnOpenDataOffset()) {
213       LOG.info("Current offset: {}, scanning until {}", fsdis.getPos(),
214         trailer.getLoadOnOpenDataOffset());
215       HFileBlock block = blockReader.readBlockData(curBlockPos, -1, false, false)
216         .unpack(context, blockReader);
217       Assert.assertEquals(BlockType.META, block.getBlockType());
218       Text t = new Text();
219       ByteBuffer buf = block.getBufferWithoutHeader();
220       if (Writables.getWritable(buf.array(), buf.arrayOffset(), buf.limit(), t) == null) {
221         throw new IOException("Failed to deserialize block " + this +
222           " into a " + t.getClass().getSimpleName());
223       }
224       Text expectedText =
225         (metaCounter == 0 ? new Text("Paris") : metaCounter == 1 ? new Text(
226           "Moscow") : new Text("Washington, D.C."));
227       Assert.assertEquals(expectedText, t);
228       LOG.info("Read meta block data: " + t);
229       ++metaCounter;
230       curBlockPos += block.getOnDiskSizeWithHeader();
231     }
232 
233     fsdis.close();
234   }
235 
236   private long scanBlocks(int entryCount, HFileContext context, List<KeyValue> keyValues,
237       FSDataInputStream fsdis, FixedFileTrailer trailer, HFileContext meta,
238       HFileBlock.FSReader blockReader, int entriesRead, int blocksRead,
239       DataBlockEncoder encoder) throws IOException {
240     // Scan blocks the way the reader would scan them
241     fsdis.seek(0);
242     long curBlockPos = 0;
243     while (curBlockPos <= trailer.getLastDataBlockOffset()) {
244       HFileBlockDecodingContext ctx = blockReader.getBlockDecodingContext();
245       HFileBlock block = blockReader.readBlockData(curBlockPos, -1, false, false)
246         .unpack(context, blockReader);
247       Assert.assertEquals(BlockType.ENCODED_DATA, block.getBlockType());
248       ByteBuffer origBlock = block.getBufferReadOnly();
249       int pos = block.headerSize() + DataBlockEncoding.ID_SIZE;
250       origBlock.position(pos);
251       origBlock.limit(pos + block.getUncompressedSizeWithoutHeader() - DataBlockEncoding.ID_SIZE);
252       ByteBuffer buf =  origBlock.slice();
253       DataBlockEncoder.EncodedSeeker seeker =
254         encoder.createSeeker(KeyValue.COMPARATOR,
255           encoder.newDataBlockDecodingContext(meta));
256       seeker.setCurrentBuffer(buf);
257       Cell res = seeker.getKeyValue();
258       KeyValue kv = keyValues.get(entriesRead);
259       Assert.assertEquals(0, KeyValue.COMPARATOR.compare(res, kv));
260       ++entriesRead;
261       while(seeker.next()) {
262         res = seeker.getKeyValue();
263         kv = keyValues.get(entriesRead);
264         Assert.assertEquals(0, KeyValue.COMPARATOR.compare(res, kv));
265         ++entriesRead;
266       }
267       ++blocksRead;
268       curBlockPos += block.getOnDiskSizeWithHeader();
269     }
270     LOG.info("Finished reading: entries={}, blocksRead = {}", entriesRead, blocksRead);
271     Assert.assertEquals(entryCount, entriesRead);
272     return curBlockPos;
273   }
274 
275   private void writeKeyValues(int entryCount, boolean useTags, HFile.Writer writer,
276       Random rand, List<KeyValue> keyValues) throws IOException {
277 
278     for (int i = 0; i < entryCount; ++i) {
279       byte[] keyBytes = RandomKeyValueUtil.randomOrderedKey(rand, i);
280 
281       // A random-length random value.
282       byte[] valueBytes = RandomKeyValueUtil.randomValue(rand);
283       KeyValue keyValue = null;
284       if (useTags) {
285         ArrayList<Tag> tags = new ArrayList<>();
286         for (int j = 0; j < 1 + rand.nextInt(4); j++) {
287           byte[] tagBytes = new byte[16];
288           rand.nextBytes(tagBytes);
289           tags.add(new Tag((byte) 1, tagBytes));
290         }
291         keyValue = new KeyValue(keyBytes, null, null, HConstants.LATEST_TIMESTAMP,
292           valueBytes, tags);
293       } else {
294         keyValue = new KeyValue(keyBytes, null, null, HConstants.LATEST_TIMESTAMP,
295           valueBytes);
296       }
297       writer.append(keyValue);
298       keyValues.add(keyValue);
299     }
300 
301     // Add in an arbitrary order. They will be sorted lexicographically by
302     // the key.
303     writer.appendMetaBlock("CAPITAL_OF_USA", new Text("Washington, D.C."));
304     writer.appendMetaBlock("CAPITAL_OF_RUSSIA", new Text("Moscow"));
305     writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris"));
306 
307     writer.close();
308   }
309 
310 }