View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertTrue;
25  
26  import java.io.ByteArrayInputStream;
27  import java.io.DataInputStream;
28  import java.io.IOException;
29  import java.nio.ByteBuffer;
30  import java.util.ArrayList;
31  import java.util.Collection;
32  import java.util.List;
33  import java.util.Random;
34  
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.FSDataInputStream;
39  import org.apache.hadoop.fs.FileSystem;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.hbase.HBaseTestingUtility;
42  import org.apache.hadoop.hbase.HConstants;
43  import org.apache.hadoop.hbase.KeyValue;
44  import org.apache.hadoop.hbase.KeyValue.KVComparator;
45  import org.apache.hadoop.hbase.testclassification.SmallTests;
46  import org.apache.hadoop.hbase.Tag;
47  import org.apache.hadoop.hbase.io.compress.Compression;
48  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
49  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
50  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
51  import org.apache.hadoop.hbase.util.Bytes;
52  import org.apache.hadoop.hbase.util.Writables;
53  import org.apache.hadoop.io.Text;
54  import org.apache.hadoop.io.WritableUtils;
55  import org.junit.Before;
56  import org.junit.Test;
57  import org.junit.experimental.categories.Category;
58  import org.junit.runner.RunWith;
59  import org.junit.runners.Parameterized;
60  import org.junit.runners.Parameterized.Parameters;
61  
62  /**
63   * Testing writing a version 3 {@link HFile}. This is a low-level test written
64   * during the development of {@link HFileWriterV3}.
65   */
66  @RunWith(Parameterized.class)
67  @Category(SmallTests.class)
68  public class TestHFileWriterV3 {
69  
70    private static final Log LOG = LogFactory.getLog(TestHFileWriterV3.class);
71  
72    private static final HBaseTestingUtility TEST_UTIL =
73        new HBaseTestingUtility();
74  
75    private Configuration conf;
76    private FileSystem fs;
77    private boolean useTags;
78    public TestHFileWriterV3(boolean useTags) {
79      this.useTags = useTags;
80    }
81    @Parameters
82    public static Collection<Object[]> parameters() {
83      return HBaseTestingUtility.BOOLEAN_PARAMETERIZED;
84    }
85  
86    @Before
87    public void setUp() throws IOException {
88      conf = TEST_UTIL.getConfiguration();
89      fs = FileSystem.get(conf);
90    }
91  
92    @Test
93    public void testHFileFormatV3() throws IOException {
94      testHFileFormatV3Internals(useTags);
95    }
96  
97    private void testHFileFormatV3Internals(boolean useTags) throws IOException {
98      Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "testHFileFormatV3");
99      final Compression.Algorithm compressAlgo = Compression.Algorithm.GZ;
100     final int entryCount = 10000;
101     writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, false, useTags);
102   }
103 
104   @Test
105   public void testMidKeyInHFile() throws IOException{
106     testMidKeyInHFileInternals(useTags);
107   }
108 
109   private void testMidKeyInHFileInternals(boolean useTags) throws IOException {
110     Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
111     "testMidKeyInHFile");
112     Compression.Algorithm compressAlgo = Compression.Algorithm.NONE;
113     int entryCount = 50000;
114     writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, true, useTags);
115   }
116 
117   private void writeDataAndReadFromHFile(Path hfilePath,
118       Algorithm compressAlgo, int entryCount, boolean findMidKey, boolean useTags) throws IOException {
119     HFileContext context = new HFileContextBuilder()
120       .withBlockSize(4096)
121       .withIncludesTags(useTags)
122       .withDataBlockEncoding(DataBlockEncoding.NONE)
123       .withCompression(compressAlgo).build();
124     HFileWriterV3 writer = (HFileWriterV3)
125         new HFileWriterV3.WriterFactoryV3(conf, new CacheConfig(conf))
126             .withPath(fs, hfilePath)
127             .withFileContext(context)
128             .withComparator(KeyValue.COMPARATOR)
129             .create();
130 
131     Random rand = new Random(9713312); // Just a fixed seed.
132     List<KeyValue> keyValues = new ArrayList<KeyValue>(entryCount);
133 
134     for (int i = 0; i < entryCount; ++i) {
135       byte[] keyBytes = TestHFileWriterV2.randomOrderedKey(rand, i);
136 
137       // A random-length random value.
138       byte[] valueBytes = TestHFileWriterV2.randomValue(rand);
139       KeyValue keyValue = null;
140       if (useTags) {
141         ArrayList<Tag> tags = new ArrayList<Tag>();
142         for (int j = 0; j < 1 + rand.nextInt(4); j++) {
143           byte[] tagBytes = new byte[16];
144           rand.nextBytes(tagBytes);
145           tags.add(new Tag((byte) 1, tagBytes));
146         }
147         keyValue = new KeyValue(keyBytes, null, null, HConstants.LATEST_TIMESTAMP,
148             valueBytes, tags);
149       } else {
150         keyValue = new KeyValue(keyBytes, null, null, HConstants.LATEST_TIMESTAMP,
151             valueBytes);
152       }
153       writer.append(keyValue);
154       keyValues.add(keyValue);
155     }
156 
157     // Add in an arbitrary order. They will be sorted lexicographically by
158     // the key.
159     writer.appendMetaBlock("CAPITAL_OF_USA", new Text("Washington, D.C."));
160     writer.appendMetaBlock("CAPITAL_OF_RUSSIA", new Text("Moscow"));
161     writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris"));
162 
163     writer.close();
164 
165 
166     FSDataInputStream fsdis = fs.open(hfilePath);
167 
168     long fileSize = fs.getFileStatus(hfilePath).getLen();
169     FixedFileTrailer trailer =
170         FixedFileTrailer.readFromStream(fsdis, fileSize);
171 
172     assertEquals(3, trailer.getMajorVersion());
173     assertEquals(entryCount, trailer.getEntryCount());
174     HFileContext meta = new HFileContextBuilder()
175       .withCompression(compressAlgo)
176       .withIncludesMvcc(false)
177       .withIncludesTags(useTags)
178       .withDataBlockEncoding(DataBlockEncoding.NONE)
179       .withHBaseCheckSum(true).build();
180     HFileBlock.FSReader blockReader =
181         new HFileBlock.FSReaderImpl(fsdis, fileSize, meta);
182  // Comparator class name is stored in the trailer in version 2.
183     KVComparator comparator = trailer.createComparator();
184     HFileBlockIndex.BlockIndexReader dataBlockIndexReader =
185         new HFileBlockIndex.BlockIndexReader(comparator,
186             trailer.getNumDataIndexLevels());
187     HFileBlockIndex.BlockIndexReader metaBlockIndexReader =
188         new HFileBlockIndex.BlockIndexReader(
189             KeyValue.RAW_COMPARATOR, 1);
190 
191     HFileBlock.BlockIterator blockIter = blockReader.blockRange(
192         trailer.getLoadOnOpenDataOffset(),
193         fileSize - trailer.getTrailerSize());
194     // Data index. We also read statistics about the block index written after
195     // the root level.
196     dataBlockIndexReader.readMultiLevelIndexRoot(
197         blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), trailer.getDataIndexCount());
198 
199     if (findMidKey) {
200       byte[] midkey = dataBlockIndexReader.midkey();
201       assertNotNull("Midkey should not be null", midkey);
202     }
203 
204     // Meta index.
205     metaBlockIndexReader.readRootIndex(
206         blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX)
207           .getByteStream(), trailer.getMetaIndexCount());
208     // File info
209     FileInfo fileInfo = new FileInfo();
210     fileInfo.read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
211     byte [] keyValueFormatVersion = fileInfo.get(
212         HFileWriterV3.KEY_VALUE_VERSION);
213     boolean includeMemstoreTS = keyValueFormatVersion != null &&
214         Bytes.toInt(keyValueFormatVersion) > 0;
215 
216     // Counters for the number of key/value pairs and the number of blocks
217     int entriesRead = 0;
218     int blocksRead = 0;
219     long memstoreTS = 0;
220 
221     // Scan blocks the way the reader would scan them
222     fsdis.seek(0);
223     long curBlockPos = 0;
224     while (curBlockPos <= trailer.getLastDataBlockOffset()) {
225       HFileBlock block = blockReader.readBlockData(curBlockPos, -1, false, false)
226         .unpack(context, blockReader);
227       assertEquals(BlockType.DATA, block.getBlockType());
228       ByteBuffer buf = block.getBufferWithoutHeader();
229       int keyLen = -1;
230       while (buf.hasRemaining()) {
231 
232         keyLen = buf.getInt();
233 
234         int valueLen = buf.getInt();
235 
236         byte[] key = new byte[keyLen];
237         buf.get(key);
238 
239         byte[] value = new byte[valueLen];
240         buf.get(value);
241         byte[] tagValue = null;
242         if (useTags) {
243           int tagLen = ((buf.get() & 0xff) << 8) ^ (buf.get() & 0xff);
244           tagValue = new byte[tagLen];
245           buf.get(tagValue);
246         }
247 
248         if (includeMemstoreTS) {
249           ByteArrayInputStream byte_input = new ByteArrayInputStream(buf.array(), buf.arrayOffset()
250               + buf.position(), buf.remaining());
251           DataInputStream data_input = new DataInputStream(byte_input);
252 
253           memstoreTS = WritableUtils.readVLong(data_input);
254           buf.position(buf.position() + WritableUtils.getVIntSize(memstoreTS));
255         }
256 
257         // A brute-force check to see that all keys and values are correct.
258         assertTrue(Bytes.compareTo(key, keyValues.get(entriesRead).getKey()) == 0);
259         assertTrue(Bytes.compareTo(value, keyValues.get(entriesRead).getValue()) == 0);
260         if (useTags) {
261           assertNotNull(tagValue);
262           KeyValue tkv =  keyValues.get(entriesRead);
263           assertEquals(tagValue.length, tkv.getTagsLength());
264           assertTrue(Bytes.compareTo(tagValue, 0, tagValue.length, tkv.getTagsArray(),
265               tkv.getTagsOffset(), tkv.getTagsLength()) == 0);
266         }
267         ++entriesRead;
268       }
269       ++blocksRead;
270       curBlockPos += block.getOnDiskSizeWithHeader();
271     }
272     LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead="
273         + blocksRead);
274     assertEquals(entryCount, entriesRead);
275 
276     // Meta blocks. We can scan until the load-on-open data offset (which is
277     // the root block index offset in version 2) because we are not testing
278     // intermediate-level index blocks here.
279 
280     int metaCounter = 0;
281     while (fsdis.getPos() < trailer.getLoadOnOpenDataOffset()) {
282       LOG.info("Current offset: " + fsdis.getPos() + ", scanning until " +
283           trailer.getLoadOnOpenDataOffset());
284       HFileBlock block = blockReader.readBlockData(curBlockPos, -1, false, false)
285         .unpack(context, blockReader);
286       assertEquals(BlockType.META, block.getBlockType());
287       Text t = new Text();
288       ByteBuffer buf = block.getBufferWithoutHeader();
289       if (Writables.getWritable(buf.array(), buf.arrayOffset(), buf.limit(), t) == null) {
290         throw new IOException("Failed to deserialize block " + this +
291             " into a " + t.getClass().getSimpleName());
292       }
293       Text expectedText =
294           (metaCounter == 0 ? new Text("Paris") : metaCounter == 1 ? new Text(
295               "Moscow") : new Text("Washington, D.C."));
296       assertEquals(expectedText, t);
297       LOG.info("Read meta block data: " + t);
298       ++metaCounter;
299       curBlockPos += block.getOnDiskSizeWithHeader();
300     }
301 
302     fsdis.close();
303   }
304 
305 }
306