1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.io.hfile;
20
21 import java.io.IOException;
22 import java.nio.ByteBuffer;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.Collection;
26 import java.util.List;
27 import java.util.Random;
28
29 import org.apache.hadoop.conf.Configuration;
30 import org.apache.hadoop.fs.FSDataInputStream;
31 import org.apache.hadoop.fs.FileSystem;
32 import org.apache.hadoop.fs.Path;
33 import org.apache.hadoop.hbase.Cell;
34 import org.apache.hadoop.hbase.HBaseTestingUtility;
35 import org.apache.hadoop.hbase.HConstants;
36 import org.apache.hadoop.hbase.KeyValue;
37 import org.apache.hadoop.hbase.Tag;
38 import org.apache.hadoop.hbase.io.compress.Compression;
39 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
40 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
41 import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
42 import org.apache.hadoop.hbase.testclassification.IOTests;
43 import org.apache.hadoop.hbase.testclassification.SmallTests;
44 import org.apache.hadoop.hbase.util.Bytes;
45 import org.apache.hadoop.hbase.util.Writables;
46 import org.apache.hadoop.io.Text;
47 import org.junit.Assert;
48 import org.junit.Before;
49 import org.junit.Test;
50 import org.junit.experimental.categories.Category;
51 import org.junit.runner.RunWith;
52 import org.junit.runners.Parameterized;
53 import org.slf4j.Logger;
54 import org.slf4j.LoggerFactory;
55
56
57
58
59 @RunWith(Parameterized.class)
60 @Category({IOTests.class, SmallTests.class})
61 public class TestHFileWriterV3WithDataEncoders {
62
63 private static final Logger LOG =
64 LoggerFactory.getLogger(TestHFileWriterV3WithDataEncoders.class);
65
66 private static final HBaseTestingUtility TEST_UTIL =
67 new HBaseTestingUtility();
68
69 private Configuration conf;
70 private FileSystem fs;
71 private boolean useTags;
72 private DataBlockEncoding dataBlockEncoding;
73
74 public TestHFileWriterV3WithDataEncoders(boolean useTags,
75 DataBlockEncoding dataBlockEncoding) {
76 this.useTags = useTags;
77 this.dataBlockEncoding = dataBlockEncoding;
78 }
79
80 @Parameterized.Parameters
81 public static Collection<Object[]> parameters() {
82 DataBlockEncoding[] dataBlockEncodings = DataBlockEncoding.values();
83 Object[][] params = new Object[dataBlockEncodings.length * 2 - 2][];
84 int i = 0;
85 for (DataBlockEncoding dataBlockEncoding : dataBlockEncodings) {
86 if (dataBlockEncoding == DataBlockEncoding.NONE) {
87 continue;
88 }
89 params[i++] = new Object[]{false, dataBlockEncoding};
90 params[i++] = new Object[]{true, dataBlockEncoding};
91 }
92 return Arrays.asList(params);
93 }
94
95 @Before
96 public void setUp() throws IOException {
97 conf = TEST_UTIL.getConfiguration();
98 fs = FileSystem.get(conf);
99 }
100
101 @Test
102 public void testHFileFormatV3() throws IOException {
103 testHFileFormatV3Internals(useTags);
104 }
105
106 private void testHFileFormatV3Internals(boolean useTags) throws IOException {
107 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "testHFileFormatV3");
108 final Compression.Algorithm compressAlgo = Compression.Algorithm.GZ;
109 final int entryCount = 10000;
110 writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, false, useTags);
111 }
112
113 @Test
114 public void testMidKeyInHFile() throws IOException{
115 testMidKeyInHFileInternals(useTags);
116 }
117
118 private void testMidKeyInHFileInternals(boolean useTags) throws IOException {
119 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
120 "testMidKeyInHFile");
121 Compression.Algorithm compressAlgo = Compression.Algorithm.NONE;
122 int entryCount = 50000;
123 writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, true, useTags);
124 }
125
126 private void writeDataAndReadFromHFile(Path hfilePath,
127 Compression.Algorithm compressAlgo, int entryCount, boolean findMidKey, boolean useTags)
128 throws IOException {
129
130 HFileContext context = new HFileContextBuilder()
131 .withBlockSize(4096)
132 .withIncludesTags(useTags)
133 .withDataBlockEncoding(dataBlockEncoding)
134 .withCompression(compressAlgo).build();
135 CacheConfig cacheConfig = new CacheConfig(conf);
136 HFileWriterV3 writer = (HFileWriterV3)
137 new HFileWriterV3.WriterFactoryV3(conf, new CacheConfig(conf))
138 .withPath(fs, hfilePath)
139 .withFileContext(context)
140 .create();
141
142 Random rand = new Random(9713312);
143 List<KeyValue> keyValues = new ArrayList<>(entryCount);
144
145 writeKeyValues(entryCount, useTags, writer, rand, keyValues);
146
147
148 FSDataInputStream fsdis = fs.open(hfilePath);
149
150 long fileSize = fs.getFileStatus(hfilePath).getLen();
151 FixedFileTrailer trailer =
152 FixedFileTrailer.readFromStream(fsdis, fileSize);
153
154 Assert.assertEquals(3, trailer.getMajorVersion());
155 Assert.assertEquals(entryCount, trailer.getEntryCount());
156 HFileContext meta = new HFileContextBuilder()
157 .withCompression(compressAlgo)
158 .withIncludesMvcc(true)
159 .withIncludesTags(useTags)
160 .withDataBlockEncoding(dataBlockEncoding)
161 .withHBaseCheckSum(true).build();
162 HFileBlock.FSReader blockReader =
163 new HFileBlock.FSReaderImpl(fsdis, fileSize, meta);
164
165 KeyValue.KVComparator comparator = trailer.createComparator();
166 HFileBlockIndex.BlockIndexReader dataBlockIndexReader =
167 new HFileBlockIndex.BlockIndexReader(comparator,
168 trailer.getNumDataIndexLevels());
169 HFileBlockIndex.BlockIndexReader metaBlockIndexReader =
170 new HFileBlockIndex.BlockIndexReader(
171 KeyValue.RAW_COMPARATOR, 1);
172
173 HFileBlock.BlockIterator blockIter = blockReader.blockRange(
174 trailer.getLoadOnOpenDataOffset(),
175 fileSize - trailer.getTrailerSize());
176
177
178 dataBlockIndexReader.readMultiLevelIndexRoot(
179 blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), trailer.getDataIndexCount());
180
181 if (findMidKey) {
182 byte[] midkey = dataBlockIndexReader.midkey();
183 Assert.assertNotNull("Midkey should not be null", midkey);
184 }
185
186
187 metaBlockIndexReader.readRootIndex(
188 blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX)
189 .getByteStream(), trailer.getMetaIndexCount());
190
191 HFile.FileInfo fileInfo = new HFile.FileInfo();
192 fileInfo.read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
193 byte [] keyValueFormatVersion = fileInfo.get(HFileWriterV3.KEY_VALUE_VERSION);
194 boolean includeMemstoreTS = keyValueFormatVersion != null &&
195 Bytes.toInt(keyValueFormatVersion) > 0;
196
197
198 int entriesRead = 0;
199 int blocksRead = 0;
200 long memstoreTS = 0;
201
202 DataBlockEncoder encoder = dataBlockEncoding.getEncoder();
203 long curBlockPos = scanBlocks(entryCount, context, keyValues, fsdis, trailer,
204 meta, blockReader, entriesRead, blocksRead, encoder);
205
206
207
208
209
210
211 int metaCounter = 0;
212 while (fsdis.getPos() < trailer.getLoadOnOpenDataOffset()) {
213 LOG.info("Current offset: {}, scanning until {}", fsdis.getPos(),
214 trailer.getLoadOnOpenDataOffset());
215 HFileBlock block = blockReader.readBlockData(curBlockPos, -1, false, false)
216 .unpack(context, blockReader);
217 Assert.assertEquals(BlockType.META, block.getBlockType());
218 Text t = new Text();
219 ByteBuffer buf = block.getBufferWithoutHeader();
220 if (Writables.getWritable(buf.array(), buf.arrayOffset(), buf.limit(), t) == null) {
221 throw new IOException("Failed to deserialize block " + this +
222 " into a " + t.getClass().getSimpleName());
223 }
224 Text expectedText =
225 (metaCounter == 0 ? new Text("Paris") : metaCounter == 1 ? new Text(
226 "Moscow") : new Text("Washington, D.C."));
227 Assert.assertEquals(expectedText, t);
228 LOG.info("Read meta block data: " + t);
229 ++metaCounter;
230 curBlockPos += block.getOnDiskSizeWithHeader();
231 }
232
233 fsdis.close();
234 }
235
236 private long scanBlocks(int entryCount, HFileContext context, List<KeyValue> keyValues,
237 FSDataInputStream fsdis, FixedFileTrailer trailer, HFileContext meta,
238 HFileBlock.FSReader blockReader, int entriesRead, int blocksRead,
239 DataBlockEncoder encoder) throws IOException {
240
241 fsdis.seek(0);
242 long curBlockPos = 0;
243 while (curBlockPos <= trailer.getLastDataBlockOffset()) {
244 HFileBlockDecodingContext ctx = blockReader.getBlockDecodingContext();
245 HFileBlock block = blockReader.readBlockData(curBlockPos, -1, false, false)
246 .unpack(context, blockReader);
247 Assert.assertEquals(BlockType.ENCODED_DATA, block.getBlockType());
248 ByteBuffer origBlock = block.getBufferReadOnly();
249 int pos = block.headerSize() + DataBlockEncoding.ID_SIZE;
250 origBlock.position(pos);
251 origBlock.limit(pos + block.getUncompressedSizeWithoutHeader() - DataBlockEncoding.ID_SIZE);
252 ByteBuffer buf = origBlock.slice();
253 DataBlockEncoder.EncodedSeeker seeker =
254 encoder.createSeeker(KeyValue.COMPARATOR,
255 encoder.newDataBlockDecodingContext(meta));
256 seeker.setCurrentBuffer(buf);
257 Cell res = seeker.getKeyValue();
258 KeyValue kv = keyValues.get(entriesRead);
259 Assert.assertEquals(0, KeyValue.COMPARATOR.compare(res, kv));
260 ++entriesRead;
261 while(seeker.next()) {
262 res = seeker.getKeyValue();
263 kv = keyValues.get(entriesRead);
264 Assert.assertEquals(0, KeyValue.COMPARATOR.compare(res, kv));
265 ++entriesRead;
266 }
267 ++blocksRead;
268 curBlockPos += block.getOnDiskSizeWithHeader();
269 }
270 LOG.info("Finished reading: entries={}, blocksRead = {}", entriesRead, blocksRead);
271 Assert.assertEquals(entryCount, entriesRead);
272 return curBlockPos;
273 }
274
275 private void writeKeyValues(int entryCount, boolean useTags, HFile.Writer writer,
276 Random rand, List<KeyValue> keyValues) throws IOException {
277
278 for (int i = 0; i < entryCount; ++i) {
279 byte[] keyBytes = RandomKeyValueUtil.randomOrderedKey(rand, i);
280
281
282 byte[] valueBytes = RandomKeyValueUtil.randomValue(rand);
283 KeyValue keyValue = null;
284 if (useTags) {
285 ArrayList<Tag> tags = new ArrayList<>();
286 for (int j = 0; j < 1 + rand.nextInt(4); j++) {
287 byte[] tagBytes = new byte[16];
288 rand.nextBytes(tagBytes);
289 tags.add(new Tag((byte) 1, tagBytes));
290 }
291 keyValue = new KeyValue(keyBytes, null, null, HConstants.LATEST_TIMESTAMP,
292 valueBytes, tags);
293 } else {
294 keyValue = new KeyValue(keyBytes, null, null, HConstants.LATEST_TIMESTAMP,
295 valueBytes);
296 }
297 writer.append(keyValue);
298 keyValues.add(keyValue);
299 }
300
301
302
303 writer.appendMetaBlock("CAPITAL_OF_USA", new Text("Washington, D.C."));
304 writer.appendMetaBlock("CAPITAL_OF_RUSSIA", new Text("Moscow"));
305 writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris"));
306
307 writer.close();
308 }
309
310 }