1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.hadoop.hbase.io.encoding;
18
19 import java.io.DataInputStream;
20 import java.io.DataOutputStream;
21 import java.io.IOException;
22 import java.nio.ByteBuffer;
23
24 import org.apache.hadoop.hbase.Cell;
25 import org.apache.hadoop.hbase.CellUtil;
26 import org.apache.hadoop.hbase.KeyValue;
27 import org.apache.hadoop.hbase.KeyValue.KVComparator;
28 import org.apache.hadoop.hbase.KeyValueUtil;
29 import org.apache.hadoop.hbase.classification.InterfaceAudience;
30 import org.apache.hadoop.hbase.util.ByteBufferUtils;
31 import org.apache.hadoop.hbase.util.Bytes;
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58 @InterfaceAudience.Private
59 public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder {
60 final int MASK_TIMESTAMP_LENGTH = (1 << 0) | (1 << 1) | (1 << 2);
61 final int SHIFT_TIMESTAMP_LENGTH = 0;
62 final int FLAG_SAME_KEY_LENGTH = 1 << 3;
63 final int FLAG_SAME_VALUE_LENGTH = 1 << 4;
64 final int FLAG_SAME_TYPE = 1 << 5;
65 final int FLAG_SAME_VALUE = 1 << 6;
66
67 private static class FastDiffCompressionState extends CompressionState {
68 byte[] timestamp = new byte[KeyValue.TIMESTAMP_SIZE];
69 int prevTimestampOffset;
70
71 @Override
72 protected void readTimestamp(ByteBuffer in) {
73 in.get(timestamp);
74 }
75
76 @Override
77 void copyFrom(CompressionState state) {
78 super.copyFrom(state);
79 FastDiffCompressionState state2 = (FastDiffCompressionState) state;
80 System.arraycopy(state2.timestamp, 0, timestamp, 0,
81 KeyValue.TIMESTAMP_SIZE);
82 prevTimestampOffset = state2.prevTimestampOffset;
83 }
84
85
86
87
88
89
90
91 private void decompressFirstKV(ByteBuffer out, DataInputStream in)
92 throws IOException {
93 int kvPos = out.position();
94 out.putInt(keyLength);
95 out.putInt(valueLength);
96 prevTimestampOffset = out.position() + keyLength -
97 KeyValue.TIMESTAMP_TYPE_SIZE;
98 ByteBufferUtils.copyFromStreamToBuffer(out, in, keyLength + valueLength);
99 rowLength = out.getShort(kvPos + KeyValue.ROW_OFFSET);
100 familyLength = out.get(kvPos + KeyValue.ROW_OFFSET +
101 KeyValue.ROW_LENGTH_SIZE + rowLength);
102 type = out.get(prevTimestampOffset + KeyValue.TIMESTAMP_SIZE);
103 }
104
105 }
106
107 private int findCommonTimestampPrefix(byte[] curTsBuf, byte[] prevTsBuf) {
108 int commonPrefix = 0;
109 while (commonPrefix < (KeyValue.TIMESTAMP_SIZE - 1)
110 && curTsBuf[commonPrefix] == prevTsBuf[commonPrefix]) {
111 commonPrefix++;
112 }
113 return commonPrefix;
114 }
115
116 private void uncompressSingleKeyValue(DataInputStream source,
117 ByteBuffer out, FastDiffCompressionState state)
118 throws IOException, EncoderBufferTooSmallException {
119 byte flag = source.readByte();
120 int prevKeyLength = state.keyLength;
121
122 if ((flag & FLAG_SAME_KEY_LENGTH) == 0) {
123 state.keyLength = ByteBufferUtils.readCompressedInt(source);
124 }
125 if ((flag & FLAG_SAME_VALUE_LENGTH) == 0) {
126 state.valueLength = ByteBufferUtils.readCompressedInt(source);
127 }
128 int commonLength = ByteBufferUtils.readCompressedInt(source);
129
130 ensureSpace(out, state.keyLength + state.valueLength + KeyValue.ROW_OFFSET);
131
132 int kvPos = out.position();
133
134 if (!state.isFirst()) {
135
136 int common;
137 int prevOffset;
138
139 if ((flag & FLAG_SAME_VALUE_LENGTH) == 0) {
140 out.putInt(state.keyLength);
141 out.putInt(state.valueLength);
142 prevOffset = state.prevOffset + KeyValue.ROW_OFFSET;
143 common = commonLength;
144 } else {
145 if ((flag & FLAG_SAME_KEY_LENGTH) != 0) {
146 prevOffset = state.prevOffset;
147 common = commonLength + KeyValue.ROW_OFFSET;
148 } else {
149 out.putInt(state.keyLength);
150 prevOffset = state.prevOffset + KeyValue.KEY_LENGTH_SIZE;
151 common = commonLength + KeyValue.KEY_LENGTH_SIZE;
152 }
153 }
154
155 ByteBufferUtils.copyFromBufferToBuffer(out, out, prevOffset, common);
156
157
158 int keyRestLength;
159 if (commonLength < state.rowLength + KeyValue.ROW_LENGTH_SIZE) {
160
161 int rowWithSizeLength;
162 int rowRestLength;
163
164
165 if (commonLength < KeyValue.ROW_LENGTH_SIZE) {
166
167 ByteBufferUtils.copyFromStreamToBuffer(out, source,
168 KeyValue.ROW_LENGTH_SIZE - commonLength);
169
170 rowWithSizeLength = out.getShort(out.position() -
171 KeyValue.ROW_LENGTH_SIZE) + KeyValue.ROW_LENGTH_SIZE;
172 rowRestLength = rowWithSizeLength - KeyValue.ROW_LENGTH_SIZE;
173 } else {
174
175 rowWithSizeLength = out.getShort(kvPos + KeyValue.ROW_OFFSET) +
176 KeyValue.ROW_LENGTH_SIZE;
177 rowRestLength = rowWithSizeLength - commonLength;
178 }
179
180
181 ByteBufferUtils.copyFromStreamToBuffer(out, source, rowRestLength);
182
183
184 ByteBufferUtils.copyFromBufferToBuffer(out, out,
185 state.prevOffset + KeyValue.ROW_OFFSET + KeyValue.ROW_LENGTH_SIZE
186 + state.rowLength, state.familyLength
187 + KeyValue.FAMILY_LENGTH_SIZE);
188 state.rowLength = (short) (rowWithSizeLength -
189 KeyValue.ROW_LENGTH_SIZE);
190
191 keyRestLength = state.keyLength - rowWithSizeLength -
192 state.familyLength -
193 (KeyValue.FAMILY_LENGTH_SIZE + KeyValue.TIMESTAMP_TYPE_SIZE);
194 } else {
195
196 keyRestLength = state.keyLength - commonLength -
197 KeyValue.TIMESTAMP_TYPE_SIZE;
198 }
199
200 ByteBufferUtils.copyFromStreamToBuffer(out, source, keyRestLength);
201
202
203 int prefixTimestamp =
204 (flag & MASK_TIMESTAMP_LENGTH) >>> SHIFT_TIMESTAMP_LENGTH;
205 ByteBufferUtils.copyFromBufferToBuffer(out, out,
206 state.prevTimestampOffset, prefixTimestamp);
207 state.prevTimestampOffset = out.position() - prefixTimestamp;
208 ByteBufferUtils.copyFromStreamToBuffer(out, source,
209 KeyValue.TIMESTAMP_SIZE - prefixTimestamp);
210
211
212 if ((flag & FLAG_SAME_TYPE) != 0) {
213 out.put(state.type);
214 if ((flag & FLAG_SAME_VALUE) != 0) {
215 ByteBufferUtils.copyFromBufferToBuffer(out, out, state.prevOffset +
216 KeyValue.ROW_OFFSET + prevKeyLength, state.valueLength);
217 } else {
218 ByteBufferUtils.copyFromStreamToBuffer(out, source,
219 state.valueLength);
220 }
221 } else {
222 if ((flag & FLAG_SAME_VALUE) != 0) {
223 ByteBufferUtils.copyFromStreamToBuffer(out, source,
224 KeyValue.TYPE_SIZE);
225 ByteBufferUtils.copyFromBufferToBuffer(out, out, state.prevOffset +
226 KeyValue.ROW_OFFSET + prevKeyLength, state.valueLength);
227 } else {
228 ByteBufferUtils.copyFromStreamToBuffer(out, source,
229 state.valueLength + KeyValue.TYPE_SIZE);
230 }
231 state.type = out.get(state.prevTimestampOffset +
232 KeyValue.TIMESTAMP_SIZE);
233 }
234 } else {
235 state.decompressFirstKV(out, source);
236 }
237
238 state.prevOffset = kvPos;
239 }
240
241 @Override
242 public int internalEncode(Cell cell, HFileBlockDefaultEncodingContext encodingContext,
243 DataOutputStream out) throws IOException {
244 EncodingState state = encodingContext.getEncodingState();
245 int size = compressSingleKeyValue(out, cell, state.prevCell);
246 size += afterEncodingKeyValue(cell, out, encodingContext);
247 state.prevCell = cell;
248 return size;
249 }
250
251 private int compressSingleKeyValue(DataOutputStream out, Cell cell, Cell prevCell)
252 throws IOException {
253 int flag = 0;
254 int kLength = KeyValueUtil.keyLength(cell);
255 int vLength = cell.getValueLength();
256
257 if (prevCell == null) {
258
259 out.write(flag);
260 ByteBufferUtils.putCompressedInt(out, kLength);
261 ByteBufferUtils.putCompressedInt(out, vLength);
262 ByteBufferUtils.putCompressedInt(out, 0);
263 CellUtil.writeFlatKey(cell, out);
264
265 out.write(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
266 } else {
267 int preKeyLength = KeyValueUtil.keyLength(prevCell);
268 int preValLength = prevCell.getValueLength();
269
270 int commonPrefix = CellUtil.findCommonPrefixInFlatKey(cell, prevCell, true, false);
271
272 if (kLength == preKeyLength) {
273 flag |= FLAG_SAME_KEY_LENGTH;
274 }
275 if (vLength == prevCell.getValueLength()) {
276 flag |= FLAG_SAME_VALUE_LENGTH;
277 }
278 if (cell.getTypeByte() == prevCell.getTypeByte()) {
279 flag |= FLAG_SAME_TYPE;
280 }
281
282 byte[] curTsBuf = Bytes.toBytes(cell.getTimestamp());
283 int commonTimestampPrefix = findCommonTimestampPrefix(curTsBuf,
284 Bytes.toBytes(prevCell.getTimestamp()));
285
286 flag |= commonTimestampPrefix << SHIFT_TIMESTAMP_LENGTH;
287
288
289
290 if (vLength == preValLength
291 && Bytes.equals(cell.getValueArray(), cell.getValueOffset(), vLength,
292 prevCell.getValueArray(), prevCell.getValueOffset(), preValLength)) {
293 flag |= FLAG_SAME_VALUE;
294 }
295
296 out.write(flag);
297 if ((flag & FLAG_SAME_KEY_LENGTH) == 0) {
298 ByteBufferUtils.putCompressedInt(out, kLength);
299 }
300 if ((flag & FLAG_SAME_VALUE_LENGTH) == 0) {
301 ByteBufferUtils.putCompressedInt(out, vLength);
302 }
303 ByteBufferUtils.putCompressedInt(out, commonPrefix);
304 short rLen = cell.getRowLength();
305 if (commonPrefix < rLen + KeyValue.ROW_LENGTH_SIZE) {
306
307
308 CellUtil.writeRowKeyExcludingCommon(cell, rLen, commonPrefix, out);
309 out.write(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
310 } else {
311
312
313
314
315 int commonQualPrefix = commonPrefix - (rLen + KeyValue.ROW_LENGTH_SIZE)
316 - (cell.getFamilyLength() + KeyValue.FAMILY_LENGTH_SIZE);
317 out.write(cell.getQualifierArray(), cell.getQualifierOffset() + commonQualPrefix,
318 cell.getQualifierLength() - commonQualPrefix);
319 }
320
321 out.write(curTsBuf, commonTimestampPrefix, KeyValue.TIMESTAMP_SIZE - commonTimestampPrefix);
322
323
324 if ((flag & FLAG_SAME_TYPE) == 0) {
325 out.write(cell.getTypeByte());
326 }
327
328
329 if ((flag & FLAG_SAME_VALUE) == 0) {
330 out.write(cell.getValueArray(), cell.getValueOffset(), vLength);
331 }
332 }
333 return kLength + vLength + KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE;
334 }
335
336 @Override
337 protected ByteBuffer internalDecodeKeyValues(DataInputStream source, int allocateHeaderLength,
338 int skipLastBytes, HFileBlockDefaultDecodingContext decodingCtx) throws IOException {
339 int decompressedSize = source.readInt();
340 ByteBuffer buffer = ByteBuffer.allocate(decompressedSize +
341 allocateHeaderLength);
342 buffer.position(allocateHeaderLength);
343 FastDiffCompressionState state = new FastDiffCompressionState();
344 while (source.available() > skipLastBytes) {
345 uncompressSingleKeyValue(source, buffer, state);
346 afterDecodingKeyValue(source, buffer, decodingCtx);
347 }
348
349 if (source.available() != skipLastBytes) {
350 throw new IllegalStateException("Read too much bytes.");
351 }
352
353 return buffer;
354 }
355
356 @Override
357 public ByteBuffer getFirstKeyInBlock(ByteBuffer block) {
358 block.mark();
359 block.position(Bytes.SIZEOF_INT + Bytes.SIZEOF_BYTE);
360 int keyLength = ByteBufferUtils.readCompressedInt(block);
361 ByteBufferUtils.readCompressedInt(block);
362 ByteBufferUtils.readCompressedInt(block);
363 int pos = block.position();
364 block.reset();
365 ByteBuffer dup = block.duplicate();
366 dup.position(pos);
367 dup.limit(pos + keyLength);
368 return dup.slice();
369 }
370
371 @Override
372 public String toString() {
373 return FastDiffDeltaEncoder.class.getSimpleName();
374 }
375
376 protected static class FastDiffSeekerState extends SeekerState {
377 private byte[] prevTimestampAndType =
378 new byte[KeyValue.TIMESTAMP_TYPE_SIZE];
379 private int rowLengthWithSize;
380 private int familyLengthWithSize;
381
382 public FastDiffSeekerState() {
383 super();
384 }
385
386 @Override
387 protected void copyFromNext(SeekerState that) {
388 super.copyFromNext(that);
389 FastDiffSeekerState other = (FastDiffSeekerState) that;
390 System.arraycopy(other.prevTimestampAndType, 0,
391 prevTimestampAndType, 0,
392 KeyValue.TIMESTAMP_TYPE_SIZE);
393 rowLengthWithSize = other.rowLengthWithSize;
394 familyLengthWithSize = other.familyLengthWithSize;
395 }
396 }
397
398 @Override
399 public EncodedSeeker createSeeker(KVComparator comparator,
400 final HFileBlockDecodingContext decodingCtx) {
401 return new FastDiffSeekerStateBufferedEncodedSeeker(comparator, decodingCtx);
402 }
403
404 private class FastDiffSeekerStateBufferedEncodedSeeker
405 extends BufferedEncodedSeeker<FastDiffSeekerState> {
406
407 private FastDiffSeekerStateBufferedEncodedSeeker(KVComparator comparator,
408 HFileBlockDecodingContext decodingCtx) {
409 super(comparator, decodingCtx);
410 }
411
412 private void decode(boolean isFirst) {
413 byte flag = currentBuffer.get();
414 if ((flag & FLAG_SAME_KEY_LENGTH) == 0) {
415 if (!isFirst) {
416 System.arraycopy(current.keyBuffer,
417 current.keyLength - current.prevTimestampAndType.length,
418 current.prevTimestampAndType, 0,
419 current.prevTimestampAndType.length);
420 }
421 current.keyLength = ByteBufferUtils.readCompressedInt(currentBuffer);
422 }
423 if ((flag & FLAG_SAME_VALUE_LENGTH) == 0) {
424 current.valueLength =
425 ByteBufferUtils.readCompressedInt(currentBuffer);
426 }
427 current.lastCommonPrefix =
428 ByteBufferUtils.readCompressedInt(currentBuffer);
429
430 current.ensureSpaceForKey();
431
432 if (isFirst) {
433
434 currentBuffer.get(current.keyBuffer, current.lastCommonPrefix,
435 current.keyLength - current.prevTimestampAndType.length);
436 current.rowLengthWithSize = Bytes.toShort(current.keyBuffer, 0) +
437 Bytes.SIZEOF_SHORT;
438 current.familyLengthWithSize =
439 current.keyBuffer[current.rowLengthWithSize] + Bytes.SIZEOF_BYTE;
440 } else if (current.lastCommonPrefix < Bytes.SIZEOF_SHORT) {
441
442
443
444 int oldRowLengthWithSize = current.rowLengthWithSize;
445 currentBuffer.get(current.keyBuffer, current.lastCommonPrefix,
446 Bytes.SIZEOF_SHORT - current.lastCommonPrefix);
447 current.rowLengthWithSize = Bytes.toShort(current.keyBuffer, 0) +
448 Bytes.SIZEOF_SHORT;
449
450
451 System.arraycopy(current.keyBuffer, oldRowLengthWithSize,
452 current.keyBuffer, current.rowLengthWithSize,
453 current.familyLengthWithSize);
454
455
456 currentBuffer.get(current.keyBuffer, Bytes.SIZEOF_SHORT,
457 current.rowLengthWithSize - Bytes.SIZEOF_SHORT);
458
459
460 currentBuffer.get(current.keyBuffer, current.rowLengthWithSize
461 + current.familyLengthWithSize, current.keyLength
462 - current.rowLengthWithSize - current.familyLengthWithSize
463 - current.prevTimestampAndType.length);
464 } else if (current.lastCommonPrefix < current.rowLengthWithSize) {
465
466
467
468
469 currentBuffer.get(current.keyBuffer, current.lastCommonPrefix,
470 current.rowLengthWithSize - current.lastCommonPrefix);
471
472
473 currentBuffer.get(current.keyBuffer, current.rowLengthWithSize
474 + current.familyLengthWithSize, current.keyLength
475 - current.rowLengthWithSize - current.familyLengthWithSize
476 - current.prevTimestampAndType.length);
477 } else {
478
479 currentBuffer.get(current.keyBuffer, current.lastCommonPrefix,
480 current.keyLength - current.prevTimestampAndType.length
481 - current.lastCommonPrefix);
482 }
483
484
485 int pos = current.keyLength - current.prevTimestampAndType.length;
486 int commonTimestampPrefix = (flag & MASK_TIMESTAMP_LENGTH) >>>
487 SHIFT_TIMESTAMP_LENGTH;
488 if ((flag & FLAG_SAME_KEY_LENGTH) == 0) {
489 System.arraycopy(current.prevTimestampAndType, 0, current.keyBuffer,
490 pos, commonTimestampPrefix);
491 }
492 pos += commonTimestampPrefix;
493 currentBuffer.get(current.keyBuffer, pos,
494 Bytes.SIZEOF_LONG - commonTimestampPrefix);
495 pos += Bytes.SIZEOF_LONG - commonTimestampPrefix;
496
497
498 if ((flag & FLAG_SAME_TYPE) == 0) {
499 currentBuffer.get(current.keyBuffer, pos, Bytes.SIZEOF_BYTE);
500 } else if ((flag & FLAG_SAME_KEY_LENGTH) == 0) {
501 current.keyBuffer[pos] =
502 current.prevTimestampAndType[Bytes.SIZEOF_LONG];
503 }
504
505
506 if ((flag & FLAG_SAME_VALUE) == 0) {
507 current.valueOffset = currentBuffer.position();
508 ByteBufferUtils.skip(currentBuffer, current.valueLength);
509 }
510
511 if (includesTags()) {
512 decodeTags();
513 }
514 if (includesMvcc()) {
515 current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer);
516 } else {
517 current.memstoreTS = 0;
518 }
519 current.nextKvOffset = currentBuffer.position();
520 }
521
522 @Override
523 protected void decodeFirst() {
524 ByteBufferUtils.skip(currentBuffer, Bytes.SIZEOF_INT);
525 decode(true);
526 }
527
528 @Override
529 protected void decodeNext() {
530 decode(false);
531 }
532
533 @Override
534 protected FastDiffSeekerState createSeekerState() {
535 return new FastDiffSeekerState();
536 }
537 }
538 }