View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import java.nio.ByteBuffer;
20  
21  import org.apache.hadoop.hbase.Cell;
22  import org.apache.hadoop.hbase.CellUtil;
23  import org.apache.hadoop.hbase.HConstants;
24  import org.apache.hadoop.hbase.KeyValue;
25  import org.apache.hadoop.hbase.KeyValue.KVComparator;
26  import org.apache.hadoop.hbase.NoTagsKeyValue;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.io.encoding.AbstractDataBlockEncoder.AbstractEncodedSeeker;
29  import org.apache.hadoop.hbase.util.ByteBufferUtils;
30  import org.apache.hadoop.hbase.util.Bytes;
31  import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
32  import org.apache.hadoop.io.WritableUtils;
33  
34  @InterfaceAudience.Private
35  public class RowIndexSeekerV1 extends AbstractEncodedSeeker {
36  
37    private ByteBuffer currentBuffer;
38    private SeekerState current = new SeekerState(); // always valid
39    private SeekerState previous = new SeekerState(); // may not be valid
40  
41    private int rowNumber;
42    private ByteBuffer rowOffsets = null;
43  
44    public RowIndexSeekerV1(KVComparator comparator,
45        HFileBlockDecodingContext decodingCtx) {
46      super(comparator, decodingCtx);
47    }
48  
49    @Override
50    public void setCurrentBuffer(ByteBuffer buffer) {
51      int onDiskSize = Bytes.toIntUnsafe(buffer.array(), buffer.arrayOffset()
52          + buffer.limit() - Bytes.SIZEOF_INT);
53      // int onDiskSize = buffer.getInt(buffer.limit() - Bytes.SIZEOF_INT);
54  
55      // Data part
56      ByteBuffer dup = buffer.duplicate();
57      dup.position(buffer.position());
58      dup.limit(buffer.position() + onDiskSize);
59      currentBuffer = dup.slice();
60      current.currentBuffer = currentBuffer;
61      ByteBufferUtils.skip(buffer, onDiskSize);
62  
63      // Row offset
64      rowNumber = buffer.getInt();
65      // equals Bytes.SIZEOF_INT * rowNumber
66      int totalRowOffsetsLength = rowNumber << 2;
67      ByteBuffer rowDup = buffer.duplicate();
68      rowDup.position(buffer.position());
69      rowDup.limit(buffer.position() + totalRowOffsetsLength);
70      rowOffsets = rowDup.slice();
71  
72      decodeFirst();
73    }
74  
75    @Override
76    public ByteBuffer getKeyDeepCopy() {
77      ByteBuffer keyBuffer = ByteBuffer.allocate(current.keyLength);
78      keyBuffer.put(current.keyBuffer.getBytes(), current.keyBuffer.getOffset(),
79          current.keyLength);
80      keyBuffer.rewind();
81      return keyBuffer;
82    }
83  
84    @Override
85    public ByteBuffer getValueShallowCopy() {
86      ByteBuffer dup = currentBuffer.duplicate();
87      dup.position(current.valueOffset);
88      dup.limit(current.valueOffset + current.valueLength);
89      return dup.slice();
90    }
91  
92    ByteBuffer getKeyValueBuffer() {
93      ByteBuffer kvBuffer = createKVBuffer();
94      kvBuffer.putInt(current.keyLength);
95      kvBuffer.putInt(current.valueLength);
96      kvBuffer.put(current.keyBuffer.getBytes(), current.keyBuffer.getOffset(),
97          current.keyLength);
98      ByteBufferUtils.copyFromBufferToBuffer(kvBuffer, currentBuffer,
99          current.valueOffset, current.valueLength);
100     if (current.tagsLength > 0) {
101       // Put short as unsigned
102       kvBuffer.put((byte) (current.tagsLength >> 8 & 0xff));
103       kvBuffer.put((byte) (current.tagsLength & 0xff));
104       if (current.tagsOffset != -1) {
105         ByteBufferUtils.copyFromBufferToBuffer(kvBuffer, currentBuffer,
106             current.tagsOffset, current.tagsLength);
107       }
108     }
109     if (includesMvcc()) {
110       ByteBufferUtils.writeVLong(kvBuffer, current.getSequenceId());
111     }
112     kvBuffer.rewind();
113     return kvBuffer;
114   }
115 
116   protected ByteBuffer createKVBuffer() {
117     int kvBufSize = (int) KeyValue.getKeyValueDataStructureSize(
118         current.keyLength, current.valueLength, current.tagsLength);
119     if (includesMvcc()) {
120       kvBufSize += WritableUtils.getVIntSize(current.getSequenceId());
121     }
122     ByteBuffer kvBuffer = ByteBuffer.allocate(kvBufSize);
123     return kvBuffer;
124   }
125 
126   @Override
127   public Cell getKeyValue() {
128     return current.toCell();
129   }
130 
131   @Override
132   public void rewind() {
133     currentBuffer.rewind();
134     decodeFirst();
135   }
136 
137   @Override
138   public boolean next() {
139     if (!currentBuffer.hasRemaining()) {
140       return false;
141     }
142     decodeNext();
143     previous.invalidate();
144     return true;
145   }
146 
147   @Override
148   public int seekToKeyInBlock(byte[] key, int offset, int length,
149       boolean seekBefore) {
150     return seekToKeyInBlock(new KeyValue.KeyOnlyKeyValue(key, offset, length),
151         seekBefore);
152   }
153 
154   private int binarySearch(Cell seekCell, boolean seekBefore) {
155     int low = 0;
156     int high = rowNumber - 1;
157     int mid = low + ((high - low) >> 1);
158     int comp = 0;
159     SimpleMutableByteRange row = new SimpleMutableByteRange();
160     while (low <= high) {
161       mid = low + ((high - low) >> 1);
162       getRow(mid, row);
163       comp = compareRows(row, seekCell);
164       if (comp < 0) {
165         low = mid + 1;
166       } else if (comp > 0) {
167         high = mid - 1;
168       } else {
169         // key found
170         if (seekBefore) {
171           return mid - 1;
172         } else {
173           return mid;
174         }
175       }
176     }
177     // key not found.
178     if (comp > 0) {
179       return mid - 1;
180     } else {
181       return mid;
182     }
183   }
184 
185   private int compareRows(SimpleMutableByteRange row, Cell seekCell) {
186     return comparator.compareRows(row.getBytes(), row.getOffset(),
187       row.getLength(), seekCell.getRowArray(), seekCell.getRowOffset(),
188       seekCell.getRowLength());
189   }
190 
191   private void getRow(int index, SimpleMutableByteRange row) {
192     int offset = Bytes.toIntUnsafe(rowOffsets.array(), rowOffsets.arrayOffset()
193         + (index << 2)); // index * Bytes.SIZEOF_INT
194     int position = currentBuffer.arrayOffset() + offset + Bytes.SIZEOF_LONG;
195     short rowLen = Bytes.toShortUnsafe(currentBuffer.array(), position);
196     row.set(currentBuffer.array(), position + Bytes.SIZEOF_SHORT, rowLen);
197   }
198 
199   @Override
200   public int seekToKeyInBlock(Cell seekCell, boolean seekBefore) {
201     previous.invalidate();
202     int index = binarySearch(seekCell, seekBefore);
203     if (index < 0) {
204       return HConstants.INDEX_KEY_MAGIC; // using optimized index key
205     } else {
206       int offset = Bytes.toIntUnsafe(rowOffsets.array(),
207           rowOffsets.arrayOffset() + (index << 2));
208       if (offset != 0) {
209         decodeAtPosition(offset);
210       }
211     }
212     do {
213       int comp;
214       comp = comparator.compareOnlyKeyPortion(seekCell, current.currentKey);
215       if (comp == 0) { // exact match
216         if (seekBefore) {
217           if (!previous.isValid()) {
218             // The caller (seekBefore) has to ensure that we are not at the
219             // first key in the block.
220             throw new IllegalStateException("Cannot seekBefore if "
221                 + "positioned at the first key in the block: key="
222                 + Bytes.toStringBinary(seekCell.getRowArray()));
223           }
224           moveToPrevious();
225           return 1;
226         }
227         return 0;
228       }
229 
230       if (comp < 0) { // already too large, check previous
231         if (previous.isValid()) {
232           moveToPrevious();
233         } else {
234           return HConstants.INDEX_KEY_MAGIC; // using optimized index key
235         }
236         return 1;
237       }
238 
239       // move to next, if more data is available
240       if (currentBuffer.hasRemaining()) {
241         previous.copyFromNext(current);
242         decodeNext();
243       } else {
244         break;
245       }
246     } while (true);
247 
248     // we hit the end of the block, not an exact match
249     return 1;
250   }
251 
252   private void moveToPrevious() {
253     if (!previous.isValid()) {
254       throw new IllegalStateException(
255           "Can move back only once and not in first key in the block.");
256     }
257 
258     SeekerState tmp = previous;
259     previous = current;
260     current = tmp;
261 
262     // move after last key value
263     currentBuffer.position(current.nextKvOffset);
264     previous.invalidate();
265   }
266 
267   @Override
268   public int compareKey(KVComparator comparator, byte[] key, int offset,
269       int length) {
270     return comparator.compareFlatKey(key, offset, length,
271         current.keyBuffer.getBytes(), current.keyBuffer.getOffset(),
272         current.keyBuffer.getLength());
273   }
274 
275   @Override
276   public int compareKey(KVComparator comparator, Cell key) {
277     return comparator.compareOnlyKeyPortion(key, new KeyValue.KeyOnlyKeyValue(
278         current.keyBuffer.getBytes(), current.keyBuffer.getOffset(),
279         current.keyBuffer.getLength()));
280   }
281 
282   protected void decodeFirst() {
283     decodeNext();
284     previous.invalidate();
285   }
286 
287   protected void decodeAtPosition(int position) {
288     currentBuffer.position(position);
289     decodeNext();
290     previous.invalidate();
291   }
292 
293   protected void decodeNext() {
294     current.startOffset = currentBuffer.position();
295     int p = currentBuffer.position() + currentBuffer.arrayOffset();
296     long ll = Bytes.toLong(currentBuffer.array(), p);
297     // Read top half as an int of key length and bottom int as value length
298     current.keyLength = (int) (ll >> Integer.SIZE);
299     current.valueLength = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
300     ByteBufferUtils.skip(currentBuffer, Bytes.SIZEOF_LONG);
301     // key part
302     current.keyBuffer.set(currentBuffer.array(), currentBuffer.arrayOffset()
303         + currentBuffer.position(), current.keyLength);
304     ByteBufferUtils.skip(currentBuffer, current.keyLength);
305     // value part
306     current.valueOffset = currentBuffer.position();
307     ByteBufferUtils.skip(currentBuffer, current.valueLength);
308     if (includesTags()) {
309       decodeTags();
310     }
311     if (includesMvcc()) {
312       current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer);
313     } else {
314       current.memstoreTS = 0;
315     }
316     current.nextKvOffset = currentBuffer.position();
317     current.setKey(current.keyBuffer.getBytes(), current.keyBuffer.getOffset(),
318         current.keyBuffer.getLength());
319   }
320 
321   protected void decodeTags() {
322     current.tagsLength = currentBuffer.getShort();
323     current.tagsOffset = currentBuffer.position();
324     ByteBufferUtils.skip(currentBuffer, current.tagsLength);
325   }
326 
327   protected class SeekerState {
328     /**
329      * The size of a (key length, value length) tuple that prefixes each entry
330      * in a data block.
331      */
332     public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT;
333 
334     protected ByteBuffer currentBuffer;
335     protected int startOffset = -1;
336     protected int valueOffset = -1;
337     protected int keyLength;
338     protected int valueLength;
339     protected int tagsLength = 0;
340     protected int tagsOffset = -1;
341 
342     protected SimpleMutableByteRange keyBuffer = new SimpleMutableByteRange();
343     protected long memstoreTS;
344     protected int nextKvOffset;
345     protected KeyValue.KeyOnlyKeyValue currentKey = new KeyValue.KeyOnlyKeyValue();
346 
347     protected boolean isValid() {
348       return valueOffset != -1;
349     }
350 
351     protected void invalidate() {
352       valueOffset = -1;
353       currentKey = new KeyValue.KeyOnlyKeyValue();
354       currentBuffer = null;
355     }
356 
357     protected void setKey(byte[] key, int offset, int length) {
358       currentKey.setKey(key, offset, length);
359     }
360 
361     protected long getSequenceId() {
362       return memstoreTS;
363     }
364 
365     /**
366      * Copy the state from the next one into this instance (the previous state
367      * placeholder). Used to save the previous state when we are advancing the
368      * seeker to the next key/value.
369      */
370     protected void copyFromNext(SeekerState nextState) {
371       keyBuffer.set(nextState.keyBuffer.getBytes(),
372           nextState.keyBuffer.getOffset(), nextState.keyBuffer.getLength());
373       currentKey.setKey(nextState.keyBuffer.getBytes(),
374           nextState.keyBuffer.getOffset(), nextState.keyBuffer.getLength());
375 
376       startOffset = nextState.startOffset;
377       valueOffset = nextState.valueOffset;
378       keyLength = nextState.keyLength;
379       valueLength = nextState.valueLength;
380       nextKvOffset = nextState.nextKvOffset;
381       memstoreTS = nextState.memstoreTS;
382       currentBuffer = nextState.currentBuffer;
383       tagsOffset = nextState.tagsOffset;
384       tagsLength = nextState.tagsLength;
385     }
386 
387     @Override
388     public String toString() {
389       return CellUtil.getCellKeyAsString(toCell());
390     }
391 
392     protected int getCellBufSize() {
393       int kvBufSize = KEY_VALUE_LEN_SIZE + keyLength + valueLength;
394       if (includesTags() && tagsLength > 0) {
395         kvBufSize += Bytes.SIZEOF_SHORT + tagsLength;
396       }
397       return kvBufSize;
398     }
399 
400     protected Cell formNoTagsKeyValue() {
401       NoTagsKeyValue ret = new NoTagsKeyValue(currentBuffer.array(),
402           currentBuffer.arrayOffset() + startOffset, getCellBufSize());
403       if (includesMvcc()) {
404         ret.setSequenceId(memstoreTS);
405       }
406       return ret;
407     }
408 
409     public Cell toCell() {
410       if (tagsOffset > 0) {
411         KeyValue ret = new KeyValue(currentBuffer.array(),
412             currentBuffer.arrayOffset() + startOffset, getCellBufSize());
413         if (includesMvcc()) {
414           ret.setSequenceId(memstoreTS);
415         }
416         return ret;
417       } else {
418         return formNoTagsKeyValue();
419       }
420     }
421   }
422 
423 }