View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.IOException;
21  import java.nio.BufferUnderflowException;
22  import java.security.Key;
23  import java.security.KeyException;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.Path;
30  import org.apache.hadoop.hbase.Cell;
31  import org.apache.hadoop.hbase.CellUtil;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.KeyValue;
34  import org.apache.hadoop.hbase.NoTagsKeyValue;
35  import org.apache.hadoop.hbase.fs.HFileSystem;
36  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
37  import org.apache.hadoop.hbase.io.crypto.Cipher;
38  import org.apache.hadoop.hbase.io.crypto.Encryption;
39  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
40  import org.apache.hadoop.hbase.security.EncryptionUtil;
41  import org.apache.hadoop.hbase.security.User;
42  import org.apache.hadoop.hbase.util.ByteBufferUtils;
43  import org.apache.hadoop.hbase.util.Bytes;
44  import org.apache.hadoop.io.WritableUtils;
45  
46  /**
47   * {@link HFile} reader for version 3.
48   */
49  @InterfaceAudience.Private
50  public class HFileReaderV3 extends HFileReaderV2 {
51  
52    private static final Log LOG = LogFactory.getLog(HFileReaderV3.class);
53  
54    public static final int MAX_MINOR_VERSION = 0;
55  
56    /**
57     * Opens a HFile. You must load the index before you can use it by calling
58     * {@link #loadFileInfo()}.
59     * @param path
60     *          Path to HFile.
61     * @param trailer
62     *          File trailer.
63     * @param fsdis
64     *          input stream.
65     * @param size
66     *          Length of the stream.
67     * @param cacheConf
68     *          Cache configuration.
69     * @param hfs
70     *          The file system.
71     * @param conf
72     *          Configuration
73     */
74    public HFileReaderV3(final Path path, FixedFileTrailer trailer,
75        final FSDataInputStreamWrapper fsdis,
76        final long size, final CacheConfig cacheConf, final HFileSystem hfs,
77        final Configuration conf) throws IOException {
78      super(path, trailer, fsdis, size, cacheConf, hfs, conf);
79      byte[] tmp = fileInfo.get(FileInfo.MAX_TAGS_LEN);
80      // max tag length is not present in the HFile means tags were not at all written to file.
81      if (tmp != null) {
82        hfileContext.setIncludesTags(true);
83        tmp = fileInfo.get(FileInfo.TAGS_COMPRESSED);
84        if (tmp != null && Bytes.toBoolean(tmp)) {
85          hfileContext.setCompressTags(true);
86        }
87      }
88    }
89  
90    @Override
91    protected HFileContext createHFileContext(FSDataInputStreamWrapper fsdis, long fileSize,
92        HFileSystem hfs, Path path, FixedFileTrailer trailer) throws IOException {
93      trailer.expectMajorVersion(3);
94      HFileContextBuilder builder = new HFileContextBuilder()
95        .withIncludesMvcc(shouldIncludeMemstoreTS())
96        .withHBaseCheckSum(true)
97        .withCompression(this.compressAlgo);
98  
99      // Check for any key material available
100     byte[] keyBytes = trailer.getEncryptionKey();
101     if (keyBytes != null) {
102       Encryption.Context cryptoContext = Encryption.newContext(conf);
103       Key key;
104       String masterKeyName = conf.get(HConstants.CRYPTO_MASTERKEY_NAME_CONF_KEY,
105         User.getCurrent().getShortName());
106       try {
107         // First try the master key
108         key = EncryptionUtil.unwrapKey(conf, masterKeyName, keyBytes);
109       } catch (KeyException e) {
110         // If the current master key fails to unwrap, try the alternate, if
111         // one is configured
112         if (LOG.isDebugEnabled()) {
113           LOG.debug("Unable to unwrap key with current master key '" + masterKeyName + "'");
114         }
115         String alternateKeyName =
116           conf.get(HConstants.CRYPTO_MASTERKEY_ALTERNATE_NAME_CONF_KEY);
117         if (alternateKeyName != null) {
118           try {
119             key = EncryptionUtil.unwrapKey(conf, alternateKeyName, keyBytes);
120           } catch (KeyException ex) {
121             throw new IOException(ex);
122           }
123         } else {
124           throw new IOException(e);
125         }
126       }
127       // Use the algorithm the key wants
128       Cipher cipher = Encryption.getCipher(conf, key.getAlgorithm());
129       if (cipher == null) {
130         throw new IOException("Cipher '" + key.getAlgorithm() + "' is not available");
131       }
132       cryptoContext.setCipher(cipher);
133       cryptoContext.setKey(key);
134       builder.withEncryptionContext(cryptoContext);
135     }
136 
137     HFileContext context = builder.build();
138 
139     if (LOG.isTraceEnabled()) {
140       LOG.trace("Reader" + (path != null ? " for " + path : "" ) +
141         " initialized with cacheConf: " + cacheConf +
142         " comparator: " + comparator.getClass().getSimpleName() +
143         " fileContext: " + context);
144     }
145 
146     return context;
147   }
148 
149   /**
150    * Create a Scanner on this file. No seeks or reads are done on creation. Call
151    * {@link HFileScanner#seekTo(byte[])} to position an start the read. There is
152    * nothing to clean up in a Scanner. Letting go of your references to the
153    * scanner is sufficient.
154    * @param cacheBlocks
155    *          True if we should cache blocks read in by this scanner.
156    * @param pread
157    *          Use positional read rather than seek+read if true (pread is better
158    *          for random reads, seek+read is better scanning).
159    * @param isCompaction
160    *          is scanner being used for a compaction?
161    * @return Scanner on this file.
162    */
163   @Override
164   public HFileScanner getScanner(boolean cacheBlocks, final boolean pread,
165       final boolean isCompaction) {
166     if (dataBlockEncoder.useEncodedScanner()) {
167       return new EncodedScannerV3(this, cacheBlocks, pread, isCompaction, this.hfileContext);
168     }
169     return new ScannerV3(this, cacheBlocks, pread, isCompaction);
170   }
171 
172   /**
173    * Implementation of {@link HFileScanner} interface.
174    */
175   protected static class ScannerV3 extends ScannerV2 {
176 
177     private HFileReaderV3 reader;
178     private int currTagsLen;
179 
180     public ScannerV3(HFileReaderV3 r, boolean cacheBlocks, final boolean pread,
181         final boolean isCompaction) {
182       super(r, cacheBlocks, pread, isCompaction);
183       this.reader = r;
184     }
185 
186     @Override
187     protected int getCellBufSize() {
188       int kvBufSize = super.getCellBufSize();
189       if (reader.hfileContext.isIncludesTags()) {
190         kvBufSize += Bytes.SIZEOF_SHORT + currTagsLen;
191       }
192       return kvBufSize;
193     }
194 
195     @Override
196     public Cell getKeyValue() {
197       if (!isSeeked())
198         return null;
199       if (currTagsLen > 0) {
200         KeyValue ret = new KeyValue(blockBuffer.array(), blockBuffer.arrayOffset()
201             + blockBuffer.position(), getCellBufSize());
202         if (this.reader.shouldIncludeMemstoreTS()) {
203           ret.setSequenceId(currMemstoreTS);
204         }
205         return ret;
206       } else {
207         return formNoTagsKeyValue();
208       }
209     }
210 
211     protected void setNonSeekedState() {
212       super.setNonSeekedState();
213       currTagsLen = 0;
214     }
215 
216     @Override
217     protected int getNextCellStartPosition() {
218       int nextKvPos = super.getNextCellStartPosition();
219       if (reader.hfileContext.isIncludesTags()) {
220         nextKvPos += Bytes.SIZEOF_SHORT + currTagsLen;
221       }
222       return nextKvPos;
223     }
224 
225     private final void checkTagsLen() {
226       if (checkLen(this.currTagsLen)) {
227         throw new IllegalStateException("Invalid currTagsLen " + this.currTagsLen +
228           ". Block offset: " + block.getOffset() + ", block length: " + this.blockBuffer.limit() +
229           ", position: " + this.blockBuffer.position() + " (without header).");
230       }
231     }
232 
233     protected final void readKeyValueLen() {
234       // TODO: METHOD (mostly) DUPLICATED IN V2!!!! FIXED in master branch by collapsing v3 and v2.
235       // This is a hot method. We go out of our way to make this method short so it can be
236       // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves
237       // because it is faster than going via range-checked ByteBuffer methods or going through a
238       // byte buffer array a byte at a time.
239       int p = blockBuffer.position() + blockBuffer.arrayOffset();
240       // Get a long at a time rather than read two individual ints. In micro-benchmarking, even
241       // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints.
242       long ll = Bytes.toLong(blockBuffer.array(), p);
243       // Read top half as an int of key length and bottom int as value length
244       this.currKeyLen = (int)(ll >> Integer.SIZE);
245       this.currValueLen = (int)(Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll);
246       checkKeyValueLen();
247       // Move position past the key and value lengths and then beyond the key and value
248       p += (Bytes.SIZEOF_LONG + currKeyLen + currValueLen);
249       if (reader.hfileContext.isIncludesTags()) {
250         // Tags length is a short.
251         this.currTagsLen = Bytes.toShort(blockBuffer.array(), p);
252         checkTagsLen();
253         p += (Bytes.SIZEOF_SHORT + currTagsLen);
254       }
255       readMvccVersion(p);
256     }
257 
258     /**
259      * Within a loaded block, seek looking for the last key that is smaller than
260      * (or equal to?) the key we are interested in.
261      * A note on the seekBefore: if you have seekBefore = true, AND the first
262      * key in the block = key, then you'll get thrown exceptions. The caller has
263      * to check for that case and load the previous block as appropriate.
264      * @param key
265      *          the key to find
266      * @param seekBefore
267      *          find the key before the given key in case of exact match.
268      * @return 0 in case of an exact key match, 1 in case of an inexact match,
269      *         -2 in case of an inexact match and furthermore, the input key
270      *         less than the first key of current block(e.g. using a faked index
271      *         key)
272      */
273     @Override
274     protected int blockSeek(Cell key, boolean seekBefore) {
275       int klen, vlen, tlen = 0;
276       long memstoreTS = 0;
277       int memstoreTSLen = 0;
278       int lastKeyValueSize = -1;
279       KeyValue.KeyOnlyKeyValue keyOnlyKv = new KeyValue.KeyOnlyKeyValue();
280       do {
281         try {
282           blockBuffer.mark();
283           klen = blockBuffer.getInt();
284           vlen = blockBuffer.getInt();
285         } catch (BufferUnderflowException bufe) {
286           LOG.error("this.blockBuffer=" + this.blockBuffer, bufe);
287           throw bufe;
288         }
289         if (klen < 0 || vlen < 0 || klen > blockBuffer.limit() || vlen > blockBuffer.limit()) {
290           throw new IllegalStateException("Invalid klen " + klen + " or vlen "
291               + vlen + ". Block offset: "
292               + block.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
293               + blockBuffer.position() + " (without header).");
294         }
295         ByteBufferUtils.skip(blockBuffer, klen + vlen);
296         if (reader.hfileContext.isIncludesTags()) {
297           // Read short as unsigned, high byte first
298           tlen = ((blockBuffer.get() & 0xff) << 8) ^ (blockBuffer.get() & 0xff);
299           if (tlen < 0 || tlen > blockBuffer.limit()) {
300             throw new IllegalStateException("Invalid tlen " + tlen + ". Block offset: "
301                 + block.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
302                 + blockBuffer.position() + " (without header).");
303           }
304           ByteBufferUtils.skip(blockBuffer, tlen);
305         }
306         if (this.reader.shouldIncludeMemstoreTS()) {
307           if (this.reader.decodeMemstoreTS) {
308             memstoreTS = Bytes.readAsVLong(blockBuffer.array(), blockBuffer.arrayOffset()
309                 + blockBuffer.position());
310             memstoreTSLen = WritableUtils.getVIntSize(memstoreTS);
311           } else {
312             memstoreTS = 0;
313             memstoreTSLen = 1;
314           }
315         }
316         blockBuffer.reset();
317         int keyOffset =
318           blockBuffer.arrayOffset() + blockBuffer.position() + (Bytes.SIZEOF_INT * 2);
319         keyOnlyKv.setKey(blockBuffer.array(), keyOffset, klen);
320         int comp = reader.getComparator().compareOnlyKeyPortion(key, keyOnlyKv);
321 
322         if (comp == 0) {
323           if (seekBefore) {
324             if (lastKeyValueSize < 0) {
325               throw new IllegalStateException("blockSeek with seekBefore "
326                   + "at the first key of the block: key="
327                   + CellUtil.getCellKeyAsString(key)
328                   + ", blockOffset=" + block.getOffset() + ", onDiskSize="
329                   + block.getOnDiskSizeWithHeader());
330             }
331             blockBuffer.position(blockBuffer.position() - lastKeyValueSize);
332             readKeyValueLen();
333             return 1; // non exact match.
334           }
335           currKeyLen = klen;
336           currValueLen = vlen;
337           currTagsLen = tlen;
338           if (this.reader.shouldIncludeMemstoreTS()) {
339             currMemstoreTS = memstoreTS;
340             currMemstoreTSLen = memstoreTSLen;
341           }
342           return 0; // indicate exact match
343         } else if (comp < 0) {
344           if (lastKeyValueSize > 0)
345             blockBuffer.position(blockBuffer.position() - lastKeyValueSize);
346           readKeyValueLen();
347           if (lastKeyValueSize == -1 && blockBuffer.position() == 0) {
348             return HConstants.INDEX_KEY_MAGIC;
349           }
350           return 1;
351         }
352 
353         // The size of this key/value tuple, including key/value length fields.
354         lastKeyValueSize = klen + vlen + memstoreTSLen + KEY_VALUE_LEN_SIZE;
355         // include tag length also if tags included with KV
356         if (reader.hfileContext.isIncludesTags()) {
357           lastKeyValueSize += tlen + Bytes.SIZEOF_SHORT;
358         }
359         blockBuffer.position(blockBuffer.position() + lastKeyValueSize);
360       } while (blockBuffer.remaining() > 0);
361 
362       // Seek to the last key we successfully read. This will happen if this is
363       // the last key/value pair in the file, in which case the following call
364       // to next() has to return false.
365       blockBuffer.position(blockBuffer.position() - lastKeyValueSize);
366       readKeyValueLen();
367       return 1; // didn't exactly find it.
368     }
369   }
370 
371   /**
372    * ScannerV3 that operates on encoded data blocks.
373    */
374   protected static class EncodedScannerV3 extends EncodedScannerV2 {
375     public EncodedScannerV3(HFileReaderV3 reader, boolean cacheBlocks, boolean pread,
376         boolean isCompaction, HFileContext context) {
377       super(reader, cacheBlocks, pread, isCompaction, context);
378     }
379   }
380 
381   @Override
382   public int getMajorVersion() {
383     return 3;
384   }
385 }