View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.io.hfile;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.Closeable;
23  import java.io.DataInput;
24  import java.io.DataInputStream;
25  import java.io.DataOutputStream;
26  import java.io.IOException;
27  import java.io.SequenceInputStream;
28  import java.net.InetSocketAddress;
29  import java.nio.ByteBuffer;
30  import java.util.ArrayList;
31  import java.util.Collection;
32  import java.util.Comparator;
33  import java.util.List;
34  import java.util.Map;
35  import java.util.Set;
36  import java.util.SortedMap;
37  import java.util.TreeMap;
38  
39  import org.apache.commons.logging.Log;
40  import org.apache.commons.logging.LogFactory;
41  import org.apache.hadoop.hbase.classification.InterfaceAudience;
42  import org.apache.hadoop.conf.Configuration;
43  import org.apache.hadoop.fs.FSDataInputStream;
44  import org.apache.hadoop.fs.FSDataOutputStream;
45  import org.apache.hadoop.fs.FileStatus;
46  import org.apache.hadoop.fs.FileSystem;
47  import org.apache.hadoop.fs.Path;
48  import org.apache.hadoop.fs.PathFilter;
49  import org.apache.hadoop.hbase.Cell;
50  import org.apache.hadoop.hbase.HConstants;
51  import org.apache.hadoop.hbase.KeyValue;
52  import org.apache.hadoop.hbase.KeyValue.KVComparator;
53  import org.apache.hadoop.hbase.fs.HFileSystem;
54  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
55  import org.apache.hadoop.hbase.io.MetricsIO;
56  import org.apache.hadoop.hbase.io.MetricsIOWrapperImpl;
57  import org.apache.hadoop.hbase.io.compress.Compression;
58  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
59  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
60  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
61  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
62  import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
63  import org.apache.hadoop.hbase.util.BloomFilterWriter;
64  import org.apache.hadoop.hbase.util.ByteStringer;
65  import org.apache.hadoop.hbase.util.Bytes;
66  import org.apache.hadoop.hbase.util.Counter;
67  import org.apache.hadoop.hbase.util.FSUtils;
68  import org.apache.hadoop.io.Writable;
69  
70  import com.google.common.base.Preconditions;
71  
72  /**
73   * File format for hbase.
74   * A file of sorted key/value pairs. Both keys and values are byte arrays.
75   * <p>
76   * The memory footprint of a HFile includes the following (below is taken from the
77   * <a
78   * href=https://issues.apache.org/jira/browse/HADOOP-3315>TFile</a> documentation
79   * but applies also to HFile):
80   * <ul>
81   * <li>Some constant overhead of reading or writing a compressed block.
82   * <ul>
83   * <li>Each compressed block requires one compression/decompression codec for
84   * I/O.
85   * <li>Temporary space to buffer the key.
86   * <li>Temporary space to buffer the value.
87   * </ul>
88   * <li>HFile index, which is proportional to the total number of Data Blocks.
89   * The total amount of memory needed to hold the index can be estimated as
90   * (56+AvgKeySize)*NumBlocks.
91   * </ul>
92   * Suggestions on performance optimization.
93   * <ul>
94   * <li>Minimum block size. We recommend a setting of minimum block size between
95   * 8KB to 1MB for general usage. Larger block size is preferred if files are
96   * primarily for sequential access. However, it would lead to inefficient random
97   * access (because there are more data to decompress). Smaller blocks are good
98   * for random access, but require more memory to hold the block index, and may
99   * be slower to create (because we must flush the compressor stream at the
100  * conclusion of each data block, which leads to an FS I/O flush). Further, due
101  * to the internal caching in Compression codec, the smallest possible block
102  * size would be around 20KB-30KB.
103  * <li>The current implementation does not offer true multi-threading for
104  * reading. The implementation uses FSDataInputStream seek()+read(), which is
105  * shown to be much faster than positioned-read call in single thread mode.
106  * However, it also means that if multiple threads attempt to access the same
107  * HFile (using multiple scanners) simultaneously, the actual I/O is carried out
108  * sequentially even if they access different DFS blocks (Reexamine! pread seems
109  * to be 10% faster than seek+read in my testing -- stack).
110  * <li>Compression codec. Use "none" if the data is not very compressable (by
111  * compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
112  * as the starting point for experimenting. "gz" overs slightly better
113  * compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
114  * decompress, comparing to "lzo".
115  * </ul>
116  *
117  * For more on the background behind HFile, see <a
118  * href=https://issues.apache.org/jira/browse/HBASE-61>HBASE-61</a>.
119  * <p>
120  * File is made of data blocks followed by meta data blocks (if any), a fileinfo
121  * block, data block index, meta data block index, and a fixed size trailer
122  * which records the offsets at which file changes content type.
123  * <pre>&lt;data blocks&gt;&lt;meta blocks&gt;&lt;fileinfo&gt;&lt;
124  * data index&gt;&lt;meta index&gt;&lt;trailer&gt;</pre>
125  * Each block has a bit of magic at its start.  Block are comprised of
126  * key/values.  In data blocks, they are both byte arrays.  Metadata blocks are
127  * a String key and a byte array value.  An empty file looks like this:
128  * <pre>&lt;fileinfo&gt;&lt;trailer&gt;</pre>.  That is, there are not data nor meta
129  * blocks present.
130  * <p>
131  * TODO: Do scanners need to be able to take a start and end row?
132  * TODO: Should BlockIndex know the name of its file?  Should it have a Path
133  * that points at its file say for the case where an index lives apart from
134  * an HFile instance?
135  */
136 @InterfaceAudience.Private
137 public class HFile {
138   // LOG is being used in HFileBlock and CheckSumUtil
139   static final Log LOG = LogFactory.getLog(HFile.class);
140 
141   /**
142    * Maximum length of key in HFile.
143    */
144   public final static int MAXIMUM_KEY_LENGTH = Integer.MAX_VALUE;
145 
146   /**
147    * Default compression: none.
148    */
149   public final static Compression.Algorithm DEFAULT_COMPRESSION_ALGORITHM =
150     Compression.Algorithm.NONE;
151 
152   /** Minimum supported HFile format version */
153   public static final int MIN_FORMAT_VERSION = 2;
154 
155   /** Maximum supported HFile format version
156    */
157   public static final int MAX_FORMAT_VERSION = 3;
158 
159   /**
160    * Minimum HFile format version with support for persisting cell tags
161    */
162   public static final int MIN_FORMAT_VERSION_WITH_TAGS = 3;
163 
164   /** Default compression name: none. */
165   public final static String DEFAULT_COMPRESSION =
166     DEFAULT_COMPRESSION_ALGORITHM.getName();
167 
168   /** Meta data block name for bloom filter bits. */
169   public static final String BLOOM_FILTER_DATA_KEY = "BLOOM_FILTER_DATA";
170 
171   /**
172    * We assume that HFile path ends with
173    * ROOT_DIR/TABLE_NAME/REGION_NAME/CF_NAME/HFILE, so it has at least this
174    * many levels of nesting. This is needed for identifying table and CF name
175    * from an HFile path.
176    */
177   public final static int MIN_NUM_HFILE_PATH_LEVELS = 5;
178 
179   /**
180    * The number of bytes per checksum.
181    */
182   public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;
183 
184   // For measuring number of checksum failures
185   static final Counter CHECKSUM_FAILURES = new Counter();
186 
187   // For tests. Gets incremented when we read a block whether from HDFS or from Cache.
188   public static final Counter DATABLOCK_READ_COUNT = new Counter();
189 
190   /** Static instance for the metrics so that HFileReaders access the same instance */
191   static final MetricsIO metrics = new MetricsIO(new MetricsIOWrapperImpl());
192 
193   /**
194    * Number of checksum verification failures. It also
195    * clears the counter.
196    */
197   public static final long getAndResetChecksumFailuresCount() {
198     long count = CHECKSUM_FAILURES.get();
199     CHECKSUM_FAILURES.set(0);
200     return count;
201   }
202 
203   /**
204    * Number of checksum verification failures.
205    */
206   public static final long getChecksumFailuresCount() {
207     long count = CHECKSUM_FAILURES.get();
208     return count;
209   }
210 
211   public static final void updateReadLatency(long latencyMillis, boolean pread) {
212     if (pread) {
213       metrics.updateFsPreadTime(latencyMillis);
214     } else {
215       metrics.updateFsReadTime(latencyMillis);
216     }
217   }
218 
219   public static final void updateWriteLatency(long latencyMillis) {
220     metrics.updateFsWriteTime(latencyMillis);
221   }
222 
223   /** API required to write an {@link HFile} */
224   public interface Writer extends Closeable {
225 
226     /** Add an element to the file info map. */
227     void appendFileInfo(byte[] key, byte[] value) throws IOException;
228 
229     void append(Cell cell) throws IOException;
230 
231     /** @return the path to this {@link HFile} */
232     Path getPath();
233 
234     /**
235      * Adds an inline block writer such as a multi-level block index writer or
236      * a compound Bloom filter writer.
237      */
238     void addInlineBlockWriter(InlineBlockWriter bloomWriter);
239 
240     // The below three methods take Writables.  We'd like to undo Writables but undoing the below would be pretty
241     // painful.  Could take a byte [] or a Message but we want to be backward compatible around hfiles so would need
242     // to map between Message and Writable or byte [] and current Writable serialization.  This would be a bit of work
243     // to little gain.  Thats my thinking at moment.  St.Ack 20121129
244 
245     void appendMetaBlock(String bloomFilterMetaKey, Writable metaWriter);
246 
247     /**
248      * Store general Bloom filter in the file. This does not deal with Bloom filter
249      * internals but is necessary, since Bloom filters are stored differently
250      * in HFile version 1 and version 2.
251      */
252     void addGeneralBloomFilter(BloomFilterWriter bfw);
253 
254     /**
255      * Store delete family Bloom filter in the file, which is only supported in
256      * HFile V2.
257      */
258     void addDeleteFamilyBloomFilter(BloomFilterWriter bfw) throws IOException;
259 
260     /**
261      * Return the file context for the HFile this writer belongs to
262      */
263     HFileContext getFileContext();
264   }
265 
266   /**
267    * This variety of ways to construct writers is used throughout the code, and
268    * we want to be able to swap writer implementations.
269    */
270   public static abstract class WriterFactory {
271     protected final Configuration conf;
272     protected final CacheConfig cacheConf;
273     protected FileSystem fs;
274     protected Path path;
275     protected FSDataOutputStream ostream;
276     protected KVComparator comparator = KeyValue.COMPARATOR;
277     protected InetSocketAddress[] favoredNodes;
278     private HFileContext fileContext;
279     protected boolean shouldDropBehind = false;
280 
281     WriterFactory(Configuration conf, CacheConfig cacheConf) {
282       this.conf = conf;
283       this.cacheConf = cacheConf;
284     }
285 
286     public WriterFactory withPath(FileSystem fs, Path path) {
287       Preconditions.checkNotNull(fs);
288       Preconditions.checkNotNull(path);
289       this.fs = fs;
290       this.path = path;
291       return this;
292     }
293 
294     public WriterFactory withOutputStream(FSDataOutputStream ostream) {
295       Preconditions.checkNotNull(ostream);
296       this.ostream = ostream;
297       return this;
298     }
299 
300     public WriterFactory withComparator(KVComparator comparator) {
301       Preconditions.checkNotNull(comparator);
302       this.comparator = comparator;
303       return this;
304     }
305 
306     public WriterFactory withFavoredNodes(InetSocketAddress[] favoredNodes) {
307       // Deliberately not checking for null here.
308       this.favoredNodes = favoredNodes;
309       return this;
310     }
311 
312     public WriterFactory withFileContext(HFileContext fileContext) {
313       this.fileContext = fileContext;
314       return this;
315     }
316 
317     public WriterFactory withShouldDropCacheBehind(boolean shouldDropBehind) {
318       this.shouldDropBehind = shouldDropBehind;
319       return this;
320     }
321 
322 
323     public Writer create() throws IOException {
324       if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) {
325         throw new AssertionError("Please specify exactly one of " +
326             "filesystem/path or path");
327       }
328       if (path != null) {
329         ostream = AbstractHFileWriter.createOutputStream(conf, fs, path, favoredNodes);
330         try {
331           ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction());
332         } catch (UnsupportedOperationException uoe) {
333           if (LOG.isTraceEnabled()) LOG.trace("Unable to set drop behind on " + path, uoe);
334           else if (LOG.isDebugEnabled()) LOG.debug("Unable to set drop behind on " + path);
335         }
336       }
337       return createWriter(fs, path, ostream,
338                    comparator, fileContext);
339     }
340 
341     protected abstract Writer createWriter(FileSystem fs, Path path, FSDataOutputStream ostream,
342         KVComparator comparator, HFileContext fileContext) throws IOException;
343   }
344 
345   /** The configuration key for HFile version to use for new files */
346   public static final String FORMAT_VERSION_KEY = "hfile.format.version";
347 
348   public static int getFormatVersion(Configuration conf) {
349     int version = conf.getInt(FORMAT_VERSION_KEY, MAX_FORMAT_VERSION);
350     checkFormatVersion(version);
351     return version;
352   }
353 
354   /**
355    * Returns the factory to be used to create {@link HFile} writers.
356    * Disables block cache access for all writers created through the
357    * returned factory.
358    */
359   public static final WriterFactory getWriterFactoryNoCache(Configuration
360        conf) {
361     Configuration tempConf = new Configuration(conf);
362     tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
363     return HFile.getWriterFactory(conf, new CacheConfig(tempConf));
364   }
365 
366   /**
367    * Returns the factory to be used to create {@link HFile} writers
368    */
369   public static final WriterFactory getWriterFactory(Configuration conf,
370       CacheConfig cacheConf) {
371     int version = getFormatVersion(conf);
372     switch (version) {
373     case 2:
374       return new HFileWriterV2.WriterFactoryV2(conf, cacheConf);
375     case 3:
376       return new HFileWriterV3.WriterFactoryV3(conf, cacheConf);
377     default:
378       throw new IllegalArgumentException("Cannot create writer for HFile " +
379           "format version " + version);
380     }
381   }
382 
383   /**
384    * An abstraction used by the block index.
385    * Implementations will check cache for any asked-for block and return cached block if found.
386    * Otherwise, after reading from fs, will try and put block into cache before returning.
387    */
388   public interface CachingBlockReader {
389     /**
390      * Read in a file block.
391      * @param offset offset to read.
392      * @param onDiskBlockSize size of the block
393      * @param cacheBlock
394      * @param pread
395      * @param isCompaction is this block being read as part of a compaction
396      * @param expectedBlockType the block type we are expecting to read with this read operation,
397      *  or null to read whatever block type is available and avoid checking (that might reduce
398      *  caching efficiency of encoded data blocks)
399      * @param expectedDataBlockEncoding the data block encoding the caller is expecting data blocks
400      *  to be in, or null to not perform this check and return the block irrespective of the
401      *  encoding. This check only applies to data blocks and can be set to null when the caller is
402      *  expecting to read a non-data block and has set expectedBlockType accordingly.
403      * @return Block wrapped in a ByteBuffer.
404      * @throws IOException
405      */
406     HFileBlock readBlock(long offset, long onDiskBlockSize,
407         boolean cacheBlock, final boolean pread, final boolean isCompaction,
408         final boolean updateCacheMetrics, BlockType expectedBlockType,
409         DataBlockEncoding expectedDataBlockEncoding)
410         throws IOException;
411   }
412 
413   /** An interface used by clients to open and iterate an {@link HFile}. */
414   public interface Reader extends Closeable, CachingBlockReader {
415     /**
416      * Returns this reader's "name". Usually the last component of the path.
417      * Needs to be constant as the file is being moved to support caching on
418      * write.
419      */
420     String getName();
421 
422     KVComparator getComparator();
423 
424     HFileScanner getScanner(boolean cacheBlocks, final boolean pread, final boolean isCompaction);
425 
426     ByteBuffer getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException;
427 
428     Map<byte[], byte[]> loadFileInfo() throws IOException;
429 
430     byte[] getLastKey();
431 
432     byte[] midkey() throws IOException;
433 
434     long length();
435 
436     long getEntries();
437 
438     byte[] getFirstKey();
439 
440     long indexSize();
441 
442     byte[] getFirstRowKey();
443 
444     byte[] getLastRowKey();
445 
446     FixedFileTrailer getTrailer();
447 
448     HFileBlockIndex.BlockIndexReader getDataBlockIndexReader();
449 
450     HFileScanner getScanner(boolean cacheBlocks, boolean pread);
451 
452     Compression.Algorithm getCompressionAlgorithm();
453 
454     /**
455      * Retrieves general Bloom filter metadata as appropriate for each
456      * {@link HFile} version.
457      * Knows nothing about how that metadata is structured.
458      */
459     DataInput getGeneralBloomFilterMetadata() throws IOException;
460 
461     /**
462      * Retrieves delete family Bloom filter metadata as appropriate for each
463      * {@link HFile}  version.
464      * Knows nothing about how that metadata is structured.
465      */
466     DataInput getDeleteBloomFilterMetadata() throws IOException;
467 
468     Path getPath();
469 
470     /** Close method with optional evictOnClose */
471     void close(boolean evictOnClose) throws IOException;
472 
473     DataBlockEncoding getDataBlockEncoding();
474 
475     boolean hasMVCCInfo();
476 
477     /**
478      * Return the file context of the HFile this reader belongs to
479      */
480     HFileContext getFileContext();
481 
482     boolean isPrimaryReplicaReader();
483 
484     void setPrimaryReplicaReader(boolean isPrimaryReplicaReader);
485 
486     /**
487      * To close the stream's socket. Note: This can be concurrently called from multiple threads and
488      * implementation should take care of thread safety.
489      */
490     void unbufferStream();
491   }
492 
493   /**
494    * Method returns the reader given the specified arguments.
495    * TODO This is a bad abstraction.  See HBASE-6635.
496    *
497    * @param path hfile's path
498    * @param fsdis stream of path's file
499    * @param size max size of the trailer.
500    * @param cacheConf Cache configuation values, cannot be null.
501    * @param hfs
502    * @return an appropriate instance of HFileReader
503    * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
504    */
505   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SF_SWITCH_FALLTHROUGH",
506       justification="Intentional")
507   private static Reader openReader(Path path, FSDataInputStreamWrapper fsdis, long size,
508       CacheConfig cacheConf, HFileSystem hfs, Configuration conf) throws IOException {
509     FixedFileTrailer trailer = null;
510     try {
511       boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
512       assert !isHBaseChecksum; // Initially we must read with FS checksum.
513       trailer = FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
514       switch (trailer.getMajorVersion()) {
515       case 2:
516         return new HFileReaderV2(path, trailer, fsdis, size, cacheConf, hfs, conf);
517       case 3 :
518         return new HFileReaderV3(path, trailer, fsdis, size, cacheConf, hfs, conf);
519       default:
520         throw new IllegalArgumentException("Invalid HFile version " + trailer.getMajorVersion());
521       }
522     } catch (Throwable t) {
523       try {
524         fsdis.close();
525       } catch (Throwable t2) {
526         LOG.warn("Error closing fsdis FSDataInputStreamWrapper", t2);
527       }
528       throw new CorruptHFileException("Problem reading HFile Trailer from file " + path, t);
529     } finally {
530       fsdis.unbuffer();
531     }
532   }
533 
534   /**
535    * The sockets and the file descriptors held by the method parameter
536    * {@code FSDataInputStreamWrapper} passed will be freed after its usage so caller needs to ensure
537    * that no other threads have access to the same passed reference.
538    * @param fs A file system
539    * @param path Path to HFile
540    * @param fsdis a stream of path's file
541    * @param size max size of the trailer.
542    * @param cacheConf Cache configuration for hfile's contents
543    * @param conf Configuration
544    * @return A version specific Hfile Reader
545    * @throws IOException If file is invalid, will throw CorruptHFileException flavored IOException
546    */
547   public static Reader createReader(FileSystem fs, Path path,
548       FSDataInputStreamWrapper fsdis, long size, CacheConfig cacheConf, Configuration conf)
549       throws IOException {
550     HFileSystem hfs = null;
551 
552     // If the fs is not an instance of HFileSystem, then create an
553     // instance of HFileSystem that wraps over the specified fs.
554     // In this case, we will not be able to avoid checksumming inside
555     // the filesystem.
556     if (!(fs instanceof HFileSystem)) {
557       hfs = new HFileSystem(fs);
558     } else {
559       hfs = (HFileSystem)fs;
560     }
561     return openReader(path, fsdis, size, cacheConf, hfs, conf);
562   }
563 
564   /**
565    *
566    * @param fs filesystem
567    * @param path Path to file to read
568    * @param cacheConf This must not be null.  @see {@link org.apache.hadoop.hbase.io.hfile.CacheConfig#CacheConfig(Configuration)}
569    * @return an active Reader instance
570    * @throws IOException Will throw a CorruptHFileException (DoNotRetryIOException subtype) if hfile is corrupt/invalid.
571    */
572   public static Reader createReader(
573       FileSystem fs, Path path, CacheConfig cacheConf, Configuration conf) throws IOException {
574     Preconditions.checkNotNull(cacheConf, "Cannot create Reader with null CacheConf");
575     FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path);
576     return openReader(path, stream, fs.getFileStatus(path).getLen(),
577       cacheConf, stream.getHfs(), conf);
578   }
579 
580   /**
581    * This factory method is used only by unit tests. <br/>
582    * The sockets and the file descriptors held by the method parameter
583    * {@code FSDataInputStreamWrapper} passed will be freed after its usage so caller needs to ensure
584    * that no other threads have access to the same passed reference.
585    */
586   static Reader createReaderFromStream(Path path,
587       FSDataInputStream fsdis, long size, CacheConfig cacheConf, Configuration conf)
588       throws IOException {
589     FSDataInputStreamWrapper wrapper = new FSDataInputStreamWrapper(fsdis);
590     return openReader(path, wrapper, size, cacheConf, null, conf);
591   }
592 
593   /**
594    * Returns true if the specified file has a valid HFile Trailer.
595    * @param fs filesystem
596    * @param path Path to file to verify
597    * @return true if the file has a valid HFile Trailer, otherwise false
598    * @throws IOException if failed to read from the underlying stream
599    */
600   public static boolean isHFileFormat(final FileSystem fs, final Path path) throws IOException {
601     return isHFileFormat(fs, fs.getFileStatus(path));
602   }
603 
604   /**
605    * Returns true if the specified file has a valid HFile Trailer.
606    * @param fs filesystem
607    * @param fileStatus the file to verify
608    * @return true if the file has a valid HFile Trailer, otherwise false
609    * @throws IOException if failed to read from the underlying stream
610    */
611   public static boolean isHFileFormat(final FileSystem fs, final FileStatus fileStatus)
612       throws IOException {
613     final Path path = fileStatus.getPath();
614     final long size = fileStatus.getLen();
615     FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, path);
616     try {
617       boolean isHBaseChecksum = fsdis.shouldUseHBaseChecksum();
618       assert !isHBaseChecksum; // Initially we must read with FS checksum.
619       FixedFileTrailer.readFromStream(fsdis.getStream(isHBaseChecksum), size);
620       return true;
621     } catch (IllegalArgumentException e) {
622       return false;
623     } catch (IOException e) {
624       throw e;
625     } finally {
626       try {
627         fsdis.close();
628       } catch (Throwable t) {
629         LOG.warn("Error closing fsdis FSDataInputStreamWrapper: " + path, t);
630       }
631     }
632   }
633 
634   /**
635    * Metadata for this file. Conjured by the writer. Read in by the reader.
636    */
637   public static class FileInfo implements SortedMap<byte[], byte[]> {
638     static final String RESERVED_PREFIX = "hfile.";
639     static final byte[] RESERVED_PREFIX_BYTES = Bytes.toBytes(RESERVED_PREFIX);
640     static final byte [] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY");
641     static final byte [] AVG_KEY_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN");
642     static final byte [] AVG_VALUE_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN");
643     static final byte [] CREATE_TIME_TS = Bytes.toBytes(RESERVED_PREFIX + "CREATE_TIME_TS");
644     static final byte [] COMPARATOR = Bytes.toBytes(RESERVED_PREFIX + "COMPARATOR");
645     static final byte [] TAGS_COMPRESSED = Bytes.toBytes(RESERVED_PREFIX + "TAGS_COMPRESSED");
646     public static final byte [] MAX_TAGS_LEN = Bytes.toBytes(RESERVED_PREFIX + "MAX_TAGS_LEN");
647     private final SortedMap<byte [], byte []> map = new TreeMap<byte [], byte []>(Bytes.BYTES_COMPARATOR);
648 
649     public FileInfo() {
650       super();
651     }
652 
653     /**
654      * Append the given key/value pair to the file info, optionally checking the
655      * key prefix.
656      *
657      * @param k key to add
658      * @param v value to add
659      * @param checkPrefix whether to check that the provided key does not start
660      *          with the reserved prefix
661      * @return this file info object
662      * @throws IOException if the key or value is invalid
663      */
664     public FileInfo append(final byte[] k, final byte[] v,
665         final boolean checkPrefix) throws IOException {
666       if (k == null || v == null) {
667         throw new NullPointerException("Key nor value may be null");
668       }
669       if (checkPrefix && isReservedFileInfoKey(k)) {
670         throw new IOException("Keys with a " + FileInfo.RESERVED_PREFIX
671             + " are reserved");
672       }
673       put(k, v);
674       return this;
675     }
676 
677     @Override
678     public void clear() {
679       this.map.clear();
680     }
681 
682     @Override
683     public Comparator<? super byte[]> comparator() {
684       return map.comparator();
685     }
686 
687     @Override
688     public boolean containsKey(Object key) {
689       return map.containsKey(key);
690     }
691 
692     @Override
693     public boolean containsValue(Object value) {
694       return map.containsValue(value);
695     }
696 
697     @Override
698     public Set<java.util.Map.Entry<byte[], byte[]>> entrySet() {
699       return map.entrySet();
700     }
701 
702     @Override
703     public boolean equals(Object o) {
704       return map.equals(o);
705     }
706 
707     @Override
708     public byte[] firstKey() {
709       return map.firstKey();
710     }
711 
712     @Override
713     public byte[] get(Object key) {
714       return map.get(key);
715     }
716 
717     @Override
718     public int hashCode() {
719       return map.hashCode();
720     }
721 
722     @Override
723     public SortedMap<byte[], byte[]> headMap(byte[] toKey) {
724       return this.map.headMap(toKey);
725     }
726 
727     @Override
728     public boolean isEmpty() {
729       return map.isEmpty();
730     }
731 
732     @Override
733     public Set<byte[]> keySet() {
734       return map.keySet();
735     }
736 
737     @Override
738     public byte[] lastKey() {
739       return map.lastKey();
740     }
741 
742     @Override
743     public byte[] put(byte[] key, byte[] value) {
744       return this.map.put(key, value);
745     }
746 
747     @Override
748     public void putAll(Map<? extends byte[], ? extends byte[]> m) {
749       this.map.putAll(m);
750     }
751 
752     @Override
753     public byte[] remove(Object key) {
754       return this.map.remove(key);
755     }
756 
757     @Override
758     public int size() {
759       return map.size();
760     }
761 
762     @Override
763     public SortedMap<byte[], byte[]> subMap(byte[] fromKey, byte[] toKey) {
764       return this.map.subMap(fromKey, toKey);
765     }
766 
767     @Override
768     public SortedMap<byte[], byte[]> tailMap(byte[] fromKey) {
769       return this.map.tailMap(fromKey);
770     }
771 
772     @Override
773     public Collection<byte[]> values() {
774       return map.values();
775     }
776 
777     /**
778      * Write out this instance on the passed in <code>out</code> stream.
779      * We write it as a protobuf.
780      * @param out
781      * @throws IOException
782      * @see #read(DataInputStream)
783      */
784     void write(final DataOutputStream out) throws IOException {
785       HFileProtos.FileInfoProto.Builder builder = HFileProtos.FileInfoProto.newBuilder();
786       for (Map.Entry<byte [], byte[]> e: this.map.entrySet()) {
787         HBaseProtos.BytesBytesPair.Builder bbpBuilder = HBaseProtos.BytesBytesPair.newBuilder();
788         bbpBuilder.setFirst(ByteStringer.wrap(e.getKey()));
789         bbpBuilder.setSecond(ByteStringer.wrap(e.getValue()));
790         builder.addMapEntry(bbpBuilder.build());
791       }
792       out.write(ProtobufUtil.PB_MAGIC);
793       builder.build().writeDelimitedTo(out);
794     }
795 
796     /**
797      * Populate this instance with what we find on the passed in <code>in</code> stream.
798      * Can deserialize protobuf of old Writables format.
799      * @param in
800      * @throws IOException
801      * @see #write(DataOutputStream)
802      */
803     void read(final DataInputStream in) throws IOException {
804       // This code is tested over in TestHFileReaderV1 where we read an old hfile w/ this new code.
805       int pblen = ProtobufUtil.lengthOfPBMagic();
806       byte [] pbuf = new byte[pblen];
807       if (in.markSupported()) in.mark(pblen);
808       int read = in.read(pbuf);
809       if (read != pblen) throw new IOException("read=" + read + ", wanted=" + pblen);
810       if (ProtobufUtil.isPBMagicPrefix(pbuf)) {
811         parsePB(HFileProtos.FileInfoProto.parseDelimitedFrom(in));
812       } else {
813         if (in.markSupported()) {
814           in.reset();
815           parseWritable(in);
816         } else {
817           // We cannot use BufferedInputStream, it consumes more than we read from the underlying IS
818           ByteArrayInputStream bais = new ByteArrayInputStream(pbuf);
819           SequenceInputStream sis = new SequenceInputStream(bais, in); // Concatenate input streams
820           // TODO: Am I leaking anything here wrapping the passed in stream?  We are not calling close on the wrapped
821           // streams but they should be let go after we leave this context?  I see that we keep a reference to the
822           // passed in inputstream but since we no longer have a reference to this after we leave, we should be ok.
823           parseWritable(new DataInputStream(sis));
824         }
825       }
826     }
827 
828     /** Now parse the old Writable format.  It was a list of Map entries.  Each map entry was a key and a value of
829      * a byte [].  The old map format had a byte before each entry that held a code which was short for the key or
830      * value type.  We know it was a byte [] so in below we just read and dump it.
831      * @throws IOException
832      */
833     void parseWritable(final DataInputStream in) throws IOException {
834       // First clear the map.  Otherwise we will just accumulate entries every time this method is called.
835       this.map.clear();
836       // Read the number of entries in the map
837       int entries = in.readInt();
838       // Then read each key/value pair
839       for (int i = 0; i < entries; i++) {
840         byte [] key = Bytes.readByteArray(in);
841         // We used to read a byte that encoded the class type.  Read and ignore it because it is always byte [] in hfile
842         in.readByte();
843         byte [] value = Bytes.readByteArray(in);
844         this.map.put(key, value);
845       }
846     }
847 
848     /**
849      * Fill our map with content of the pb we read off disk
850      * @param fip protobuf message to read
851      */
852     void parsePB(final HFileProtos.FileInfoProto fip) {
853       this.map.clear();
854       for (BytesBytesPair pair: fip.getMapEntryList()) {
855         this.map.put(pair.getFirst().toByteArray(), pair.getSecond().toByteArray());
856       }
857     }
858   }
859 
860   /** Return true if the given file info key is reserved for internal use. */
861   public static boolean isReservedFileInfoKey(byte[] key) {
862     return Bytes.startsWith(key, FileInfo.RESERVED_PREFIX_BYTES);
863   }
864 
865   /**
866    * Get names of supported compression algorithms. The names are acceptable by
867    * HFile.Writer.
868    *
869    * @return Array of strings, each represents a supported compression
870    *         algorithm. Currently, the following compression algorithms are
871    *         supported.
872    *         <ul>
873    *         <li>"none" - No compression.
874    *         <li>"gz" - GZIP compression.
875    *         </ul>
876    */
877   public static String[] getSupportedCompressionAlgorithms() {
878     return Compression.getSupportedAlgorithms();
879   }
880 
881   // Utility methods.
882   /*
883    * @param l Long to convert to an int.
884    * @return <code>l</code> cast as an int.
885    */
886   static int longToInt(final long l) {
887     // Expecting the size() of a block not exceeding 4GB. Assuming the
888     // size() will wrap to negative integer if it exceeds 2GB (From tfile).
889     return (int)(l & 0x00000000ffffffffL);
890   }
891 
892   /**
893    * Returns all HFiles belonging to the given region directory. Could return an
894    * empty list.
895    *
896    * @param fs  The file system reference.
897    * @param regionDir  The region directory to scan.
898    * @return The list of files found.
899    * @throws IOException When scanning the files fails.
900    */
901   static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
902       throws IOException {
903     List<Path> regionHFiles = new ArrayList<Path>();
904     PathFilter dirFilter = new FSUtils.DirFilter(fs);
905     FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
906     for(FileStatus dir : familyDirs) {
907       FileStatus[] files = fs.listStatus(dir.getPath());
908       for (FileStatus file : files) {
909         if (!file.isDirectory() &&
910             (!file.getPath().toString().contains(HConstants.HREGION_OLDLOGDIR_NAME)) &&
911             (!file.getPath().toString().contains(HConstants.RECOVERED_EDITS_DIR))) {
912           regionHFiles.add(file.getPath());
913         }
914       }
915     }
916     return regionHFiles;
917   }
918 
919   /**
920    * Checks the given {@link HFile} format version, and throws an exception if
921    * invalid. Note that if the version number comes from an input file and has
922    * not been verified, the caller needs to re-throw an {@link IOException} to
923    * indicate that this is not a software error, but corrupted input.
924    *
925    * @param version an HFile version
926    * @throws IllegalArgumentException if the version is invalid
927    */
928   public static void checkFormatVersion(int version)
929       throws IllegalArgumentException {
930     if (version < MIN_FORMAT_VERSION || version > MAX_FORMAT_VERSION) {
931       throw new IllegalArgumentException("Invalid HFile version: " + version
932           + " (expected to be " + "between " + MIN_FORMAT_VERSION + " and "
933           + MAX_FORMAT_VERSION + ")");
934     }
935   }
936 
937   public static void main(String[] args) throws Exception {
938     // delegate to preserve old behavior
939     HFilePrettyPrinter.main(args);
940   }
941 }