View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import java.io.DataOutput;
23  import java.io.DataOutputStream;
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.List;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FSDataOutputStream;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.Cell;
35  import org.apache.hadoop.hbase.CellComparator;
36  import org.apache.hadoop.hbase.CellUtil;
37  import org.apache.hadoop.hbase.KeyValue.KVComparator;
38  import org.apache.hadoop.hbase.classification.InterfaceAudience;
39  import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
40  import org.apache.hadoop.hbase.io.hfile.HFileBlock.BlockWritable;
41  import org.apache.hadoop.hbase.util.BloomFilterWriter;
42  import org.apache.hadoop.hbase.util.Bytes;
43  import org.apache.hadoop.io.Writable;
44  
45  /**
46   * Writes HFile format version 2.
47   */
48  @InterfaceAudience.Private
49  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD",
50    justification="Understood but doing it anyway; HBASE-14730")
51  public class HFileWriterV2 extends AbstractHFileWriter {
52    static final Log LOG = LogFactory.getLog(HFileWriterV2.class);
53  
54    /** Max memstore (mvcc) timestamp in FileInfo */
55    public static final byte [] MAX_MEMSTORE_TS_KEY =
56        Bytes.toBytes("MAX_MEMSTORE_TS_KEY");
57  
58    /** KeyValue version in FileInfo */
59    public static final byte [] KEY_VALUE_VERSION =
60        Bytes.toBytes("KEY_VALUE_VERSION");
61  
62    /** Version for KeyValue which includes memstore timestamp */
63    public static final int KEY_VALUE_VER_WITH_MEMSTORE = 1;
64  
65    /** Inline block writers for multi-level block index and compound Blooms. */
66    private List<InlineBlockWriter> inlineBlockWriters =
67        new ArrayList<InlineBlockWriter>();
68  
69    /** Unified version 2 block writer */
70    protected HFileBlock.Writer fsBlockWriter;
71  
72    private HFileBlockIndex.BlockIndexWriter dataBlockIndexWriter;
73    private HFileBlockIndex.BlockIndexWriter metaBlockIndexWriter;
74  
75    /** The offset of the first data block or -1 if the file is empty. */
76    private long firstDataBlockOffset = -1;
77  
78    /** The offset of the last data block or 0 if the file is empty. */
79    protected long lastDataBlockOffset;
80  
81    /**
82     * The last(stop) Cell of the previous data block.
83     * This reference should be short-lived since we write hfiles in a burst.
84     */
85    private Cell lastCellOfPreviousBlock = null;
86  
87    /** Additional data items to be written to the "load-on-open" section. */
88    private List<BlockWritable> additionalLoadOnOpenData =
89      new ArrayList<BlockWritable>();
90  
91    protected long maxMemstoreTS = 0;
92  
93    /** warn on cell with tags */
94    private static boolean warnCellWithTags = true;
95  
96  
97    /** if this feature is enabled, preCalculate encoded data size before real encoding happens*/
98    public static final String UNIFIED_ENCODED_BLOCKSIZE_RATIO = "hbase.writer.unified.encoded.blocksize.ratio";
99  
100   /** Block size limit after encoding, used to unify encoded block Cache entry size*/
101   private final int encodedBlockSizeLimit;
102 
103 
104   static class WriterFactoryV2 extends HFile.WriterFactory {
105     WriterFactoryV2(Configuration conf, CacheConfig cacheConf) {
106       super(conf, cacheConf);
107     }
108 
109     @Override
110     public Writer createWriter(FileSystem fs, Path path,
111         FSDataOutputStream ostream,
112         KVComparator comparator, HFileContext context) throws IOException {
113       context.setIncludesTags(false);// HFile V2 does not deal with tags at all!
114       return new HFileWriterV2(conf, cacheConf, fs, path, ostream,
115           comparator, context);
116       }
117     }
118 
119   /** Constructor that takes a path, creates and closes the output stream. */
120   public HFileWriterV2(Configuration conf, CacheConfig cacheConf,
121       FileSystem fs, Path path, FSDataOutputStream ostream,
122       final KVComparator comparator, final HFileContext context) throws IOException {
123     super(cacheConf,
124         ostream == null ? createOutputStream(conf, fs, path, null) : ostream,
125         path, comparator, context);
126     float encodeBlockSizeRatio = conf.getFloat(UNIFIED_ENCODED_BLOCKSIZE_RATIO, 1f);
127     this.encodedBlockSizeLimit = (int)(hFileContext.getBlocksize() * encodeBlockSizeRatio);
128     finishInit(conf);
129   }
130 
131   /** Additional initialization steps */
132   protected void finishInit(final Configuration conf) {
133     if (fsBlockWriter != null)
134       throw new IllegalStateException("finishInit called twice");
135 
136     fsBlockWriter = new HFileBlock.Writer(blockEncoder, hFileContext);
137 
138     // Data block index writer
139     boolean cacheIndexesOnWrite = cacheConf.shouldCacheIndexesOnWrite();
140     dataBlockIndexWriter = new HFileBlockIndex.BlockIndexWriter(fsBlockWriter,
141         cacheIndexesOnWrite ? cacheConf : null,
142         cacheIndexesOnWrite ? name : null);
143     dataBlockIndexWriter.setMaxChunkSize(
144         HFileBlockIndex.getMaxChunkSize(conf));
145     dataBlockIndexWriter.setMinIndexNumEntries(
146         HFileBlockIndex.getMinIndexNumEntries(conf));
147     inlineBlockWriters.add(dataBlockIndexWriter);
148 
149     // Meta data block index writer
150     metaBlockIndexWriter = new HFileBlockIndex.BlockIndexWriter();
151     if (LOG.isTraceEnabled()) LOG.trace("Initialized with " + cacheConf);
152   }
153 
154   /**
155    * At a block boundary, write all the inline blocks and opens new block.
156    *
157    * @throws IOException
158    */
159   protected void checkBlockBoundary() throws IOException {
160     //for encoder like prefixTree, encoded size is not available, so we have to compare both encoded size
161     //and unencoded size to blocksize limit.
162     if (fsBlockWriter.encodedBlockSizeWritten() >= encodedBlockSizeLimit
163         || fsBlockWriter.blockSizeWritten() >= hFileContext.getBlocksize()) {
164       finishBlock();
165       writeInlineBlocks(false);
166       newBlock();
167     }
168   }
169 
170   /** Clean up the current data block */
171   private void finishBlock() throws IOException {
172     if (!fsBlockWriter.isWriting() || fsBlockWriter.blockSizeWritten() == 0)
173       return;
174 
175     // Update the first data block offset for scanning.
176     if (firstDataBlockOffset == -1) {
177       firstDataBlockOffset = outputStream.getPos();
178     }
179     // Update the last data block offset
180     lastDataBlockOffset = outputStream.getPos();
181     fsBlockWriter.writeHeaderAndData(outputStream);
182     int onDiskSize = fsBlockWriter.getOnDiskSizeWithHeader();
183 
184     Cell indexEntry =
185       CellComparator.getMidpoint(this.comparator, lastCellOfPreviousBlock, firstCellInBlock);
186     dataBlockIndexWriter.addEntry(CellUtil.getCellKeySerializedAsKeyValueKey(indexEntry),
187       lastDataBlockOffset, onDiskSize);
188     totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader();
189     if (cacheConf.shouldCacheDataOnWrite()) {
190       doCacheOnWrite(lastDataBlockOffset);
191     }
192   }
193 
194   /** Gives inline block writers an opportunity to contribute blocks. */
195   private void writeInlineBlocks(boolean closing) throws IOException {
196     for (InlineBlockWriter ibw : inlineBlockWriters) {
197       while (ibw.shouldWriteBlock(closing)) {
198         long offset = outputStream.getPos();
199         boolean cacheThisBlock = ibw.getCacheOnWrite();
200         ibw.writeInlineBlock(fsBlockWriter.startWriting(
201             ibw.getInlineBlockType()));
202         fsBlockWriter.writeHeaderAndData(outputStream);
203         ibw.blockWritten(offset, fsBlockWriter.getOnDiskSizeWithHeader(),
204             fsBlockWriter.getUncompressedSizeWithoutHeader());
205         totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader();
206 
207         if (cacheThisBlock) {
208           doCacheOnWrite(offset);
209         }
210       }
211     }
212   }
213 
214   /**
215    * Caches the last written HFile block.
216    * @param offset the offset of the block we want to cache. Used to determine
217    *          the cache key.
218    */
219   private void doCacheOnWrite(long offset) {
220     HFileBlock cacheFormatBlock = fsBlockWriter.getBlockForCaching(cacheConf);
221     cacheConf.getBlockCache().cacheBlock(
222         new BlockCacheKey(name, offset, true, cacheFormatBlock.getBlockType()), cacheFormatBlock);
223   }
224 
225   /**
226    * Ready a new block for writing.
227    *
228    * @throws IOException
229    */
230   protected void newBlock() throws IOException {
231     // This is where the next block begins.
232     fsBlockWriter.startWriting(BlockType.DATA);
233     firstCellInBlock = null;
234     if (lastCell != null) {
235       lastCellOfPreviousBlock = lastCell;
236     }
237   }
238 
239   /**
240    * Add a meta block to the end of the file. Call before close(). Metadata
241    * blocks are expensive. Fill one with a bunch of serialized data rather than
242    * do a metadata block per metadata instance. If metadata is small, consider
243    * adding to file info using {@link #appendFileInfo(byte[], byte[])}
244    *
245    * @param metaBlockName
246    *          name of the block
247    * @param content
248    *          will call readFields to get data later (DO NOT REUSE)
249    */
250   @Override
251   public void appendMetaBlock(String metaBlockName, Writable content) {
252     byte[] key = Bytes.toBytes(metaBlockName);
253     int i;
254     for (i = 0; i < metaNames.size(); ++i) {
255       // stop when the current key is greater than our own
256       byte[] cur = metaNames.get(i);
257       if (Bytes.BYTES_RAWCOMPARATOR.compare(cur, 0, cur.length, key, 0,
258           key.length) > 0) {
259         break;
260       }
261     }
262     metaNames.add(i, key);
263     metaData.add(i, content);
264   }
265 
266   /**
267    * Add key/value to file. Keys must be added in an order that agrees with the
268    * Comparator passed on construction.
269    *
270    * @param cell Cell to add. Cannot be empty nor null.
271    * @throws IOException
272    */
273   @Override
274   public void append(final Cell cell) throws IOException {
275     byte[] value = cell.getValueArray();
276     int voffset = cell.getValueOffset();
277     int vlength = cell.getValueLength();
278     // checkKey uses comparator to check we are writing in order.
279     boolean dupKey = checkKey(cell);
280     checkValue(value, voffset, vlength);
281     if (!dupKey) {
282       checkBlockBoundary();
283     }
284 
285     if (!fsBlockWriter.isWriting()) {
286       newBlock();
287     }
288 
289     if (warnCellWithTags && getFileContext().isIncludesTags()) {
290       LOG.warn("A minimum HFile version of " + HFile.MIN_FORMAT_VERSION_WITH_TAGS
291           + " is required to support cell attributes/tags. Consider setting "
292           + HFile.FORMAT_VERSION_KEY + " accordingly.");
293       warnCellWithTags = false;
294     }
295 
296     fsBlockWriter.write(cell);
297 
298     totalKeyLength += CellUtil.estimatedSerializedSizeOfKey(cell);
299     totalValueLength += vlength;
300 
301     // Are we the first key in this block?
302     if (firstCellInBlock == null) {
303       // If cell is big, block will be closed and this firstCellInBlock reference will only last
304       // a short while.
305       firstCellInBlock = cell;
306     }
307 
308     // TODO: What if cell is 10MB and we write infrequently?  We'll hold on to the cell here
309     // indefinetly?
310     lastCell = cell;
311     entryCount++;
312     this.maxMemstoreTS = Math.max(this.maxMemstoreTS, cell.getSequenceId());
313   }
314 
315   @Override
316   public void close() throws IOException {
317     if (outputStream == null) {
318       return;
319     }
320     // Save data block encoder metadata in the file info.
321     blockEncoder.saveMetadata(this);
322     // Write out the end of the data blocks, then write meta data blocks.
323     // followed by fileinfo, data block index and meta block index.
324 
325     finishBlock();
326     writeInlineBlocks(true);
327 
328     FixedFileTrailer trailer = new FixedFileTrailer(getMajorVersion(), getMinorVersion());
329 
330     // Write out the metadata blocks if any.
331     if (!metaNames.isEmpty()) {
332       for (int i = 0; i < metaNames.size(); ++i) {
333         // store the beginning offset
334         long offset = outputStream.getPos();
335         // write the metadata content
336         DataOutputStream dos = fsBlockWriter.startWriting(BlockType.META);
337         metaData.get(i).write(dos);
338 
339         fsBlockWriter.writeHeaderAndData(outputStream);
340         totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader();
341 
342         // Add the new meta block to the meta index.
343         metaBlockIndexWriter.addEntry(metaNames.get(i), offset,
344             fsBlockWriter.getOnDiskSizeWithHeader());
345       }
346     }
347 
348     // Load-on-open section.
349 
350     // Data block index.
351     //
352     // In version 2, this section of the file starts with the root level data
353     // block index. We call a function that writes intermediate-level blocks
354     // first, then root level, and returns the offset of the root level block
355     // index.
356 
357     long rootIndexOffset = dataBlockIndexWriter.writeIndexBlocks(outputStream);
358     trailer.setLoadOnOpenOffset(rootIndexOffset);
359 
360     // Meta block index.
361     metaBlockIndexWriter.writeSingleLevelIndex(fsBlockWriter.startWriting(
362         BlockType.ROOT_INDEX), "meta");
363     fsBlockWriter.writeHeaderAndData(outputStream);
364     totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader();
365 
366     if (this.hFileContext.isIncludesMvcc()) {
367       appendFileInfo(MAX_MEMSTORE_TS_KEY, Bytes.toBytes(maxMemstoreTS));
368       appendFileInfo(KEY_VALUE_VERSION, Bytes.toBytes(KEY_VALUE_VER_WITH_MEMSTORE));
369     }
370 
371     // File info
372     writeFileInfo(trailer, fsBlockWriter.startWriting(BlockType.FILE_INFO));
373     fsBlockWriter.writeHeaderAndData(outputStream);
374     totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader();
375 
376     // Load-on-open data supplied by higher levels, e.g. Bloom filters.
377     for (BlockWritable w : additionalLoadOnOpenData){
378       fsBlockWriter.writeBlock(w, outputStream);
379       totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader();
380     }
381 
382     // Now finish off the trailer.
383     trailer.setNumDataIndexLevels(dataBlockIndexWriter.getNumLevels());
384     trailer.setUncompressedDataIndexSize(
385         dataBlockIndexWriter.getTotalUncompressedSize());
386     trailer.setFirstDataBlockOffset(firstDataBlockOffset);
387     trailer.setLastDataBlockOffset(lastDataBlockOffset);
388     trailer.setComparatorClass(comparator.getClass());
389     trailer.setDataIndexCount(dataBlockIndexWriter.getNumRootEntries());
390 
391 
392     finishClose(trailer);
393 
394     fsBlockWriter.release();
395   }
396 
397   @Override
398   public void addInlineBlockWriter(InlineBlockWriter ibw) {
399     inlineBlockWriters.add(ibw);
400   }
401 
402   @Override
403   public void addGeneralBloomFilter(final BloomFilterWriter bfw) {
404     this.addBloomFilter(bfw, BlockType.GENERAL_BLOOM_META);
405   }
406 
407   @Override
408   public void addDeleteFamilyBloomFilter(final BloomFilterWriter bfw) {
409     this.addBloomFilter(bfw, BlockType.DELETE_FAMILY_BLOOM_META);
410   }
411 
412   private void addBloomFilter(final BloomFilterWriter bfw,
413       final BlockType blockType) {
414     if (bfw.getKeyCount() <= 0)
415       return;
416 
417     if (blockType != BlockType.GENERAL_BLOOM_META &&
418         blockType != BlockType.DELETE_FAMILY_BLOOM_META) {
419       throw new RuntimeException("Block Type: " + blockType.toString() +
420           "is not supported");
421     }
422     additionalLoadOnOpenData.add(new BlockWritable() {
423       @Override
424       public BlockType getBlockType() {
425         return blockType;
426       }
427 
428       @Override
429       public void writeToBlock(DataOutput out) throws IOException {
430         bfw.getMetaWriter().write(out);
431         Writable dataWriter = bfw.getDataWriter();
432         if (dataWriter != null)
433           dataWriter.write(out);
434       }
435     });
436   }
437 
438   protected int getMajorVersion() {
439     return 2;
440   }
441 
442   protected int getMinorVersion() {
443     return HFileReaderV2.MAX_MINOR_VERSION;
444   }
445 
446   @Override
447   public HFileContext getFileContext() {
448     return hFileContext;
449   }
450 }