View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.io.encoding;
18  
19  import com.google.common.base.Preconditions;
20  import java.io.ByteArrayInputStream;
21  import java.io.ByteArrayOutputStream;
22  import java.io.DataInputStream;
23  import java.io.DataOutputStream;
24  import java.io.IOException;
25  import java.io.OutputStream;
26  import java.nio.ByteBuffer;
27  import java.util.Iterator;
28  import org.apache.commons.lang.NotImplementedException;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.hbase.Cell;
31  import org.apache.hadoop.hbase.HConstants;
32  import org.apache.hadoop.hbase.KeyValue;
33  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
34  import org.apache.hadoop.hbase.io.hfile.HFileContext;
35  import org.apache.hadoop.hbase.util.ByteBufferUtils;
36  import org.apache.hadoop.hbase.util.Bytes;
37  import org.apache.hadoop.io.IOUtils;
38  import org.apache.hadoop.io.compress.Compressor;
39  
40  /**
41   * Encapsulates a data block compressed using a particular encoding algorithm.
42   * Useful for testing and benchmarking.
43   * This is used only in testing.
44   */
45  @InterfaceAudience.Private
46  public class EncodedDataBlock {
47    private byte[] rawKVs;
48    private ByteBuffer rawBuffer;
49    private DataBlockEncoder dataBlockEncoder;
50  
51    private byte[] cachedEncodedData;
52  
53    private final HFileBlockEncodingContext encodingCtx;
54    private HFileContext meta;
55  
56    /**
57     * Create a buffer which will be encoded using dataBlockEncoder.
58     * @param dataBlockEncoder Algorithm used for compression.
59     * @param encoding encoding type used
60     * @param rawKVs
61     * @param meta
62     */
63    public EncodedDataBlock(DataBlockEncoder dataBlockEncoder, DataBlockEncoding encoding,
64        byte[] rawKVs, HFileContext meta) {
65      Preconditions.checkNotNull(encoding,
66          "Cannot create encoded data block with null encoder");
67      this.dataBlockEncoder = dataBlockEncoder;
68      encodingCtx = dataBlockEncoder.newDataBlockEncodingContext(encoding,
69          HConstants.HFILEBLOCK_DUMMY_HEADER, meta);
70      this.rawKVs = rawKVs;
71      this.meta = meta;
72    }
73  
74    /**
75     * Provides access to compressed value.
76     * @param headerSize header size of the block.
77     * @return Forwards sequential iterator.
78     */
79    public Iterator<Cell> getIterator(int headerSize) {
80      final int rawSize = rawKVs.length;
81      byte[] encodedDataWithHeader = getEncodedData();
82      int bytesToSkip = headerSize + Bytes.SIZEOF_SHORT;
83      ByteArrayInputStream bais = new ByteArrayInputStream(encodedDataWithHeader,
84          bytesToSkip, encodedDataWithHeader.length - bytesToSkip);
85      final DataInputStream dis = new DataInputStream(bais);
86  
87      return new Iterator<Cell>() {
88        private ByteBuffer decompressedData = null;
89  
90        @Override
91        public boolean hasNext() {
92          if (decompressedData == null) {
93            return rawSize > 0;
94          }
95          return decompressedData.hasRemaining();
96        }
97  
98        @Override
99        public Cell next() {
100         if (decompressedData == null) {
101           try {
102             decompressedData = dataBlockEncoder.decodeKeyValues(dis, dataBlockEncoder
103                 .newDataBlockDecodingContext(meta));
104           } catch (IOException e) {
105             throw new RuntimeException("Problem with data block encoder, " +
106                 "most likely it requested more bytes than are available.", e);
107           }
108           decompressedData.rewind();
109         }
110         int offset = decompressedData.position();
111         int klen = decompressedData.getInt();
112         int vlen = decompressedData.getInt();
113         int tagsLen = 0;
114         ByteBufferUtils.skip(decompressedData, klen + vlen);
115         // Read the tag length in case when steam contain tags
116         if (meta.isIncludesTags()) {
117           tagsLen = ((decompressedData.get() & 0xff) << 8) ^ (decompressedData.get() & 0xff);
118           ByteBufferUtils.skip(decompressedData, tagsLen);
119         }
120         KeyValue kv = new KeyValue(decompressedData.array(), offset,
121             (int) KeyValue.getKeyValueDataStructureSize(klen, vlen, tagsLen));
122         if (meta.isIncludesMvcc()) {
123           long mvccVersion = ByteBufferUtils.readVLong(decompressedData);
124           kv.setSequenceId(mvccVersion);
125         }
126         return kv;
127       }
128 
129       @Override
130       public void remove() {
131         throw new NotImplementedException("remove() is not supported!");
132       }
133 
134       @Override
135       public String toString() {
136         return "Iterator of: " + dataBlockEncoder.getClass().getName();
137       }
138 
139     };
140   }
141 
142   /**
143    * Find the size of minimal buffer that could store compressed data.
144    * @return Size in bytes of compressed data.
145    */
146   public int getSize() {
147     return getEncodedData().length;
148   }
149 
150   /**
151    * Find the size of compressed data assuming that buffer will be compressed
152    * using given algorithm.
153    * @param algo compression algorithm
154    * @param compressor compressor already requested from codec
155    * @param inputBuffer Array to be compressed.
156    * @param offset Offset to beginning of the data.
157    * @param length Length to be compressed.
158    * @return Size of compressed data in bytes.
159    * @throws IOException
160    */
161   public static int getCompressedSize(Algorithm algo, Compressor compressor,
162       byte[] inputBuffer, int offset, int length) throws IOException {
163 
164     // Create streams
165     // Storing them so we can close them
166     final IOUtils.NullOutputStream nullOutputStream = new IOUtils.NullOutputStream();
167     final DataOutputStream compressedStream = new DataOutputStream(nullOutputStream);
168     OutputStream compressingStream = null;
169 
170 
171     try {
172       if (compressor != null) {
173         compressor.reset();
174       }
175 
176       compressingStream = algo.createCompressionStream(compressedStream, compressor, 0);
177 
178       compressingStream.write(inputBuffer, offset, length);
179       compressingStream.flush();
180 
181       return compressedStream.size();
182     } finally {
183       nullOutputStream.close();
184       compressedStream.close();
185       if (compressingStream != null) compressingStream.close();
186     }
187   }
188 
189   /**
190    * Estimate size after second stage of compression (e.g. LZO).
191    * @param comprAlgo compression algorithm to be used for compression
192    * @param compressor compressor corresponding to the given compression
193    *          algorithm
194    * @return Size after second stage of compression.
195    */
196   public int getEncodedCompressedSize(Algorithm comprAlgo,
197       Compressor compressor) throws IOException {
198     byte[] compressedBytes = getEncodedData();
199     return getCompressedSize(comprAlgo, compressor, compressedBytes, 0,
200         compressedBytes.length);
201   }
202 
203   /** @return encoded data with header */
204   private byte[] getEncodedData() {
205     if (cachedEncodedData != null) {
206       return cachedEncodedData;
207     }
208     cachedEncodedData = encodeData();
209     return cachedEncodedData;
210   }
211 
212   private ByteBuffer getUncompressedBuffer() {
213     if (rawBuffer == null || rawBuffer.limit() < rawKVs.length) {
214       rawBuffer = ByteBuffer.wrap(rawKVs);
215     }
216     return rawBuffer;
217   }
218 
219   /**
220    * Do the encoding, but do not cache the encoded data.
221    * @return encoded data block with header and checksum
222    */
223   public byte[] encodeData() {
224     ByteArrayOutputStream baos = new ByteArrayOutputStream();
225     try {
226       baos.write(HConstants.HFILEBLOCK_DUMMY_HEADER);
227       DataOutputStream out = new DataOutputStream(baos);
228       this.dataBlockEncoder.startBlockEncoding(encodingCtx, out);
229       ByteBuffer in = getUncompressedBuffer();
230       in.rewind();
231       int klength, vlength;
232       int tagsLength = 0;
233       long memstoreTS = 0L;
234       KeyValue kv = null;
235       while (in.hasRemaining()) {
236         int kvOffset = in.position();
237         klength = in.getInt();
238         vlength = in.getInt();
239         ByteBufferUtils.skip(in, klength + vlength);
240         if (this.meta.isIncludesTags()) {
241           tagsLength = ((in.get() & 0xff) << 8) ^ (in.get() & 0xff);
242           ByteBufferUtils.skip(in, tagsLength);
243         }
244         if (this.meta.isIncludesMvcc()) {
245           memstoreTS = ByteBufferUtils.readVLong(in);
246         }
247         kv = new KeyValue(in.array(), kvOffset, (int) KeyValue.getKeyValueDataStructureSize(
248             klength, vlength, tagsLength));
249         kv.setSequenceId(memstoreTS);
250         this.dataBlockEncoder.encode(kv, encodingCtx, out);
251       }
252       BufferGrabbingByteArrayOutputStream stream = new BufferGrabbingByteArrayOutputStream();
253       baos.writeTo(stream);
254       this.dataBlockEncoder.endBlockEncoding(encodingCtx, out, stream.toByteArray());
255     } catch (IOException e) {
256       throw new RuntimeException(String.format(
257           "Bug in encoding part of algorithm %s. " +
258           "Probably it requested more bytes than are available.",
259           toString()), e);
260     }
261     return baos.toByteArray();
262   }
263 
264   private static class BufferGrabbingByteArrayOutputStream extends ByteArrayOutputStream {
265     private byte[] ourBytes;
266 
267     @Override
268     public synchronized void write(byte[] b, int off, int len) {
269       this.ourBytes = b;
270     }
271 
272     @Override
273     public synchronized byte[] toByteArray() {
274       return ourBytes;
275     }
276   }
277 
278   @Override
279   public String toString() {
280     return dataBlockEncoder.toString();
281   }
282 }