1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with this
4 * work for additional information regarding copyright ownership. The ASF
5 * licenses this file to you under the Apache License, Version 2.0 (the
6 * "License"); you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 * License for the specific language governing permissions and limitations
15 * under the License.
16 */
17 package org.apache.hadoop.hbase.io.encoding;
18
19 import java.io.DataInputStream;
20 import java.io.DataOutputStream;
21 import java.io.IOException;
22 import java.nio.ByteBuffer;
23
24 import org.apache.hadoop.hbase.classification.InterfaceAudience;
25 import org.apache.hadoop.hbase.Cell;
26 import org.apache.hadoop.hbase.KeyValue.KVComparator;
27 import org.apache.hadoop.hbase.io.hfile.HFileContext;
28
29 /**
30 * Encoding of KeyValue. It aims to be fast and efficient using assumptions:
31 * <ul>
32 * <li>the KeyValues are stored sorted by key</li>
33 * <li>we know the structure of KeyValue</li>
34 * <li>the values are always iterated forward from beginning of block</li>
35 * <li>knowledge of Key Value format</li>
36 * </ul>
37 * It is designed to work fast enough to be feasible as in memory compression.
38 */
39 @InterfaceAudience.Private
40 public interface DataBlockEncoder {
41 // TODO: This Interface should be deprecated and replaced. It presumes hfile and carnal knowledge of
42 // Cell internals. It was done for a different time. Remove. Purge.
43 /**
44 * Starts encoding for a block of KeyValues. Call
45 * {@link #endBlockEncoding(HFileBlockEncodingContext, DataOutputStream, byte[])} to finish
46 * encoding of a block.
47 * @param encodingCtx
48 * @param out
49 * @throws IOException
50 */
51 void startBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out)
52 throws IOException;
53
54 /**
55 * Encodes a KeyValue.
56 * @param cell
57 * @param encodingCtx
58 * @param out
59 * @return unencoded kv size written
60 * @throws IOException
61 */
62 int encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out)
63 throws IOException;
64
65 /**
66 * Ends encoding for a block of KeyValues. Gives a chance for the encoder to do the finishing
67 * stuff for the encoded block. It must be called at the end of block encoding.
68 * @param encodingCtx
69 * @param out
70 * @param uncompressedBytesWithHeader
71 * @throws IOException
72 */
73 void endBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out,
74 byte[] uncompressedBytesWithHeader) throws IOException;
75
76 /**
77 * Decode.
78 * @param source Compressed stream of KeyValues.
79 * @param decodingCtx
80 * @return Uncompressed block of KeyValues.
81 * @throws IOException If there is an error in source.
82 */
83 ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx)
84 throws IOException;
85
86 /**
87 * Return first key in block. Useful for indexing. Typically does not make
88 * a deep copy but returns a buffer wrapping a segment of the actual block's
89 * byte array. This is because the first key in block is usually stored
90 * unencoded.
91 * @param block encoded block we want index, the position will not change
92 * @return First key in block.
93 */
94 ByteBuffer getFirstKeyInBlock(ByteBuffer block);
95
96 /**
97 * Create a HFileBlock seeker which find KeyValues within a block.
98 * @param comparator what kind of comparison should be used
99 * @param decodingCtx
100 * @return A newly created seeker.
101 */
102 EncodedSeeker createSeeker(KVComparator comparator,
103 HFileBlockDecodingContext decodingCtx);
104
105 /**
106 * Creates a encoder specific encoding context
107 *
108 * @param encoding
109 * encoding strategy used
110 * @param headerBytes
111 * header bytes to be written, put a dummy header here if the header
112 * is unknown
113 * @param meta
114 * HFile meta data
115 * @return a newly created encoding context
116 */
117 HFileBlockEncodingContext newDataBlockEncodingContext(
118 DataBlockEncoding encoding, byte[] headerBytes, HFileContext meta);
119
120 /**
121 * Creates an encoder specific decoding context, which will prepare the data
122 * before actual decoding
123 *
124 * @param meta
125 * HFile meta data
126 * @return a newly created decoding context
127 */
128 HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta);
129
130 /**
131 * An interface which enable to seek while underlying data is encoded.
132 *
133 * It works on one HFileBlock, but it is reusable. See
134 * {@link #setCurrentBuffer(ByteBuffer)}.
135 */
136 interface EncodedSeeker {
137 /**
138 * Set on which buffer there will be done seeking.
139 * @param buffer Used for seeking.
140 */
141 void setCurrentBuffer(ByteBuffer buffer);
142
143 /**
144 * Does a deep copy of the key at the current position. A deep copy is
145 * necessary because buffers are reused in the decoder.
146 * @return key at current position
147 */
148 ByteBuffer getKeyDeepCopy();
149
150 /**
151 * Does a shallow copy of the value at the current position. A shallow
152 * copy is possible because the returned buffer refers to the backing array
153 * of the original encoded buffer.
154 * @return value at current position
155 */
156 ByteBuffer getValueShallowCopy();
157
158
159 /**
160 * @return the KeyValue object at the current position. Includes memstore
161 * timestamp.
162 */
163 Cell getKeyValue();
164
165 /** Set position to beginning of given block */
166 void rewind();
167
168 /**
169 * Move to next position
170 * @return true on success, false if there is no more positions.
171 */
172 boolean next();
173
174 /**
175 * Moves the seeker position within the current block to:
176 * <ul>
177 * <li>the last key that that is less than or equal to the given key if
178 * <code>seekBefore</code> is false</li>
179 * <li>the last key that is strictly less than the given key if <code>
180 * seekBefore</code> is true. The caller is responsible for loading the
181 * previous block if the requested key turns out to be the first key of the
182 * current block.</li>
183 * </ul>
184 * @param key byte array containing the key
185 * @param offset key position the array
186 * @param length key length in bytes
187 * @param seekBefore find the key strictly less than the given key in case
188 * of an exact match. Does not matter in case of an inexact match.
189 * @return 0 on exact match, 1 on inexact match.
190 */
191 @Deprecated
192 int seekToKeyInBlock(
193 byte[] key, int offset, int length, boolean seekBefore
194 );
195 /**
196 * Moves the seeker position within the current block to:
197 * <ul>
198 * <li>the last key that that is less than or equal to the given key if
199 * <code>seekBefore</code> is false</li>
200 * <li>the last key that is strictly less than the given key if <code>
201 * seekBefore</code> is true. The caller is responsible for loading the
202 * previous block if the requested key turns out to be the first key of the
203 * current block.</li>
204 * </ul>
205 * @param key - Cell to which the seek should happen
206 * @param seekBefore find the key strictly less than the given key in case
207 * of an exact match. Does not matter in case of an inexact match.
208 * @return 0 on exact match, 1 on inexact match.
209 */
210 int seekToKeyInBlock(Cell key, boolean seekBefore);
211
212 /**
213 * Compare the given key against the current key
214 * @param comparator
215 * @param key
216 * @param offset
217 * @param length
218 * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater
219 */
220 public int compareKey(KVComparator comparator, byte[] key, int offset, int length);
221
222 public int compareKey(KVComparator comparator, Cell key);
223 }
224 }