View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase;
20  
21  import java.io.EOFException;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.io.OutputStream;
25  import java.nio.ByteBuffer;
26  import java.util.ArrayList;
27  import java.util.List;
28  
29  import org.apache.hadoop.hbase.KeyValue.Type;
30  import org.apache.hadoop.hbase.classification.InterfaceAudience;
31  import org.apache.hadoop.hbase.io.util.StreamUtils;
32  import org.apache.hadoop.hbase.util.ByteBufferUtils;
33  import org.apache.hadoop.hbase.util.Bytes;
34  import org.apache.hadoop.hbase.util.IterableUtils;
35  import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
36  import org.apache.hadoop.io.IOUtils;
37  import org.apache.hadoop.io.WritableUtils;
38  
39  import com.google.common.base.Function;
40  import com.google.common.collect.Lists;
41  
42  /**
43   * static convenience methods for dealing with KeyValues and collections of KeyValues
44   */
45  @InterfaceAudience.Private
46  public class KeyValueUtil {
47  
48    /**************** length *********************/
49  
50    /**
51     * Returns number of bytes this cell would have been used if serialized as in {@link KeyValue}
52     * @param cell
53     * @return the length
54     */
55    public static int length(final Cell cell) {
56      return length(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength(),
57          cell.getValueLength(), cell.getTagsLength(), true);
58    }
59  
60    private static int length(short rlen, byte flen, int qlen, int vlen, int tlen, boolean withTags) {
61      if (withTags) {
62        return (int) (KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen, tlen));
63      }
64      return (int) (KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen));
65    }
66  
67    /**
68     * Returns number of bytes this cell's key part would have been used if serialized as in
69     * {@link KeyValue}. Key includes rowkey, family, qualifier, timestamp and type.
70     * @param cell
71     * @return the key length
72     */
73    public static int keyLength(final Cell cell) {
74      return keyLength(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength());
75    }
76  
77    private static int keyLength(short rlen, byte flen, int qlen) {
78      return (int) KeyValue.getKeyDataStructureSize(rlen, flen, qlen);
79    }
80  
81    public static int lengthWithMvccVersion(final KeyValue kv, final boolean includeMvccVersion) {
82      int length = kv.getLength();
83      if (includeMvccVersion) {
84        length += WritableUtils.getVIntSize(kv.getMvccVersion());
85      }
86      return length;
87    }
88  
89    public static int totalLengthWithMvccVersion(final Iterable<? extends KeyValue> kvs,
90        final boolean includeMvccVersion) {
91      int length = 0;
92      for (KeyValue kv : IterableUtils.nullSafe(kvs)) {
93        length += lengthWithMvccVersion(kv, includeMvccVersion);
94      }
95      return length;
96    }
97  
98  
99    /**************** copy key only *********************/
100 
101   public static KeyValue copyToNewKeyValue(final Cell cell) {
102     byte[] bytes = copyToNewByteArray(cell);
103     KeyValue kvCell = new KeyValue(bytes, 0, bytes.length);
104     kvCell.setSequenceId(cell.getMvccVersion());
105     return kvCell;
106   }
107 
108   /**
109    * The position will be set to the beginning of the new ByteBuffer
110    * @param cell
111    * @return the Bytebuffer containing the key part of the cell
112    */
113   public static ByteBuffer copyKeyToNewByteBuffer(final Cell cell) {
114     byte[] bytes = new byte[keyLength(cell)];
115     appendKeyTo(cell, bytes, 0);
116     ByteBuffer buffer = ByteBuffer.wrap(bytes);
117     return buffer;
118   }
119 
120   public static byte[] copyToNewByteArray(final Cell cell) {
121     int v1Length = length(cell);
122     byte[] backingBytes = new byte[v1Length];
123     appendToByteArray(cell, backingBytes, 0);
124     return backingBytes;
125   }
126 
127   public static int appendKeyTo(final Cell cell, final byte[] output,
128       final int offset) {
129     int nextOffset = offset;
130     nextOffset = Bytes.putShort(output, nextOffset, cell.getRowLength());
131     nextOffset = CellUtil.copyRowTo(cell, output, nextOffset);
132     nextOffset = Bytes.putByte(output, nextOffset, cell.getFamilyLength());
133     nextOffset = CellUtil.copyFamilyTo(cell, output, nextOffset);
134     nextOffset = CellUtil.copyQualifierTo(cell, output, nextOffset);
135     nextOffset = Bytes.putLong(output, nextOffset, cell.getTimestamp());
136     nextOffset = Bytes.putByte(output, nextOffset, cell.getTypeByte());
137     return nextOffset;
138   }
139 
140 
141   /**************** copy key and value *********************/
142 
143   public static int appendToByteArray(final Cell cell, final byte[] output, final int offset) {
144     // TODO when cell instance of KV we can bypass all steps and just do backing single array
145     // copy(?)
146     int pos = offset;
147     pos = Bytes.putInt(output, pos, keyLength(cell));
148     pos = Bytes.putInt(output, pos, cell.getValueLength());
149     pos = appendKeyTo(cell, output, pos);
150     pos = CellUtil.copyValueTo(cell, output, pos);
151     if ((cell.getTagsLength() > 0)) {
152       pos = Bytes.putAsShort(output, pos, cell.getTagsLength());
153       pos = CellUtil.copyTagTo(cell, output, pos);
154     }
155     return pos;
156   }
157 
158   /**
159    * The position will be set to the beginning of the new ByteBuffer
160    * @param cell
161    * @return the ByteBuffer containing the cell
162    */
163   public static ByteBuffer copyToNewByteBuffer(final Cell cell) {
164     byte[] bytes = new byte[length(cell)];
165     appendToByteArray(cell, bytes, 0);
166     ByteBuffer buffer = ByteBuffer.wrap(bytes);
167     return buffer;
168   }
169 
170   public static void appendToByteBuffer(final ByteBuffer bb, final KeyValue kv,
171       final boolean includeMvccVersion) {
172     // keep pushing the limit out. assume enough capacity
173     bb.limit(bb.position() + kv.getLength());
174     bb.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
175     if (includeMvccVersion) {
176       int numMvccVersionBytes = WritableUtils.getVIntSize(kv.getMvccVersion());
177       ByteBufferUtils.extendLimit(bb, numMvccVersionBytes);
178       ByteBufferUtils.writeVLong(bb, kv.getMvccVersion());
179     }
180   }
181 
182 
183   /**************** iterating *******************************/
184 
185   /**
186    * Creates a new KeyValue object positioned in the supplied ByteBuffer and sets the ByteBuffer's
187    * position to the start of the next KeyValue. Does not allocate a new array or copy data.
188    * @param bb
189    * @param includesMvccVersion
190    * @param includesTags
191    */
192   public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includesMvccVersion,
193       boolean includesTags) {
194     if (bb.isDirect()) {
195       throw new IllegalArgumentException("only supports heap buffers");
196     }
197     if (bb.remaining() < 1) {
198       return null;
199     }
200     KeyValue keyValue = null;
201     int underlyingArrayOffset = bb.arrayOffset() + bb.position();
202     int keyLength = bb.getInt();
203     int valueLength = bb.getInt();
204     ByteBufferUtils.skip(bb, keyLength + valueLength);
205     int tagsLength = 0;
206     if (includesTags) {
207       // Read short as unsigned, high byte first
208       tagsLength = ((bb.get() & 0xff) << 8) ^ (bb.get() & 0xff);
209       ByteBufferUtils.skip(bb, tagsLength);
210     }
211     int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength);
212     keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength);
213     if (includesMvccVersion) {
214       long mvccVersion = ByteBufferUtils.readVLong(bb);
215       keyValue.setSequenceId(mvccVersion);
216     }
217     return keyValue;
218   }
219 
220 
221   /*************** next/previous **********************************/
222 
223   /**
224    * Append single byte 0x00 to the end of the input row key
225    */
226   public static KeyValue createFirstKeyInNextRow(final Cell in){
227     byte[] nextRow = new byte[in.getRowLength() + 1];
228     System.arraycopy(in.getRowArray(), in.getRowOffset(), nextRow, 0, in.getRowLength());
229     nextRow[nextRow.length - 1] = 0;//maybe not necessary
230     return createFirstOnRow(nextRow);
231   }
232 
233   /**
234    * Increment the row bytes and clear the other fields
235    */
236   public static KeyValue createFirstKeyInIncrementedRow(final Cell in){
237     byte[] thisRow = new SimpleMutableByteRange(in.getRowArray(), in.getRowOffset(),
238         in.getRowLength()).deepCopyToNewArray();
239     byte[] nextRow = Bytes.unsignedCopyAndIncrement(thisRow);
240     return createFirstOnRow(nextRow);
241   }
242 
243   /**
244    * Decrement the timestamp.  For tests (currently wasteful)
245    *
246    * Remember timestamps are sorted reverse chronologically.
247    * @param in
248    * @return previous key
249    */
250   public static KeyValue previousKey(final KeyValue in) {
251     return createFirstOnRow(CellUtil.cloneRow(in), CellUtil.cloneFamily(in),
252       CellUtil.cloneQualifier(in), in.getTimestamp() - 1);
253   }
254 
255 
256   /**
257    * Create a KeyValue for the specified row, family and qualifier that would be
258    * larger than or equal to all other possible KeyValues that have the same
259    * row, family, qualifier. Used for reseeking. Should NEVER be returned to a client.
260    *
261    * @param row
262    *          row key
263    * @param roffset
264    *         row offset
265    * @param rlength
266    *         row length
267    * @param family
268    *         family name
269    * @param foffset
270    *         family offset
271    * @param flength
272    *         family length
273    * @param qualifier
274    *        column qualifier
275    * @param qoffset
276    *        qualifier offset
277    * @param qlength
278    *        qualifier length
279    * @return Last possible key on passed row, family, qualifier.
280    */
281   public static KeyValue createLastOnRow(final byte[] row, final int roffset, final int rlength,
282       final byte[] family, final int foffset, final int flength, final byte[] qualifier,
283       final int qoffset, final int qlength) {
284     return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
285         qlength, HConstants.OLDEST_TIMESTAMP, Type.Minimum, null, 0, 0);
286   }
287   
288   /**
289    * Creates a keyValue for the specified keyvalue larger than or equal to all other possible
290    * KeyValues that have the same row, family, qualifer.  Used for reseeking
291    * @param kv
292    * @return KeyValue
293    */
294   public static KeyValue createLastOnRow(Cell kv) {
295     return createLastOnRow(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), null, 0, 0,
296         null, 0, 0);
297   }
298 
299   /**
300    * Similar to
301    * {@link #createLastOnRow(byte[], int, int, byte[], int, int, byte[], int, int)}
302    * but creates the last key on the row/column of this KV (the value part of
303    * the returned KV is always empty). Used in creating "fake keys" for the
304    * multi-column Bloom filter optimization to skip the row/column we already
305    * know is not in the file. Not to be returned to clients.
306    * 
307    * @param kv - cell
308    * @return the last key on the row/column of the given key-value pair
309    */
310   public static KeyValue createLastOnRowCol(Cell kv) {
311     return new KeyValue(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
312         kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(), kv.getQualifierArray(),
313         kv.getQualifierOffset(), kv.getQualifierLength(), HConstants.OLDEST_TIMESTAMP,
314         Type.Minimum, null, 0, 0);
315   }
316 
317   /**
318    * Creates the first KV with the row/family/qualifier of this KV and the given
319    * timestamp. Uses the "maximum" KV type that guarantees that the new KV is
320    * the lowest possible for this combination of row, family, qualifier, and
321    * timestamp. This KV's own timestamp is ignored. While this function copies
322    * the value from this KV, it is normally used on key-only KVs.
323    * 
324    * @param kv - cell
325    * @param ts
326    */
327   public static KeyValue createFirstOnRowColTS(Cell kv, long ts) {
328     return new KeyValue(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
329         kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(), kv.getQualifierArray(),
330         kv.getQualifierOffset(), kv.getQualifierLength(), ts, Type.Maximum, kv.getValueArray(),
331         kv.getValueOffset(), kv.getValueLength());
332   }
333   
334   /**
335    * Create a KeyValue that is smaller than all other possible KeyValues
336    * for the given row. That is any (valid) KeyValue on 'row' would sort
337    * _after_ the result.
338    *
339    * @param row - row key (arbitrary byte array)
340    * @return First possible KeyValue on passed <code>row</code>
341    */
342   public static KeyValue createFirstOnRow(final byte [] row, int roffset, short rlength) {
343     return new KeyValue(row, roffset, rlength,
344         null, 0, 0, null, 0, 0, HConstants.LATEST_TIMESTAMP, Type.Maximum, null, 0, 0);
345   }
346   
347 
348   /**
349    * Creates a KeyValue that is last on the specified row id. That is,
350    * every other possible KeyValue for the given row would compareTo()
351    * less than the result of this call.
352    * @param row row key
353    * @return Last possible KeyValue on passed <code>row</code>
354    */
355   public static KeyValue createLastOnRow(final byte[] row) {
356     return new KeyValue(row, null, null, HConstants.LATEST_TIMESTAMP, Type.Minimum);
357   }
358 
359   /**
360    * Create a KeyValue that is smaller than all other possible KeyValues
361    * for the given row. That is any (valid) KeyValue on 'row' would sort
362    * _after_ the result.
363    *
364    * @param row - row key (arbitrary byte array)
365    * @return First possible KeyValue on passed <code>row</code>
366    */
367   public static KeyValue createFirstOnRow(final byte [] row) {
368     return createFirstOnRow(row, HConstants.LATEST_TIMESTAMP);
369   }
370 
371   /**
372    * Creates a KeyValue that is smaller than all other KeyValues that
373    * are older than the passed timestamp.
374    * @param row - row key (arbitrary byte array)
375    * @param ts - timestamp
376    * @return First possible key on passed <code>row</code> and timestamp.
377    */
378   public static KeyValue createFirstOnRow(final byte [] row,
379       final long ts) {
380     return new KeyValue(row, null, null, ts, Type.Maximum);
381   }
382 
383   /**
384    * Create a KeyValue for the specified row, family and qualifier that would be
385    * smaller than all other possible KeyValues that have the same row,family,qualifier.
386    * Used for seeking.
387    * @param row - row key (arbitrary byte array)
388    * @param family - family name
389    * @param qualifier - column qualifier
390    * @return First possible key on passed <code>row</code>, and column.
391    */
392   public static KeyValue createFirstOnRow(final byte [] row, final byte [] family,
393       final byte [] qualifier) {
394     return new KeyValue(row, family, qualifier, HConstants.LATEST_TIMESTAMP, Type.Maximum);
395   }
396 
397   /**
398    * Create a Delete Family KeyValue for the specified row and family that would
399    * be smaller than all other possible Delete Family KeyValues that have the
400    * same row and family.
401    * Used for seeking.
402    * @param row - row key (arbitrary byte array)
403    * @param family - family name
404    * @return First Delete Family possible key on passed <code>row</code>.
405    */
406   public static KeyValue createFirstDeleteFamilyOnRow(final byte [] row,
407       final byte [] family) {
408     return new KeyValue(row, family, null, HConstants.LATEST_TIMESTAMP,
409         Type.DeleteFamily);
410   }
411 
412   /**
413    * @param row - row key (arbitrary byte array)
414    * @param f - family name
415    * @param q - column qualifier
416    * @param ts - timestamp
417    * @return First possible key on passed <code>row</code>, column and timestamp
418    */
419   public static KeyValue createFirstOnRow(final byte [] row, final byte [] f,
420       final byte [] q, final long ts) {
421     return new KeyValue(row, f, q, ts, Type.Maximum);
422   }
423 
424   /**
425    * Create a KeyValue for the specified row, family and qualifier that would be
426    * smaller than all other possible KeyValues that have the same row,
427    * family, qualifier.
428    * Used for seeking.
429    * @param row row key
430    * @param roffset row offset
431    * @param rlength row length
432    * @param family family name
433    * @param foffset family offset
434    * @param flength family length
435    * @param qualifier column qualifier
436    * @param qoffset qualifier offset
437    * @param qlength qualifier length
438    * @return First possible key on passed Row, Family, Qualifier.
439    */
440   public static KeyValue createFirstOnRow(final byte [] row,
441       final int roffset, final int rlength, final byte [] family,
442       final int foffset, final int flength, final byte [] qualifier,
443       final int qoffset, final int qlength) {
444     return new KeyValue(row, roffset, rlength, family,
445         foffset, flength, qualifier, qoffset, qlength,
446         HConstants.LATEST_TIMESTAMP, Type.Maximum, null, 0, 0);
447   }
448 
449   /**
450    * Create a KeyValue for the specified row, family and qualifier that would be
451    * smaller than all other possible KeyValues that have the same row,
452    * family, qualifier.
453    * Used for seeking.
454    *
455    * @param buffer the buffer to use for the new <code>KeyValue</code> object
456    * @param row the value key
457    * @param family family name
458    * @param qualifier column qualifier
459    *
460    * @return First possible key on passed Row, Family, Qualifier.
461    *
462    * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger
463    * than the provided buffer or than <code>Integer.MAX_VALUE</code>
464    */
465   public static KeyValue createFirstOnRow(byte [] buffer, final byte [] row,
466       final byte [] family, final byte [] qualifier)
467           throws IllegalArgumentException {
468     return createFirstOnRow(buffer, 0, row, 0, row.length,
469         family, 0, family.length,
470         qualifier, 0, qualifier.length);
471   }
472 
473   /**
474    * Create a KeyValue for the specified row, family and qualifier that would be
475    * smaller than all other possible KeyValues that have the same row,
476    * family, qualifier.
477    * Used for seeking.
478    *
479    * @param buffer the buffer to use for the new <code>KeyValue</code> object
480    * @param boffset buffer offset
481    * @param row the value key
482    * @param roffset row offset
483    * @param rlength row length
484    * @param family family name
485    * @param foffset family offset
486    * @param flength family length
487    * @param qualifier column qualifier
488    * @param qoffset qualifier offset
489    * @param qlength qualifier length
490    *
491    * @return First possible key on passed Row, Family, Qualifier.
492    *
493    * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger
494    * than the provided buffer or than <code>Integer.MAX_VALUE</code>
495    */
496   public static KeyValue createFirstOnRow(byte[] buffer, final int boffset, final byte[] row,
497       final int roffset, final int rlength, final byte[] family, final int foffset,
498       final int flength, final byte[] qualifier, final int qoffset, final int qlength)
499       throws IllegalArgumentException {
500 
501     long lLength = KeyValue.getKeyValueDataStructureSize(rlength, flength, qlength, 0);
502 
503     if (lLength > Integer.MAX_VALUE) {
504       throw new IllegalArgumentException("KeyValue length " + lLength + " > " + Integer.MAX_VALUE);
505     }
506     int iLength = (int) lLength;
507     if (buffer.length - boffset < iLength) {
508       throw new IllegalArgumentException("Buffer size " + (buffer.length - boffset) + " < "
509           + iLength);
510     }
511 
512     int len = KeyValue.writeByteArray(buffer, boffset, row, roffset, rlength, family, foffset,
513         flength, qualifier, qoffset, qlength, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum,
514         null, 0, 0, null);
515     return new KeyValue(buffer, boffset, len);
516   }
517 
518   /**
519    * Creates the first KV with the row/family/qualifier of this KV and the
520    * given timestamp. Uses the "maximum" KV type that guarantees that the new
521    * KV is the lowest possible for this combination of row, family, qualifier,
522    * and timestamp. This KV's own timestamp is ignored. While this function
523    * copies the value from this KV, it is normally used on key-only KVs.
524    */
525   public static KeyValue createFirstOnRowColTS(KeyValue kv, long ts) {
526     return new KeyValue(
527         kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
528         kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(),
529         kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength(),
530         ts, Type.Maximum, kv.getValueArray(), kv.getValueOffset(), kv.getValueLength());
531   }
532 
533   /*************** misc **********************************/
534   /**
535    * @param cell
536    * @return <code>cell</code> if it is an instance of {@link KeyValue} else we will return a
537    * new {@link KeyValue} instance made from <code>cell</code>
538    * @deprecated without any replacement.
539    */
540   @Deprecated
541   public static KeyValue ensureKeyValue(final Cell cell) {
542     if (cell == null) return null;
543     return cell instanceof KeyValue? (KeyValue)cell: copyToNewKeyValue(cell);
544   }
545 
546   /**
547    * @param cell
548    * @return <code>cell</code> if it is an object of class {@link KeyValue} else we will return a
549    * new {@link KeyValue} instance made from <code>cell</code> Note: Even if the cell is an object
550    * of any of the subclass of {@link KeyValue}, we will create a new {@link KeyValue} object
551    * wrapping same buffer. This API should be used only with MR based tools which expect the type
552    * to be exactly KeyValue. That is the reason for doing this way.
553    *
554    * @deprecated without any replacement.
555    */
556   @Deprecated
557   public static KeyValue ensureKeyValueTypeForMR(final Cell cell) {
558     if (cell == null) return null;
559     if (cell instanceof KeyValue) {
560       if (cell.getClass().getName().equals(KeyValue.class.getName())) {
561         return (KeyValue) cell;
562       }
563       // Cell is an Object of any of the sub classes of KeyValue. Make a new KeyValue wrapping the
564       // same byte[]
565       KeyValue kv = (KeyValue) cell;
566       KeyValue newKv = new KeyValue(kv.bytes, kv.offset, kv.length);
567       newKv.setSequenceId(kv.getSequenceId());
568       return newKv;
569     }
570     return copyToNewKeyValue(cell);
571   }
572 
573   @Deprecated
574   public static List<KeyValue> ensureKeyValues(List<Cell> cells) {
575     List<KeyValue> lazyList = Lists.transform(cells, new Function<Cell, KeyValue>() {
576       @Override
577       public KeyValue apply(Cell arg0) {
578         return KeyValueUtil.ensureKeyValue(arg0);
579       }
580     });
581     return new ArrayList<KeyValue>(lazyList);
582   }
583 
584   static String bytesToHex(byte[] buf, int offset, int length) {
585     String bufferContents = buf != null ? Bytes.toStringBinary(buf, offset, length) : "<null>";
586     return ", KeyValueBytesHex=" + bufferContents + ", offset=" + offset
587         + ", length=" + length;
588   }
589 
590   static void checkKeyValueBytes(byte[] buf, int offset, int length, boolean withTags) {
591     if (buf == null) {
592       throw new IllegalArgumentException("Invalid to have null " +
593           "byte array in KeyValue.");
594     }
595 
596     int pos = offset, endOffset = offset + length;
597     // check the key
598     if (pos + Bytes.SIZEOF_INT > endOffset) {
599       throw new IllegalArgumentException(
600           "Overflow when reading key length at position=" + pos + bytesToHex(buf, offset, length));
601     }
602     int keyLen = Bytes.toInt(buf, pos, Bytes.SIZEOF_INT);
603     pos += Bytes.SIZEOF_INT;
604     if (keyLen <= 0 || pos + keyLen > endOffset) {
605       throw new IllegalArgumentException(
606           "Invalid key length in KeyValue. keyLength=" + keyLen + bytesToHex(buf, offset, length));
607     }
608     // check the value
609     if (pos + Bytes.SIZEOF_INT > endOffset) {
610       throw new IllegalArgumentException("Overflow when reading value length at position=" + pos
611           + bytesToHex(buf, offset, length));
612     }
613     int valLen = Bytes.toInt(buf, pos, Bytes.SIZEOF_INT);
614     pos += Bytes.SIZEOF_INT;
615     if (valLen < 0 || pos + valLen > endOffset) {
616       throw new IllegalArgumentException("Invalid value length in KeyValue, valueLength=" + valLen
617           + bytesToHex(buf, offset, length));
618     }
619     // check the row
620     if (pos + Bytes.SIZEOF_SHORT > endOffset) {
621       throw new IllegalArgumentException(
622           "Overflow when reading row length at position=" + pos + bytesToHex(buf, offset, length));
623     }
624     short rowLen = Bytes.toShort(buf, pos, Bytes.SIZEOF_SHORT);
625     pos += Bytes.SIZEOF_SHORT;
626     if (rowLen < 0 || pos + rowLen > endOffset) {
627       throw new IllegalArgumentException(
628           "Invalid row length in KeyValue, rowLength=" + rowLen + bytesToHex(buf, offset, length));
629     }
630     pos += rowLen;
631     // check the family
632     if (pos + Bytes.SIZEOF_BYTE > endOffset) {
633       throw new IllegalArgumentException("Overflow when reading family length at position=" + pos
634           + bytesToHex(buf, offset, length));
635     }
636     int familyLen = buf[pos];
637     pos += Bytes.SIZEOF_BYTE;
638     if (familyLen < 0 || pos + familyLen > endOffset) {
639       throw new IllegalArgumentException("Invalid family length in KeyValue, familyLength="
640           + familyLen + bytesToHex(buf, offset, length));
641     }
642     pos += familyLen;
643     // check the qualifier
644     int qualifierLen = keyLen - Bytes.SIZEOF_SHORT - rowLen - Bytes.SIZEOF_BYTE - familyLen
645         - Bytes.SIZEOF_LONG - Bytes.SIZEOF_BYTE;
646     if (qualifierLen < 0 || pos + qualifierLen > endOffset) {
647       throw new IllegalArgumentException("Invalid qualifier length in KeyValue, qualifierLen="
648           + qualifierLen + bytesToHex(buf, offset, length));
649     }
650     pos += qualifierLen;
651     // check the timestamp
652     if (pos + Bytes.SIZEOF_LONG > endOffset) {
653       throw new IllegalArgumentException(
654           "Overflow when reading timestamp at position=" + pos + bytesToHex(buf, offset, length));
655     }
656     long timestamp = Bytes.toLong(buf, pos, Bytes.SIZEOF_LONG);
657     if (timestamp < 0) {
658       throw new IllegalArgumentException(
659           "Timestamp cannot be negative, ts=" + timestamp + bytesToHex(buf, offset, length));
660     }
661     pos += Bytes.SIZEOF_LONG;
662     // check the type
663     if (pos + Bytes.SIZEOF_BYTE > endOffset) {
664       throw new IllegalArgumentException(
665           "Overflow when reading type at position=" + pos + bytesToHex(buf, offset, length));
666     }
667     byte type = buf[pos];
668     if (!Type.isValidType(type)) {
669       throw new IllegalArgumentException(
670           "Invalid type in KeyValue, type=" + type + bytesToHex(buf, offset, length));
671     }
672     pos += Bytes.SIZEOF_BYTE;
673     // check the value
674     if (pos + valLen > endOffset) {
675       throw new IllegalArgumentException(
676           "Overflow when reading value part at position=" + pos + bytesToHex(buf, offset, length));
677     }
678     pos += valLen;
679     // check the tags
680     if (withTags) {
681       if (pos == endOffset) {
682         // withTags is true but no tag in the cell.
683         return;
684       }
685       if (pos + Bytes.SIZEOF_SHORT > endOffset) {
686         throw new IllegalArgumentException("Overflow when reading tags length at position=" + pos
687             + bytesToHex(buf, offset, length));
688       }
689       short tagsLen = Bytes.toShort(buf, pos);
690       pos += Bytes.SIZEOF_SHORT;
691       if (tagsLen < 0 || pos + tagsLen > endOffset) {
692         throw new IllegalArgumentException("Invalid tags length in KeyValue at position="
693             + (pos - Bytes.SIZEOF_SHORT) + bytesToHex(buf, offset, length));
694       }
695       int tagsEndOffset = pos + tagsLen;
696       for (; pos < tagsEndOffset;) {
697         if (pos + Tag.TAG_LENGTH_SIZE > endOffset) {
698           throw new IllegalArgumentException("Overflow when reading tag length at position=" + pos
699               + bytesToHex(buf, offset, length));
700         }
701         short tagLen = Bytes.toShort(buf, pos);
702         pos += Tag.TAG_LENGTH_SIZE;
703         // tagLen contains one byte tag type, so must be not less than 1.
704         if (tagLen < 1 || pos + tagLen > endOffset) {
705           throw new IllegalArgumentException(
706               "Invalid tag length at position=" + (pos - Tag.TAG_LENGTH_SIZE) + ", tagLength="
707                   + tagLen + bytesToHex(buf, offset, length));
708         }
709         pos += tagLen;
710       }
711     }
712     if (pos != endOffset) {
713       throw new IllegalArgumentException("Some redundant bytes in KeyValue's buffer, startOffset="
714           + pos + ", endOffset=" + endOffset + bytesToHex(buf, offset, length));
715     }
716   }
717 
718   /**
719    * Create a KeyValue reading from the raw InputStream.
720    * @param in inputStream to read.
721    * @param withTags whether the keyvalue should include tags are not
722    * @return Created KeyValue OR if we find a length of zero, we will return null which can be
723    *         useful marking a stream as done.
724    * @throws IOException
725    */
726   public static KeyValue createKeyValueFromInputStream(InputStream in, boolean withTags)
727       throws IOException {
728     byte[] intBytes = new byte[Bytes.SIZEOF_INT];
729     int bytesRead = 0;
730     while (bytesRead < intBytes.length) {
731       int n = in.read(intBytes, bytesRead, intBytes.length - bytesRead);
732       if (n < 0) {
733         if (bytesRead == 0) {
734           throw new EOFException();
735         }
736         throw new IOException("Failed read of int, read " + bytesRead + " bytes");
737       }
738       bytesRead += n;
739     }
740     byte[] bytes = new byte[Bytes.toInt(intBytes)];
741     IOUtils.readFully(in, bytes, 0, bytes.length);
742     return withTags ? new KeyValue(bytes, 0, bytes.length)
743         : new NoTagsKeyValue(bytes, 0, bytes.length);
744   }
745 
746   public static int getSerializedSize(Cell cell, boolean withTags) {
747     return length(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength(),
748       cell.getValueLength(), cell.getTagsLength(), withTags);
749   }
750 
751   public static void oswrite(final Cell cell, final OutputStream out, final boolean withTags)
752       throws IOException {
753     if (cell instanceof KeyValue) {
754       KeyValue.oswrite((KeyValue) cell, out, withTags);
755     } else {
756       short rlen = cell.getRowLength();
757       byte flen = cell.getFamilyLength();
758       int qlen = cell.getQualifierLength();
759       int vlen = cell.getValueLength();
760       int tlen = cell.getTagsLength();
761 
762       // write total length
763       StreamUtils.writeInt(out, length(rlen, flen, qlen, vlen, tlen, withTags));
764       // write key length
765       StreamUtils.writeInt(out, keyLength(rlen, flen, qlen));
766       // write value length
767       StreamUtils.writeInt(out, vlen);
768       // Write rowkey - 2 bytes rk length followed by rowkey bytes
769       StreamUtils.writeShort(out, rlen);
770       out.write(cell.getRowArray(), cell.getRowOffset(), rlen);
771       // Write cf - 1 byte of cf length followed by the family bytes
772       out.write(flen);
773       out.write(cell.getFamilyArray(), cell.getFamilyOffset(), flen);
774       // write qualifier
775       out.write(cell.getQualifierArray(), cell.getQualifierOffset(), qlen);
776       // write timestamp
777       StreamUtils.writeLong(out, cell.getTimestamp());
778       // write the type
779       out.write(cell.getTypeByte());
780       // write value
781       out.write(cell.getValueArray(), cell.getValueOffset(), vlen);
782       // write tags if we have to
783       if (withTags && tlen > 0) {
784         // 2 bytes tags length followed by tags bytes
785         // tags length is serialized with 2 bytes only(short way) even if the type is int. As this
786         // is non -ve numbers, we save the sign bit. See HBASE-11437
787         out.write((byte) (0xff & (tlen >> 8)));
788         out.write((byte) (0xff & tlen));
789         out.write(cell.getTagsArray(), cell.getTagsOffset(), tlen);
790       }
791     }
792   }
793 
794   /**
795    * @return A KeyValue made of a byte array that holds the key-only part.
796    *         Needed to convert hfile index members to KeyValues.
797    */
798   public static KeyValue createKeyValueFromKey(final byte[] b) {
799     return createKeyValueFromKey(b, 0, b.length);
800   }
801 
802   /**
803    * @return A KeyValue made of a byte buffer that holds the key-only part.
804    *         Needed to convert hfile index members to KeyValues.
805    */
806   public static KeyValue createKeyValueFromKey(final ByteBuffer bb) {
807     return createKeyValueFromKey(bb.array(), bb.arrayOffset(), bb.limit());
808   }
809 
810   /**
811    * @return A KeyValue made of a byte array that holds the key-only part.
812    *         Needed to convert hfile index members to KeyValues.
813    */
814   public static KeyValue createKeyValueFromKey(final byte[] b, final int o, final int l) {
815     byte[] newb = new byte[l + KeyValue.ROW_OFFSET];
816     System.arraycopy(b, o, newb, KeyValue.ROW_OFFSET, l);
817     Bytes.putInt(newb, 0, l);
818     Bytes.putInt(newb, Bytes.SIZEOF_INT, 0);
819     return new KeyValue(newb);
820   }
821 }