1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18 package org.apache.hadoop.hbase.util;
19
20 import static org.apache.hadoop.hbase.util.Order.ASCENDING;
21 import static org.apache.hadoop.hbase.util.Order.DESCENDING;
22
23 import java.math.BigDecimal;
24 import java.math.BigInteger;
25 import java.math.MathContext;
26 import java.math.RoundingMode;
27 import java.nio.charset.Charset;
28
29 import org.apache.hadoop.hbase.classification.InterfaceAudience;
30 import org.apache.hadoop.hbase.classification.InterfaceStability;
31
32 /**
33 * Utility class that handles ordered byte arrays. That is, unlike
34 * {@link Bytes}, these methods produce byte arrays which maintain the sort
35 * order of the original values.
36 * <h3>Encoding Format summary</h3>
37 * <p>
38 * Each value is encoded as one or more bytes. The first byte of the encoding,
39 * its meaning, and a terse description of the bytes that follow is given by
40 * the following table:
41 * </p>
42 * <table summary="Encodings">
43 * <tr><th>Content Type</th><th>Encoding</th></tr>
44 * <tr><td>NULL</td><td>0x05</td></tr>
45 * <tr><td>negative infinity</td><td>0x07</td></tr>
46 * <tr><td>negative large</td><td>0x08, ~E, ~M</td></tr>
47 * <tr><td>negative medium</td><td>0x13-E, ~M</td></tr>
48 * <tr><td>negative small</td><td>0x14, -E, ~M</td></tr>
49 * <tr><td>zero</td><td>0x15</td></tr>
50 * <tr><td>positive small</td><td>0x16, ~-E, M</td></tr>
51 * <tr><td>positive medium</td><td>0x17+E, M</td></tr>
52 * <tr><td>positive large</td><td>0x22, E, M</td></tr>
53 * <tr><td>positive infinity</td><td>0x23</td></tr>
54 * <tr><td>NaN</td><td>0x25</td></tr>
55 * <tr><td>fixed-length 32-bit integer</td><td>0x27, I</td></tr>
56 * <tr><td>fixed-length 64-bit integer</td><td>0x28, I</td></tr>
57 * <tr><td>fixed-length 8-bit integer</td><td>0x29</td></tr>
58 * <tr><td>fixed-length 16-bit integer</td><td>0x2a</td></tr>
59 * <tr><td>fixed-length 32-bit float</td><td>0x30, F</td></tr>
60 * <tr><td>fixed-length 64-bit float</td><td>0x31, F</td></tr>
61 * <tr><td>TEXT</td><td>0x33, T</td></tr>
62 * <tr><td>variable length BLOB</td><td>0x35, B</td></tr>
63 * <tr><td>byte-for-byte BLOB</td><td>0x36, X</td></tr>
64 * </table>
65 *
66 * <h3>Null Encoding</h3>
67 * <p>
68 * Each value that is a NULL encodes as a single byte of 0x05. Since every
69 * other value encoding begins with a byte greater than 0x05, this forces NULL
70 * values to sort first.
71 * </p>
72 * <h3>Text Encoding</h3>
73 * <p>
74 * Each text value begins with a single byte of 0x33 and ends with a single
75 * byte of 0x00. There are zero or more intervening bytes that encode the text
76 * value. The intervening bytes are chosen so that the encoding will sort in
77 * the desired collating order. The intervening bytes may not contain a 0x00
78 * character; the only 0x00 byte allowed in a text encoding is the final byte.
79 * </p>
80 * <p>
81 * The text encoding ends in 0x00 in order to ensure that when there are two
82 * strings where one is a prefix of the other that the shorter string will
83 * sort first.
84 * </p>
85 * <h3>Binary Encoding</h3>
86 * <p>
87 * There are two encoding strategies for binary fields, referred to as
88 * "BlobVar" and "BlobCopy". BlobVar is less efficient in both space and
89 * encoding time. It has no limitations on the range of encoded values.
90 * BlobCopy is a byte-for-byte copy of the input data followed by a
91 * termination byte. It is extremely fast to encode and decode. It carries the
92 * restriction of not allowing a 0x00 value in the input byte[] as this value
93 * is used as the termination byte.
94 * </p>
95 * <h4>BlobVar</h4>
96 * <p>
97 * "BlobVar" encodes the input byte[] in a manner similar to a variable length
98 * integer encoding. As with the other {@code OrderedBytes} encodings,
99 * the first encoded byte is used to indicate what kind of value follows. This
100 * header byte is 0x37 for BlobVar encoded values. As with the traditional
101 * varint encoding, the most significant bit of each subsequent encoded
102 * {@code byte} is used as a continuation marker. The 7 remaining bits
103 * contain the 7 most significant bits of the first unencoded byte. The next
104 * encoded byte starts with a continuation marker in the MSB. The least
105 * significant bit from the first unencoded byte follows, and the remaining 6
106 * bits contain the 6 MSBs of the second unencoded byte. The encoding
107 * continues, encoding 7 bytes on to 8 encoded bytes. The MSB of the final
108 * encoded byte contains a termination marker rather than a continuation
109 * marker, and any remaining bits from the final input byte. Any trailing bits
110 * in the final encoded byte are zeros.
111 * </p>
112 * <h4>BlobCopy</h4>
113 * <p>
114 * "BlobCopy" is a simple byte-for-byte copy of the input data. It uses 0x38
115 * as the header byte, and is terminated by 0x00 in the DESCENDING case. This
116 * alternative encoding is faster and more space-efficient, but it cannot
117 * accept values containing a 0x00 byte in DESCENDING order.
118 * </p>
119 * <h3>Variable-length Numeric Encoding</h3>
120 * <p>
121 * Numeric values must be coded so as to sort in numeric order. We assume that
122 * numeric values can be both integer and floating point values. Clients must
123 * be careful to use inspection methods for encoded values (such as
124 * {@link #isNumericInfinite(PositionedByteRange)} and
125 * {@link #isNumericNaN(PositionedByteRange)} to protect against decoding
126 * values into object which do not support these numeric concepts (such as
127 * {@link Long} and {@link BigDecimal}).
128 * </p>
129 * <p>
130 * Simplest cases first: If the numeric value is a NaN, then the encoding is a
131 * single byte of 0x25. This causes NaN values to sort after every other
132 * numeric value.
133 * </p>
134 * <p>
135 * If the numeric value is a negative infinity then the encoding is a single
136 * byte of 0x07. Since every other numeric value except NaN has a larger
137 * initial byte, this encoding ensures that negative infinity will sort prior
138 * to every other numeric value other than NaN.
139 * </p>
140 * <p>
141 * If the numeric value is a positive infinity then the encoding is a single
142 * byte of 0x23. Every other numeric value encoding begins with a smaller
143 * byte, ensuring that positive infinity always sorts last among numeric
144 * values. 0x23 is also smaller than 0x33, the initial byte of a text value,
145 * ensuring that every numeric value sorts before every text value.
146 * </p>
147 * <p>
148 * If the numeric value is exactly zero then it is encoded as a single byte of
149 * 0x15. Finite negative values will have initial bytes of 0x08 through 0x14
150 * and finite positive values will have initial bytes of 0x16 through 0x22.
151 * </p>
152 * <p>
153 * For all numeric values, we compute a mantissa M and an exponent E. The
154 * mantissa is a base-100 representation of the value. The exponent E
155 * determines where to put the decimal point.
156 * </p>
157 * <p>
158 * Each centimal digit of the mantissa is stored in a byte. If the value of
159 * the centimal digit is X (hence X≥0 and X≤99) then the byte value will
160 * be 2*X+1 for every byte of the mantissa, except for the last byte which
161 * will be 2*X+0. The mantissa must be the minimum number of bytes necessary
162 * to represent the value; trailing X==0 digits are omitted. This means that
163 * the mantissa will never contain a byte with the value 0x00.
164 * </p>
165 * <p>
166 * If we assume all digits of the mantissa occur to the right of the decimal
167 * point, then the exponent E is the power of one hundred by which one must
168 * multiply the mantissa to recover the original value.
169 * </p>
170 * <p>
171 * Values are classified as large, medium, or small according to the value of
172 * E. If E is 11 or more, the value is large. For E between 0 and 10, the
173 * value is medium. For E less than zero, the value is small.
174 * </p>
175 * <p>
176 * Large positive values are encoded as a single byte 0x22 followed by E as a
177 * varint and then M. Medium positive values are a single byte of 0x17+E
178 * followed by M. Small positive values are encoded as a single byte 0x16
179 * followed by the ones-complement of the varint for -E followed by M.
180 * </p>
181 * <p>
182 * Small negative values are encoded as a single byte 0x14 followed by -E as a
183 * varint and then the ones-complement of M. Medium negative values are
184 * encoded as a byte 0x13-E followed by the ones-complement of M. Large
185 * negative values consist of the single byte 0x08 followed by the
186 * ones-complement of the varint encoding of E followed by the ones-complement
187 * of M.
188 * </p>
189 * <h3>Fixed-length Integer Encoding</h3>
190 * <p>
191 * All 4-byte integers are serialized to a 5-byte, fixed-width, sortable byte
192 * format. All 8-byte integers are serialized to the equivelant 9-byte format.
193 * Serialization is performed by writing a header byte, inverting the integer
194 * sign bit and writing the resulting bytes to the byte array in big endian
195 * order.
196 * </p>
197 * <h3>Fixed-length Floating Point Encoding</h3>
198 * <p>
199 * 32-bit and 64-bit floating point numbers are encoded to a 5-byte and 9-byte
200 * encoding format, respectively. The format is identical, save for the
201 * precision respected in each step of the operation.
202 * <p>
203 * This format ensures the following total ordering of floating point values:
204 * Float.NEGATIVE_INFINITY < -Float.MAX_VALUE < ... <
205 * -Float.MIN_VALUE < -0.0 < +0.0; < Float.MIN_VALUE < ... <
206 * Float.MAX_VALUE < Float.POSITIVE_INFINITY < Float.NaN
207 * </p>
208 * <p>
209 * Floating point numbers are encoded as specified in IEEE 754. A 32-bit
210 * single precision float consists of a sign bit, 8-bit unsigned exponent
211 * encoded in offset-127 notation, and a 23-bit significand. The format is
212 * described further in the <a
213 * href="http://en.wikipedia.org/wiki/Single_precision"> Single Precision
214 * Floating Point Wikipedia page</a>
215 * </p>
216 * <p>
217 * The value of a normal float is -1 <sup>sign bit</sup> ×
218 * 2<sup>exponent - 127</sup> × 1.significand
219 * </p>
220 * <p>
221 * The IEE754 floating point format already preserves sort ordering for
222 * positive floating point numbers when the raw bytes are compared in most
223 * significant byte order. This is discussed further at <a href=
224 * "http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm">
225 * http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm</a>
226 * </p>
227 * <p>
228 * Thus, we need only ensure that negative numbers sort in the the exact
229 * opposite order as positive numbers (so that say, negative infinity is less
230 * than negative 1), and that all negative numbers compare less than any
231 * positive number. To accomplish this, we invert the sign bit of all floating
232 * point numbers, and we also invert the exponent and significand bits if the
233 * floating point number was negative.
234 * </p>
235 * <p>
236 * More specifically, we first store the floating point bits into a 32-bit int
237 * {@code j} using {@link Float#floatToIntBits}. This method collapses
238 * all NaNs into a single, canonical NaN value but otherwise leaves the bits
239 * unchanged. We then compute
240 * </p>
241 *
242 * <pre>
243 * j ˆ= (j >> (Integer.SIZE - 1)) | Integer.MIN_SIZE
244 * </pre>
245 * <p>
246 * which inverts the sign bit and XOR's all other bits with the sign bit
247 * itself. Comparing the raw bytes of {@code j} in most significant byte
248 * order is equivalent to performing a single precision floating point
249 * comparison on the underlying bits (ignoring NaN comparisons, as NaNs don't
250 * compare equal to anything when performing floating point comparisons).
251 * </p>
252 * <p>
253 * The resulting integer is then converted into a byte array by serializing
254 * the integer one byte at a time in most significant byte order. The
255 * serialized integer is prefixed by a single header byte. All serialized
256 * values are 5 bytes in length.
257 * </p>
258 * <p>
259 * {@code OrderedBytes} encodings are heavily influenced by the
260 * <a href="http://sqlite.org/src4/doc/trunk/www/key_encoding.wiki">SQLite4 Key
261 * Encoding</a>. Slight deviations are make in the interest of order
262 * correctness and user extensibility. Fixed-width {@code Long} and
263 * {@link Double} encodings are based on implementations from the now defunct
264 * Orderly library.
265 * </p>
266 */
267 @InterfaceAudience.Public
268 @InterfaceStability.Evolving
269 public class OrderedBytes {
270
271 /*
272 * These constants define header bytes used to identify encoded values. Note
273 * that the values here are not exhaustive as the Numeric format encodes
274 * portions of its value within the header byte. The values listed here are
275 * directly applied to persisted data -- DO NOT modify the values specified
276 * here. Instead, gaps are placed intentionally between values so that new
277 * implementations can be inserted into the total ordering enforced here.
278 */
279 private static final byte NULL = 0x05;
280 // room for 1 expansion type
281 private static final byte NEG_INF = 0x07;
282 private static final byte NEG_LARGE = 0x08;
283 private static final byte NEG_MED_MIN = 0x09;
284 private static final byte NEG_MED_MAX = 0x13;
285 private static final byte NEG_SMALL = 0x14;
286 private static final byte ZERO = 0x15;
287 private static final byte POS_SMALL = 0x16;
288 private static final byte POS_MED_MIN = 0x17;
289 private static final byte POS_MED_MAX = 0x21;
290 private static final byte POS_LARGE = 0x22;
291 private static final byte POS_INF = 0x23;
292 // room for 2 expansion type
293 private static final byte NAN = 0x26;
294 // room for 2 expansion types
295 private static final byte FIXED_INT8 = 0x29;
296 private static final byte FIXED_INT16 = 0x2a;
297 private static final byte FIXED_INT32 = 0x2b;
298 private static final byte FIXED_INT64 = 0x2c;
299 // room for 3 expansion types
300 private static final byte FIXED_FLOAT32 = 0x30;
301 private static final byte FIXED_FLOAT64 = 0x31;
302 // room for 2 expansion type
303 private static final byte TEXT = 0x34;
304 // room for 2 expansion type
305 private static final byte BLOB_VAR = 0x37;
306 private static final byte BLOB_COPY = 0x38;
307
308 /*
309 * The following constant values are used by encoding implementations
310 */
311
312 public static final Charset UTF8 = Charset.forName("UTF-8");
313 private static final byte TERM = 0x00;
314 private static final BigDecimal E8 = BigDecimal.valueOf(1e8);
315 private static final BigDecimal E32 = BigDecimal.valueOf(1e32);
316 private static final BigDecimal EN2 = BigDecimal.valueOf(1e-2);
317 private static final BigDecimal EN10 = BigDecimal.valueOf(1e-10);
318
319 /**
320 * Max precision guaranteed to fit into a {@code long}.
321 */
322 public static final int MAX_PRECISION = 31;
323
324 /**
325 * The context used to normalize {@link BigDecimal} values.
326 */
327 public static final MathContext DEFAULT_MATH_CONTEXT =
328 new MathContext(MAX_PRECISION, RoundingMode.HALF_UP);
329
330 /**
331 * Creates the standard exception when the encoded header byte is unexpected for the decoding
332 * context.
333 * @param header value used in error message.
334 */
335 private static IllegalArgumentException unexpectedHeader(byte header) {
336 throw new IllegalArgumentException("unexpected value in first byte: 0x"
337 + Long.toHexString(header));
338 }
339
340 /**
341 * Perform unsigned comparison between two long values. Conforms to the same interface as
342 * {@link Comparator#compare(Object, Object)}.
343 */
344 private static int unsignedCmp(long x1, long x2) {
345 int cmp;
346 if ((cmp = (x1 < x2 ? -1 : (x1 == x2 ? 0 : 1))) == 0) return 0;
347 // invert the result when either value is negative
348 if ((x1 < 0) != (x2 < 0)) return -cmp;
349 return cmp;
350 }
351
352 /**
353 * Write a 32-bit unsigned integer to {@code dst} as 4 big-endian bytes.
354 * @return number of bytes written.
355 */
356 private static int putUint32(PositionedByteRange dst, int val) {
357 dst.put((byte) (val >>> 24))
358 .put((byte) (val >>> 16))
359 .put((byte) (val >>> 8))
360 .put((byte) val);
361 return 4;
362 }
363
364 /**
365 * Encode an unsigned 64-bit unsigned integer {@code val} into {@code dst}.
366 * @param dst The destination to which encoded bytes are written.
367 * @param val The value to write.
368 * @param comp Compliment the encoded value when {@code comp} is true.
369 * @return number of bytes written.
370 */
371 @InterfaceAudience.Private
372 static int putVaruint64(PositionedByteRange dst, long val, boolean comp) {
373 int w, y, len = 0;
374 final int offset = dst.getOffset(), start = dst.getPosition();
375 byte[] a = dst.getBytes();
376 Order ord = comp ? DESCENDING : ASCENDING;
377 if (-1 == unsignedCmp(val, 241L)) {
378 dst.put((byte) val);
379 len = dst.getPosition() - start;
380 ord.apply(a, offset + start, len);
381 return len;
382 }
383 if (-1 == unsignedCmp(val, 2288L)) {
384 y = (int) (val - 240);
385 dst.put((byte) (y / 256 + 241))
386 .put((byte) (y % 256));
387 len = dst.getPosition() - start;
388 ord.apply(a, offset + start, len);
389 return len;
390 }
391 if (-1 == unsignedCmp(val, 67824L)) {
392 y = (int) (val - 2288);
393 dst.put((byte) 249)
394 .put((byte) (y / 256))
395 .put((byte) (y % 256));
396 len = dst.getPosition() - start;
397 ord.apply(a, offset + start, len);
398 return len;
399 }
400 y = (int) val;
401 w = (int) (val >>> 32);
402 if (w == 0) {
403 if (-1 == unsignedCmp(y, 16777216L)) {
404 dst.put((byte) 250)
405 .put((byte) (y >>> 16))
406 .put((byte) (y >>> 8))
407 .put((byte) y);
408 len = dst.getPosition() - start;
409 ord.apply(a, offset + start, len);
410 return len;
411 }
412 dst.put((byte) 251);
413 putUint32(dst, y);
414 len = dst.getPosition() - start;
415 ord.apply(a, offset + start, len);
416 return len;
417 }
418 if (-1 == unsignedCmp(w, 256L)) {
419 dst.put((byte) 252)
420 .put((byte) w);
421 putUint32(dst, y);
422 len = dst.getPosition() - start;
423 ord.apply(a, offset + start, len);
424 return len;
425 }
426 if (-1 == unsignedCmp(w, 65536L)) {
427 dst.put((byte) 253)
428 .put((byte) (w >>> 8))
429 .put((byte) w);
430 putUint32(dst, y);
431 len = dst.getPosition() - start;
432 ord.apply(a, offset + start, len);
433 return len;
434 }
435 if (-1 == unsignedCmp(w, 16777216L)) {
436 dst.put((byte) 254)
437 .put((byte) (w >>> 16))
438 .put((byte) (w >>> 8))
439 .put((byte) w);
440 putUint32(dst, y);
441 len = dst.getPosition() - start;
442 ord.apply(a, offset + start, len);
443 return len;
444 }
445 dst.put((byte) 255);
446 putUint32(dst, w);
447 putUint32(dst, y);
448 len = dst.getPosition() - start;
449 ord.apply(a, offset + start, len);
450 return len;
451 }
452
453 /**
454 * Inspect {@code src} for an encoded varuint64 for its length in bytes.
455 * Preserves the state of {@code src}.
456 * @param src source buffer
457 * @param comp if true, parse the compliment of the value.
458 * @return the number of bytes consumed by this value.
459 */
460 @InterfaceAudience.Private
461 static int lengthVaruint64(PositionedByteRange src, boolean comp) {
462 int a0 = (comp ? DESCENDING : ASCENDING).apply(src.peek()) & 0xff;
463 if (a0 <= 240) return 1;
464 if (a0 <= 248) return 2;
465 if (a0 == 249) return 3;
466 if (a0 == 250) return 4;
467 if (a0 == 251) return 5;
468 if (a0 == 252) return 6;
469 if (a0 == 253) return 7;
470 if (a0 == 254) return 8;
471 if (a0 == 255) return 9;
472 throw unexpectedHeader(src.peek());
473 }
474
475 /**
476 * Skip {@code src} over the encoded varuint64.
477 * @param src source buffer
478 * @param cmp if true, parse the compliment of the value.
479 * @return the number of bytes skipped.
480 */
481 @InterfaceAudience.Private
482 static int skipVaruint64(PositionedByteRange src, boolean cmp) {
483 final int len = lengthVaruint64(src, cmp);
484 src.setPosition(src.getPosition() + len);
485 return len;
486 }
487
488 /**
489 * Decode a sequence of bytes in {@code src} as a varuint64. Compliment the
490 * encoded value when {@code comp} is true.
491 * @return the decoded value.
492 */
493 @InterfaceAudience.Private
494 static long getVaruint64(PositionedByteRange src, boolean comp) {
495 assert src.getRemaining() >= lengthVaruint64(src, comp);
496 final long ret;
497 Order ord = comp ? DESCENDING : ASCENDING;
498 byte x = src.get();
499 final int a0 = ord.apply(x) & 0xff, a1, a2, a3, a4, a5, a6, a7, a8;
500 if (-1 == unsignedCmp(a0, 241)) {
501 return a0;
502 }
503 x = src.get();
504 a1 = ord.apply(x) & 0xff;
505 if (-1 == unsignedCmp(a0, 249)) {
506 return (a0 - 241L) * 256 + a1 + 240;
507 }
508 x = src.get();
509 a2 = ord.apply(x) & 0xff;
510 if (a0 == 249) {
511 return 2288L + 256 * a1 + a2;
512 }
513 x = src.get();
514 a3 = ord.apply(x) & 0xff;
515 if (a0 == 250) {
516 return ((long) a1 << 16L) | (a2 << 8) | a3;
517 }
518 x = src.get();
519 a4 = ord.apply(x) & 0xff;
520 ret = (((long) a1) << 24) | (a2 << 16) | (a3 << 8) | a4;
521 if (a0 == 251) {
522 return ret;
523 }
524 x = src.get();
525 a5 = ord.apply(x) & 0xff;
526 if (a0 == 252) {
527 return (ret << 8) | a5;
528 }
529 x = src.get();
530 a6 = ord.apply(x) & 0xff;
531 if (a0 == 253) {
532 return (ret << 16) | (a5 << 8) | a6;
533 }
534 x = src.get();
535 a7 = ord.apply(x) & 0xff;
536 if (a0 == 254) {
537 return (ret << 24) | (a5 << 16) | (a6 << 8) | a7;
538 }
539 x = src.get();
540 a8 = ord.apply(x) & 0xff;
541 return (ret << 32) | (((long) a5) << 24) | (a6 << 16) | (a7 << 8) | a8;
542 }
543
544 /**
545 * Strip all trailing zeros to ensure that no digit will be zero and round
546 * using our default context to ensure precision doesn't exceed max allowed.
547 * From Phoenix's {@code NumberUtil}.
548 * @return new {@link BigDecimal} instance
549 */
550 @InterfaceAudience.Private
551 static BigDecimal normalize(BigDecimal val) {
552 return null == val ? null : val.stripTrailingZeros().round(DEFAULT_MATH_CONTEXT);
553 }
554
555 /**
556 * Read significand digits from {@code src} according to the magnitude
557 * of {@code e}.
558 * @param src The source from which to read encoded digits.
559 * @param e The magnitude of the first digit read.
560 * @param comp Treat encoded bytes as compliments when {@code comp} is true.
561 * @return The decoded value.
562 * @throws IllegalArgumentException when read exceeds the remaining length
563 * of {@code src}.
564 */
565 private static BigDecimal decodeSignificand(PositionedByteRange src, int e, boolean comp) {
566 // TODO: can this be made faster?
567 byte[] a = src.getBytes();
568 final int start = src.getPosition(), offset = src.getOffset(), remaining = src.getRemaining();
569 Order ord = comp ? DESCENDING : ASCENDING;
570 BigDecimal m = BigDecimal.ZERO;
571 e--;
572 for (int i = 0;; i++) {
573 if (i > remaining) {
574 // we've exceeded this range's window
575 src.setPosition(start);
576 throw new IllegalArgumentException(
577 "Read exceeds range before termination byte found. offset: " + offset + " position: "
578 + (start + i));
579 }
580 // base-100 digits are encoded as val * 2 + 1 except for the termination digit.
581 m = m.add( // m +=
582 new BigDecimal(BigInteger.ONE, e * -2).multiply( // 100 ^ p * [decoded digit]
583 BigDecimal.valueOf((ord.apply(a[offset + start + i]) & 0xff) / 2)));
584 e--;
585 // detect termination digit
586 if ((ord.apply(a[offset + start + i]) & 1) == 0) {
587 src.setPosition(start + i + 1);
588 break;
589 }
590 }
591 return normalize(m);
592 }
593
594 /**
595 * Skip {@code src} over the significand bytes.
596 * @param src The source from which to read encoded digits.
597 * @param comp Treat encoded bytes as compliments when {@code comp} is true.
598 * @return the number of bytes skipped.
599 */
600 private static int skipSignificand(PositionedByteRange src, boolean comp) {
601 byte[] a = src.getBytes();
602 final int offset = src.getOffset(), start = src.getPosition();
603 int i = src.getPosition();
604 while (((comp ? DESCENDING : ASCENDING).apply(a[offset + i++]) & 1) != 0)
605 ;
606 src.setPosition(i);
607 return i - start;
608 }
609
610 /**
611 * <p>
612 * Encode the small magnitude floating point number {@code val} using the
613 * key encoding. The caller guarantees that 1.0 > abs(val) > 0.0.
614 * </p>
615 * <p>
616 * A floating point value is encoded as an integer exponent {@code E} and a
617 * mantissa {@code M}. The original value is equal to {@code (M * 100^E)}.
618 * {@code E} is set to the smallest value possible without making {@code M}
619 * greater than or equal to 1.0.
620 * </p>
621 * <p>
622 * For this routine, {@code E} will always be zero or negative, since the
623 * original value is less than one. The encoding written by this routine is
624 * the ones-complement of the varint of the negative of {@code E} followed
625 * by the mantissa:
626 * <pre>
627 * Encoding: ~-E M
628 * </pre>
629 * </p>
630 * @param dst The destination to which encoded digits are written.
631 * @param val The value to encode.
632 * @return the number of bytes written.
633 */
634 private static int encodeNumericSmall(PositionedByteRange dst, BigDecimal val) {
635 // TODO: this can be done faster?
636 // assert 1.0 > abs(val) > 0.0
637 BigDecimal abs = val.abs();
638 assert BigDecimal.ZERO.compareTo(abs) < 0 && BigDecimal.ONE.compareTo(abs) > 0;
639 byte[] a = dst.getBytes();
640 boolean isNeg = val.signum() == -1;
641 final int offset = dst.getOffset(), start = dst.getPosition();
642 int e = 0, d, startM;
643
644 if (isNeg) { /* Small negative number: 0x14, -E, ~M */
645 dst.put(NEG_SMALL);
646 } else { /* Small positive number: 0x16, ~-E, M */
647 dst.put(POS_SMALL);
648 }
649
650 // normalize abs(val) to determine E
651 while (abs.compareTo(EN10) < 0) { abs = abs.movePointRight(8); e += 4; }
652 while (abs.compareTo(EN2) < 0) { abs = abs.movePointRight(2); e++; }
653
654 putVaruint64(dst, e, !isNeg); // encode appropriate E value.
655
656 // encode M by peeling off centimal digits, encoding x as 2x+1
657 startM = dst.getPosition();
658 // TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of
659 // numeric scale.
660 for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) {
661 abs = abs.movePointRight(2);
662 d = abs.intValue();
663 dst.put((byte) ((2 * d + 1) & 0xff));
664 abs = abs.subtract(BigDecimal.valueOf(d));
665 }
666 // terminal digit should be 2x
667 a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe);
668 if (isNeg) {
669 // negative values encoded as ~M
670 DESCENDING.apply(a, offset + startM, dst.getPosition() - startM);
671 }
672 return dst.getPosition() - start;
673 }
674
675 /**
676 * Encode the large magnitude floating point number {@code val} using
677 * the key encoding. The caller guarantees that {@code val} will be
678 * finite and abs(val) >= 1.0.
679 * <p>
680 * A floating point value is encoded as an integer exponent {@code E}
681 * and a mantissa {@code M}. The original value is equal to
682 * {@code (M * 100^E)}. {@code E} is set to the smallest value
683 * possible without making {@code M} greater than or equal to 1.0.
684 * </p>
685 * <p>
686 * Each centimal digit of the mantissa is stored in a byte. If the value of
687 * the centimal digit is {@code X} (hence {@code X>=0} and
688 * {@code X<=99}) then the byte value will be {@code 2*X+1} for
689 * every byte of the mantissa, except for the last byte which will be
690 * {@code 2*X+0}. The mantissa must be the minimum number of bytes
691 * necessary to represent the value; trailing {@code X==0} digits are
692 * omitted. This means that the mantissa will never contain a byte with the
693 * value {@code 0x00}.
694 * </p>
695 * <p>
696 * If {@code E > 10}, then this routine writes of {@code E} as a
697 * varint followed by the mantissa as described above. Otherwise, if
698 * {@code E <= 10}, this routine only writes the mantissa and leaves
699 * the {@code E} value to be encoded as part of the opening byte of the
700 * field by the calling function.
701 *
702 * <pre>
703 * Encoding: M (if E<=10)
704 * E M (if E>10)
705 * </pre>
706 * </p>
707 * @param dst The destination to which encoded digits are written.
708 * @param val The value to encode.
709 * @return the number of bytes written.
710 */
711 private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) {
712 // TODO: this can be done faster
713 BigDecimal abs = val.abs();
714 byte[] a = dst.getBytes();
715 boolean isNeg = val.signum() == -1;
716 final int start = dst.getPosition(), offset = dst.getOffset();
717 int e = 0, d, startM;
718
719 if (isNeg) { /* Large negative number: 0x08, ~E, ~M */
720 dst.put(NEG_LARGE);
721 } else { /* Large positive number: 0x22, E, M */
722 dst.put(POS_LARGE);
723 }
724
725 // normalize abs(val) to determine E
726 while (abs.compareTo(E32) >= 0 && e <= 350) { abs = abs.movePointLeft(32); e +=16; }
727 while (abs.compareTo(E8) >= 0 && e <= 350) { abs = abs.movePointLeft(8); e+= 4; }
728 while (abs.compareTo(BigDecimal.ONE) >= 0 && e <= 350) { abs = abs.movePointLeft(2); e++; }
729
730 // encode appropriate header byte and/or E value.
731 if (e > 10) { /* large number, write out {~,}E */
732 putVaruint64(dst, e, isNeg);
733 } else {
734 if (isNeg) { /* Medium negative number: 0x13-E, ~M */
735 dst.put(start, (byte) (NEG_MED_MAX - e));
736 } else { /* Medium positive number: 0x17+E, M */
737 dst.put(start, (byte) (POS_MED_MIN + e));
738 }
739 }
740
741 // encode M by peeling off centimal digits, encoding x as 2x+1
742 startM = dst.getPosition();
743 // TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of
744 // numeric scale.
745 for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) {
746 abs = abs.movePointRight(2);
747 d = abs.intValue();
748 dst.put((byte) (2 * d + 1));
749 abs = abs.subtract(BigDecimal.valueOf(d));
750 }
751 // terminal digit should be 2x
752 a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe);
753 if (isNeg) {
754 // negative values encoded as ~M
755 DESCENDING.apply(a, offset + startM, dst.getPosition() - startM);
756 }
757 return dst.getPosition() - start;
758 }
759
760 /**
761 * Encode a numerical value using the variable-length encoding.
762 * @param dst The destination to which encoded digits are written.
763 * @param val The value to encode.
764 * @param ord The {@link Order} to respect while encoding {@code val}.
765 * @return the number of bytes written.
766 */
767 public static int encodeNumeric(PositionedByteRange dst, long val, Order ord) {
768 return encodeNumeric(dst, BigDecimal.valueOf(val), ord);
769 }
770
771 /**
772 * Encode a numerical value using the variable-length encoding.
773 * @param dst The destination to which encoded digits are written.
774 * @param val The value to encode.
775 * @param ord The {@link Order} to respect while encoding {@code val}.
776 * @return the number of bytes written.
777 */
778 public static int encodeNumeric(PositionedByteRange dst, double val, Order ord) {
779 if (val == 0.0) {
780 dst.put(ord.apply(ZERO));
781 return 1;
782 }
783 if (Double.isNaN(val)) {
784 dst.put(ord.apply(NAN));
785 return 1;
786 }
787 if (val == Double.NEGATIVE_INFINITY) {
788 dst.put(ord.apply(NEG_INF));
789 return 1;
790 }
791 if (val == Double.POSITIVE_INFINITY) {
792 dst.put(ord.apply(POS_INF));
793 return 1;
794 }
795 return encodeNumeric(dst, BigDecimal.valueOf(val), ord);
796 }
797
798 /**
799 * Encode a numerical value using the variable-length encoding.
800 * @param dst The destination to which encoded digits are written.
801 * @param val The value to encode.
802 * @param ord The {@link Order} to respect while encoding {@code val}.
803 * @return the number of bytes written.
804 */
805 public static int encodeNumeric(PositionedByteRange dst, BigDecimal val, Order ord) {
806 final int len, offset = dst.getOffset(), start = dst.getPosition();
807 if (null == val) {
808 return encodeNull(dst, ord);
809 } else if (BigDecimal.ZERO.compareTo(val) == 0) {
810 dst.put(ord.apply(ZERO));
811 return 1;
812 }
813 BigDecimal abs = val.abs();
814 if (BigDecimal.ONE.compareTo(abs) <= 0) { // abs(v) >= 1.0
815 len = encodeNumericLarge(dst, normalize(val));
816 } else { // 1.0 > abs(v) >= 0.0
817 len = encodeNumericSmall(dst, normalize(val));
818 }
819 ord.apply(dst.getBytes(), offset + start, len);
820 return len;
821 }
822
823 /**
824 * Decode a {@link BigDecimal} from {@code src}. Assumes {@code src} encodes
825 * a value in Numeric encoding and is within the valid range of
826 * {@link BigDecimal} values. {@link BigDecimal} does not support {@code NaN}
827 * or {@code Infinte} values.
828 * @see #decodeNumericAsDouble(PositionedByteRange)
829 */
830 private static BigDecimal decodeNumericValue(PositionedByteRange src) {
831 final int e;
832 byte header = src.get();
833 boolean dsc = -1 == Integer.signum(header);
834 header = dsc ? DESCENDING.apply(header) : header;
835
836 if (header == NULL) return null;
837 if (header == NEG_LARGE) { /* Large negative number: 0x08, ~E, ~M */
838 e = (int) getVaruint64(src, !dsc);
839 return decodeSignificand(src, e, !dsc).negate();
840 }
841 if (header >= NEG_MED_MIN && header <= NEG_MED_MAX) {
842 /* Medium negative number: 0x13-E, ~M */
843 e = NEG_MED_MAX - header;
844 return decodeSignificand(src, e, !dsc).negate();
845 }
846 if (header == NEG_SMALL) { /* Small negative number: 0x14, -E, ~M */
847 e = (int) -getVaruint64(src, dsc);
848 return decodeSignificand(src, e, !dsc).negate();
849 }
850 if (header == ZERO) {
851 return BigDecimal.ZERO;
852 }
853 if (header == POS_SMALL) { /* Small positive number: 0x16, ~-E, M */
854 e = (int) -getVaruint64(src, !dsc);
855 return decodeSignificand(src, e, dsc);
856 }
857 if (header >= POS_MED_MIN && header <= POS_MED_MAX) {
858 /* Medium positive number: 0x17+E, M */
859 e = header - POS_MED_MIN;
860 return decodeSignificand(src, e, dsc);
861 }
862 if (header == POS_LARGE) { /* Large positive number: 0x22, E, M */
863 e = (int) getVaruint64(src, dsc);
864 return decodeSignificand(src, e, dsc);
865 }
866 throw unexpectedHeader(header);
867 }
868
869 /**
870 * Decode a primitive {@code double} value from the Numeric encoding. Numeric
871 * encoding is based on {@link BigDecimal}; in the event the encoded value is
872 * larger than can be represented in a {@code double}, this method performs
873 * an implicit narrowing conversion as described in
874 * {@link BigDecimal#doubleValue()}.
875 * @throws NullPointerException when the encoded value is {@code NULL}.
876 * @throws IllegalArgumentException when the encoded value is not a Numeric.
877 * @see #encodeNumeric(PositionedByteRange, double, Order)
878 * @see BigDecimal#doubleValue()
879 */
880 public static double decodeNumericAsDouble(PositionedByteRange src) {
881 // TODO: should an encoded NULL value throw unexpectedHeader() instead?
882 if (isNull(src)) {
883 throw new NullPointerException("A null value cannot be decoded to a double.");
884 }
885 if (isNumericNaN(src)) {
886 src.get();
887 return Double.NaN;
888 }
889 if (isNumericZero(src)) {
890 src.get();
891 return Double.valueOf(0.0);
892 }
893
894 byte header = -1 == Integer.signum(src.peek()) ? DESCENDING.apply(src.peek()) : src.peek();
895
896 if (header == NEG_INF) {
897 src.get();
898 return Double.NEGATIVE_INFINITY;
899 } else if (header == POS_INF) {
900 src.get();
901 return Double.POSITIVE_INFINITY;
902 } else {
903 return decodeNumericValue(src).doubleValue();
904 }
905 }
906
907 /**
908 * Decode a primitive {@code long} value from the Numeric encoding. Numeric
909 * encoding is based on {@link BigDecimal}; in the event the encoded value is
910 * larger than can be represented in a {@code long}, this method performs an
911 * implicit narrowing conversion as described in
912 * {@link BigDecimal#doubleValue()}.
913 * @throws NullPointerException when the encoded value is {@code NULL}.
914 * @throws IllegalArgumentException when the encoded value is not a Numeric.
915 * @see #encodeNumeric(PositionedByteRange, long, Order)
916 * @see BigDecimal#longValue()
917 */
918 public static long decodeNumericAsLong(PositionedByteRange src) {
919 // TODO: should an encoded NULL value throw unexpectedHeader() instead?
920 if (isNull(src)) throw new NullPointerException();
921 if (!isNumeric(src)) throw unexpectedHeader(src.peek());
922 if (isNumericNaN(src)) throw unexpectedHeader(src.peek());
923 if (isNumericInfinite(src)) throw unexpectedHeader(src.peek());
924
925 if (isNumericZero(src)) {
926 src.get();
927 return Long.valueOf(0);
928 }
929 return decodeNumericValue(src).longValue();
930 }
931
932 /**
933 * Decode a {@link BigDecimal} value from the variable-length encoding.
934 * @throws IllegalArgumentException when the encoded value is not a Numeric.
935 * @see #encodeNumeric(PositionedByteRange, BigDecimal, Order)
936 */
937 public static BigDecimal decodeNumericAsBigDecimal(PositionedByteRange src) {
938 if (isNull(src)) {
939 src.get();
940 return null;
941 }
942 if (!isNumeric(src)) throw unexpectedHeader(src.peek());
943 if (isNumericNaN(src)) throw unexpectedHeader(src.peek());
944 if (isNumericInfinite(src)) throw unexpectedHeader(src.peek());
945 return decodeNumericValue(src);
946 }
947
948 /**
949 * Encode a String value. String encoding is 0x00-terminated and so it does
950 * not support {@code \u0000} codepoints in the value.
951 * @param dst The destination to which the encoded value is written.
952 * @param val The value to encode.
953 * @param ord The {@link Order} to respect while encoding {@code val}.
954 * @return the number of bytes written.
955 * @throws IllegalArgumentException when {@code val} contains a {@code \u0000}.
956 */
957 public static int encodeString(PositionedByteRange dst, String val, Order ord) {
958 if (null == val) {
959 return encodeNull(dst, ord);
960 }
961 if (val.contains("\u0000"))
962 throw new IllegalArgumentException("Cannot encode String values containing '\\u0000'");
963 final int offset = dst.getOffset(), start = dst.getPosition();
964 dst.put(TEXT);
965 // TODO: is there no way to decode into dst directly?
966 dst.put(val.getBytes(UTF8));
967 dst.put(TERM);
968 ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start);
969 return dst.getPosition() - start;
970 }
971
972 /**
973 * Decode a String value.
974 */
975 public static String decodeString(PositionedByteRange src) {
976 final byte header = src.get();
977 if (header == NULL || header == DESCENDING.apply(NULL))
978 return null;
979 assert header == TEXT || header == DESCENDING.apply(TEXT);
980 Order ord = header == TEXT ? ASCENDING : DESCENDING;
981 byte[] a = src.getBytes();
982 final int offset = src.getOffset(), start = src.getPosition();
983 final byte terminator = ord.apply(TERM);
984 int rawStartPos = offset + start, rawTermPos = rawStartPos;
985 for (; a[rawTermPos] != terminator; rawTermPos++)
986 ;
987 src.setPosition(rawTermPos - offset + 1); // advance position to TERM + 1
988 if (DESCENDING == ord) {
989 // make a copy so that we don't disturb encoded value with ord.
990 byte[] copy = new byte[rawTermPos - rawStartPos];
991 System.arraycopy(a, rawStartPos, copy, 0, copy.length);
992 ord.apply(copy);
993 return new String(copy, UTF8);
994 } else {
995 return new String(a, rawStartPos, rawTermPos - rawStartPos, UTF8);
996 }
997 }
998
999 /**
1000 * Calculate the expected BlobVar encoded length based on unencoded length.
1001 */
1002 public static int blobVarEncodedLength(int len) {
1003 if (0 == len)
1004 return 2; // 1-byte header + 1-byte terminator
1005 else
1006 return (int)
1007 Math.ceil(
1008 (len * 8) // 8-bits per input byte
1009 / 7.0) // 7-bits of input data per encoded byte, rounded up
1010 + 1; // + 1-byte header
1011 }
1012
1013 /**
1014 * Calculate the expected BlobVar decoded length based on encoded length.
1015 */
1016 @InterfaceAudience.Private
1017 static int blobVarDecodedLength(int len) {
1018 return
1019 ((len
1020 - 1) // 1-byte header
1021 * 7) // 7-bits of payload per encoded byte
1022 / 8; // 8-bits per byte
1023 }
1024
1025 /**
1026 * Encode a Blob value using a modified varint encoding scheme.
1027 * <p>
1028 * This format encodes a byte[] value such that no limitations on the input
1029 * value are imposed. The first byte encodes the encoding scheme that
1030 * follows, {@link #BLOB_VAR}. Each encoded byte thereafter consists of a
1031 * header bit followed by 7 bits of payload. A header bit of '1' indicates
1032 * continuation of the encoding. A header bit of '0' indicates this byte
1033 * contains the last of the payload. An empty input value is encoded as the
1034 * header byte immediately followed by a termination byte {@code 0x00}. This
1035 * is not ambiguous with the encoded value of {@code []}, which results in
1036 * {@code [0x80, 0x00]}.
1037 * </p>
1038 * @return the number of bytes written.
1039 */
1040 public static int encodeBlobVar(PositionedByteRange dst, byte[] val, int voff, int vlen,
1041 Order ord) {
1042 if (null == val) {
1043 return encodeNull(dst, ord);
1044 }
1045 // Empty value is null-terminated. All other values are encoded as 7-bits per byte.
1046 assert dst.getRemaining() >= blobVarEncodedLength(vlen) : "buffer overflow expected.";
1047 final int offset = dst.getOffset(), start = dst.getPosition();
1048 dst.put(BLOB_VAR);
1049 if (0 == vlen) {
1050 dst.put(TERM);
1051 } else {
1052 byte s = 1, t = 0;
1053 for (int i = voff; i < vlen; i++) {
1054 dst.put((byte) (0x80 | t | ((val[i] & 0xff) >>> s)));
1055 if (s < 7) {
1056 t = (byte) (val[i] << (7 - s));
1057 s++;
1058 } else {
1059 dst.put((byte) (0x80 | val[i]));
1060 s = 1;
1061 t = 0;
1062 }
1063 }
1064 if (s > 1) {
1065 dst.put((byte) (0x7f & t));
1066 } else {
1067 dst.getBytes()[offset + dst.getPosition() - 1] =
1068 (byte) (dst.getBytes()[offset + dst.getPosition() - 1] & 0x7f);
1069 }
1070 }
1071 ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start);
1072 return dst.getPosition() - start;
1073 }
1074
1075 /**
1076 * Encode a blob value using a modified varint encoding scheme.
1077 * @return the number of bytes written.
1078 * @see #encodeBlobVar(PositionedByteRange, byte[], int, int, Order)
1079 */
1080 public static int encodeBlobVar(PositionedByteRange dst, byte[] val, Order ord) {
1081 return encodeBlobVar(dst, val, 0, null != val ? val.length : 0, ord);
1082 }
1083
1084 /**
1085 * Decode a blob value that was encoded using BlobVar encoding.
1086 */
1087 public static byte[] decodeBlobVar(PositionedByteRange src) {
1088 final byte header = src.get();
1089 if (header == NULL || header == DESCENDING.apply(NULL)) {
1090 return null;
1091 }
1092 assert header == BLOB_VAR || header == DESCENDING.apply(BLOB_VAR);
1093 Order ord = BLOB_VAR == header ? ASCENDING : DESCENDING;
1094 if (src.peek() == ord.apply(TERM)) {
1095 // skip empty input buffer.
1096 src.get();
1097 return new byte[0];
1098 }
1099 final int offset = src.getOffset(), start = src.getPosition();
1100 int end;
1101 byte[] a = src.getBytes();
1102 for (end = start; (byte) (ord.apply(a[offset + end]) & 0x80) != TERM; end++)
1103 ;
1104 end++; // increment end to 1-past last byte
1105 // create ret buffer using length of encoded data + 1 (header byte)
1106 PositionedByteRange ret = new SimplePositionedMutableByteRange(blobVarDecodedLength(end - start
1107 + 1));
1108 int s = 6;
1109 byte t = (byte) ((ord.apply(a[offset + start]) << 1) & 0xff);
1110 for (int i = start + 1; i < end; i++) {
1111 if (s == 7) {
1112 ret.put((byte) (t | (ord.apply(a[offset + i]) & 0x7f)));
1113 i++;
1114 // explicitly reset t -- clean up overflow buffer after decoding
1115 // a full cycle and retain assertion condition below. This happens
1116 t = 0; // when the LSB in the last encoded byte is 1. (HBASE-9893)
1117 } else {
1118 ret.put((byte) (t | ((ord.apply(a[offset + i]) & 0x7f) >>> s)));
1119 }
1120 if (i == end) break;
1121 t = (byte) ((ord.apply(a[offset + i]) << (8 - s)) & 0xff);
1122 s = s == 1 ? 7 : s - 1;
1123 }
1124 src.setPosition(end);
1125 assert t == 0 : "Unexpected bits remaining after decoding blob.";
1126 assert ret.getPosition() == ret.getLength() : "Allocated unnecessarily large return buffer.";
1127 return ret.getBytes();
1128 }
1129
1130 /**
1131 * Encode a Blob value as a byte-for-byte copy. BlobCopy encoding in
1132 * DESCENDING order is NULL terminated so as to preserve proper sorting of
1133 * {@code []} and so it does not support {@code 0x00} in the value.
1134 * @return the number of bytes written.
1135 * @throws IllegalArgumentException when {@code ord} is DESCENDING and
1136 * {@code val} contains a {@code 0x00} byte.
1137 */
1138 public static int encodeBlobCopy(PositionedByteRange dst, byte[] val, int voff, int vlen,
1139 Order ord) {
1140 if (null == val) {
1141 encodeNull(dst, ord);
1142 if (ASCENDING == ord) return 1;
1143 else {
1144 // DESCENDING ordered BlobCopy requires a termination bit to preserve
1145 // sort-order semantics of null values.
1146 dst.put(ord.apply(TERM));
1147 return 2;
1148 }
1149 }
1150 // Blobs as final entry in a compound key are written unencoded.
1151 assert dst.getRemaining() >= vlen + (ASCENDING == ord ? 1 : 2);
1152 if (DESCENDING == ord) {
1153 for (int i = 0; i < vlen; i++) {
1154 if (TERM == val[voff + i]) {
1155 throw new IllegalArgumentException("0x00 bytes not permitted in value.");
1156 }
1157 }
1158 }
1159 final int offset = dst.getOffset(), start = dst.getPosition();
1160 dst.put(BLOB_COPY);
1161 dst.put(val, voff, vlen);
1162 // DESCENDING ordered BlobCopy requires a termination bit to preserve
1163 // sort-order semantics of null values.
1164 if (DESCENDING == ord) dst.put(TERM);
1165 ord.apply(dst.getBytes(), offset + start, dst.getPosition() - start);
1166 return dst.getPosition() - start;
1167 }
1168
1169 /**
1170 * Encode a Blob value as a byte-for-byte copy. BlobCopy encoding in
1171 * DESCENDING order is NULL terminated so as to preserve proper sorting of
1172 * {@code []} and so it does not support {@code 0x00} in the value.
1173 * @return the number of bytes written.
1174 * @throws IllegalArgumentException when {@code ord} is DESCENDING and
1175 * {@code val} contains a {@code 0x00} byte.
1176 * @see #encodeBlobCopy(PositionedByteRange, byte[], int, int, Order)
1177 */
1178 public static int encodeBlobCopy(PositionedByteRange dst, byte[] val, Order ord) {
1179 return encodeBlobCopy(dst, val, 0, null != val ? val.length : 0, ord);
1180 }
1181
1182 /**
1183 * Decode a Blob value, byte-for-byte copy.
1184 * @see #encodeBlobCopy(PositionedByteRange, byte[], int, int, Order)
1185 */
1186 public static byte[] decodeBlobCopy(PositionedByteRange src) {
1187 byte header = src.get();
1188 if (header == NULL || header == DESCENDING.apply(NULL)) {
1189 return null;
1190 }
1191 assert header == BLOB_COPY || header == DESCENDING.apply(BLOB_COPY);
1192 Order ord = header == BLOB_COPY ? ASCENDING : DESCENDING;
1193 final int length = src.getRemaining() - (ASCENDING == ord ? 0 : 1);
1194 byte[] ret = new byte[length];
1195 src.get(ret);
1196 ord.apply(ret, 0, ret.length);
1197 // DESCENDING ordered BlobCopy requires a termination bit to preserve
1198 // sort-order semantics of null values.
1199 if (DESCENDING == ord) src.get();
1200 return ret;
1201 }
1202
1203 /**
1204 * Encode a null value.
1205 * @param dst The destination to which encoded digits are written.
1206 * @param ord The {@link Order} to respect while encoding {@code val}.
1207 * @return the number of bytes written.
1208 */
1209 public static int encodeNull(PositionedByteRange dst, Order ord) {
1210 dst.put(ord.apply(NULL));
1211 return 1;
1212 }
1213
1214 /**
1215 * Encode an {@code int8} value using the fixed-length encoding.
1216 * @return the number of bytes written.
1217 * @see #encodeInt64(PositionedByteRange, long, Order)
1218 * @see #decodeInt8(PositionedByteRange)
1219 */
1220 public static int encodeInt8(PositionedByteRange dst, byte val, Order ord) {
1221 final int offset = dst.getOffset(), start = dst.getPosition();
1222 dst.put(FIXED_INT8)
1223 .put((byte) (val ^ 0x80));
1224 ord.apply(dst.getBytes(), offset + start, 2);
1225 return 2;
1226 }
1227
1228 /**
1229 * Decode an {@code int8} value.
1230 * @see #encodeInt8(PositionedByteRange, byte, Order)
1231 */
1232 public static byte decodeInt8(PositionedByteRange src) {
1233 final byte header = src.get();
1234 assert header == FIXED_INT8 || header == DESCENDING.apply(FIXED_INT8);
1235 Order ord = header == FIXED_INT8 ? ASCENDING : DESCENDING;
1236 return (byte)((ord.apply(src.get()) ^ 0x80) & 0xff);
1237 }
1238
1239 /**
1240 * Encode an {@code int16} value using the fixed-length encoding.
1241 * @return the number of bytes written.
1242 * @see #encodeInt64(PositionedByteRange, long, Order)
1243 * @see #decodeInt16(PositionedByteRange)
1244 */
1245 public static int encodeInt16(PositionedByteRange dst, short val, Order ord) {
1246 final int offset = dst.getOffset(), start = dst.getPosition();
1247 dst.put(FIXED_INT16)
1248 .put((byte) ((val >> 8) ^ 0x80))
1249 .put((byte) val);
1250 ord.apply(dst.getBytes(), offset + start, 3);
1251 return 3;
1252 }
1253
1254 /**
1255 * Decode an {@code int16} value.
1256 * @see #encodeInt16(PositionedByteRange, short, Order)
1257 */
1258 public static short decodeInt16(PositionedByteRange src) {
1259 final byte header = src.get();
1260 assert header == FIXED_INT16 || header == DESCENDING.apply(FIXED_INT16);
1261 Order ord = header == FIXED_INT16 ? ASCENDING : DESCENDING;
1262 short val = (short) ((ord.apply(src.get()) ^ 0x80) & 0xff);
1263 val = (short) ((val << 8) + (ord.apply(src.get()) & 0xff));
1264 return val;
1265 }
1266
1267 /**
1268 * Encode an {@code int32} value using the fixed-length encoding.
1269 * @return the number of bytes written.
1270 * @see #encodeInt64(PositionedByteRange, long, Order)
1271 * @see #decodeInt32(PositionedByteRange)
1272 */
1273 public static int encodeInt32(PositionedByteRange dst, int val, Order ord) {
1274 final int offset = dst.getOffset(), start = dst.getPosition();
1275 dst.put(FIXED_INT32)
1276 .put((byte) ((val >> 24) ^ 0x80))
1277 .put((byte) (val >> 16))
1278 .put((byte) (val >> 8))
1279 .put((byte) val);
1280 ord.apply(dst.getBytes(), offset + start, 5);
1281 return 5;
1282 }
1283
1284 /**
1285 * Decode an {@code int32} value.
1286 * @see #encodeInt32(PositionedByteRange, int, Order)
1287 */
1288 public static int decodeInt32(PositionedByteRange src) {
1289 final byte header = src.get();
1290 assert header == FIXED_INT32 || header == DESCENDING.apply(FIXED_INT32);
1291 Order ord = header == FIXED_INT32 ? ASCENDING : DESCENDING;
1292 int val = (ord.apply(src.get()) ^ 0x80) & 0xff;
1293 for (int i = 1; i < 4; i++) {
1294 val = (val << 8) + (ord.apply(src.get()) & 0xff);
1295 }
1296 return val;
1297 }
1298
1299 /**
1300 * Encode an {@code int64} value using the fixed-length encoding.
1301 * <p>
1302 * This format ensures that all longs sort in their natural order, as they
1303 * would sort when using signed long comparison.
1304 * </p>
1305 * <p>
1306 * All Longs are serialized to an 8-byte, fixed-width sortable byte format.
1307 * Serialization is performed by inverting the integer sign bit and writing
1308 * the resulting bytes to the byte array in big endian order. The encoded
1309 * value is prefixed by the {@link #FIXED_INT64} header byte. This encoding
1310 * is designed to handle java language primitives and so Null values are NOT
1311 * supported by this implementation.
1312 * </p>
1313 * <p>
1314 * For example:
1315 * </p>
1316 * <pre>
1317 * Input: 0x0000000000000005 (5)
1318 * Result: 0x288000000000000005
1319 *
1320 * Input: 0xfffffffffffffffb (-4)
1321 * Result: 0x280000000000000004
1322 *
1323 * Input: 0x7fffffffffffffff (Long.MAX_VALUE)
1324 * Result: 0x28ffffffffffffffff
1325 *
1326 * Input: 0x8000000000000000 (Long.MIN_VALUE)
1327 * Result: 0x287fffffffffffffff
1328 * </pre>
1329 * <p>
1330 * This encoding format, and much of this documentation string, is based on
1331 * Orderly's {@code FixedIntWritableRowKey}.
1332 * </p>
1333 * @return the number of bytes written.
1334 * @see #decodeInt64(PositionedByteRange)
1335 */
1336 public static int encodeInt64(PositionedByteRange dst, long val, Order ord) {
1337 final int offset = dst.getOffset(), start = dst.getPosition();
1338 dst.put(FIXED_INT64)
1339 .put((byte) ((val >> 56) ^ 0x80))
1340 .put((byte) (val >> 48))
1341 .put((byte) (val >> 40))
1342 .put((byte) (val >> 32))
1343 .put((byte) (val >> 24))
1344 .put((byte) (val >> 16))
1345 .put((byte) (val >> 8))
1346 .put((byte) val);
1347 ord.apply(dst.getBytes(), offset + start, 9);
1348 return 9;
1349 }
1350
1351 /**
1352 * Decode an {@code int64} value.
1353 * @see #encodeInt64(PositionedByteRange, long, Order)
1354 */
1355 public static long decodeInt64(PositionedByteRange src) {
1356 final byte header = src.get();
1357 assert header == FIXED_INT64 || header == DESCENDING.apply(FIXED_INT64);
1358 Order ord = header == FIXED_INT64 ? ASCENDING : DESCENDING;
1359 long val = (ord.apply(src.get()) ^ 0x80) & 0xff;
1360 for (int i = 1; i < 8; i++) {
1361 val = (val << 8) + (ord.apply(src.get()) & 0xff);
1362 }
1363 return val;
1364 }
1365
1366 /**
1367 * Encode a 32-bit floating point value using the fixed-length encoding.
1368 * Encoding format is described at length in
1369 * {@link #encodeFloat64(PositionedByteRange, double, Order)}.
1370 * @return the number of bytes written.
1371 * @see #decodeFloat32(PositionedByteRange)
1372 * @see #encodeFloat64(PositionedByteRange, double, Order)
1373 */
1374 public static int encodeFloat32(PositionedByteRange dst, float val, Order ord) {
1375 final int offset = dst.getOffset(), start = dst.getPosition();
1376 int i = Float.floatToIntBits(val);
1377 i ^= ((i >> (Integer.SIZE - 1)) | Integer.MIN_VALUE);
1378 dst.put(FIXED_FLOAT32)
1379 .put((byte) (i >> 24))
1380 .put((byte) (i >> 16))
1381 .put((byte) (i >> 8))
1382 .put((byte) i);
1383 ord.apply(dst.getBytes(), offset + start, 5);
1384 return 5;
1385 }
1386
1387 /**
1388 * Decode a 32-bit floating point value using the fixed-length encoding.
1389 * @see #encodeFloat32(PositionedByteRange, float, Order)
1390 */
1391 public static float decodeFloat32(PositionedByteRange src) {
1392 final byte header = src.get();
1393 assert header == FIXED_FLOAT32 || header == DESCENDING.apply(FIXED_FLOAT32);
1394 Order ord = header == FIXED_FLOAT32 ? ASCENDING : DESCENDING;
1395 int val = ord.apply(src.get()) & 0xff;
1396 for (int i = 1; i < 4; i++) {
1397 val = (val << 8) + (ord.apply(src.get()) & 0xff);
1398 }
1399 val ^= (~val >> (Integer.SIZE - 1)) | Integer.MIN_VALUE;
1400 return Float.intBitsToFloat(val);
1401 }
1402
1403 /**
1404 * Encode a 64-bit floating point value using the fixed-length encoding.
1405 * <p>
1406 * This format ensures the following total ordering of floating point
1407 * values: Double.NEGATIVE_INFINITY < -Double.MAX_VALUE < ... <
1408 * -Double.MIN_VALUE < -0.0 < +0.0; < Double.MIN_VALUE < ...
1409 * < Double.MAX_VALUE < Double.POSITIVE_INFINITY < Double.NaN
1410 * </p>
1411 * <p>
1412 * Floating point numbers are encoded as specified in IEEE 754. A 64-bit
1413 * double precision float consists of a sign bit, 11-bit unsigned exponent
1414 * encoded in offset-1023 notation, and a 52-bit significand. The format is
1415 * described further in the <a
1416 * href="http://en.wikipedia.org/wiki/Double_precision"> Double Precision
1417 * Floating Point Wikipedia page</a> </p>
1418 * <p>
1419 * The value of a normal float is -1 <sup>sign bit</sup> ×
1420 * 2<sup>exponent - 1023</sup> × 1.significand
1421 * </p>
1422 * <p>
1423 * The IEE754 floating point format already preserves sort ordering for
1424 * positive floating point numbers when the raw bytes are compared in most
1425 * significant byte order. This is discussed further at <a href=
1426 * "http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm"
1427 * > http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.
1428 * htm</a>
1429 * </p>
1430 * <p>
1431 * Thus, we need only ensure that negative numbers sort in the the exact
1432 * opposite order as positive numbers (so that say, negative infinity is
1433 * less than negative 1), and that all negative numbers compare less than
1434 * any positive number. To accomplish this, we invert the sign bit of all
1435 * floating point numbers, and we also invert the exponent and significand
1436 * bits if the floating point number was negative.
1437 * </p>
1438 * <p>
1439 * More specifically, we first store the floating point bits into a 64-bit
1440 * long {@code l} using {@link Double#doubleToLongBits}. This method
1441 * collapses all NaNs into a single, canonical NaN value but otherwise
1442 * leaves the bits unchanged. We then compute
1443 * </p>
1444 * <pre>
1445 * l ˆ= (l >> (Long.SIZE - 1)) | Long.MIN_SIZE
1446 * </pre>
1447 * <p>
1448 * which inverts the sign bit and XOR's all other bits with the sign bit
1449 * itself. Comparing the raw bytes of {@code l} in most significant
1450 * byte order is equivalent to performing a double precision floating point
1451 * comparison on the underlying bits (ignoring NaN comparisons, as NaNs
1452 * don't compare equal to anything when performing floating point
1453 * comparisons).
1454 * </p>
1455 * <p>
1456 * The resulting long integer is then converted into a byte array by
1457 * serializing the long one byte at a time in most significant byte order.
1458 * The serialized integer is prefixed by a single header byte. All
1459 * serialized values are 9 bytes in length.
1460 * </p>
1461 * <p>
1462 * This encoding format, and much of this highly detailed documentation
1463 * string, is based on Orderly's {@code DoubleWritableRowKey}.
1464 * </p>
1465 * @return the number of bytes written.
1466 * @see #decodeFloat64(PositionedByteRange)
1467 */
1468 public static int encodeFloat64(PositionedByteRange dst, double val, Order ord) {
1469 final int offset = dst.getOffset(), start = dst.getPosition();
1470 long lng = Double.doubleToLongBits(val);
1471 lng ^= ((lng >> (Long.SIZE - 1)) | Long.MIN_VALUE);
1472 dst.put(FIXED_FLOAT64)
1473 .put((byte) (lng >> 56))
1474 .put((byte) (lng >> 48))
1475 .put((byte) (lng >> 40))
1476 .put((byte) (lng >> 32))
1477 .put((byte) (lng >> 24))
1478 .put((byte) (lng >> 16))
1479 .put((byte) (lng >> 8))
1480 .put((byte) lng);
1481 ord.apply(dst.getBytes(), offset + start, 9);
1482 return 9;
1483 }
1484
1485 /**
1486 * Decode a 64-bit floating point value using the fixed-length encoding.
1487 * @see #encodeFloat64(PositionedByteRange, double, Order)
1488 */
1489 public static double decodeFloat64(PositionedByteRange src) {
1490 final byte header = src.get();
1491 assert header == FIXED_FLOAT64 || header == DESCENDING.apply(FIXED_FLOAT64);
1492 Order ord = header == FIXED_FLOAT64 ? ASCENDING : DESCENDING;
1493 long val = ord.apply(src.get()) & 0xff;
1494 for (int i = 1; i < 8; i++) {
1495 val = (val << 8) + (ord.apply(src.get()) & 0xff);
1496 }
1497 val ^= (~val >> (Long.SIZE - 1)) | Long.MIN_VALUE;
1498 return Double.longBitsToDouble(val);
1499 }
1500
1501 /**
1502 * Returns true when {@code src} appears to be positioned an encoded value,
1503 * false otherwise.
1504 */
1505 public static boolean isEncodedValue(PositionedByteRange src) {
1506 return isNull(src) || isNumeric(src) || isFixedInt8(src) || isFixedInt16(src)
1507 || isFixedInt32(src) || isFixedInt64(src)
1508 || isFixedFloat32(src) || isFixedFloat64(src) || isText(src) || isBlobCopy(src)
1509 || isBlobVar(src);
1510 }
1511
1512 /**
1513 * Return true when the next encoded value in {@code src} is null, false
1514 * otherwise.
1515 */
1516 public static boolean isNull(PositionedByteRange src) {
1517 return NULL ==
1518 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1519 }
1520
1521 /**
1522 * Return true when the next encoded value in {@code src} uses Numeric
1523 * encoding, false otherwise. {@code NaN}, {@code +/-Inf} are valid Numeric
1524 * values.
1525 */
1526 public static boolean isNumeric(PositionedByteRange src) {
1527 byte x = (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1528 return x >= NEG_INF && x <= NAN;
1529 }
1530
1531 /**
1532 * Return true when the next encoded value in {@code src} uses Numeric
1533 * encoding and is {@code Infinite}, false otherwise.
1534 */
1535 public static boolean isNumericInfinite(PositionedByteRange src) {
1536 byte x = (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1537 return NEG_INF == x || POS_INF == x;
1538 }
1539
1540 /**
1541 * Return true when the next encoded value in {@code src} uses Numeric
1542 * encoding and is {@code NaN}, false otherwise.
1543 */
1544 public static boolean isNumericNaN(PositionedByteRange src) {
1545 return NAN == (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1546 }
1547
1548 /**
1549 * Return true when the next encoded value in {@code src} uses Numeric
1550 * encoding and is {@code 0}, false otherwise.
1551 */
1552 public static boolean isNumericZero(PositionedByteRange src) {
1553 return ZERO ==
1554 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1555 }
1556
1557 /**
1558 * Return true when the next encoded value in {@code src} uses fixed-width
1559 * Int8 encoding, false otherwise.
1560 */
1561 public static boolean isFixedInt8(PositionedByteRange src) {
1562 return FIXED_INT8 ==
1563 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1564 }
1565
1566 /**
1567 * Return true when the next encoded value in {@code src} uses fixed-width
1568 * Int16 encoding, false otherwise.
1569 */
1570 public static boolean isFixedInt16(PositionedByteRange src) {
1571 return FIXED_INT16 ==
1572 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1573 }
1574
1575 /**
1576 * Return true when the next encoded value in {@code src} uses fixed-width
1577 * Int32 encoding, false otherwise.
1578 */
1579 public static boolean isFixedInt32(PositionedByteRange src) {
1580 return FIXED_INT32 ==
1581 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1582 }
1583
1584 /**
1585 * Return true when the next encoded value in {@code src} uses fixed-width
1586 * Int64 encoding, false otherwise.
1587 */
1588 public static boolean isFixedInt64(PositionedByteRange src) {
1589 return FIXED_INT64 ==
1590 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1591 }
1592
1593 /**
1594 * Return true when the next encoded value in {@code src} uses fixed-width
1595 * Float32 encoding, false otherwise.
1596 */
1597 public static boolean isFixedFloat32(PositionedByteRange src) {
1598 return FIXED_FLOAT32 ==
1599 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1600 }
1601
1602 /**
1603 * Return true when the next encoded value in {@code src} uses fixed-width
1604 * Float64 encoding, false otherwise.
1605 */
1606 public static boolean isFixedFloat64(PositionedByteRange src) {
1607 return FIXED_FLOAT64 ==
1608 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1609 }
1610
1611 /**
1612 * Return true when the next encoded value in {@code src} uses Text encoding,
1613 * false otherwise.
1614 */
1615 public static boolean isText(PositionedByteRange src) {
1616 return TEXT ==
1617 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1618 }
1619
1620 /**
1621 * Return true when the next encoded value in {@code src} uses BlobVar
1622 * encoding, false otherwise.
1623 */
1624 public static boolean isBlobVar(PositionedByteRange src) {
1625 return BLOB_VAR ==
1626 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1627 }
1628
1629 /**
1630 * Return true when the next encoded value in {@code src} uses BlobCopy
1631 * encoding, false otherwise.
1632 */
1633 public static boolean isBlobCopy(PositionedByteRange src) {
1634 return BLOB_COPY ==
1635 (-1 == Integer.signum(src.peek()) ? DESCENDING : ASCENDING).apply(src.peek());
1636 }
1637
1638 /**
1639 * Skip {@code buff}'s position forward over one encoded value.
1640 * @return number of bytes skipped.
1641 */
1642 public static int skip(PositionedByteRange src) {
1643 final int start = src.getPosition();
1644 byte header = src.get();
1645 Order ord = (-1 == Integer.signum(header)) ? DESCENDING : ASCENDING;
1646 header = ord.apply(header);
1647
1648 switch (header) {
1649 case NULL:
1650 case NEG_INF:
1651 return 1;
1652 case NEG_LARGE: /* Large negative number: 0x08, ~E, ~M */
1653 skipVaruint64(src, DESCENDING != ord);
1654 skipSignificand(src, DESCENDING != ord);
1655 return src.getPosition() - start;
1656 case NEG_MED_MIN: /* Medium negative number: 0x13-E, ~M */
1657 case NEG_MED_MIN + 0x01:
1658 case NEG_MED_MIN + 0x02:
1659 case NEG_MED_MIN + 0x03:
1660 case NEG_MED_MIN + 0x04:
1661 case NEG_MED_MIN + 0x05:
1662 case NEG_MED_MIN + 0x06:
1663 case NEG_MED_MIN + 0x07:
1664 case NEG_MED_MIN + 0x08:
1665 case NEG_MED_MIN + 0x09:
1666 case NEG_MED_MAX:
1667 skipSignificand(src, DESCENDING != ord);
1668 return src.getPosition() - start;
1669 case NEG_SMALL: /* Small negative number: 0x14, -E, ~M */
1670 skipVaruint64(src, DESCENDING == ord);
1671 skipSignificand(src, DESCENDING != ord);
1672 return src.getPosition() - start;
1673 case ZERO:
1674 return 1;
1675 case POS_SMALL: /* Small positive number: 0x16, ~-E, M */
1676 skipVaruint64(src, DESCENDING != ord);
1677 skipSignificand(src, DESCENDING == ord);
1678 return src.getPosition() - start;
1679 case POS_MED_MIN: /* Medium positive number: 0x17+E, M */
1680 case POS_MED_MIN + 0x01:
1681 case POS_MED_MIN + 0x02:
1682 case POS_MED_MIN + 0x03:
1683 case POS_MED_MIN + 0x04:
1684 case POS_MED_MIN + 0x05:
1685 case POS_MED_MIN + 0x06:
1686 case POS_MED_MIN + 0x07:
1687 case POS_MED_MIN + 0x08:
1688 case POS_MED_MIN + 0x09:
1689 case POS_MED_MAX:
1690 skipSignificand(src, DESCENDING == ord);
1691 return src.getPosition() - start;
1692 case POS_LARGE: /* Large positive number: 0x22, E, M */
1693 skipVaruint64(src, DESCENDING == ord);
1694 skipSignificand(src, DESCENDING == ord);
1695 return src.getPosition() - start;
1696 case POS_INF:
1697 return 1;
1698 case NAN:
1699 return 1;
1700 case FIXED_INT8:
1701 src.setPosition(src.getPosition() + 1);
1702 return src.getPosition() - start;
1703 case FIXED_INT16:
1704 src.setPosition(src.getPosition() + 2);
1705 return src.getPosition() - start;
1706 case FIXED_INT32:
1707 src.setPosition(src.getPosition() + 4);
1708 return src.getPosition() - start;
1709 case FIXED_INT64:
1710 src.setPosition(src.getPosition() + 8);
1711 return src.getPosition() - start;
1712 case FIXED_FLOAT32:
1713 src.setPosition(src.getPosition() + 4);
1714 return src.getPosition() - start;
1715 case FIXED_FLOAT64:
1716 src.setPosition(src.getPosition() + 8);
1717 return src.getPosition() - start;
1718 case TEXT:
1719 // for null-terminated values, skip to the end.
1720 do {
1721 header = ord.apply(src.get());
1722 } while (header != TERM);
1723 return src.getPosition() - start;
1724 case BLOB_VAR:
1725 // read until we find a 0 in the MSB
1726 do {
1727 header = ord.apply(src.get());
1728 } while ((byte) (header & 0x80) != TERM);
1729 return src.getPosition() - start;
1730 case BLOB_COPY:
1731 if (Order.DESCENDING == ord) {
1732 // if descending, read to termination byte.
1733 do {
1734 header = ord.apply(src.get());
1735 } while (header != TERM);
1736 return src.getPosition() - start;
1737 } else {
1738 // otherwise, just skip to the end.
1739 src.setPosition(src.getLength());
1740 return src.getPosition() - start;
1741 }
1742 default:
1743 throw unexpectedHeader(header);
1744 }
1745 }
1746
1747 /**
1748 * Return the number of encoded entries remaining in {@code buff}. The
1749 * state of {@code buff} is not modified through use of this method.
1750 */
1751 public static int length(PositionedByteRange buff) {
1752 PositionedByteRange b =
1753 new SimplePositionedMutableByteRange(buff.getBytes(), buff.getOffset(), buff.getLength());
1754 b.setPosition(buff.getPosition());
1755 int cnt = 0;
1756 for (; isEncodedValue(b); skip(b), cnt++)
1757 ;
1758 return cnt;
1759 }
1760 }