1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.filter;
19
20 import com.google.protobuf.InvalidProtocolBufferException;
21 import java.util.ArrayList;
22 import java.util.Arrays;
23 import java.util.Comparator;
24 import java.util.List;
25 import java.util.Objects;
26 import java.util.PriorityQueue;
27 import org.apache.hadoop.hbase.Cell;
28 import org.apache.hadoop.hbase.KeyValueUtil;
29 import org.apache.hadoop.hbase.classification.InterfaceAudience;
30 import org.apache.hadoop.hbase.classification.InterfaceStability;
31 import org.apache.hadoop.hbase.exceptions.DeserializationException;
32 import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
33 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
34 import org.apache.hadoop.hbase.util.ByteStringer;
35 import org.apache.hadoop.hbase.util.Bytes;
36 import org.apache.hadoop.hbase.util.Pair;
37 import org.apache.hadoop.hbase.util.UnsafeAvailChecker;
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57 @InterfaceAudience.Public
58 @InterfaceStability.Evolving
59 public class FuzzyRowFilter extends FilterBase {
60 private static final boolean UNSAFE_UNALIGNED = UnsafeAvailChecker.unaligned();
61
62
63
64
65
66
67 static final byte V1_PROCESSED_WILDCARD_MASK = 0;
68 static final byte V2_PROCESSED_WILDCARD_MASK = 2;
69
70 private final byte processedWildcardMask;
71 private List<Pair<byte[], byte[]>> fuzzyKeysData;
72 private boolean done = false;
73
74
75
76
77
78
79 private int lastFoundIndex = -1;
80
81
82
83
84 private RowTracker tracker;
85
86
87
88 public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
89 this(fuzzyKeysData, V2_PROCESSED_WILDCARD_MASK);
90 }
91
92
93
94
95 FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData, byte processedWildcardMask) {
96 this.processedWildcardMask = processedWildcardMask;
97
98 List<Pair<byte[], byte[]>> fuzzyKeyDataCopy = new ArrayList<>(fuzzyKeysData.size());
99
100 for (Pair<byte[], byte[]> aFuzzyKeysData : fuzzyKeysData) {
101 if (aFuzzyKeysData.getFirst().length != aFuzzyKeysData.getSecond().length) {
102 Pair<String, String> readable =
103 new Pair<>(Bytes.toStringBinary(aFuzzyKeysData.getFirst()), Bytes.toStringBinary(aFuzzyKeysData.getSecond()));
104 throw new IllegalArgumentException("Fuzzy pair lengths do not match: " + readable);
105 }
106
107 Pair<byte[], byte[]> p = new Pair<>();
108
109 p.setFirst(Arrays.copyOf(aFuzzyKeysData.getFirst(), aFuzzyKeysData.getFirst().length));
110 p.setSecond(Arrays.copyOf(aFuzzyKeysData.getSecond(), aFuzzyKeysData.getSecond().length));
111
112
113 p.setSecond(preprocessMask(p.getSecond()));
114 preprocessSearchKey(p);
115
116 fuzzyKeyDataCopy.add(p);
117 }
118 this.fuzzyKeysData = fuzzyKeyDataCopy;
119 this.tracker = new RowTracker();
120 }
121
122 private void preprocessSearchKey(Pair<byte[], byte[]> p) {
123 if (!UNSAFE_UNALIGNED) {
124 return;
125 }
126 byte[] key = p.getFirst();
127 byte[] mask = p.getSecond();
128 for (int i = 0; i < mask.length; i++) {
129
130 if (mask[i] == processedWildcardMask) {
131 key[i] = 0;
132 }
133 }
134 }
135
136
137
138
139
140
141
142 private byte[] preprocessMask(byte[] mask) {
143 if (!UNSAFE_UNALIGNED) {
144 return mask;
145 }
146 if (isPreprocessedMask(mask)) return mask;
147 for (int i = 0; i < mask.length; i++) {
148 if (mask[i] == 0) {
149 mask[i] = -1;
150 } else if (mask[i] == 1) {
151 mask[i] = processedWildcardMask;
152 }
153 }
154 return mask;
155 }
156
157 private boolean isPreprocessedMask(byte[] mask) {
158 for (int i = 0; i < mask.length; i++) {
159 if (mask[i] != -1 && mask[i] != processedWildcardMask) {
160 return false;
161 }
162 }
163 return true;
164 }
165
166 @Override
167 public ReturnCode filterKeyValue(Cell c) {
168 final int startIndex = lastFoundIndex >= 0 ? lastFoundIndex : 0;
169 final int size = fuzzyKeysData.size();
170 for (int i = startIndex; i < size + startIndex; i++) {
171 final int index = i % size;
172 Pair<byte[], byte[]> fuzzyData = fuzzyKeysData.get(index);
173 idempotentMaskShift(fuzzyData.getSecond());
174 SatisfiesCode satisfiesCode =
175 satisfies(isReversed(), c.getRowArray(), c.getRowOffset(), c.getRowLength(),
176 fuzzyData.getFirst(), fuzzyData.getSecond());
177 if (satisfiesCode == SatisfiesCode.YES) {
178 lastFoundIndex = index;
179 return ReturnCode.INCLUDE;
180 }
181 }
182
183 lastFoundIndex = -1;
184
185 return ReturnCode.SEEK_NEXT_USING_HINT;
186 }
187
188 static void idempotentMaskShift(byte[] mask) {
189
190
191
192 for (int j = 0; j < mask.length; j++) {
193 mask[j] >>= 2;
194 }
195 }
196
197 @Override
198 public Cell getNextCellHint(Cell currentCell) {
199 boolean result = tracker.updateTracker(currentCell);
200 if (result == false) {
201 done = true;
202 return null;
203 }
204 byte[] nextRowKey = tracker.nextRow();
205 return KeyValueUtil.createFirstOnRow(nextRowKey);
206 }
207
208
209
210
211
212
213
214
215
216 private class RowTracker {
217 private final PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>> nextRows;
218 private boolean initialized = false;
219
220 RowTracker() {
221 nextRows =
222 new PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>>(fuzzyKeysData.size(),
223 new Comparator<Pair<byte[], Pair<byte[], byte[]>>>() {
224 @Override
225 public int compare(Pair<byte[], Pair<byte[], byte[]>> o1,
226 Pair<byte[], Pair<byte[], byte[]>> o2) {
227 return isReversed()? Bytes.compareTo(o2.getFirst(), o1.getFirst()):
228 Bytes.compareTo(o1.getFirst(), o2.getFirst());
229 }
230 });
231 }
232
233 byte[] nextRow() {
234 if (nextRows.isEmpty()) {
235 throw new IllegalStateException(
236 "NextRows should not be empty, make sure to call nextRow() after updateTracker() return true");
237 } else {
238 return nextRows.peek().getFirst();
239 }
240 }
241
242 boolean updateTracker(Cell currentCell) {
243 if (!initialized) {
244 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
245 updateWith(currentCell, fuzzyData);
246 }
247 initialized = true;
248 } else {
249 while (!nextRows.isEmpty() && !lessThan(currentCell, nextRows.peek().getFirst())) {
250 Pair<byte[], Pair<byte[], byte[]>> head = nextRows.poll();
251 Pair<byte[], byte[]> fuzzyData = head.getSecond();
252 updateWith(currentCell, fuzzyData);
253 }
254 }
255 return !nextRows.isEmpty();
256 }
257
258 boolean lessThan(Cell currentCell, byte[] nextRowKey) {
259 int compareResult =
260 Bytes.compareTo(currentCell.getRowArray(), currentCell.getRowOffset(),
261 currentCell.getRowLength(), nextRowKey, 0, nextRowKey.length);
262 return (!isReversed() && compareResult < 0) || (isReversed() && compareResult > 0);
263 }
264
265 void updateWith(Cell currentCell, Pair<byte[], byte[]> fuzzyData) {
266 byte[] nextRowKeyCandidate =
267 getNextForFuzzyRule(isReversed(), currentCell.getRowArray(), currentCell.getRowOffset(),
268 currentCell.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond());
269 if (nextRowKeyCandidate != null) {
270 nextRows.add(new Pair<byte[], Pair<byte[], byte[]>>(nextRowKeyCandidate, fuzzyData));
271 }
272 }
273
274 }
275
276 @Override
277 public boolean filterAllRemaining() {
278 return done;
279 }
280
281
282
283
284 @Override
285 public byte[] toByteArray() {
286 FilterProtos.FuzzyRowFilter.Builder builder = FilterProtos.FuzzyRowFilter
287 .newBuilder()
288 .setIsMaskV2(processedWildcardMask == V2_PROCESSED_WILDCARD_MASK);
289 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
290 BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder();
291 bbpBuilder.setFirst(ByteStringer.wrap(fuzzyData.getFirst()));
292 bbpBuilder.setSecond(ByteStringer.wrap(fuzzyData.getSecond()));
293 builder.addFuzzyKeysData(bbpBuilder);
294 }
295 return builder.build().toByteArray();
296 }
297
298
299
300
301
302
303
304 public static FuzzyRowFilter parseFrom(final byte[] pbBytes) throws DeserializationException {
305 FilterProtos.FuzzyRowFilter proto;
306 try {
307 proto = FilterProtos.FuzzyRowFilter.parseFrom(pbBytes);
308 } catch (InvalidProtocolBufferException e) {
309 throw new DeserializationException(e);
310 }
311 int count = proto.getFuzzyKeysDataCount();
312 ArrayList<Pair<byte[], byte[]>> fuzzyKeysData = new ArrayList<Pair<byte[], byte[]>>(count);
313 for (int i = 0; i < count; ++i) {
314 BytesBytesPair current = proto.getFuzzyKeysData(i);
315 byte[] keyBytes = current.getFirst().toByteArray();
316 byte[] keyMeta = current.getSecond().toByteArray();
317 fuzzyKeysData.add(new Pair<byte[], byte[]>(keyBytes, keyMeta));
318 }
319 byte processedWildcardMask = proto.hasIsMaskV2() && proto.getIsMaskV2()
320 ? V2_PROCESSED_WILDCARD_MASK
321 : V1_PROCESSED_WILDCARD_MASK;
322 return new FuzzyRowFilter(fuzzyKeysData, processedWildcardMask);
323 }
324
325 @Override
326 public String toString() {
327 final StringBuilder sb = new StringBuilder();
328 sb.append("FuzzyRowFilter");
329 sb.append("{fuzzyKeysData=");
330 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
331 sb.append('{').append(Bytes.toStringBinary(fuzzyData.getFirst())).append(":");
332 sb.append(Bytes.toStringBinary(fuzzyData.getSecond())).append('}');
333 }
334 sb.append("}, ");
335 return sb.toString();
336 }
337
338
339
340 static enum SatisfiesCode {
341
342 YES,
343
344 NEXT_EXISTS,
345
346 NO_NEXT
347 }
348
349 @InterfaceAudience.Private
350 static SatisfiesCode satisfies(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
351 return satisfies(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
352 }
353
354 @InterfaceAudience.Private
355 static SatisfiesCode satisfies(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
356 byte[] fuzzyKeyMeta) {
357 return satisfies(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
358 }
359
360 static SatisfiesCode satisfies(boolean reverse, byte[] row, int offset, int length,
361 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
362
363 if (!UNSAFE_UNALIGNED) {
364 return satisfiesNoUnsafe(reverse, row, offset, length, fuzzyKeyBytes, fuzzyKeyMeta);
365 }
366
367 if (row == null) {
368
369 return SatisfiesCode.YES;
370 }
371 length = Math.min(length, fuzzyKeyBytes.length);
372 int numWords = length / Bytes.SIZEOF_LONG;
373
374 int j = numWords << 3;
375
376 for (int i = 0; i < j; i += Bytes.SIZEOF_LONG) {
377 long fuzzyBytes = Bytes.toLong(fuzzyKeyBytes, i);
378 long fuzzyMeta = Bytes.toLong(fuzzyKeyMeta, i);
379 long rowValue = Bytes.toLong(row, offset + i);
380 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
381
382 return SatisfiesCode.NEXT_EXISTS;
383 }
384 }
385
386 int off = j;
387
388 if (length - off >= Bytes.SIZEOF_INT) {
389 int fuzzyBytes = Bytes.toInt(fuzzyKeyBytes, off);
390 int fuzzyMeta = Bytes.toInt(fuzzyKeyMeta, off);
391 int rowValue = Bytes.toInt(row, offset + off);
392 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
393
394 return SatisfiesCode.NEXT_EXISTS;
395 }
396 off += Bytes.SIZEOF_INT;
397 }
398
399 if (length - off >= Bytes.SIZEOF_SHORT) {
400 short fuzzyBytes = Bytes.toShort(fuzzyKeyBytes, off);
401 short fuzzyMeta = Bytes.toShort(fuzzyKeyMeta, off);
402 short rowValue = Bytes.toShort(row, offset + off);
403 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
404
405
406
407 return SatisfiesCode.NEXT_EXISTS;
408 }
409 off += Bytes.SIZEOF_SHORT;
410 }
411
412 if (length - off >= Bytes.SIZEOF_BYTE) {
413 int fuzzyBytes = fuzzyKeyBytes[off] & 0xff;
414 int fuzzyMeta = fuzzyKeyMeta[off] & 0xff;
415 int rowValue = row[offset + off] & 0xff;
416 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
417
418 return SatisfiesCode.NEXT_EXISTS;
419 }
420 }
421 return SatisfiesCode.YES;
422 }
423
424 static SatisfiesCode satisfiesNoUnsafe(boolean reverse, byte[] row, int offset, int length,
425 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
426 if (row == null) {
427
428 return SatisfiesCode.YES;
429 }
430
431 Order order = Order.orderFor(reverse);
432 boolean nextRowKeyCandidateExists = false;
433
434 for (int i = 0; i < fuzzyKeyMeta.length && i < length; i++) {
435
436 boolean byteAtPositionFixed = fuzzyKeyMeta[i] == 0;
437 boolean fixedByteIncorrect = byteAtPositionFixed && fuzzyKeyBytes[i] != row[i + offset];
438 if (fixedByteIncorrect) {
439
440 if (nextRowKeyCandidateExists) {
441 return SatisfiesCode.NEXT_EXISTS;
442 }
443
444
445
446
447 boolean rowByteLessThanFixed = (row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF);
448 if (rowByteLessThanFixed && !reverse) {
449 return SatisfiesCode.NEXT_EXISTS;
450 } else if (!rowByteLessThanFixed && reverse) {
451 return SatisfiesCode.NEXT_EXISTS;
452 } else {
453 return SatisfiesCode.NO_NEXT;
454 }
455 }
456
457
458
459
460
461
462
463 if (fuzzyKeyMeta[i] == 1 && !order.isMax(fuzzyKeyBytes[i])) {
464 nextRowKeyCandidateExists = true;
465 }
466 }
467 return SatisfiesCode.YES;
468 }
469
470 @InterfaceAudience.Private
471 static byte[] getNextForFuzzyRule(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
472 return getNextForFuzzyRule(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
473 }
474
475 @InterfaceAudience.Private
476 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
477 byte[] fuzzyKeyMeta) {
478 return getNextForFuzzyRule(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
479 }
480
481
482 private enum Order {
483 ASC {
484 @Override
485 public boolean lt(int lhs, int rhs) {
486 return lhs < rhs;
487 }
488
489 @Override
490 public boolean gt(int lhs, int rhs) {
491 return lhs > rhs;
492 }
493
494 @Override
495 public byte inc(byte val) {
496
497 return (byte) (val + 1);
498 }
499
500 @Override
501 public boolean isMax(byte val) {
502 return val == (byte) 0xff;
503 }
504
505 @Override
506 public byte min() {
507 return 0;
508 }
509 },
510 DESC {
511 @Override
512 public boolean lt(int lhs, int rhs) {
513 return lhs > rhs;
514 }
515
516 @Override
517 public boolean gt(int lhs, int rhs) {
518 return lhs < rhs;
519 }
520
521 @Override
522 public byte inc(byte val) {
523
524 return (byte) (val - 1);
525 }
526
527 @Override
528 public boolean isMax(byte val) {
529 return val == 0;
530 }
531
532 @Override
533 public byte min() {
534 return (byte) 0xFF;
535 }
536 };
537
538 public static Order orderFor(boolean reverse) {
539 return reverse ? DESC : ASC;
540 }
541
542
543 public abstract boolean lt(int lhs, int rhs);
544
545
546 public abstract boolean gt(int lhs, int rhs);
547
548
549 public abstract byte inc(byte val);
550
551
552 public abstract boolean isMax(byte val);
553
554
555 public abstract byte min();
556 }
557
558
559
560
561
562 @InterfaceAudience.Private
563 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length,
564 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
565
566
567
568
569
570
571
572
573 byte[] result =
574 Arrays.copyOf(fuzzyKeyBytes, length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length);
575 if (reverse && length > fuzzyKeyBytes.length) {
576
577 for (int i = fuzzyKeyBytes.length; i < result.length; i++) {
578 result[i] = (byte) 0xFF;
579 }
580 }
581 int toInc = -1;
582 final Order order = Order.orderFor(reverse);
583
584 boolean increased = false;
585 for (int i = 0; i < result.length; i++) {
586 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0
587 result[i] = row[offset + i];
588 if (!order.isMax(row[offset + i])) {
589
590 toInc = i;
591 }
592 } else if (i < fuzzyKeyMeta.length && fuzzyKeyMeta[i] == -1
593 if (order.lt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
594
595
596 increased = true;
597 break;
598 }
599
600 if (order.gt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
601
602
603
604 break;
605 }
606 }
607 }
608
609 if (!increased) {
610 if (toInc < 0) {
611 return null;
612 }
613 result[toInc] = order.inc(result[toInc]);
614
615
616
617 for (int i = toInc + 1; i < result.length; i++) {
618 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0
619 result[i] = order.min();
620 }
621 }
622 }
623
624 return reverse? result: trimTrailingZeroes(result, fuzzyKeyMeta, toInc);
625 }
626
627
628
629
630
631
632
633
634
635
636
637
638
639 private static byte[] trimTrailingZeroes(byte[] result, byte[] fuzzyKeyMeta, int toInc) {
640 int off = fuzzyKeyMeta.length >= result.length? result.length -1:
641 fuzzyKeyMeta.length -1;
642 for( ; off >= 0; off--){
643 if(fuzzyKeyMeta[off] != 0) break;
644 }
645 if (off < toInc) off = toInc;
646 byte[] retValue = new byte[off+1];
647 System.arraycopy(result, 0, retValue, 0, retValue.length);
648 return retValue;
649 }
650
651
652
653
654
655 @Override
656 boolean areSerializedFieldsEqual(Filter o) {
657 if (o == this) return true;
658 if (!(o instanceof FuzzyRowFilter)) return false;
659
660 FuzzyRowFilter other = (FuzzyRowFilter) o;
661 if (this.fuzzyKeysData.size() != other.fuzzyKeysData.size()) return false;
662 for (int i = 0; i < fuzzyKeysData.size(); ++i) {
663 Pair<byte[], byte[]> thisData = this.fuzzyKeysData.get(i);
664 Pair<byte[], byte[]> otherData = other.fuzzyKeysData.get(i);
665 if (!(Bytes.equals(thisData.getFirst(), otherData.getFirst()) && Bytes.equals(
666 thisData.getSecond(), otherData.getSecond()))) {
667 return false;
668 }
669 }
670 return true;
671 }
672
673 @Override
674 public boolean equals(Object obj) {
675 return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj);
676 }
677
678 @Override
679 public int hashCode() {
680 return Objects.hash(this.fuzzyKeysData);
681 }
682 }