1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.filter;
19
20 import com.google.protobuf.InvalidProtocolBufferException;
21 import java.util.ArrayList;
22 import java.util.Arrays;
23 import java.util.Comparator;
24 import java.util.List;
25 import java.util.Objects;
26 import java.util.PriorityQueue;
27 import org.apache.hadoop.hbase.Cell;
28 import org.apache.hadoop.hbase.KeyValueUtil;
29 import org.apache.hadoop.hbase.classification.InterfaceAudience;
30 import org.apache.hadoop.hbase.classification.InterfaceStability;
31 import org.apache.hadoop.hbase.exceptions.DeserializationException;
32 import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
33 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
34 import org.apache.hadoop.hbase.util.ByteStringer;
35 import org.apache.hadoop.hbase.util.Bytes;
36 import org.apache.hadoop.hbase.util.Pair;
37 import org.apache.hadoop.hbase.util.UnsafeAvailChecker;
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57 @InterfaceAudience.Public
58 @InterfaceStability.Evolving
59 public class FuzzyRowFilter extends FilterBase {
60 private static final boolean UNSAFE_UNALIGNED = UnsafeAvailChecker.unaligned();
61 private List<Pair<byte[], byte[]>> fuzzyKeysData;
62 private boolean done = false;
63
64
65
66
67
68
69 private int lastFoundIndex = -1;
70
71
72
73
74 private RowTracker tracker;
75
76 public FuzzyRowFilter(List<Pair<byte[], byte[]>> fuzzyKeysData) {
77 List<Pair<byte[], byte[]>> fuzzyKeyDataCopy = new ArrayList<>(fuzzyKeysData.size());
78
79 for (Pair<byte[], byte[]> aFuzzyKeysData : fuzzyKeysData) {
80 if (aFuzzyKeysData.getFirst().length != aFuzzyKeysData.getSecond().length) {
81 Pair<String, String> readable =
82 new Pair<>(Bytes.toStringBinary(aFuzzyKeysData.getFirst()), Bytes.toStringBinary(aFuzzyKeysData.getSecond()));
83 throw new IllegalArgumentException("Fuzzy pair lengths do not match: " + readable);
84 }
85
86 Pair<byte[], byte[]> p = new Pair<>();
87
88 p.setFirst(Arrays.copyOf(aFuzzyKeysData.getFirst(), aFuzzyKeysData.getFirst().length));
89 p.setSecond(Arrays.copyOf(aFuzzyKeysData.getSecond(), aFuzzyKeysData.getSecond().length));
90
91
92 p.setSecond(preprocessMask(p.getSecond()));
93 preprocessSearchKey(p);
94
95 fuzzyKeyDataCopy.add(p);
96 }
97 this.fuzzyKeysData = fuzzyKeyDataCopy;
98 this.tracker = new RowTracker();
99 }
100
101 private void preprocessSearchKey(Pair<byte[], byte[]> p) {
102 if (!UNSAFE_UNALIGNED) {
103 return;
104 }
105 byte[] key = p.getFirst();
106 byte[] mask = p.getSecond();
107 for (int i = 0; i < mask.length; i++) {
108
109 if (mask[i] == 2) {
110 key[i] = 0;
111 }
112 }
113 }
114
115
116
117
118
119
120
121 private byte[] preprocessMask(byte[] mask) {
122 if (!UNSAFE_UNALIGNED) {
123 return mask;
124 }
125 if (isPreprocessedMask(mask)) return mask;
126 for (int i = 0; i < mask.length; i++) {
127 if (mask[i] == 0) {
128 mask[i] = -1;
129 } else if (mask[i] == 1) {
130 mask[i] = 2;
131 }
132 }
133 return mask;
134 }
135
136 private boolean isPreprocessedMask(byte[] mask) {
137 for (int i = 0; i < mask.length; i++) {
138 if (mask[i] != -1 && mask[i] != 2) {
139 return false;
140 }
141 }
142 return true;
143 }
144
145 @Override
146 public ReturnCode filterKeyValue(Cell c) {
147 final int startIndex = lastFoundIndex >= 0 ? lastFoundIndex : 0;
148 final int size = fuzzyKeysData.size();
149 for (int i = startIndex; i < size + startIndex; i++) {
150 final int index = i % size;
151 Pair<byte[], byte[]> fuzzyData = fuzzyKeysData.get(index);
152
153 for (int j = 0; j < fuzzyData.getSecond().length; j++) {
154 fuzzyData.getSecond()[j] >>= 2;
155 }
156 SatisfiesCode satisfiesCode =
157 satisfies(isReversed(), c.getRowArray(), c.getRowOffset(), c.getRowLength(),
158 fuzzyData.getFirst(), fuzzyData.getSecond());
159 if (satisfiesCode == SatisfiesCode.YES) {
160 lastFoundIndex = index;
161 return ReturnCode.INCLUDE;
162 }
163 }
164
165 lastFoundIndex = -1;
166
167 return ReturnCode.SEEK_NEXT_USING_HINT;
168
169 }
170
171 @Override
172 public Cell getNextCellHint(Cell currentCell) {
173 boolean result = tracker.updateTracker(currentCell);
174 if (result == false) {
175 done = true;
176 return null;
177 }
178 byte[] nextRowKey = tracker.nextRow();
179 return KeyValueUtil.createFirstOnRow(nextRowKey);
180 }
181
182
183
184
185
186
187
188
189
190 private class RowTracker {
191 private final PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>> nextRows;
192 private boolean initialized = false;
193
194 RowTracker() {
195 nextRows =
196 new PriorityQueue<Pair<byte[], Pair<byte[], byte[]>>>(fuzzyKeysData.size(),
197 new Comparator<Pair<byte[], Pair<byte[], byte[]>>>() {
198 @Override
199 public int compare(Pair<byte[], Pair<byte[], byte[]>> o1,
200 Pair<byte[], Pair<byte[], byte[]>> o2) {
201 return isReversed()? Bytes.compareTo(o2.getFirst(), o1.getFirst()):
202 Bytes.compareTo(o1.getFirst(), o2.getFirst());
203 }
204 });
205 }
206
207 byte[] nextRow() {
208 if (nextRows.isEmpty()) {
209 throw new IllegalStateException(
210 "NextRows should not be empty, make sure to call nextRow() after updateTracker() return true");
211 } else {
212 return nextRows.peek().getFirst();
213 }
214 }
215
216 boolean updateTracker(Cell currentCell) {
217 if (!initialized) {
218 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
219 updateWith(currentCell, fuzzyData);
220 }
221 initialized = true;
222 } else {
223 while (!nextRows.isEmpty() && !lessThan(currentCell, nextRows.peek().getFirst())) {
224 Pair<byte[], Pair<byte[], byte[]>> head = nextRows.poll();
225 Pair<byte[], byte[]> fuzzyData = head.getSecond();
226 updateWith(currentCell, fuzzyData);
227 }
228 }
229 return !nextRows.isEmpty();
230 }
231
232 boolean lessThan(Cell currentCell, byte[] nextRowKey) {
233 int compareResult =
234 Bytes.compareTo(currentCell.getRowArray(), currentCell.getRowOffset(),
235 currentCell.getRowLength(), nextRowKey, 0, nextRowKey.length);
236 return (!isReversed() && compareResult < 0) || (isReversed() && compareResult > 0);
237 }
238
239 void updateWith(Cell currentCell, Pair<byte[], byte[]> fuzzyData) {
240 byte[] nextRowKeyCandidate =
241 getNextForFuzzyRule(isReversed(), currentCell.getRowArray(), currentCell.getRowOffset(),
242 currentCell.getRowLength(), fuzzyData.getFirst(), fuzzyData.getSecond());
243 if (nextRowKeyCandidate != null) {
244 nextRows.add(new Pair<byte[], Pair<byte[], byte[]>>(nextRowKeyCandidate, fuzzyData));
245 }
246 }
247
248 }
249
250 @Override
251 public boolean filterAllRemaining() {
252 return done;
253 }
254
255
256
257
258 @Override
259 public byte[] toByteArray() {
260 FilterProtos.FuzzyRowFilter.Builder builder = FilterProtos.FuzzyRowFilter.newBuilder();
261 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
262 BytesBytesPair.Builder bbpBuilder = BytesBytesPair.newBuilder();
263 bbpBuilder.setFirst(ByteStringer.wrap(fuzzyData.getFirst()));
264 bbpBuilder.setSecond(ByteStringer.wrap(fuzzyData.getSecond()));
265 builder.addFuzzyKeysData(bbpBuilder);
266 }
267 return builder.build().toByteArray();
268 }
269
270
271
272
273
274
275
276 public static FuzzyRowFilter parseFrom(final byte[] pbBytes) throws DeserializationException {
277 FilterProtos.FuzzyRowFilter proto;
278 try {
279 proto = FilterProtos.FuzzyRowFilter.parseFrom(pbBytes);
280 } catch (InvalidProtocolBufferException e) {
281 throw new DeserializationException(e);
282 }
283 int count = proto.getFuzzyKeysDataCount();
284 ArrayList<Pair<byte[], byte[]>> fuzzyKeysData = new ArrayList<Pair<byte[], byte[]>>(count);
285 for (int i = 0; i < count; ++i) {
286 BytesBytesPair current = proto.getFuzzyKeysData(i);
287 byte[] keyBytes = current.getFirst().toByteArray();
288 byte[] keyMeta = current.getSecond().toByteArray();
289 fuzzyKeysData.add(new Pair<byte[], byte[]>(keyBytes, keyMeta));
290 }
291 return new FuzzyRowFilter(fuzzyKeysData);
292 }
293
294 @Override
295 public String toString() {
296 final StringBuilder sb = new StringBuilder();
297 sb.append("FuzzyRowFilter");
298 sb.append("{fuzzyKeysData=");
299 for (Pair<byte[], byte[]> fuzzyData : fuzzyKeysData) {
300 sb.append('{').append(Bytes.toStringBinary(fuzzyData.getFirst())).append(":");
301 sb.append(Bytes.toStringBinary(fuzzyData.getSecond())).append('}');
302 }
303 sb.append("}, ");
304 return sb.toString();
305 }
306
307
308
309 static enum SatisfiesCode {
310
311 YES,
312
313 NEXT_EXISTS,
314
315 NO_NEXT
316 }
317
318 @InterfaceAudience.Private
319 static SatisfiesCode satisfies(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
320 return satisfies(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
321 }
322
323 @InterfaceAudience.Private
324 static SatisfiesCode satisfies(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
325 byte[] fuzzyKeyMeta) {
326 return satisfies(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
327 }
328
329 static SatisfiesCode satisfies(boolean reverse, byte[] row, int offset, int length,
330 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
331
332 if (!UNSAFE_UNALIGNED) {
333 return satisfiesNoUnsafe(reverse, row, offset, length, fuzzyKeyBytes, fuzzyKeyMeta);
334 }
335
336 if (row == null) {
337
338 return SatisfiesCode.YES;
339 }
340 length = Math.min(length, fuzzyKeyBytes.length);
341 int numWords = length / Bytes.SIZEOF_LONG;
342
343 int j = numWords << 3;
344
345 for (int i = 0; i < j; i += Bytes.SIZEOF_LONG) {
346 long fuzzyBytes = Bytes.toLong(fuzzyKeyBytes, i);
347 long fuzzyMeta = Bytes.toLong(fuzzyKeyMeta, i);
348 long rowValue = Bytes.toLong(row, offset + i);
349 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
350
351 return SatisfiesCode.NEXT_EXISTS;
352 }
353 }
354
355 int off = j;
356
357 if (length - off >= Bytes.SIZEOF_INT) {
358 int fuzzyBytes = Bytes.toInt(fuzzyKeyBytes, off);
359 int fuzzyMeta = Bytes.toInt(fuzzyKeyMeta, off);
360 int rowValue = Bytes.toInt(row, offset + off);
361 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
362
363 return SatisfiesCode.NEXT_EXISTS;
364 }
365 off += Bytes.SIZEOF_INT;
366 }
367
368 if (length - off >= Bytes.SIZEOF_SHORT) {
369 short fuzzyBytes = Bytes.toShort(fuzzyKeyBytes, off);
370 short fuzzyMeta = Bytes.toShort(fuzzyKeyMeta, off);
371 short rowValue = Bytes.toShort(row, offset + off);
372 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
373
374
375
376 return SatisfiesCode.NEXT_EXISTS;
377 }
378 off += Bytes.SIZEOF_SHORT;
379 }
380
381 if (length - off >= Bytes.SIZEOF_BYTE) {
382 int fuzzyBytes = fuzzyKeyBytes[off] & 0xff;
383 int fuzzyMeta = fuzzyKeyMeta[off] & 0xff;
384 int rowValue = row[offset + off] & 0xff;
385 if ((rowValue & fuzzyMeta) != (fuzzyBytes)) {
386
387 return SatisfiesCode.NEXT_EXISTS;
388 }
389 }
390 return SatisfiesCode.YES;
391 }
392
393 static SatisfiesCode satisfiesNoUnsafe(boolean reverse, byte[] row, int offset, int length,
394 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
395 if (row == null) {
396
397 return SatisfiesCode.YES;
398 }
399
400 Order order = Order.orderFor(reverse);
401 boolean nextRowKeyCandidateExists = false;
402
403 for (int i = 0; i < fuzzyKeyMeta.length && i < length; i++) {
404
405 boolean byteAtPositionFixed = fuzzyKeyMeta[i] == 0;
406 boolean fixedByteIncorrect = byteAtPositionFixed && fuzzyKeyBytes[i] != row[i + offset];
407 if (fixedByteIncorrect) {
408
409 if (nextRowKeyCandidateExists) {
410 return SatisfiesCode.NEXT_EXISTS;
411 }
412
413
414
415
416 boolean rowByteLessThanFixed = (row[i + offset] & 0xFF) < (fuzzyKeyBytes[i] & 0xFF);
417 if (rowByteLessThanFixed && !reverse) {
418 return SatisfiesCode.NEXT_EXISTS;
419 } else if (!rowByteLessThanFixed && reverse) {
420 return SatisfiesCode.NEXT_EXISTS;
421 } else {
422 return SatisfiesCode.NO_NEXT;
423 }
424 }
425
426
427
428
429
430
431
432 if (fuzzyKeyMeta[i] == 1 && !order.isMax(fuzzyKeyBytes[i])) {
433 nextRowKeyCandidateExists = true;
434 }
435 }
436 return SatisfiesCode.YES;
437 }
438
439 @InterfaceAudience.Private
440 static byte[] getNextForFuzzyRule(byte[] row, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
441 return getNextForFuzzyRule(false, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
442 }
443
444 @InterfaceAudience.Private
445 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, byte[] fuzzyKeyBytes,
446 byte[] fuzzyKeyMeta) {
447 return getNextForFuzzyRule(reverse, row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta);
448 }
449
450
451 private enum Order {
452 ASC {
453 @Override
454 public boolean lt(int lhs, int rhs) {
455 return lhs < rhs;
456 }
457
458 @Override
459 public boolean gt(int lhs, int rhs) {
460 return lhs > rhs;
461 }
462
463 @Override
464 public byte inc(byte val) {
465
466 return (byte) (val + 1);
467 }
468
469 @Override
470 public boolean isMax(byte val) {
471 return val == (byte) 0xff;
472 }
473
474 @Override
475 public byte min() {
476 return 0;
477 }
478 },
479 DESC {
480 @Override
481 public boolean lt(int lhs, int rhs) {
482 return lhs > rhs;
483 }
484
485 @Override
486 public boolean gt(int lhs, int rhs) {
487 return lhs < rhs;
488 }
489
490 @Override
491 public byte inc(byte val) {
492
493 return (byte) (val - 1);
494 }
495
496 @Override
497 public boolean isMax(byte val) {
498 return val == 0;
499 }
500
501 @Override
502 public byte min() {
503 return (byte) 0xFF;
504 }
505 };
506
507 public static Order orderFor(boolean reverse) {
508 return reverse ? DESC : ASC;
509 }
510
511
512 public abstract boolean lt(int lhs, int rhs);
513
514
515 public abstract boolean gt(int lhs, int rhs);
516
517
518 public abstract byte inc(byte val);
519
520
521 public abstract boolean isMax(byte val);
522
523
524 public abstract byte min();
525 }
526
527
528
529
530
531 @InterfaceAudience.Private
532 static byte[] getNextForFuzzyRule(boolean reverse, byte[] row, int offset, int length,
533 byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) {
534
535
536
537
538
539
540
541
542 byte[] result =
543 Arrays.copyOf(fuzzyKeyBytes, length > fuzzyKeyBytes.length ? length : fuzzyKeyBytes.length);
544 if (reverse && length > fuzzyKeyBytes.length) {
545
546 for (int i = fuzzyKeyBytes.length; i < result.length; i++) {
547 result[i] = (byte) 0xFF;
548 }
549 }
550 int toInc = -1;
551 final Order order = Order.orderFor(reverse);
552
553 boolean increased = false;
554 for (int i = 0; i < result.length; i++) {
555 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0
556 result[i] = row[offset + i];
557 if (!order.isMax(row[offset + i])) {
558
559 toInc = i;
560 }
561 } else if (i < fuzzyKeyMeta.length && fuzzyKeyMeta[i] == -1
562 if (order.lt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
563
564
565 increased = true;
566 break;
567 }
568
569 if (order.gt((row[i + offset] & 0xFF), (fuzzyKeyBytes[i] & 0xFF))) {
570
571
572
573 break;
574 }
575 }
576 }
577
578 if (!increased) {
579 if (toInc < 0) {
580 return null;
581 }
582 result[toInc] = order.inc(result[toInc]);
583
584
585
586 for (int i = toInc + 1; i < result.length; i++) {
587 if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 0
588 result[i] = order.min();
589 }
590 }
591 }
592
593 return reverse? result: trimTrailingZeroes(result, fuzzyKeyMeta, toInc);
594 }
595
596
597
598
599
600
601
602
603
604
605
606
607
608 private static byte[] trimTrailingZeroes(byte[] result, byte[] fuzzyKeyMeta, int toInc) {
609 int off = fuzzyKeyMeta.length >= result.length? result.length -1:
610 fuzzyKeyMeta.length -1;
611 for( ; off >= 0; off--){
612 if(fuzzyKeyMeta[off] != 0) break;
613 }
614 if (off < toInc) off = toInc;
615 byte[] retValue = new byte[off+1];
616 System.arraycopy(result, 0, retValue, 0, retValue.length);
617 return retValue;
618 }
619
620
621
622
623
624 @Override
625 boolean areSerializedFieldsEqual(Filter o) {
626 if (o == this) return true;
627 if (!(o instanceof FuzzyRowFilter)) return false;
628
629 FuzzyRowFilter other = (FuzzyRowFilter) o;
630 if (this.fuzzyKeysData.size() != other.fuzzyKeysData.size()) return false;
631 for (int i = 0; i < fuzzyKeysData.size(); ++i) {
632 Pair<byte[], byte[]> thisData = this.fuzzyKeysData.get(i);
633 Pair<byte[], byte[]> otherData = other.fuzzyKeysData.get(i);
634 if (!(Bytes.equals(thisData.getFirst(), otherData.getFirst()) && Bytes.equals(
635 thisData.getSecond(), otherData.getSecond()))) {
636 return false;
637 }
638 }
639 return true;
640 }
641
642 @Override
643 public boolean equals(Object obj) {
644 return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj);
645 }
646
647 @Override
648 public int hashCode() {
649 return Objects.hash(this.fuzzyKeysData);
650 }
651 }