View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.filter;
19  
20  import com.google.common.collect.Lists;
21  import org.apache.commons.logging.Log;
22  import org.apache.commons.logging.LogFactory;
23  import org.apache.hadoop.conf.Configuration;
24  import org.apache.hadoop.hbase.Cell;
25  import org.apache.hadoop.hbase.CellUtil;
26  import org.apache.hadoop.hbase.HBaseTestingUtility;
27  import org.apache.hadoop.hbase.HConstants;
28  import org.apache.hadoop.hbase.TableName;
29  import org.apache.hadoop.hbase.client.Durability;
30  import org.apache.hadoop.hbase.client.HTable;
31  import org.apache.hadoop.hbase.client.Put;
32  import org.apache.hadoop.hbase.client.Result;
33  import org.apache.hadoop.hbase.client.ResultScanner;
34  import org.apache.hadoop.hbase.client.Scan;
35  import org.apache.hadoop.hbase.client.Table;
36  import org.apache.hadoop.hbase.filter.FilterList.Operator;
37  import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
38  import org.apache.hadoop.hbase.regionserver.HRegion;
39  import org.apache.hadoop.hbase.regionserver.RegionScanner;
40  import org.apache.hadoop.hbase.testclassification.MediumTests;
41  import org.apache.hadoop.hbase.util.Bytes;
42  import org.apache.hadoop.hbase.util.Pair;
43  import org.junit.After;
44  import org.junit.AfterClass;
45  import org.junit.Before;
46  import org.junit.BeforeClass;
47  import org.junit.Test;
48  import org.junit.experimental.categories.Category;
49  
50  import java.io.IOException;
51  import java.nio.ByteBuffer;
52  import java.util.ArrayList;
53  import java.util.Arrays;
54  import java.util.List;
55  
56  import static org.junit.Assert.assertEquals;
57  
58  /**
59   */
60  @Category(MediumTests.class)
61  public class TestFuzzyRowFilterEndToEnd {
62    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
63    private final static byte fuzzyValue = (byte) 63;
64    private static final Log LOG = LogFactory.getLog(TestFuzzyRowFilterEndToEnd.class);
65  
66    private static int firstPartCardinality = 50;
67    private static int secondPartCardinality = 50;
68    private static int thirdPartCardinality = 50;
69    private static int colQualifiersTotal = 5;
70    private static int totalFuzzyKeys = thirdPartCardinality / 2;
71  
72    private static String table = "TestFuzzyRowFilterEndToEnd";
73  
74    /**
75     * @throws java.lang.Exception
76     */
77    @BeforeClass
78    public static void setUpBeforeClass() throws Exception {
79      Configuration conf = TEST_UTIL.getConfiguration();
80      conf.setInt("hbase.client.scanner.caching", 1000);
81      conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
82        ConstantSizeRegionSplitPolicy.class.getName());
83      // set no splits
84      conf.setLong(HConstants.HREGION_MAX_FILESIZE, ((long) 1024) * 1024 * 1024 * 10);
85  
86      TEST_UTIL.startMiniCluster();
87    }
88  
89    /**
90     * @throws java.lang.Exception
91     */
92    @AfterClass
93    public static void tearDownAfterClass() throws Exception {
94      TEST_UTIL.shutdownMiniCluster();
95    }
96  
97    /**
98     * @throws java.lang.Exception
99     */
100   @Before
101   public void setUp() throws Exception {
102     // Nothing to do.
103   }
104 
105   /**
106    * @throws java.lang.Exception
107    */
108   @After
109   public void tearDown() throws Exception {
110     // Nothing to do.
111   }
112 
113   // HBASE-15676 Test that fuzzy info of all fixed bits (0s) finds matching row.
114   @Test
115   public void testAllFixedBits() throws IOException {
116     String cf = "f";
117     String cq = "q";
118     String table = "testAllFixedBits";
119 
120     Table ht =
121         TEST_UTIL.createTable(TableName.valueOf(table), Bytes.toBytes(cf), Integer.MAX_VALUE);
122     // Load data
123     String[] rows = new String[] { "\\x9C\\x00\\x044\\x00\\x00\\x00\\x00",
124         "\\x9C\\x00\\x044\\x01\\x00\\x00\\x00", "\\x9C\\x00\\x044\\x00\\x01\\x00\\x00",
125         "\\x9B\\x00\\x044e\\x9B\\x02\\xBB", "\\x9C\\x00\\x044\\x00\\x00\\x01\\x00",
126         "\\x9C\\x00\\x044\\x00\\x01\\x00\\x01", "\\x9B\\x00\\x044e\\xBB\\xB2\\xBB", };
127 
128     for (int i = 0; i < rows.length; i++) {
129       Put p = new Put(Bytes.toBytesBinary(rows[i]));
130       p.addColumn(cf.getBytes(), cq.getBytes(), "value".getBytes());
131       ht.put(p);
132     }
133 
134     TEST_UTIL.flush();
135 
136     // v1 should match all rows, because v2 has the actual fix for this bug
137     testAllFixedBitsRunScanWithMask(ht, rows.length, FuzzyRowFilter.V1_PROCESSED_WILDCARD_MASK);
138     testAllFixedBitsRunScanWithMask(ht, 2, FuzzyRowFilter.V2_PROCESSED_WILDCARD_MASK);
139 
140     TEST_UTIL.deleteTable(TableName.valueOf(table));
141   }
142 
143   private void testAllFixedBitsRunScanWithMask(Table ht, int expectedRows, byte processedRowMask)
144     throws IOException {
145     List<Pair<byte[], byte[]>> data = new ArrayList<Pair<byte[], byte[]>>();
146     byte[] fuzzyKey = Bytes.toBytesBinary("\\x9B\\x00\\x044e");
147     byte[] mask = new byte[] { 0, 0, 0, 0, 0 };
148 
149     // copy the fuzzy key and mask to test HBASE-18617
150     byte[] copyFuzzyKey = Arrays.copyOf(fuzzyKey, fuzzyKey.length);
151     byte[] copyMask = Arrays.copyOf(mask, mask.length);
152 
153     data.add(new Pair<byte[], byte[]>(fuzzyKey, mask));
154     FuzzyRowFilter filter = new FuzzyRowFilter(data, processedRowMask);
155 
156     Scan scan = new Scan();
157     scan.setFilter(filter);
158 
159     ResultScanner scanner = ht.getScanner(scan);
160     int total = 0;
161     while (scanner.next() != null) {
162       total++;
163     }
164     assertEquals(expectedRows, total);
165 
166     assertEquals(true, Arrays.equals(copyFuzzyKey, fuzzyKey));
167     assertEquals(true, Arrays.equals(copyMask, mask));
168   }
169 
170   @Test
171   public void testHBASE14782() throws IOException
172   {
173     String cf = "f";
174     String cq = "q";
175     String table = "HBASE14782";
176 
177     Table ht =
178         TEST_UTIL.createTable(TableName.valueOf(table), Bytes.toBytes(cf), Integer.MAX_VALUE);
179     // Load data
180     String[] rows = new String[]{
181         "\\x9C\\x00\\x044\\x00\\x00\\x00\\x00",
182         "\\x9C\\x00\\x044\\x01\\x00\\x00\\x00", 
183         "\\x9C\\x00\\x044\\x00\\x01\\x00\\x00",
184         "\\x9C\\x00\\x044\\x00\\x00\\x01\\x00",
185         "\\x9C\\x00\\x044\\x00\\x01\\x00\\x01", 
186         "\\x9B\\x00\\x044e\\xBB\\xB2\\xBB", 
187     };
188     
189     String badRow = "\\x9C\\x00\\x03\\xE9e\\xBB{X\\x1Fwts\\x1F\\x15vRX";
190     
191     for(int i=0; i < rows.length; i++){
192       Put p = new Put(Bytes.toBytesBinary(rows[i]));
193       p.addColumn(cf.getBytes(), cq.getBytes(), "value".getBytes());
194       ht.put(p);            
195     }
196     
197     Put p = new Put(Bytes.toBytesBinary(badRow));
198     p.addColumn(cf.getBytes(), cq.getBytes(), "value".getBytes());
199     ht.put(p);            
200 
201     TEST_UTIL.flush();
202 
203     testHBASE14782RunScanWithMask(ht, rows.length, FuzzyRowFilter.V1_PROCESSED_WILDCARD_MASK);
204     testHBASE14782RunScanWithMask(ht, rows.length, FuzzyRowFilter.V2_PROCESSED_WILDCARD_MASK);
205 
206     TEST_UTIL.deleteTable(TableName.valueOf(table));
207   }
208 
209   private void testHBASE14782RunScanWithMask(Table ht, int expectedRows, byte processedRowMask)
210     throws IOException {
211     List<Pair<byte[], byte[]>> data = new ArrayList<Pair<byte[], byte[]>>();
212 
213     byte[] fuzzyKey = Bytes.toBytesBinary("\\x00\\x00\\x044");
214     byte[] mask = new byte[] { 1,0,0,0};
215     data.add(new Pair<byte[], byte[]>(fuzzyKey, mask));
216     FuzzyRowFilter filter = new FuzzyRowFilter(data, processedRowMask);
217 
218     Scan scan = new Scan();
219     scan.setFilter(filter);
220 
221     ResultScanner scanner = ht.getScanner(scan);
222     int total = 0;
223     while(scanner.next() != null){
224       total++;
225     }
226     assertEquals(expectedRows, total);
227   }
228   
229   @Test
230   public void testEndToEnd() throws Exception {
231     String cf = "f";
232 
233     HTable ht =
234         TEST_UTIL.createTable(TableName.valueOf(table), Bytes.toBytes(cf), Integer.MAX_VALUE);
235 
236     // 10 byte row key - (2 bytes 4 bytes 4 bytes)
237     // 4 byte qualifier
238     // 4 byte value
239 
240     for (int i0 = 0; i0 < firstPartCardinality; i0++) {
241 
242       for (int i1 = 0; i1 < secondPartCardinality; i1++) {
243 
244         for (int i2 = 0; i2 < thirdPartCardinality; i2++) {
245           byte[] rk = new byte[10];
246 
247           ByteBuffer buf = ByteBuffer.wrap(rk);
248           buf.clear();
249           buf.putShort((short) i0);
250           buf.putInt(i1);
251           buf.putInt(i2);
252           for (int c = 0; c < colQualifiersTotal; c++) {
253             byte[] cq = new byte[4];
254             Bytes.putBytes(cq, 0, Bytes.toBytes(c), 0, 4);
255 
256             Put p = new Put(rk);
257             p.setDurability(Durability.SKIP_WAL);
258             p.add(cf.getBytes(), cq, Bytes.toBytes(c));
259             ht.put(p);
260           }
261         }
262       }
263     }
264 
265     TEST_UTIL.flush();
266 
267     // test passes
268     runTest1(ht, FuzzyRowFilter.V1_PROCESSED_WILDCARD_MASK);
269     runTest1(ht, FuzzyRowFilter.V2_PROCESSED_WILDCARD_MASK);
270     runTest2(ht, FuzzyRowFilter.V1_PROCESSED_WILDCARD_MASK);
271     runTest2(ht, FuzzyRowFilter.V2_PROCESSED_WILDCARD_MASK);
272 
273   }
274 
275   private void runTest1(Table hTable, byte processedWildcardMask) throws IOException {
276     // [0, 2, ?, ?, ?, ?, 0, 0, 0, 1]
277 
278     byte[] mask = new byte[] { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 };
279 
280     List<Pair<byte[], byte[]>> list = new ArrayList<Pair<byte[], byte[]>>();
281     for (int i = 0; i < totalFuzzyKeys; i++) {
282       byte[] fuzzyKey = new byte[10];
283       ByteBuffer buf = ByteBuffer.wrap(fuzzyKey);
284       buf.clear();
285       buf.putShort((short) 2);
286       for (int j = 0; j < 4; j++) {
287         buf.put(fuzzyValue);
288       }
289       buf.putInt(i);
290 
291       Pair<byte[], byte[]> pair = new Pair<byte[], byte[]>(fuzzyKey, mask);
292       list.add(pair);
293     }
294 
295     int expectedSize = secondPartCardinality * totalFuzzyKeys * colQualifiersTotal;
296     FuzzyRowFilter fuzzyRowFilter0 = new FuzzyRowFilter(list, processedWildcardMask);
297     // Filters are not stateless - we can't reuse them
298     FuzzyRowFilter fuzzyRowFilter1 = new FuzzyRowFilter(list, processedWildcardMask);
299 
300     // regular test
301     runScanner(hTable, expectedSize, fuzzyRowFilter0);
302     // optimized from block cache
303     runScanner(hTable, expectedSize, fuzzyRowFilter1);
304 
305   }
306 
307   private void runTest2(Table hTable, byte processedWildcardMask) throws IOException {
308     // [0, 0, ?, ?, ?, ?, 0, 0, 0, 0] , [0, 1, ?, ?, ?, ?, 0, 0, 0, 1]...
309 
310     byte[] mask = new byte[] { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 };
311 
312     List<Pair<byte[], byte[]>> list = new ArrayList<Pair<byte[], byte[]>>();
313 
314     for (int i = 0; i < totalFuzzyKeys; i++) {
315       byte[] fuzzyKey = new byte[10];
316       ByteBuffer buf = ByteBuffer.wrap(fuzzyKey);
317       buf.clear();
318       buf.putShort((short) (i * 2));
319       for (int j = 0; j < 4; j++) {
320         buf.put(fuzzyValue);
321       }
322       buf.putInt(i * 2);
323 
324       Pair<byte[], byte[]> pair = new Pair<byte[], byte[]>(fuzzyKey, mask);
325       list.add(pair);
326     }
327 
328     int expectedSize = totalFuzzyKeys * secondPartCardinality * colQualifiersTotal;
329 
330     FuzzyRowFilter fuzzyRowFilter0 = new FuzzyRowFilter(list, processedWildcardMask);
331     // Filters are not stateless - we can't reuse them
332     FuzzyRowFilter fuzzyRowFilter1 = new FuzzyRowFilter(list, processedWildcardMask);
333 
334     // regular test
335     runScanner(hTable, expectedSize, fuzzyRowFilter0);
336     // optimized from block cache
337     runScanner(hTable, expectedSize, fuzzyRowFilter1);
338 
339   }
340 
341   private void runScanner(Table hTable, int expectedSize, Filter filter) throws IOException {
342 
343     String cf = "f";
344     Scan scan = new Scan();
345     scan.addFamily(cf.getBytes());
346     scan.setFilter(filter);
347     List<HRegion> regions = TEST_UTIL.getHBaseCluster().getRegions(table.getBytes());
348     HRegion first = regions.get(0);
349     first.getScanner(scan);
350     RegionScanner scanner = first.getScanner(scan);
351     List<Cell> results = new ArrayList<Cell>();
352     // Result result;
353     long timeBeforeScan = System.currentTimeMillis();
354     int found = 0;
355     while (scanner.next(results)) {
356       found += results.size();
357       results.clear();
358     }
359     found += results.size();
360     long scanTime = System.currentTimeMillis() - timeBeforeScan;
361     scanner.close();
362 
363     LOG.info("\nscan time = " + scanTime + "ms");
364     LOG.info("found " + found + " results\n");
365 
366     assertEquals(expectedSize, found);
367   }
368 
369   @SuppressWarnings("deprecation")
370   @Test
371   public void testFilterList() throws Exception {
372     String cf = "f";
373     String table = "TestFuzzyRowFiltersInFilterList";
374     HTable ht =
375         TEST_UTIL.createTable(TableName.valueOf(table), Bytes.toBytes(cf), Integer.MAX_VALUE);
376 
377     // 10 byte row key - (2 bytes 4 bytes 4 bytes)
378     // 4 byte qualifier
379     // 4 byte value
380 
381     for (int i1 = 0; i1 < 5; i1++) {
382       for (int i2 = 0; i2 < 5; i2++) {
383         byte[] rk = new byte[10];
384 
385         ByteBuffer buf = ByteBuffer.wrap(rk);
386         buf.clear();
387         buf.putShort((short) 2);
388         buf.putInt(i1);
389         buf.putInt(i2);
390 
391         // Each row contains 5 columns
392         for (int c = 0; c < 5; c++) {
393           byte[] cq = new byte[4];
394           Bytes.putBytes(cq, 0, Bytes.toBytes(c), 0, 4);
395 
396           Put p = new Put(rk);
397           p.setDurability(Durability.SKIP_WAL);
398           p.add(cf.getBytes(), cq, Bytes.toBytes(c));
399           ht.put(p);
400           LOG.info("Inserting: rk: " + Bytes.toStringBinary(rk) + " cq: "
401               + Bytes.toStringBinary(cq));
402         }
403       }
404     }
405 
406     TEST_UTIL.flush();
407 
408     // test passes if we get back 5 KV's (1 row)
409     runTest(ht, 5);
410 
411   }
412 
413   @SuppressWarnings("unchecked")
414   private void runTest(HTable hTable, int expectedSize) throws IOException {
415     // [0, 2, ?, ?, ?, ?, 0, 0, 0, 1]
416     byte[] fuzzyKey1 = new byte[10];
417     ByteBuffer buf = ByteBuffer.wrap(fuzzyKey1);
418     buf.clear();
419     buf.putShort((short) 2);
420     for (int i = 0; i < 4; i++)
421       buf.put(fuzzyValue);
422     buf.putInt((short) 1);
423     byte[] mask1 = new byte[] { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0 };
424 
425     byte[] fuzzyKey2 = new byte[10];
426     buf = ByteBuffer.wrap(fuzzyKey2);
427     buf.clear();
428     buf.putShort((short) 2);
429     buf.putInt((short) 2);
430     for (int i = 0; i < 4; i++)
431       buf.put(fuzzyValue);
432 
433     byte[] mask2 = new byte[] { 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 };
434 
435     Pair<byte[], byte[]> pair1 = new Pair<byte[], byte[]>(fuzzyKey1, mask1);
436     Pair<byte[], byte[]> pair2 = new Pair<byte[], byte[]>(fuzzyKey2, mask2);
437 
438     FuzzyRowFilter fuzzyRowFilter1 = new FuzzyRowFilter(Lists.newArrayList(pair1));
439     FuzzyRowFilter fuzzyRowFilter2 = new FuzzyRowFilter(Lists.newArrayList(pair2));
440     // regular test - we expect 1 row back (5 KVs)
441     runScanner(hTable, expectedSize, fuzzyRowFilter1, fuzzyRowFilter2);
442   }
443 
444   private void runScanner(Table hTable, int expectedSize, Filter filter1, Filter filter2)
445       throws IOException {
446     String cf = "f";
447     Scan scan = new Scan();
448     scan.addFamily(cf.getBytes());
449     FilterList filterList = new FilterList(Operator.MUST_PASS_ALL, filter1, filter2);
450     scan.setFilter(filterList);
451 
452     ResultScanner scanner = hTable.getScanner(scan);
453     List<Cell> results = new ArrayList<Cell>();
454     Result result;
455     long timeBeforeScan = System.currentTimeMillis();
456     while ((result = scanner.next()) != null) {
457       for (Cell kv : result.listCells()) {
458         LOG.info("Got rk: " + Bytes.toStringBinary(CellUtil.cloneRow(kv)) + " cq: "
459             + Bytes.toStringBinary(CellUtil.cloneQualifier(kv)));
460         results.add(kv);
461       }
462     }
463     long scanTime = System.currentTimeMillis() - timeBeforeScan;
464     scanner.close();
465 
466     LOG.info("scan time = " + scanTime + "ms");
467     LOG.info("found " + results.size() + " results");
468 
469     assertEquals(expectedSize, results.size());
470   }
471 }