1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.regionserver;
21
22 import static org.junit.Assert.assertEquals;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertTrue;
25 import static org.junit.Assert.fail;
26 import static org.mockito.Mockito.mock;
27 import static org.mockito.Mockito.when;
28
29 import java.io.IOException;
30 import java.util.ArrayList;
31 import java.util.Collections;
32 import java.util.List;
33 import java.util.Random;
34
35 import org.apache.commons.logging.Log;
36 import org.apache.commons.logging.LogFactory;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.fs.FileSystem;
39 import org.apache.hadoop.fs.Path;
40 import org.apache.hadoop.hbase.HBaseTestingUtility;
41 import org.apache.hadoop.hbase.HColumnDescriptor;
42 import org.apache.hadoop.hbase.KeyValue;
43 import org.apache.hadoop.hbase.testclassification.MediumTests;
44 import org.apache.hadoop.hbase.client.Scan;
45 import org.apache.hadoop.hbase.io.hfile.BlockCache;
46 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
47 import org.apache.hadoop.hbase.io.hfile.HFile;
48 import org.apache.hadoop.hbase.io.hfile.HFileContext;
49 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
50 import org.apache.hadoop.hbase.io.hfile.TestHFileWriterV2;
51 import org.apache.hadoop.hbase.util.BloomFilterFactory;
52 import org.apache.hadoop.hbase.util.ByteBloomFilter;
53 import org.apache.hadoop.hbase.util.Bytes;
54 import org.apache.hadoop.hbase.util.CompoundBloomFilter;
55 import org.apache.hadoop.hbase.util.CompoundBloomFilterBase;
56 import org.apache.hadoop.hbase.util.CompoundBloomFilterWriter;
57 import org.junit.Before;
58 import org.junit.Test;
59 import org.junit.experimental.categories.Category;
60
61
62
63
64
65 @Category(MediumTests.class)
66 public class TestCompoundBloomFilter {
67
68 private static final HBaseTestingUtility TEST_UTIL =
69 new HBaseTestingUtility();
70
71 private static final Log LOG = LogFactory.getLog(
72 TestCompoundBloomFilter.class);
73
74 private static final int NUM_TESTS = 9;
75 private static final BloomType BLOOM_TYPES[] = { BloomType.ROW,
76 BloomType.ROW, BloomType.ROWCOL, BloomType.ROWCOL, BloomType.ROW,
77 BloomType.ROWCOL, BloomType.ROWCOL, BloomType.ROWCOL, BloomType.ROW };
78
79 private static final int NUM_KV[];
80 static {
81 final int N = 10000;
82 NUM_KV = new int[] { 21870, N, N, N, N, 1000, N, 7500, 7500};
83 assert NUM_KV.length == NUM_TESTS;
84 }
85
86 private static final int BLOCK_SIZES[];
87 static {
88 final int blkSize = 65536;
89 BLOCK_SIZES = new int[] { 512, 1000, blkSize, blkSize, blkSize, 128, 300,
90 blkSize, blkSize };
91 assert BLOCK_SIZES.length == NUM_TESTS;
92 }
93
94
95
96
97
98
99 private static final int BLOOM_BLOCK_SIZES[] = { 1000, 4096, 4096, 4096,
100 8192, 128, 1024, 600, 600 };
101 static { assert BLOOM_BLOCK_SIZES.length == NUM_TESTS; }
102
103 private static final double TARGET_ERROR_RATES[] = { 0.025, 0.01, 0.015,
104 0.01, 0.03, 0.01, 0.01, 0.07, 0.07 };
105 static { assert TARGET_ERROR_RATES.length == NUM_TESTS; }
106
107
108 private static final double TOO_HIGH_ERROR_RATE;
109 static {
110 double m = 0;
111 for (double errorRate : TARGET_ERROR_RATES)
112 m = Math.max(m, errorRate);
113 TOO_HIGH_ERROR_RATE = m + 0.03;
114 }
115
116 private static Configuration conf;
117 private static CacheConfig cacheConf;
118 private FileSystem fs;
119 private BlockCache blockCache;
120
121
122 private String testIdMsg;
123
124 private static final int GENERATION_SEED = 2319;
125 private static final int EVALUATION_SEED = 135;
126
127 @Before
128 public void setUp() throws IOException {
129 conf = TEST_UTIL.getConfiguration();
130
131
132 conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
133
134 fs = FileSystem.get(conf);
135
136 cacheConf = new CacheConfig(conf);
137 blockCache = cacheConf.getBlockCache();
138 assertNotNull(blockCache);
139 }
140
141 private List<KeyValue> createSortedKeyValues(Random rand, int n) {
142 List<KeyValue> kvList = new ArrayList<KeyValue>(n);
143 for (int i = 0; i < n; ++i)
144 kvList.add(TestHFileWriterV2.randomKeyValue(rand));
145 Collections.sort(kvList, KeyValue.COMPARATOR);
146 return kvList;
147 }
148
149 @Test
150 public void testCompoundBloomFilter() throws IOException {
151 conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
152 for (int t = 0; t < NUM_TESTS; ++t) {
153 conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE,
154 (float) TARGET_ERROR_RATES[t]);
155
156 testIdMsg = "in test #" + t + ":";
157 Random generationRand = new Random(GENERATION_SEED);
158 List<KeyValue> kvs = createSortedKeyValues(generationRand, NUM_KV[t]);
159 BloomType bt = BLOOM_TYPES[t];
160 Path sfPath = writeStoreFile(t, bt, kvs);
161 readStoreFile(t, bt, kvs, sfPath);
162 }
163 }
164
165
166
167
168
169
170
171
172
173
174
175
176
177 private void validateFalsePosRate(double falsePosRate, int nTrials,
178 double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) {
179 double p = BloomFilterFactory.getErrorRate(conf);
180 double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials);
181
182 String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate="
183 + falsePosRate + ", nTrials=" + nTrials + ")";
184 LOG.info("z-value is " + zValue + assortedStatsStr);
185
186 boolean isUpperBound = zValueBoundary > 0;
187
188 if (isUpperBound && zValue > zValueBoundary ||
189 !isUpperBound && zValue < zValueBoundary) {
190 String errorMsg = "False positive rate z-value " + zValue + " is "
191 + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary
192 + assortedStatsStr + ". Per-chunk stats:\n"
193 + cbf.formatTestingStats();
194 fail(errorMsg + additionalMsg);
195 }
196 }
197
198 private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
199 Path sfPath) throws IOException {
200 StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
201 StoreFile.Reader r = sf.createReader();
202 final boolean pread = true;
203 StoreFileScanner scanner = r.getStoreFileScanner(true, pread, false, 0, 0, false);
204
205 {
206
207 int numChecked = 0;
208 for (KeyValue kv : kvs) {
209 byte[] row = kv.getRow();
210 boolean present = isInBloom(scanner, row, kv.getQualifier());
211 assertTrue(testIdMsg + " Bloom filter false negative on row "
212 + Bytes.toStringBinary(row) + " after " + numChecked
213 + " successful checks", present);
214 ++numChecked;
215 }
216 }
217
218
219
220 for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
221 if (fakeLookupEnabled) {
222 ByteBloomFilter.setRandomGeneratorForTest(new Random(283742987L));
223 }
224 try {
225 String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
226 "enabled" : "disabled");
227 CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
228 cbf.enableTestingStats();
229 int numFalsePos = 0;
230 Random rand = new Random(EVALUATION_SEED);
231 int nTrials = NUM_KV[t] * 10;
232 for (int i = 0; i < nTrials; ++i) {
233 byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
234 if (isInBloom(scanner, query, bt, rand)) {
235 numFalsePos += 1;
236 }
237 }
238 double falsePosRate = numFalsePos * 1.0 / nTrials;
239 LOG.debug(String.format(testIdMsg
240 + " False positives: %d out of %d (%f)",
241 numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);
242
243
244 assertTrue("False positive is too high: " + falsePosRate + " (greater "
245 + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
246 falsePosRate < TOO_HIGH_ERROR_RATE);
247
248
249
250
251
252
253 double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
254 validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
255 fakeLookupModeStr);
256
257
258
259
260
261 int nChunks = cbf.getNumChunks();
262 if (nChunks > 1) {
263 numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
264 nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
265 falsePosRate = numFalsePos * 1.0 / nTrials;
266 LOG.info(testIdMsg + " False positive rate without last chunk is " +
267 falsePosRate + fakeLookupModeStr);
268 }
269
270 validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
271 fakeLookupModeStr);
272 } finally {
273 ByteBloomFilter.setRandomGeneratorForTest(null);
274 }
275 }
276
277 r.close(true);
278 }
279
280 private boolean isInBloom(StoreFileScanner scanner, byte[] row, BloomType bt,
281 Random rand) {
282 return isInBloom(scanner, row, TestHFileWriterV2.randomRowOrQualifier(rand));
283 }
284
285 private boolean isInBloom(StoreFileScanner scanner, byte[] row,
286 byte[] qualifier) {
287 Scan scan = new Scan(row, row);
288 scan.addColumn(Bytes.toBytes(TestHFileWriterV2.COLUMN_FAMILY_NAME), qualifier);
289 Store store = mock(Store.class);
290 HColumnDescriptor hcd = mock(HColumnDescriptor.class);
291 when(hcd.getName()).thenReturn(Bytes.toBytes(TestHFileWriterV2.COLUMN_FAMILY_NAME));
292 when(store.getFamily()).thenReturn(hcd);
293 return scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
294 }
295
296 private Path writeStoreFile(int t, BloomType bt, List<KeyValue> kvs)
297 throws IOException {
298 conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE,
299 BLOOM_BLOCK_SIZES[t]);
300 conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true);
301 cacheConf = new CacheConfig(conf);
302 HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCK_SIZES[t]).build();
303 StoreFile.Writer w = new StoreFile.WriterBuilder(conf, cacheConf, fs)
304 .withOutputDir(TEST_UTIL.getDataTestDir())
305 .withBloomType(bt)
306 .withFileContext(meta)
307 .build();
308
309 assertTrue(w.hasGeneralBloom());
310 assertTrue(w.getGeneralBloomWriter() instanceof CompoundBloomFilterWriter);
311 CompoundBloomFilterWriter cbbf =
312 (CompoundBloomFilterWriter) w.getGeneralBloomWriter();
313
314 int keyCount = 0;
315 KeyValue prev = null;
316 LOG.debug("Total keys/values to insert: " + kvs.size());
317 for (KeyValue kv : kvs) {
318 w.append(kv);
319
320
321 boolean newKey = true;
322 if (prev != null) {
323 newKey = !(bt == BloomType.ROW ? KeyValue.COMPARATOR.matchingRows(kv,
324 prev) : KeyValue.COMPARATOR.matchingRowColumn(kv, prev));
325 }
326 if (newKey)
327 ++keyCount;
328 assertEquals(keyCount, cbbf.getKeyCount());
329
330 prev = kv;
331 }
332 w.close();
333
334 return w.getPath();
335 }
336
337 @Test
338 public void testCompoundBloomSizing() {
339 int bloomBlockByteSize = 4096;
340 int bloomBlockBitSize = bloomBlockByteSize * 8;
341 double targetErrorRate = 0.01;
342 long maxKeysPerChunk = ByteBloomFilter.idealMaxKeys(bloomBlockBitSize,
343 targetErrorRate);
344
345 long bloomSize1 = bloomBlockByteSize * 8;
346 long bloomSize2 = ByteBloomFilter.computeBitSize(maxKeysPerChunk,
347 targetErrorRate);
348
349 double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);
350 assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);
351 }
352
353 @Test
354 public void testCreateKey() {
355 CompoundBloomFilterBase cbfb = new CompoundBloomFilterBase();
356 byte[] row = "myRow".getBytes();
357 byte[] qualifier = "myQualifier".getBytes();
358 byte[] rowKey = cbfb.createBloomKey(row, 0, row.length,
359 row, 0, 0);
360 byte[] rowColKey = cbfb.createBloomKey(row, 0, row.length,
361 qualifier, 0, qualifier.length);
362 KeyValue rowKV = KeyValue.createKeyValueFromKey(rowKey);
363 KeyValue rowColKV = KeyValue.createKeyValueFromKey(rowColKey);
364 assertEquals(rowKV.getTimestamp(), rowColKV.getTimestamp());
365 assertEquals(Bytes.toStringBinary(rowKV.getRow()),
366 Bytes.toStringBinary(rowColKV.getRow()));
367 assertEquals(0, rowKV.getQualifier().length);
368 }
369
370
371 }
372