1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertNotSame;
25 import static org.junit.Assert.assertTrue;
26 import static org.junit.Assert.fail;
27
28 import java.io.IOException;
29 import java.util.Arrays;
30 import java.util.HashMap;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.Map.Entry;
34 import java.util.Random;
35 import java.util.Set;
36 import java.util.concurrent.Callable;
37
38 import junit.framework.Assert;
39
40 import org.apache.commons.logging.Log;
41 import org.apache.commons.logging.LogFactory;
42 import org.apache.hadoop.conf.Configuration;
43 import org.apache.hadoop.fs.FileStatus;
44 import org.apache.hadoop.fs.FileSystem;
45 import org.apache.hadoop.fs.Path;
46 import org.apache.hadoop.hbase.CategoryBasedTimeout;
47 import org.apache.hadoop.hbase.Cell;
48 import org.apache.hadoop.hbase.CellUtil;
49 import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
50 import org.apache.hadoop.hbase.HBaseConfiguration;
51 import org.apache.hadoop.hbase.HBaseTestingUtility;
52 import org.apache.hadoop.hbase.HColumnDescriptor;
53 import org.apache.hadoop.hbase.HConstants;
54 import org.apache.hadoop.hbase.HTableDescriptor;
55 import org.apache.hadoop.hbase.HadoopShims;
56 import org.apache.hadoop.hbase.KeyValue;
57 import org.apache.hadoop.hbase.PerformanceEvaluation;
58 import org.apache.hadoop.hbase.TableName;
59 import org.apache.hadoop.hbase.client.HBaseAdmin;
60 import org.apache.hadoop.hbase.client.HTable;
61 import org.apache.hadoop.hbase.client.Put;
62 import org.apache.hadoop.hbase.client.RegionLocator;
63 import org.apache.hadoop.hbase.client.Result;
64 import org.apache.hadoop.hbase.client.ResultScanner;
65 import org.apache.hadoop.hbase.client.Scan;
66 import org.apache.hadoop.hbase.client.Table;
67 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
68 import org.apache.hadoop.hbase.io.compress.Compression;
69 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
70 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
71 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
72 import org.apache.hadoop.hbase.io.hfile.HFile;
73 import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
74 import org.apache.hadoop.hbase.regionserver.BloomType;
75 import org.apache.hadoop.hbase.regionserver.HRegion;
76 import org.apache.hadoop.hbase.regionserver.HStore;
77 import org.apache.hadoop.hbase.regionserver.Store;
78 import org.apache.hadoop.hbase.regionserver.StoreFile;
79 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
80 import org.apache.hadoop.hbase.testclassification.LargeTests;
81 import org.apache.hadoop.hbase.util.Bytes;
82 import org.apache.hadoop.hbase.util.FSUtils;
83 import org.apache.hadoop.hbase.util.Threads;
84 import org.apache.hadoop.hbase.util.Writables;
85 import org.apache.hadoop.io.NullWritable;
86 import org.apache.hadoop.mapreduce.Job;
87 import org.apache.hadoop.mapreduce.Mapper;
88 import org.apache.hadoop.mapreduce.RecordWriter;
89 import org.apache.hadoop.mapreduce.TaskAttemptContext;
90 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
91 import org.junit.Ignore;
92 import org.junit.Rule;
93 import org.junit.Test;
94 import org.junit.experimental.categories.Category;
95 import org.junit.rules.TestRule;
96 import org.mockito.Mockito;
97
98
99
100
101
102
103
104 @Category(LargeTests.class)
105 public class TestHFileOutputFormat {
106 @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
107 withTimeout(this.getClass()).withLookingForStuckThread(true).build();
108 private final static int ROWSPERSPLIT = 1024;
109
110 private static final byte[] FAMILY_NAME = Bytes.toBytes("info");
111 private static final byte[][] FAMILIES = {
112 Bytes.add(FAMILY_NAME, Bytes.toBytes("-A")),
113 Bytes.add(FAMILY_NAME, Bytes.toBytes("-B"))
114 };
115 private static final TableName TABLE_NAME =
116 TableName.valueOf("TestTable");
117
118 private HBaseTestingUtility util = new HBaseTestingUtility();
119
120 private static final Log LOG = LogFactory.getLog(TestHFileOutputFormat.class);
121
122
123
124
125 static class RandomKVGeneratingMapper extends
126 Mapper<NullWritable, NullWritable, ImmutableBytesWritable, KeyValue> {
127
128 private int keyLength;
129 private static final int KEYLEN_DEFAULT = 10;
130 private static final String KEYLEN_CONF = "randomkv.key.length";
131
132 private int valLength;
133 private static final int VALLEN_DEFAULT=10;
134 private static final String VALLEN_CONF="randomkv.val.length";
135 private static final byte [] QUALIFIER = Bytes.toBytes("data");
136
137 @Override
138 protected void setup(Context context) throws IOException,
139 InterruptedException {
140 super.setup(context);
141
142 Configuration conf = context.getConfiguration();
143 keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
144 valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
145 }
146
147 protected void map(
148 NullWritable n1, NullWritable n2,
149 Mapper<NullWritable, NullWritable,
150 ImmutableBytesWritable,KeyValue>.Context context)
151 throws java.io.IOException ,InterruptedException
152 {
153
154 byte keyBytes[] = new byte[keyLength];
155 byte valBytes[] = new byte[valLength];
156
157 int taskId = context.getTaskAttemptID().getTaskID().getId();
158 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
159
160 Random random = new Random();
161 for (int i = 0; i < ROWSPERSPLIT; i++) {
162
163 random.nextBytes(keyBytes);
164
165 keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
166 random.nextBytes(valBytes);
167 ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
168
169 for (byte[] family : TestHFileOutputFormat.FAMILIES) {
170 KeyValue kv = new KeyValue(keyBytes, family, QUALIFIER, valBytes);
171 context.write(key, kv);
172 }
173 }
174 }
175 }
176
177 private void setupRandomGeneratorMapper(Job job) {
178 job.setInputFormatClass(NMapInputFormat.class);
179 job.setMapperClass(RandomKVGeneratingMapper.class);
180 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
181 job.setMapOutputValueClass(KeyValue.class);
182 }
183
184
185
186
187
188
189 @Test
190 public void test_LATEST_TIMESTAMP_isReplaced()
191 throws Exception {
192 Configuration conf = new Configuration(this.util.getConfiguration());
193 RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
194 TaskAttemptContext context = null;
195 Path dir =
196 util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
197 try {
198 Job job = new Job(conf);
199 FileOutputFormat.setOutputPath(job, dir);
200 context = createTestTaskAttemptContext(job);
201 HFileOutputFormat hof = new HFileOutputFormat();
202 writer = hof.getRecordWriter(context);
203 final byte [] b = Bytes.toBytes("b");
204
205
206
207 KeyValue kv = new KeyValue(b, b, b);
208 KeyValue original = kv.clone();
209 writer.write(new ImmutableBytesWritable(), kv);
210 assertFalse(original.equals(kv));
211 assertTrue(Bytes.equals(original.getRow(), kv.getRow()));
212 assertTrue(CellUtil.matchingColumn(original, kv.getFamily(), kv.getQualifier()));
213 assertNotSame(original.getTimestamp(), kv.getTimestamp());
214 assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
215
216
217
218 kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
219 original = kv.clone();
220 writer.write(new ImmutableBytesWritable(), kv);
221 assertTrue(original.equals(kv));
222 } finally {
223 if (writer != null && context != null) writer.close(context);
224 dir.getFileSystem(conf).delete(dir, true);
225 }
226 }
227
228 private TaskAttemptContext createTestTaskAttemptContext(final Job job)
229 throws IOException, Exception {
230 HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
231 TaskAttemptContext context = hadoop.createTestTaskAttemptContext(job, "attempt_200707121733_0001_m_000000_0");
232 return context;
233 }
234
235
236
237
238
239 @Test
240 public void test_TIMERANGE() throws Exception {
241 Configuration conf = new Configuration(this.util.getConfiguration());
242 RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
243 TaskAttemptContext context = null;
244 Path dir =
245 util.getDataTestDir("test_TIMERANGE_present");
246 LOG.info("Timerange dir writing to dir: "+ dir);
247 try {
248
249 Job job = new Job(conf);
250 FileOutputFormat.setOutputPath(job, dir);
251 context = createTestTaskAttemptContext(job);
252 HFileOutputFormat hof = new HFileOutputFormat();
253 writer = hof.getRecordWriter(context);
254
255
256 final byte [] b = Bytes.toBytes("b");
257
258
259 KeyValue kv = new KeyValue(b, b, b, 2000, b);
260 KeyValue original = kv.clone();
261 writer.write(new ImmutableBytesWritable(), kv);
262 assertEquals(original,kv);
263
264
265 kv = new KeyValue(b, b, b, 1000, b);
266 original = kv.clone();
267 writer.write(new ImmutableBytesWritable(), kv);
268 assertEquals(original, kv);
269
270
271 writer.close(context);
272
273
274
275
276 FileSystem fs = FileSystem.get(conf);
277 Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
278 FileStatus[] sub1 = fs.listStatus(attemptDirectory);
279 FileStatus[] file = fs.listStatus(sub1[0].getPath());
280
281
282 HFile.Reader rd = HFile.createReader(fs, file[0].getPath(),
283 new CacheConfig(conf), conf);
284 Map<byte[],byte[]> finfo = rd.loadFileInfo();
285 byte[] range = finfo.get("TIMERANGE".getBytes());
286 assertNotNull(range);
287
288
289 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
290 Writables.copyWritable(range, timeRangeTracker);
291 LOG.info(timeRangeTracker.getMin() + "...." + timeRangeTracker.getMax());
292 assertEquals(1000, timeRangeTracker.getMin());
293 assertEquals(2000, timeRangeTracker.getMax());
294 rd.close();
295 } finally {
296 if (writer != null && context != null) writer.close(context);
297 dir.getFileSystem(conf).delete(dir, true);
298 }
299 }
300
301
302
303
304 @Test
305 public void testWritingPEData() throws Exception {
306 Configuration conf = util.getConfiguration();
307 Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
308 FileSystem fs = testDir.getFileSystem(conf);
309
310
311 conf.setInt("mapreduce.task.io.sort.mb", 20);
312
313 conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
314
315 Job job = new Job(conf, "testWritingPEData");
316 setupRandomGeneratorMapper(job);
317
318
319 byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
320 byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
321
322 Arrays.fill(startKey, (byte)0);
323 Arrays.fill(endKey, (byte)0xff);
324
325 job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
326
327 SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
328 SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
329 job.setReducerClass(KeyValueSortReducer.class);
330 job.setOutputFormatClass(HFileOutputFormat.class);
331 job.setNumReduceTasks(4);
332 job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
333 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
334 KeyValueSerialization.class.getName());
335
336 FileOutputFormat.setOutputPath(job, testDir);
337 assertTrue(job.waitForCompletion(false));
338 FileStatus [] files = fs.listStatus(testDir);
339 assertTrue(files.length > 0);
340 }
341
342 @Test
343 public void testJobConfiguration() throws Exception {
344 Configuration conf = new Configuration(this.util.getConfiguration());
345 conf.set("hbase.fs.tmp.dir", util.getDataTestDir("testJobConfiguration").toString());
346 Job job = new Job(conf);
347 job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
348 HTableDescriptor tableDescriptor = Mockito.mock(HTableDescriptor.class);
349 RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
350 setupMockStartKeys(regionLocator);
351 setupMockTableName(regionLocator);
352 HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator);
353 assertEquals(job.getNumReduceTasks(), 4);
354 }
355
356 private byte [][] generateRandomStartKeys(int numKeys) {
357 Random random = new Random();
358 byte[][] ret = new byte[numKeys][];
359
360 ret[0] = HConstants.EMPTY_BYTE_ARRAY;
361 for (int i = 1; i < numKeys; i++) {
362 ret[i] =
363 PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
364 }
365 return ret;
366 }
367
368 private byte[][] generateRandomSplitKeys(int numKeys) {
369 Random random = new Random();
370 byte[][] ret = new byte[numKeys][];
371 for (int i = 0; i < numKeys; i++) {
372 ret[i] =
373 PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
374 }
375 return ret;
376 }
377
378 @Test
379 public void testMRIncrementalLoad() throws Exception {
380 LOG.info("\nStarting test testMRIncrementalLoad\n");
381 doIncrementalLoadTest(false);
382 }
383
384 @Test
385 public void testMRIncrementalLoadWithSplit() throws Exception {
386 LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
387 doIncrementalLoadTest(true);
388 }
389
390 private void doIncrementalLoadTest(
391 boolean shouldChangeRegions) throws Exception {
392 util = new HBaseTestingUtility();
393 Configuration conf = util.getConfiguration();
394 byte[][] splitKeys = generateRandomSplitKeys(4);
395 HBaseAdmin admin = null;
396 try {
397 util.setJobWithoutMRCluster();
398 util.startMiniCluster();
399 Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
400 admin = util.getHBaseAdmin();
401 HTable table = util.createTable(TABLE_NAME, FAMILIES, splitKeys);
402 assertEquals("Should start with empty table",
403 0, util.countRows(table));
404 int numRegions = -1;
405 try(RegionLocator r = table.getRegionLocator()) {
406 numRegions = r.getStartKeys().length;
407 }
408 assertEquals("Should make 5 regions", numRegions, 5);
409
410
411 runIncrementalPELoad(conf, table, testDir);
412
413 assertEquals("HFOF should not touch actual table",
414 0, util.countRows(table));
415
416
417
418 int dir = 0;
419 for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
420 for (byte[] family : FAMILIES) {
421 if (Bytes.toString(family).equals(f.getPath().getName())) {
422 ++dir;
423 }
424 }
425 }
426 assertEquals("Column family not found in FS.", FAMILIES.length, dir);
427
428
429 if (shouldChangeRegions) {
430 LOG.info("Changing regions in table");
431 admin.disableTable(table.getTableName());
432 while(util.getMiniHBaseCluster().getMaster().getAssignmentManager().
433 getRegionStates().isRegionsInTransition()) {
434 Threads.sleep(200);
435 LOG.info("Waiting on table to finish disabling");
436 }
437 util.deleteTable(table.getName());
438 byte[][] newSplitKeys = generateRandomSplitKeys(14);
439 table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys);
440 while (table.getRegionLocations().size() != 15 ||
441 !admin.isTableAvailable(table.getTableName())) {
442 Thread.sleep(200);
443 LOG.info("Waiting for new region assignment to happen");
444 }
445 }
446
447
448 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
449
450
451 int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
452 assertEquals("LoadIncrementalHFiles should put expected data in table",
453 expectedRows, util.countRows(table));
454 Scan scan = new Scan();
455 ResultScanner results = table.getScanner(scan);
456 for (Result res : results) {
457 assertEquals(FAMILIES.length, res.rawCells().length);
458 Cell first = res.rawCells()[0];
459 for (Cell kv : res.rawCells()) {
460 assertTrue(CellUtil.matchingRow(first, kv));
461 assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
462 }
463 }
464 results.close();
465 String tableDigestBefore = util.checksumRows(table);
466
467
468 admin.disableTable(TABLE_NAME);
469 while (!admin.isTableDisabled(TABLE_NAME)) {
470 Thread.sleep(200);
471 LOG.info("Waiting for table to disable");
472 }
473 admin.enableTable(TABLE_NAME);
474 util.waitTableAvailable(TABLE_NAME);
475 assertEquals("Data should remain after reopening of regions",
476 tableDigestBefore, util.checksumRows(table));
477 } finally {
478 if (admin != null) admin.close();
479 util.shutdownMiniCluster();
480 }
481 }
482
483 private void runIncrementalPELoad(
484 Configuration conf, HTable table, Path outDir)
485 throws Exception {
486 Job job = new Job(conf, "testLocalMRIncrementalLoad");
487 job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
488 job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
489 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
490 KeyValueSerialization.class.getName());
491 setupRandomGeneratorMapper(job);
492 HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(),
493 table.getRegionLocator());
494 FileOutputFormat.setOutputPath(job, outDir);
495
496 Assert.assertFalse( util.getTestFileSystem().exists(outDir)) ;
497
498 assertEquals(table.getRegionLocator().getAllRegionLocations().size(), job.getNumReduceTasks());
499
500 assertTrue(job.waitForCompletion(true));
501 }
502
503
504
505
506
507
508
509
510
511
512 @Test
513 public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
514 for (int numCfs = 0; numCfs <= 3; numCfs++) {
515 Configuration conf = new Configuration(this.util.getConfiguration());
516 Map<String, Compression.Algorithm> familyToCompression =
517 getMockColumnFamiliesForCompression(numCfs);
518 Table table = Mockito.mock(HTable.class);
519 setupMockColumnFamiliesForCompression(table, familyToCompression);
520 HFileOutputFormat.configureCompression(table, conf);
521
522
523 Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat
524 .createFamilyCompressionMap(conf);
525
526
527
528 for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
529 assertEquals("Compression configuration incorrect for column family:"
530 + entry.getKey(), entry.getValue(),
531 retrievedFamilyToCompressionMap.get(entry.getKey().getBytes()));
532 }
533 }
534 }
535
536 private void setupMockColumnFamiliesForCompression(Table table,
537 Map<String, Compression.Algorithm> familyToCompression) throws IOException {
538 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
539 for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
540 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
541 .setMaxVersions(1)
542 .setCompressionType(entry.getValue())
543 .setBlockCacheEnabled(false)
544 .setTimeToLive(0));
545 }
546 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
547 }
548
549
550
551
552
553 private Map<String, Compression.Algorithm>
554 getMockColumnFamiliesForCompression (int numCfs) {
555 Map<String, Compression.Algorithm> familyToCompression = new HashMap<String, Compression.Algorithm>();
556
557 if (numCfs-- > 0) {
558 familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
559 }
560 if (numCfs-- > 0) {
561 familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
562 }
563 if (numCfs-- > 0) {
564 familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
565 }
566 if (numCfs-- > 0) {
567 familyToCompression.put("Family3", Compression.Algorithm.NONE);
568 }
569 return familyToCompression;
570 }
571
572
573
574
575
576
577
578
579
580
581
582 @Test
583 public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
584 for (int numCfs = 0; numCfs <= 2; numCfs++) {
585 Configuration conf = new Configuration(this.util.getConfiguration());
586 Map<String, BloomType> familyToBloomType =
587 getMockColumnFamiliesForBloomType(numCfs);
588 Table table = Mockito.mock(HTable.class);
589 setupMockColumnFamiliesForBloomType(table,
590 familyToBloomType);
591 HFileOutputFormat.configureBloomType(table, conf);
592
593
594
595 Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
596 HFileOutputFormat
597 .createFamilyBloomTypeMap(conf);
598
599
600
601 for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
602 assertEquals("BloomType configuration incorrect for column family:"
603 + entry.getKey(), entry.getValue(),
604 retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes()));
605 }
606 }
607 }
608
609 private void setupMockColumnFamiliesForBloomType(Table table,
610 Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
611 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
612 for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
613 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
614 .setMaxVersions(1)
615 .setBloomFilterType(entry.getValue())
616 .setBlockCacheEnabled(false)
617 .setTimeToLive(0));
618 }
619 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
620 }
621
622
623
624
625
626 private Map<String, BloomType>
627 getMockColumnFamiliesForBloomType (int numCfs) {
628 Map<String, BloomType> familyToBloomType =
629 new HashMap<String, BloomType>();
630
631 if (numCfs-- > 0) {
632 familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
633 }
634 if (numCfs-- > 0) {
635 familyToBloomType.put("Family2=asdads&!AASD",
636 BloomType.ROWCOL);
637 }
638 if (numCfs-- > 0) {
639 familyToBloomType.put("Family3", BloomType.NONE);
640 }
641 return familyToBloomType;
642 }
643
644
645
646
647
648
649
650
651
652
653 @Test
654 public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
655 for (int numCfs = 0; numCfs <= 3; numCfs++) {
656 Configuration conf = new Configuration(this.util.getConfiguration());
657 Map<String, Integer> familyToBlockSize =
658 getMockColumnFamiliesForBlockSize(numCfs);
659 Table table = Mockito.mock(HTable.class);
660 setupMockColumnFamiliesForBlockSize(table,
661 familyToBlockSize);
662 HFileOutputFormat.configureBlockSize(table, conf);
663
664
665
666 Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
667 HFileOutputFormat
668 .createFamilyBlockSizeMap(conf);
669
670
671
672 for (Entry<String, Integer> entry : familyToBlockSize.entrySet()
673 ) {
674 assertEquals("BlockSize configuration incorrect for column family:"
675 + entry.getKey(), entry.getValue(),
676 retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes()));
677 }
678 }
679 }
680
681 private void setupMockColumnFamiliesForBlockSize(Table table,
682 Map<String, Integer> familyToDataBlockEncoding) throws IOException {
683 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
684 for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
685 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
686 .setMaxVersions(1)
687 .setBlocksize(entry.getValue())
688 .setBlockCacheEnabled(false)
689 .setTimeToLive(0));
690 }
691 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
692 }
693
694
695
696
697
698 private Map<String, Integer>
699 getMockColumnFamiliesForBlockSize (int numCfs) {
700 Map<String, Integer> familyToBlockSize =
701 new HashMap<String, Integer>();
702
703 if (numCfs-- > 0) {
704 familyToBlockSize.put("Family1!@#!@#&", 1234);
705 }
706 if (numCfs-- > 0) {
707 familyToBlockSize.put("Family2=asdads&!AASD",
708 Integer.MAX_VALUE);
709 }
710 if (numCfs-- > 0) {
711 familyToBlockSize.put("Family2=asdads&!AASD",
712 Integer.MAX_VALUE);
713 }
714 if (numCfs-- > 0) {
715 familyToBlockSize.put("Family3", 0);
716 }
717 return familyToBlockSize;
718 }
719
720
721
722
723
724
725
726
727
728
729 @Test
730 public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
731 for (int numCfs = 0; numCfs <= 3; numCfs++) {
732 Configuration conf = new Configuration(this.util.getConfiguration());
733 Map<String, DataBlockEncoding> familyToDataBlockEncoding =
734 getMockColumnFamiliesForDataBlockEncoding(numCfs);
735 Table table = Mockito.mock(HTable.class);
736 setupMockColumnFamiliesForDataBlockEncoding(table,
737 familyToDataBlockEncoding);
738 HFileOutputFormat.configureDataBlockEncoding(table, conf);
739
740
741
742 Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
743 HFileOutputFormat
744 .createFamilyDataBlockEncodingMap(conf);
745
746
747
748 for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
749 assertEquals("DataBlockEncoding configuration incorrect for column family:"
750 + entry.getKey(), entry.getValue(),
751 retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes()));
752 }
753 }
754 }
755
756 private void setupMockColumnFamiliesForDataBlockEncoding(Table table,
757 Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
758 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
759 for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
760 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
761 .setMaxVersions(1)
762 .setDataBlockEncoding(entry.getValue())
763 .setBlockCacheEnabled(false)
764 .setTimeToLive(0));
765 }
766 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
767 }
768
769
770
771
772
773 private Map<String, DataBlockEncoding>
774 getMockColumnFamiliesForDataBlockEncoding (int numCfs) {
775 Map<String, DataBlockEncoding> familyToDataBlockEncoding =
776 new HashMap<String, DataBlockEncoding>();
777
778 if (numCfs-- > 0) {
779 familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
780 }
781 if (numCfs-- > 0) {
782 familyToDataBlockEncoding.put("Family2=asdads&!AASD",
783 DataBlockEncoding.FAST_DIFF);
784 }
785 if (numCfs-- > 0) {
786 familyToDataBlockEncoding.put("Family2=asdads&!AASD",
787 DataBlockEncoding.PREFIX);
788 }
789 if (numCfs-- > 0) {
790 familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
791 }
792 return familyToDataBlockEncoding;
793 }
794
795 private void setupMockStartKeys(RegionLocator regionLocator) throws IOException {
796 byte[][] mockKeys = new byte[][] {
797 HConstants.EMPTY_BYTE_ARRAY,
798 Bytes.toBytes("aaa"),
799 Bytes.toBytes("ggg"),
800 Bytes.toBytes("zzz")
801 };
802 Mockito.doReturn(mockKeys).when(regionLocator).getStartKeys();
803 }
804
805 private void setupMockTableName(RegionLocator table) throws IOException {
806 TableName mockTableName = TableName.valueOf("mock_table");
807 Mockito.doReturn(mockTableName).when(table).getName();
808 }
809
810
811
812
813
814 @Test
815 public void testColumnFamilySettings() throws Exception {
816 Configuration conf = new Configuration(this.util.getConfiguration());
817 RecordWriter<ImmutableBytesWritable, KeyValue> writer = null;
818 TaskAttemptContext context = null;
819 Path dir = util.getDataTestDir("testColumnFamilySettings");
820
821
822 HTable table = Mockito.mock(HTable.class);
823 RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
824 HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
825 Mockito.doReturn(htd).when(table).getTableDescriptor();
826 for (HColumnDescriptor hcd: this.util.generateColumnDescriptors()) {
827 htd.addFamily(hcd);
828 }
829
830
831 setupMockStartKeys(regionLocator);
832
833 try {
834
835
836
837 conf.set("io.seqfile.compression.type", "NONE");
838 conf.set("hbase.fs.tmp.dir", dir.toString());
839
840 conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
841 Job job = new Job(conf, "testLocalMRIncrementalLoad");
842 job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
843 setupRandomGeneratorMapper(job);
844 HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
845 FileOutputFormat.setOutputPath(job, dir);
846 context = createTestTaskAttemptContext(job);
847 HFileOutputFormat hof = new HFileOutputFormat();
848 writer = hof.getRecordWriter(context);
849
850
851 writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
852 writer.close(context);
853
854
855 FileSystem fs = dir.getFileSystem(conf);
856
857
858 hof.getOutputCommitter(context).commitTask(context);
859 hof.getOutputCommitter(context).commitJob(context);
860 FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
861 assertEquals(htd.getFamilies().size(), families.length);
862 for (FileStatus f : families) {
863 String familyStr = f.getPath().getName();
864 HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
865
866
867 Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
868 Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
869 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
870
871 byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
872 if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
873 assertEquals("Incorrect bloom filter used for column family " + familyStr +
874 "(reader: " + reader + ")",
875 hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
876 assertEquals("Incorrect compression used for column family " + familyStr +
877 "(reader: " + reader + ")", hcd.getCompression(), reader.getFileContext().getCompression());
878 }
879 } finally {
880 dir.getFileSystem(conf).delete(dir, true);
881 }
882 }
883
884
885
886
887
888 private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, KeyValue> writer,
889 TaskAttemptContext context, Set<byte[]> families, int numRows)
890 throws IOException, InterruptedException {
891 byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
892 int valLength = 10;
893 byte valBytes[] = new byte[valLength];
894
895 int taskId = context.getTaskAttemptID().getTaskID().getId();
896 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
897 final byte [] qualifier = Bytes.toBytes("data");
898 Random random = new Random();
899 for (int i = 0; i < numRows; i++) {
900
901 Bytes.putInt(keyBytes, 0, i);
902 random.nextBytes(valBytes);
903 ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
904
905 for (byte[] family : families) {
906 KeyValue kv = new KeyValue(keyBytes, family, qualifier, valBytes);
907 writer.write(key, kv);
908 }
909 }
910 }
911
912
913
914
915
916
917
918 @Ignore ("Flakey: See HBASE-9051") @Test
919 public void testExcludeAllFromMinorCompaction() throws Exception {
920 Configuration conf = util.getConfiguration();
921 conf.setInt("hbase.hstore.compaction.min", 2);
922 generateRandomStartKeys(5);
923
924 try {
925 util.setJobWithoutMRCluster();
926 util.startMiniCluster();
927 final FileSystem fs = util.getDFSCluster().getFileSystem();
928 HBaseAdmin admin = new HBaseAdmin(conf);
929 HTable table = util.createTable(TABLE_NAME, FAMILIES);
930 assertEquals("Should start with empty table", 0, util.countRows(table));
931
932
933 final Path storePath = HStore.getStoreHomedir(
934 FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
935 admin.getTableRegions(TABLE_NAME).get(0),
936 FAMILIES[0]);
937 assertEquals(0, fs.listStatus(storePath).length);
938
939
940 conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
941 true);
942
943 for (int i = 0; i < 2; i++) {
944 Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
945 runIncrementalPELoad(conf, table, testDir);
946
947 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
948 }
949
950
951 int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
952 assertEquals("LoadIncrementalHFiles should put expected data in table",
953 expectedRows, util.countRows(table));
954
955
956 assertEquals(2, fs.listStatus(storePath).length);
957
958
959 admin.compact(TABLE_NAME.getName());
960 try {
961 quickPoll(new Callable<Boolean>() {
962 public Boolean call() throws Exception {
963 return fs.listStatus(storePath).length == 1;
964 }
965 }, 5000);
966 throw new IOException("SF# = " + fs.listStatus(storePath).length);
967 } catch (AssertionError ae) {
968
969 }
970
971
972 admin.majorCompact(TABLE_NAME.getName());
973 quickPoll(new Callable<Boolean>() {
974 public Boolean call() throws Exception {
975 return fs.listStatus(storePath).length == 1;
976 }
977 }, 5000);
978
979 } finally {
980 util.shutdownMiniCluster();
981 }
982 }
983
984 @Test
985 public void testExcludeMinorCompaction() throws Exception {
986 Configuration conf = util.getConfiguration();
987 conf.setInt("hbase.hstore.compaction.min", 2);
988 generateRandomStartKeys(5);
989
990 try {
991 util.setJobWithoutMRCluster();
992 util.startMiniCluster();
993 Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
994 final FileSystem fs = util.getTestFileSystem();
995 HBaseAdmin admin = new HBaseAdmin(conf);
996 HTable table = util.createTable(TABLE_NAME, FAMILIES);
997 assertEquals("Should start with empty table", 0, util.countRows(table));
998
999
1000 final Path storePath = HStore.getStoreHomedir(
1001 FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
1002 admin.getTableRegions(TABLE_NAME).get(0),
1003 FAMILIES[0]);
1004 assertEquals(0, fs.listStatus(storePath).length);
1005
1006
1007 Put p = new Put(Bytes.toBytes("test"));
1008 p.add(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
1009 table.put(p);
1010 admin.flush(TABLE_NAME.getName());
1011 assertEquals(1, util.countRows(table));
1012 quickPoll(new Callable<Boolean>() {
1013 public Boolean call() throws Exception {
1014 return fs.listStatus(storePath).length == 1;
1015 }
1016 }, 5000);
1017
1018
1019 conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
1020 true);
1021 runIncrementalPELoad(conf, table, testDir);
1022
1023
1024 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
1025
1026
1027 int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
1028 assertEquals("LoadIncrementalHFiles should put expected data in table",
1029 expectedRows + 1, util.countRows(table));
1030
1031
1032 assertEquals(2, fs.listStatus(storePath).length);
1033
1034
1035 admin.compact(TABLE_NAME.getName());
1036 try {
1037 quickPoll(new Callable<Boolean>() {
1038 public Boolean call() throws Exception {
1039 List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAME);
1040 for (HRegion region : regions) {
1041 for (Store store : region.getStores()) {
1042 store.closeAndArchiveCompactedFiles();
1043 }
1044 }
1045 return fs.listStatus(storePath).length == 1;
1046 }
1047 }, 5000);
1048 throw new IOException("SF# = " + fs.listStatus(storePath).length);
1049 } catch (AssertionError ae) {
1050
1051 }
1052
1053
1054 admin.majorCompact(TABLE_NAME.getName());
1055 quickPoll(new Callable<Boolean>() {
1056 public Boolean call() throws Exception {
1057 List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAME);
1058 for (HRegion region : regions) {
1059 for (Store store : region.getStores()) {
1060 store.closeAndArchiveCompactedFiles();
1061 }
1062 }
1063 return fs.listStatus(storePath).length == 1;
1064 }
1065 }, 5000);
1066
1067 } finally {
1068 util.shutdownMiniCluster();
1069 }
1070 }
1071
1072 private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
1073 int sleepMs = 10;
1074 int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
1075 while (retries-- > 0) {
1076 if (c.call().booleanValue()) {
1077 return;
1078 }
1079 Thread.sleep(sleepMs);
1080 }
1081 fail();
1082 }
1083
1084 public static void main(String args[]) throws Exception {
1085 new TestHFileOutputFormat().manualTest(args);
1086 }
1087
1088 public void manualTest(String args[]) throws Exception {
1089 Configuration conf = HBaseConfiguration.create();
1090 util = new HBaseTestingUtility(conf);
1091 if ("newtable".equals(args[0])) {
1092 TableName tname = TableName.valueOf(args[1]);
1093 byte[][] splitKeys = generateRandomSplitKeys(4);
1094 HTable table = util.createTable(tname, FAMILIES, splitKeys);
1095 } else if ("incremental".equals(args[0])) {
1096 TableName tname = TableName.valueOf(args[1]);
1097 HTable table = new HTable(conf, tname);
1098 Path outDir = new Path("incremental-out");
1099 runIncrementalPELoad(conf, table, outDir);
1100 } else {
1101 throw new RuntimeException(
1102 "usage: TestHFileOutputFormat newtable | incremental");
1103 }
1104 }
1105
1106 }
1107