1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import static org.apache.hadoop.hbase.client.ConnectionFactory.createConnection;
22 import static org.junit.Assert.assertEquals;
23 import static org.junit.Assert.assertFalse;
24 import static org.junit.Assert.assertNotNull;
25 import static org.junit.Assert.assertNotSame;
26 import static org.junit.Assert.assertTrue;
27 import static org.junit.Assert.fail;
28
29 import java.io.IOException;
30 import java.io.UnsupportedEncodingException;
31 import java.util.Arrays;
32 import java.util.HashMap;
33 import java.util.Iterator;
34 import java.util.List;
35 import java.util.Map;
36 import java.util.Map.Entry;
37 import java.util.Random;
38 import java.util.Set;
39 import java.util.UUID;
40 import java.util.concurrent.Callable;
41 import java.util.concurrent.ConcurrentHashMap;
42 import java.util.concurrent.CopyOnWriteArrayList;
43 import java.util.concurrent.ExecutorService;
44
45 import org.apache.commons.logging.Log;
46 import org.apache.commons.logging.LogFactory;
47 import org.apache.hadoop.conf.Configuration;
48 import org.apache.hadoop.fs.FileStatus;
49 import org.apache.hadoop.fs.FileSystem;
50 import org.apache.hadoop.fs.LocatedFileStatus;
51 import org.apache.hadoop.fs.Path;
52 import org.apache.hadoop.fs.RemoteIterator;
53 import org.apache.hadoop.hbase.CategoryBasedTimeout;
54 import org.apache.hadoop.hbase.Cell;
55 import org.apache.hadoop.hbase.CellUtil;
56 import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
57 import org.apache.hadoop.hbase.HBaseConfiguration;
58 import org.apache.hadoop.hbase.HBaseTestingUtility;
59 import org.apache.hadoop.hbase.HColumnDescriptor;
60 import org.apache.hadoop.hbase.HConstants;
61 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
62 import org.apache.hadoop.hbase.HTableDescriptor;
63 import org.apache.hadoop.hbase.HadoopShims;
64 import org.apache.hadoop.hbase.KeyValue;
65 import org.apache.hadoop.hbase.PerformanceEvaluation;
66 import org.apache.hadoop.hbase.TableName;
67 import org.apache.hadoop.hbase.Tag;
68 import org.apache.hadoop.hbase.TagType;
69 import org.apache.hadoop.hbase.client.Admin;
70 import org.apache.hadoop.hbase.client.BufferedMutator;
71 import org.apache.hadoop.hbase.client.BufferedMutatorParams;
72 import org.apache.hadoop.hbase.client.Connection;
73 import org.apache.hadoop.hbase.client.HConnection;
74 import org.apache.hadoop.hbase.client.HTable;
75 import org.apache.hadoop.hbase.client.Put;
76 import org.apache.hadoop.hbase.client.RegionLocator;
77 import org.apache.hadoop.hbase.client.Result;
78 import org.apache.hadoop.hbase.client.ResultScanner;
79 import org.apache.hadoop.hbase.client.Scan;
80 import org.apache.hadoop.hbase.client.Table;
81 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
82 import org.apache.hadoop.hbase.io.compress.Compression;
83 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
84 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
85 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
86 import org.apache.hadoop.hbase.io.hfile.HFile;
87 import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
88 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
89 import org.apache.hadoop.hbase.regionserver.BloomType;
90 import org.apache.hadoop.hbase.regionserver.HRegion;
91 import org.apache.hadoop.hbase.regionserver.Store;
92 import org.apache.hadoop.hbase.regionserver.StoreFile;
93 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
94 import org.apache.hadoop.hbase.security.User;
95 import org.apache.hadoop.hbase.testclassification.LargeTests;
96 import org.apache.hadoop.hbase.util.Bytes;
97 import org.apache.hadoop.hbase.util.FSUtils;
98 import org.apache.hadoop.hbase.util.Writables;
99 import org.apache.hadoop.io.NullWritable;
100 import org.apache.hadoop.mapreduce.Job;
101 import org.apache.hadoop.mapreduce.Mapper;
102 import org.apache.hadoop.mapreduce.RecordWriter;
103 import org.apache.hadoop.mapreduce.TaskAttemptContext;
104 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
105 import org.junit.Ignore;
106 import org.junit.Rule;
107 import org.junit.Test;
108 import org.junit.experimental.categories.Category;
109 import org.junit.rules.TestRule;
110 import org.mockito.Mockito;
111
112
113
114
115
116
117
118 @Category(LargeTests.class)
119 public class TestHFileOutputFormat2 {
120 @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
121 withTimeout(this.getClass()).withLookingForStuckThread(true).build();
122 private final static int ROWSPERSPLIT = 1024;
123
124 private static final byte[] FAMILY_NAME = Bytes.toBytes("info");
125 private static final byte[][] FAMILIES = {
126 Bytes.add(FAMILY_NAME, Bytes.toBytes("-A")),
127 Bytes.add(FAMILY_NAME, Bytes.toBytes("-B"))
128 };
129 private static final TableName TABLE_NAME =
130 TableName.valueOf("TestTable");
131
132 private HBaseTestingUtility util = new HBaseTestingUtility();
133
134 private static final Log LOG = LogFactory.getLog(TestHFileOutputFormat2.class);
135
136
137
138
139 static class RandomKVGeneratingMapper
140 extends Mapper<NullWritable, NullWritable,
141 ImmutableBytesWritable, Cell> {
142
143 private int keyLength;
144 private static final int KEYLEN_DEFAULT=10;
145 private static final String KEYLEN_CONF="randomkv.key.length";
146
147 private int valLength;
148 private static final int VALLEN_DEFAULT=10;
149 private static final String VALLEN_CONF="randomkv.val.length";
150 private static final byte [] QUALIFIER = Bytes.toBytes("data");
151
152 @Override
153 protected void setup(Context context) throws IOException,
154 InterruptedException {
155 super.setup(context);
156
157 Configuration conf = context.getConfiguration();
158 keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
159 valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
160 }
161
162 @Override
163 protected void map(
164 NullWritable n1, NullWritable n2,
165 Mapper<NullWritable, NullWritable,
166 ImmutableBytesWritable,Cell>.Context context)
167 throws java.io.IOException ,InterruptedException
168 {
169
170 byte keyBytes[] = new byte[keyLength];
171 byte valBytes[] = new byte[valLength];
172
173 int taskId = context.getTaskAttemptID().getTaskID().getId();
174 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
175
176 Random random = new Random();
177 for (int i = 0; i < ROWSPERSPLIT; i++) {
178
179 random.nextBytes(keyBytes);
180
181 keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
182 random.nextBytes(valBytes);
183 ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
184
185 for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
186 Cell kv = new KeyValue(keyBytes, family, QUALIFIER, valBytes);
187 context.write(key, kv);
188 }
189 }
190 }
191 }
192
193
194
195
196 static class RandomPutGeneratingMapper
197 extends Mapper<NullWritable, NullWritable,
198 ImmutableBytesWritable, Put> {
199
200 private int keyLength;
201 private static final int KEYLEN_DEFAULT=10;
202 private static final String KEYLEN_CONF="randomkv.key.length";
203
204 private int valLength;
205 private static final int VALLEN_DEFAULT=10;
206 private static final String VALLEN_CONF="randomkv.val.length";
207 private static final byte [] QUALIFIER = Bytes.toBytes("data");
208
209 @Override
210 protected void setup(Context context) throws IOException,
211 InterruptedException {
212 super.setup(context);
213
214 Configuration conf = context.getConfiguration();
215 keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
216 valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
217 }
218
219 @Override
220 protected void map(
221 NullWritable n1, NullWritable n2,
222 Mapper<NullWritable, NullWritable,
223 ImmutableBytesWritable,Put>.Context context)
224 throws java.io.IOException ,InterruptedException
225 {
226
227 byte keyBytes[] = new byte[keyLength];
228 byte valBytes[] = new byte[valLength];
229
230 int taskId = context.getTaskAttemptID().getTaskID().getId();
231 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
232
233 Random random = new Random();
234 for (int i = 0; i < ROWSPERSPLIT; i++) {
235
236 random.nextBytes(keyBytes);
237
238 keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
239 random.nextBytes(valBytes);
240 ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
241
242 for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
243 Put p = new Put(keyBytes);
244 p.addColumn(family, QUALIFIER, valBytes);
245
246 p.setTTL(1l);
247 context.write(key, p);
248 }
249 }
250 }
251 }
252
253 private void setupRandomGeneratorMapper(Job job, boolean putSortReducer) {
254 if (putSortReducer) {
255 job.setInputFormatClass(NMapInputFormat.class);
256 job.setMapperClass(RandomPutGeneratingMapper.class);
257 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
258 job.setMapOutputValueClass(Put.class);
259 } else {
260 job.setInputFormatClass(NMapInputFormat.class);
261 job.setMapperClass(RandomKVGeneratingMapper.class);
262 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
263 job.setMapOutputValueClass(KeyValue.class);
264 }
265 }
266
267
268
269
270
271
272 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
273 public void test_LATEST_TIMESTAMP_isReplaced()
274 throws Exception {
275 Configuration conf = new Configuration(this.util.getConfiguration());
276 RecordWriter<ImmutableBytesWritable, Cell> writer = null;
277 TaskAttemptContext context = null;
278 Path dir =
279 util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
280 try {
281 Job job = new Job(conf);
282 FileOutputFormat.setOutputPath(job, dir);
283 context = createTestTaskAttemptContext(job);
284 HFileOutputFormat2 hof = new HFileOutputFormat2();
285 writer = hof.getRecordWriter(context);
286 final byte [] b = Bytes.toBytes("b");
287
288
289
290 KeyValue kv = new KeyValue(b, b, b);
291 KeyValue original = kv.clone();
292 writer.write(new ImmutableBytesWritable(), kv);
293 assertFalse(original.equals(kv));
294 assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
295 assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
296 assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
297 assertNotSame(original.getTimestamp(), kv.getTimestamp());
298 assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
299
300
301
302 kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
303 original = kv.clone();
304 writer.write(new ImmutableBytesWritable(), kv);
305 assertTrue(original.equals(kv));
306 } finally {
307 if (writer != null && context != null) writer.close(context);
308 dir.getFileSystem(conf).delete(dir, true);
309 }
310 }
311
312 private TaskAttemptContext createTestTaskAttemptContext(final Job job)
313 throws Exception {
314 HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
315 TaskAttemptContext context = hadoop.createTestTaskAttemptContext(
316 job, "attempt_201402131733_0001_m_000000_0");
317 return context;
318 }
319
320
321
322
323
324 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
325 public void test_TIMERANGE() throws Exception {
326 Configuration conf = new Configuration(this.util.getConfiguration());
327 RecordWriter<ImmutableBytesWritable, Cell> writer = null;
328 TaskAttemptContext context = null;
329 Path dir =
330 util.getDataTestDir("test_TIMERANGE_present");
331 LOG.info("Timerange dir writing to dir: " + dir);
332 try {
333
334 Job job = new Job(conf);
335 FileOutputFormat.setOutputPath(job, dir);
336 context = createTestTaskAttemptContext(job);
337 HFileOutputFormat2 hof = new HFileOutputFormat2();
338 writer = hof.getRecordWriter(context);
339
340
341 final byte [] b = Bytes.toBytes("b");
342
343
344 KeyValue kv = new KeyValue(b, b, b, 2000, b);
345 KeyValue original = kv.clone();
346 writer.write(new ImmutableBytesWritable(), kv);
347 assertEquals(original,kv);
348
349
350 kv = new KeyValue(b, b, b, 1000, b);
351 original = kv.clone();
352 writer.write(new ImmutableBytesWritable(), kv);
353 assertEquals(original, kv);
354
355
356 writer.close(context);
357
358
359
360
361 FileSystem fs = FileSystem.get(conf);
362 Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
363 FileStatus[] sub1 = fs.listStatus(attemptDirectory);
364 FileStatus[] file = fs.listStatus(sub1[0].getPath());
365
366
367 HFile.Reader rd = HFile.createReader(fs, file[0].getPath(),
368 new CacheConfig(conf), conf);
369 Map<byte[],byte[]> finfo = rd.loadFileInfo();
370 byte[] range = finfo.get("TIMERANGE".getBytes());
371 assertNotNull(range);
372
373
374 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
375 Writables.copyWritable(range, timeRangeTracker);
376 LOG.info(timeRangeTracker.getMin() +
377 "...." + timeRangeTracker.getMax());
378 assertEquals(1000, timeRangeTracker.getMin());
379 assertEquals(2000, timeRangeTracker.getMax());
380 rd.close();
381 } finally {
382 if (writer != null && context != null) writer.close(context);
383 dir.getFileSystem(conf).delete(dir, true);
384 }
385 }
386
387
388
389
390 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
391 public void testWritingPEData() throws Exception {
392 Configuration conf = util.getConfiguration();
393 Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
394 FileSystem fs = testDir.getFileSystem(conf);
395
396
397 conf.setInt("mapreduce.task.io.sort.mb", 20);
398
399 conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
400
401 Job job = new Job(conf, "testWritingPEData");
402 setupRandomGeneratorMapper(job, false);
403
404
405 byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
406 byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
407
408 Arrays.fill(startKey, (byte)0);
409 Arrays.fill(endKey, (byte)0xff);
410
411 job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
412
413 SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
414 SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
415 job.setReducerClass(KeyValueSortReducer.class);
416 job.setOutputFormatClass(HFileOutputFormat2.class);
417 job.setNumReduceTasks(4);
418 job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
419 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
420 KeyValueSerialization.class.getName());
421
422 FileOutputFormat.setOutputPath(job, testDir);
423 assertTrue(job.waitForCompletion(false));
424 FileStatus [] files = fs.listStatus(testDir);
425 assertTrue(files.length > 0);
426 }
427
428
429
430
431
432 @Test
433 public void test_WritingTagData()
434 throws Exception {
435 Configuration conf = new Configuration(this.util.getConfiguration());
436 final String HFILE_FORMAT_VERSION_CONF_KEY = "hfile.format.version";
437 conf.setInt(HFILE_FORMAT_VERSION_CONF_KEY, HFile.MIN_FORMAT_VERSION_WITH_TAGS);
438 RecordWriter<ImmutableBytesWritable, Cell> writer = null;
439 TaskAttemptContext context = null;
440 Path dir =
441 util.getDataTestDir("WritingTagData");
442 try {
443 Job job = new Job(conf);
444 FileOutputFormat.setOutputPath(job, dir);
445 context = createTestTaskAttemptContext(job);
446 HFileOutputFormat2 hof = new HFileOutputFormat2();
447 writer = hof.getRecordWriter(context);
448 final byte [] b = Bytes.toBytes("b");
449
450 KeyValue kv = new KeyValue(b, b, b, HConstants.LATEST_TIMESTAMP, b, new Tag[] {
451 new Tag(TagType.TTL_TAG_TYPE, Bytes.toBytes(978670)) });
452 writer.write(new ImmutableBytesWritable(), kv);
453 writer.close(context);
454 writer = null;
455 FileSystem fs = dir.getFileSystem(conf);
456 RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, true);
457 while(iterator.hasNext()) {
458 LocatedFileStatus keyFileStatus = iterator.next();
459 HFile.Reader reader = HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf),
460 conf);
461 HFileScanner scanner = reader.getScanner(false, false, false);
462 scanner.seekTo();
463 Cell cell = scanner.getKeyValue();
464
465 Iterator<Tag> tagsIterator = CellUtil.tagsIterator(cell.getTagsArray(),
466 cell.getTagsOffset(), cell.getTagsLength());
467 assertTrue(tagsIterator.hasNext());
468 assertTrue(tagsIterator.next().getType() == TagType.TTL_TAG_TYPE);
469 }
470 } finally {
471 if (writer != null && context != null) writer.close(context);
472 dir.getFileSystem(conf).delete(dir, true);
473 }
474 }
475
476 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
477 public void testJobConfiguration() throws Exception {
478 Configuration conf = new Configuration(this.util.getConfiguration());
479 conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration")
480 .toString());
481 Job job = new Job(conf);
482 job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
483 RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
484 setupMockStartKeys(regionLocator);
485 setupMockTableName(regionLocator);
486 HFileOutputFormat2.configureIncrementalLoad(job, new HTableDescriptor(), regionLocator);
487 assertEquals(job.getNumReduceTasks(), 4);
488 }
489
490 private byte [][] generateRandomStartKeys(int numKeys) {
491 Random random = new Random();
492 byte[][] ret = new byte[numKeys][];
493
494 ret[0] = HConstants.EMPTY_BYTE_ARRAY;
495 for (int i = 1; i < numKeys; i++) {
496 ret[i] =
497 PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
498 }
499 return ret;
500 }
501
502 private byte[][] generateRandomSplitKeys(int numKeys) {
503 Random random = new Random();
504 byte[][] ret = new byte[numKeys][];
505 for (int i = 0; i < numKeys; i++) {
506 ret[i] =
507 PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
508 }
509 return ret;
510 }
511
512 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
513 public void testMRIncrementalLoad() throws Exception {
514 LOG.info("\nStarting test testMRIncrementalLoad\n");
515 doIncrementalLoadTest(false, false, false, "testMRIncrementalLoad");
516 }
517
518 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
519 public void testMRIncrementalLoadWithSplit() throws Exception {
520 LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
521 doIncrementalLoadTest(true, false, false, "testMRIncrementalLoadWithSplit");
522 }
523
524
525
526
527
528
529
530
531
532 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
533 public void testMRIncrementalLoadWithLocality() throws Exception {
534 LOG.info("\nStarting test testMRIncrementalLoadWithLocality\n");
535 doIncrementalLoadTest(false, true, false, "testMRIncrementalLoadWithLocality1");
536 doIncrementalLoadTest(true, true, false, "testMRIncrementalLoadWithLocality2");
537 }
538
539 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
540 public void testMRIncrementalLoadWithPutSortReducer() throws Exception {
541 LOG.info("\nStarting test testMRIncrementalLoadWithPutSortReducer\n");
542 doIncrementalLoadTest(false, false, true, "testMRIncrementalLoadWithPutSortReducer");
543 }
544
545 private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality,
546 boolean putSortReducer, String tableStr) throws Exception {
547 util = new HBaseTestingUtility();
548 Configuration conf = util.getConfiguration();
549 conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
550 int hostCount = 1;
551 int regionNum = 5;
552 if (shouldKeepLocality) {
553
554
555 hostCount = 3;
556 regionNum = 20;
557 }
558 byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
559 String[] hostnames = new String[hostCount];
560 for (int i = 0; i < hostCount; ++i) {
561 hostnames[i] = "datanode_" + i;
562 }
563 util.setJobWithoutMRCluster();
564 util.startMiniCluster(1, hostCount, hostnames);
565
566 HTable table = util.createTable(TABLE_NAME, FAMILIES, splitKeys);
567 Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
568 try (RegionLocator r = table.getRegionLocator(); Admin admin = table.getConnection().getAdmin()) {
569 assertEquals("Should start with empty table", 0, util.countRows(table));
570 int numRegions = r.getStartKeys().length;
571 assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);
572
573
574 runIncrementalPELoad(conf, table.getTableDescriptor(), table.getRegionLocator(), testDir,
575 putSortReducer);
576
577 assertEquals("HFOF should not touch actual table", 0, util.countRows(table));
578
579
580 int dir = 0;
581 for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
582 for (byte[] family : FAMILIES) {
583 if (Bytes.toString(family).equals(f.getPath().getName())) {
584 ++dir;
585 }
586 }
587 }
588 assertEquals("Column family not found in FS.", FAMILIES.length, dir);
589
590
591 if (shouldChangeRegions) {
592 LOG.info("Changing regions in table");
593 admin.disableTable(table.getName());
594 util.waitUntilNoRegionsInTransition();
595
596 util.deleteTable(table.getName());
597 byte[][] newSplitKeys = generateRandomSplitKeys(14);
598 table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys);
599
600 while (table.getRegionLocator().getAllRegionLocations().size() != 15
601 || !admin.isTableAvailable(table.getName())) {
602 Thread.sleep(200);
603 LOG.info("Waiting for new region assignment to happen");
604 }
605 }
606
607
608 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
609
610 int expectedRows = 0;
611 if (putSortReducer) {
612
613 assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows,
614 util.countRows(table));
615 } else {
616
617 expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
618 assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows,
619 util.countRows(table));
620 Scan scan = new Scan();
621 ResultScanner results = table.getScanner(scan);
622 for (Result res : results) {
623 assertEquals(FAMILIES.length, res.rawCells().length);
624 Cell first = res.rawCells()[0];
625 for (Cell kv : res.rawCells()) {
626 assertTrue(CellUtil.matchingRow(first, kv));
627 assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
628 }
629 }
630 results.close();
631 }
632 String tableDigestBefore = util.checksumRows(table);
633
634
635 HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
636 for (HRegion region : util.getHBaseCluster().getRegions(TABLE_NAME)) {
637 hbd.add(region.getHDFSBlocksDistribution());
638 }
639 for (String hostname : hostnames) {
640 float locality = hbd.getBlockLocalityIndex(hostname);
641 LOG.info("locality of [" + hostname + "]: " + locality);
642 assertEquals(100, (int) (locality * 100));
643 }
644
645
646 admin.disableTable(TABLE_NAME);
647 while (!admin.isTableDisabled(TABLE_NAME)) {
648 Thread.sleep(200);
649 LOG.info("Waiting for table to disable");
650 }
651 admin.enableTable(TABLE_NAME);
652 util.waitTableAvailable(TABLE_NAME);
653 assertEquals("Data should remain after reopening of regions", tableDigestBefore,
654 util.checksumRows(table));
655 } finally {
656 testDir.getFileSystem(conf).delete(testDir, true);
657 util.deleteTable(TABLE_NAME);
658 util.shutdownMiniCluster();
659 }
660 }
661
662 private void runIncrementalPELoad(Configuration conf, HTableDescriptor tableDescriptor,
663 RegionLocator regionLocator, Path outDir, boolean putSortReducer) throws IOException,
664 UnsupportedEncodingException, InterruptedException, ClassNotFoundException {
665 Job job = new Job(conf, "testLocalMRIncrementalLoad");
666 job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
667 job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
668 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
669 KeyValueSerialization.class.getName());
670 setupRandomGeneratorMapper(job, putSortReducer);
671 HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator);
672 FileOutputFormat.setOutputPath(job, outDir);
673
674 assertFalse(util.getTestFileSystem().exists(outDir)) ;
675
676 assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());
677
678 assertTrue(job.waitForCompletion(true));
679 }
680
681
682
683
684
685
686
687
688
689
690 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
691 public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
692 for (int numCfs = 0; numCfs <= 3; numCfs++) {
693 Configuration conf = new Configuration(this.util.getConfiguration());
694 Map<String, Compression.Algorithm> familyToCompression =
695 getMockColumnFamiliesForCompression(numCfs);
696 Table table = Mockito.mock(HTable.class);
697 setupMockColumnFamiliesForCompression(table, familyToCompression);
698 HFileOutputFormat2.configureCompression(conf, table.getTableDescriptor());
699
700
701 Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat2
702 .createFamilyCompressionMap(conf);
703
704
705
706 for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
707 assertEquals("Compression configuration incorrect for column family:"
708 + entry.getKey(), entry.getValue(),
709 retrievedFamilyToCompressionMap.get(entry.getKey().getBytes()));
710 }
711 }
712 }
713
714 private void setupMockColumnFamiliesForCompression(Table table,
715 Map<String, Compression.Algorithm> familyToCompression) throws IOException {
716 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
717 for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
718 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
719 .setMaxVersions(1)
720 .setCompressionType(entry.getValue())
721 .setBlockCacheEnabled(false)
722 .setTimeToLive(0));
723 }
724 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
725 }
726
727
728
729
730
731 private Map<String, Compression.Algorithm>
732 getMockColumnFamiliesForCompression (int numCfs) {
733 Map<String, Compression.Algorithm> familyToCompression
734 = new HashMap<String, Compression.Algorithm>();
735
736 if (numCfs-- > 0) {
737 familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
738 }
739 if (numCfs-- > 0) {
740 familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
741 }
742 if (numCfs-- > 0) {
743 familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
744 }
745 if (numCfs-- > 0) {
746 familyToCompression.put("Family3", Compression.Algorithm.NONE);
747 }
748 return familyToCompression;
749 }
750
751
752
753
754
755
756
757
758
759
760
761 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
762 public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
763 for (int numCfs = 0; numCfs <= 2; numCfs++) {
764 Configuration conf = new Configuration(this.util.getConfiguration());
765 Map<String, BloomType> familyToBloomType =
766 getMockColumnFamiliesForBloomType(numCfs);
767 Table table = Mockito.mock(HTable.class);
768 setupMockColumnFamiliesForBloomType(table,
769 familyToBloomType);
770 HFileOutputFormat2.configureBloomType(table.getTableDescriptor(), conf);
771
772
773
774 Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
775 HFileOutputFormat2
776 .createFamilyBloomTypeMap(conf);
777
778
779
780 for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
781 assertEquals("BloomType configuration incorrect for column family:"
782 + entry.getKey(), entry.getValue(),
783 retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes()));
784 }
785 }
786 }
787
788 private void setupMockColumnFamiliesForBloomType(Table table,
789 Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
790 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
791 for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
792 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
793 .setMaxVersions(1)
794 .setBloomFilterType(entry.getValue())
795 .setBlockCacheEnabled(false)
796 .setTimeToLive(0));
797 }
798 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
799 }
800
801
802
803
804
805 private Map<String, BloomType>
806 getMockColumnFamiliesForBloomType (int numCfs) {
807 Map<String, BloomType> familyToBloomType =
808 new HashMap<String, BloomType>();
809
810 if (numCfs-- > 0) {
811 familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
812 }
813 if (numCfs-- > 0) {
814 familyToBloomType.put("Family2=asdads&!AASD",
815 BloomType.ROWCOL);
816 }
817 if (numCfs-- > 0) {
818 familyToBloomType.put("Family3", BloomType.NONE);
819 }
820 return familyToBloomType;
821 }
822
823
824
825
826
827
828
829
830
831
832 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
833 public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
834 for (int numCfs = 0; numCfs <= 3; numCfs++) {
835 Configuration conf = new Configuration(this.util.getConfiguration());
836 Map<String, Integer> familyToBlockSize =
837 getMockColumnFamiliesForBlockSize(numCfs);
838 Table table = Mockito.mock(HTable.class);
839 setupMockColumnFamiliesForBlockSize(table,
840 familyToBlockSize);
841 HFileOutputFormat2.configureBlockSize(table.getTableDescriptor(), conf);
842
843
844
845 Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
846 HFileOutputFormat2
847 .createFamilyBlockSizeMap(conf);
848
849
850
851 for (Entry<String, Integer> entry : familyToBlockSize.entrySet()
852 ) {
853 assertEquals("BlockSize configuration incorrect for column family:"
854 + entry.getKey(), entry.getValue(),
855 retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes()));
856 }
857 }
858 }
859
860 private void setupMockColumnFamiliesForBlockSize(Table table,
861 Map<String, Integer> familyToDataBlockEncoding) throws IOException {
862 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
863 for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
864 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
865 .setMaxVersions(1)
866 .setBlocksize(entry.getValue())
867 .setBlockCacheEnabled(false)
868 .setTimeToLive(0));
869 }
870 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
871 }
872
873
874
875
876
877 private Map<String, Integer>
878 getMockColumnFamiliesForBlockSize (int numCfs) {
879 Map<String, Integer> familyToBlockSize =
880 new HashMap<String, Integer>();
881
882 if (numCfs-- > 0) {
883 familyToBlockSize.put("Family1!@#!@#&", 1234);
884 }
885 if (numCfs-- > 0) {
886 familyToBlockSize.put("Family2=asdads&!AASD",
887 Integer.MAX_VALUE);
888 }
889 if (numCfs-- > 0) {
890 familyToBlockSize.put("Family2=asdads&!AASD",
891 Integer.MAX_VALUE);
892 }
893 if (numCfs-- > 0) {
894 familyToBlockSize.put("Family3", 0);
895 }
896 return familyToBlockSize;
897 }
898
899
900
901
902
903
904
905
906
907 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
908 public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
909 for (int numCfs = 0; numCfs <= 3; numCfs++) {
910 Configuration conf = new Configuration(this.util.getConfiguration());
911 Map<String, DataBlockEncoding> familyToDataBlockEncoding =
912 getMockColumnFamiliesForDataBlockEncoding(numCfs);
913 Table table = Mockito.mock(HTable.class);
914 setupMockColumnFamiliesForDataBlockEncoding(table,
915 familyToDataBlockEncoding);
916 HTableDescriptor tableDescriptor = table.getTableDescriptor();
917 HFileOutputFormat2.configureDataBlockEncoding(tableDescriptor, conf);
918
919
920
921 Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
922 HFileOutputFormat2
923 .createFamilyDataBlockEncodingMap(conf);
924
925
926
927 for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
928 assertEquals("DataBlockEncoding configuration incorrect for column family:"
929 + entry.getKey(), entry.getValue(),
930 retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes()));
931 }
932 }
933 }
934
935 private void setupMockColumnFamiliesForDataBlockEncoding(Table table,
936 Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
937 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
938 for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
939 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
940 .setMaxVersions(1)
941 .setDataBlockEncoding(entry.getValue())
942 .setBlockCacheEnabled(false)
943 .setTimeToLive(0));
944 }
945 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
946 }
947
948
949
950
951
952 private Map<String, DataBlockEncoding>
953 getMockColumnFamiliesForDataBlockEncoding (int numCfs) {
954 Map<String, DataBlockEncoding> familyToDataBlockEncoding =
955 new HashMap<String, DataBlockEncoding>();
956
957 if (numCfs-- > 0) {
958 familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
959 }
960 if (numCfs-- > 0) {
961 familyToDataBlockEncoding.put("Family2=asdads&!AASD",
962 DataBlockEncoding.FAST_DIFF);
963 }
964 if (numCfs-- > 0) {
965 familyToDataBlockEncoding.put("Family2=asdads&!AASD",
966 DataBlockEncoding.PREFIX);
967 }
968 if (numCfs-- > 0) {
969 familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
970 }
971 return familyToDataBlockEncoding;
972 }
973
974 private void setupMockStartKeys(RegionLocator table) throws IOException {
975 byte[][] mockKeys = new byte[][] {
976 HConstants.EMPTY_BYTE_ARRAY,
977 Bytes.toBytes("aaa"),
978 Bytes.toBytes("ggg"),
979 Bytes.toBytes("zzz")
980 };
981 Mockito.doReturn(mockKeys).when(table).getStartKeys();
982 }
983
984 private void setupMockTableName(RegionLocator table) throws IOException {
985 TableName mockTableName = TableName.valueOf("mock_table");
986 Mockito.doReturn(mockTableName).when(table).getName();
987 }
988
989
990
991
992
993 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
994 public void testColumnFamilySettings() throws Exception {
995 Configuration conf = new Configuration(this.util.getConfiguration());
996 RecordWriter<ImmutableBytesWritable, Cell> writer = null;
997 TaskAttemptContext context = null;
998 Path dir = util.getDataTestDir("testColumnFamilySettings");
999
1000
1001 Table table = Mockito.mock(Table.class);
1002 RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
1003 HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
1004 Mockito.doReturn(htd).when(table).getTableDescriptor();
1005 for (HColumnDescriptor hcd: HBaseTestingUtility.generateColumnDescriptors()) {
1006 htd.addFamily(hcd);
1007 }
1008
1009
1010 setupMockStartKeys(regionLocator);
1011
1012 try {
1013
1014
1015
1016 conf.set("io.seqfile.compression.type", "NONE");
1017 conf.set("hbase.fs.tmp.dir", dir.toString());
1018
1019 conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
1020 Job job = new Job(conf, "testLocalMRIncrementalLoad");
1021 job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
1022 setupRandomGeneratorMapper(job, false);
1023 HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
1024 FileOutputFormat.setOutputPath(job, dir);
1025 context = createTestTaskAttemptContext(job);
1026 HFileOutputFormat2 hof = new HFileOutputFormat2();
1027 writer = hof.getRecordWriter(context);
1028
1029
1030 writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
1031 writer.close(context);
1032
1033
1034 FileSystem fs = dir.getFileSystem(conf);
1035
1036
1037 hof.getOutputCommitter(context).commitTask(context);
1038 hof.getOutputCommitter(context).commitJob(context);
1039 FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
1040 assertEquals(htd.getFamilies().size(), families.length);
1041 for (FileStatus f : families) {
1042 String familyStr = f.getPath().getName();
1043 HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
1044
1045
1046 Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
1047 Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
1048 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
1049
1050 byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
1051 if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
1052 assertEquals("Incorrect bloom filter used for column family " + familyStr +
1053 "(reader: " + reader + ")",
1054 hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
1055 assertEquals("Incorrect compression used for column family " + familyStr +
1056 "(reader: " + reader + ")", hcd.getCompression(), reader.getFileContext().getCompression());
1057 }
1058 } finally {
1059 dir.getFileSystem(conf).delete(dir, true);
1060 }
1061 }
1062
1063
1064
1065
1066
1067 private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer,
1068 TaskAttemptContext context, Set<byte[]> families, int numRows)
1069 throws IOException, InterruptedException {
1070 byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
1071 int valLength = 10;
1072 byte valBytes[] = new byte[valLength];
1073
1074 int taskId = context.getTaskAttemptID().getTaskID().getId();
1075 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
1076 final byte [] qualifier = Bytes.toBytes("data");
1077 Random random = new Random();
1078 for (int i = 0; i < numRows; i++) {
1079
1080 Bytes.putInt(keyBytes, 0, i);
1081 random.nextBytes(valBytes);
1082 ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
1083
1084 for (byte[] family : families) {
1085 Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes);
1086 writer.write(key, kv);
1087 }
1088 }
1089 }
1090
1091
1092
1093
1094
1095
1096
1097 @Ignore ("Flakey: See HBASE-9051") @Test
1098 public void testExcludeAllFromMinorCompaction() throws Exception {
1099 Configuration conf = util.getConfiguration();
1100 conf.setInt("hbase.hstore.compaction.min", 2);
1101 generateRandomStartKeys(5);
1102 util.setJobWithoutMRCluster();
1103 util.startMiniCluster();
1104 try (Connection conn = createConnection();
1105 Admin admin = conn.getAdmin()) {
1106 final FileSystem fs = util.getDFSCluster().getFileSystem();
1107 HTable table = util.createTable(TABLE_NAME, FAMILIES);
1108 assertEquals("Should start with empty table", 0, util.countRows(table));
1109
1110
1111 final Path storePath = new Path(
1112 FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
1113 new Path(admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
1114 Bytes.toString(FAMILIES[0])));
1115 assertEquals(0, fs.listStatus(storePath).length);
1116
1117
1118 conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
1119 true);
1120
1121 for (int i = 0; i < 2; i++) {
1122 Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
1123 runIncrementalPELoad(conf, table.getTableDescriptor(), conn.getRegionLocator(TABLE_NAME),
1124 testDir, false);
1125
1126 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
1127 }
1128
1129
1130 int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
1131 assertEquals("LoadIncrementalHFiles should put expected data in table",
1132 expectedRows, util.countRows(table));
1133
1134
1135 assertEquals(2, fs.listStatus(storePath).length);
1136
1137
1138 admin.compact(TABLE_NAME);
1139 try {
1140 quickPoll(new Callable<Boolean>() {
1141 @Override
1142 public Boolean call() throws Exception {
1143 List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAME);
1144 for (HRegion region : regions) {
1145 for (Store store : region.getStores()) {
1146 store.closeAndArchiveCompactedFiles();
1147 }
1148 }
1149 return fs.listStatus(storePath).length == 1;
1150 }
1151 }, 5000);
1152 throw new IOException("SF# = " + fs.listStatus(storePath).length);
1153 } catch (AssertionError ae) {
1154
1155 }
1156
1157
1158 admin.majorCompact(TABLE_NAME);
1159 quickPoll(new Callable<Boolean>() {
1160 @Override
1161 public Boolean call() throws Exception {
1162 List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAME);
1163 for (HRegion region : regions) {
1164 for (Store store : region.getStores()) {
1165 store.closeAndArchiveCompactedFiles();
1166 }
1167 }
1168 return fs.listStatus(storePath).length == 1;
1169 }
1170 }, 5000);
1171
1172 } finally {
1173 util.shutdownMiniCluster();
1174 }
1175 }
1176
1177 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
1178 public void testExcludeMinorCompaction() throws Exception {
1179 Configuration conf = util.getConfiguration();
1180 conf.setInt("hbase.hstore.compaction.min", 2);
1181 generateRandomStartKeys(5);
1182 util.setJobWithoutMRCluster();
1183 util.startMiniCluster();
1184 try (Connection conn = createConnection(conf);
1185 Admin admin = conn.getAdmin()){
1186 Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
1187 final FileSystem fs = util.getDFSCluster().getFileSystem();
1188 Table table = util.createTable(TABLE_NAME, FAMILIES);
1189 assertEquals("Should start with empty table", 0, util.countRows(table));
1190
1191
1192 final Path storePath = new Path(
1193 FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
1194 new Path(admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
1195 Bytes.toString(FAMILIES[0])));
1196 assertEquals(0, fs.listStatus(storePath).length);
1197
1198
1199 Put p = new Put(Bytes.toBytes("test"));
1200 p.add(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
1201 table.put(p);
1202 admin.flush(TABLE_NAME);
1203 assertEquals(1, util.countRows(table));
1204 quickPoll(new Callable<Boolean>() {
1205 @Override
1206 public Boolean call() throws Exception {
1207 return fs.listStatus(storePath).length == 1;
1208 }
1209 }, 5000);
1210
1211
1212 conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
1213 true);
1214
1215 RegionLocator regionLocator = conn.getRegionLocator(TABLE_NAME);
1216 runIncrementalPELoad(conf, table.getTableDescriptor(), regionLocator, testDir, false);
1217
1218
1219 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, regionLocator);
1220
1221
1222 int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
1223 assertEquals("LoadIncrementalHFiles should put expected data in table",
1224 expectedRows + 1, util.countRows(table));
1225
1226
1227 assertEquals(2, fs.listStatus(storePath).length);
1228
1229
1230 admin.compact(TABLE_NAME);
1231 try {
1232 quickPoll(new Callable<Boolean>() {
1233 @Override
1234 public Boolean call() throws Exception {
1235 return fs.listStatus(storePath).length == 1;
1236 }
1237 }, 5000);
1238 throw new IOException("SF# = " + fs.listStatus(storePath).length);
1239 } catch (AssertionError ae) {
1240
1241 }
1242
1243
1244 admin.majorCompact(TABLE_NAME);
1245 quickPoll(new Callable<Boolean>() {
1246 @Override
1247 public Boolean call() throws Exception {
1248 return fs.listStatus(storePath).length == 1;
1249 }
1250 }, 5000);
1251
1252 } finally {
1253 util.shutdownMiniCluster();
1254 }
1255 }
1256
1257 private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
1258 int sleepMs = 10;
1259 int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
1260 while (retries-- > 0) {
1261 if (c.call().booleanValue()) {
1262 return;
1263 }
1264 Thread.sleep(sleepMs);
1265 }
1266 fail();
1267 }
1268
1269 public static void main(String args[]) throws Exception {
1270 new TestHFileOutputFormat2().manualTest(args);
1271 }
1272
1273 public void manualTest(String args[]) throws Exception {
1274 Configuration conf = HBaseConfiguration.create();
1275 util = new HBaseTestingUtility(conf);
1276 if ("newtable".equals(args[0])) {
1277 TableName tname = TableName.valueOf(args[1]);
1278 byte[][] splitKeys = generateRandomSplitKeys(4);
1279 try (HTable table = util.createTable(tname, FAMILIES, splitKeys)) {
1280 }
1281 } else if ("incremental".equals(args[0])) {
1282 TableName tname = TableName.valueOf(args[1]);
1283 try(Connection c = createConnection(conf);
1284 Admin admin = c.getAdmin();
1285 RegionLocator regionLocator = c.getRegionLocator(tname)) {
1286 Path outDir = new Path("incremental-out");
1287 runIncrementalPELoad(conf, admin.getTableDescriptor(tname), regionLocator, outDir, false);
1288 }
1289 } else {
1290 throw new RuntimeException(
1291 "usage: TestHFileOutputFormat2 newtable | incremental");
1292 }
1293 }
1294
1295 @Test
1296 public void TestConfigureCompression() throws Exception {
1297 Configuration conf = new Configuration(this.util.getConfiguration());
1298 RecordWriter<ImmutableBytesWritable, Cell> writer = null;
1299 TaskAttemptContext context = null;
1300 Path dir = util.getDataTestDir("TestConfigureCompression");
1301 String hfileoutputformatCompression = "gz";
1302
1303 try {
1304 conf.set(HFileOutputFormat2.OUTPUT_TABLE_NAME_CONF_KEY, TABLE_NAME.getNameAsString());
1305 conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
1306
1307 conf.set(HFileOutputFormat2.COMPRESSION_OVERRIDE_CONF_KEY, hfileoutputformatCompression);
1308
1309 Job job = Job.getInstance(conf);
1310 FileOutputFormat.setOutputPath(job, dir);
1311 context = createTestTaskAttemptContext(job);
1312 HFileOutputFormat2 hof = new HFileOutputFormat2();
1313 writer = hof.getRecordWriter(context);
1314 final byte[] b = Bytes.toBytes("b");
1315
1316 KeyValue kv = new KeyValue(b, b, b, HConstants.LATEST_TIMESTAMP, b);
1317 writer.write(new ImmutableBytesWritable(), kv);
1318 writer.close(context);
1319 writer = null;
1320 FileSystem fs = dir.getFileSystem(conf);
1321 RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, true);
1322 while (iterator.hasNext()) {
1323 LocatedFileStatus keyFileStatus = iterator.next();
1324 HFile.Reader reader =
1325 HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf), conf);
1326 assertEquals(reader.getCompressionAlgorithm().getName(), hfileoutputformatCompression);
1327 }
1328 } finally {
1329 if (writer != null && context != null) {
1330 writer.close(context);
1331 }
1332 dir.getFileSystem(conf).delete(dir, true);
1333 }
1334
1335 }
1336
1337 @Test
1338 public void testMRIncrementalLoadWithLocalityMultiCluster() throws Exception {
1339
1340 util = new HBaseTestingUtility();
1341 Configuration confA = util.getConfiguration();
1342 int hostCount = 3;
1343 int regionNum = 20;
1344 String[] hostnames = new String[hostCount];
1345 for (int i = 0; i < hostCount; ++i) {
1346 hostnames[i] = "datanode_" + i;
1347 }
1348 util.setJobWithoutMRCluster();
1349 util.startMiniCluster(1, hostCount, hostnames);
1350
1351
1352 HBaseTestingUtility utilB = new HBaseTestingUtility();
1353 Configuration confB = utilB.getConfiguration();
1354 utilB.startMiniCluster(1, hostCount, hostnames);
1355
1356 Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
1357
1358 byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
1359 TableName tableName = TableName.valueOf("table");
1360
1361 try (Table table = utilB.createTable(tableName, FAMILIES, splitKeys);
1362 RegionLocator r = utilB.getConnection().getRegionLocator(tableName)) {
1363
1364
1365
1366 Job job = new Job(confA, "testLocalMRIncrementalLoad");
1367 Configuration jobConf = job.getConfiguration();
1368 final UUID key = ConfigurationCaptorConnection.configureConnectionImpl(jobConf);
1369 job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
1370 setupRandomGeneratorMapper(job, false);
1371 HFileOutputFormat2.configureIncrementalLoad(job, table, r);
1372
1373 assertEquals(confB.get(HConstants.ZOOKEEPER_QUORUM),
1374 jobConf.get(HFileOutputFormat2.REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY));
1375 assertEquals(confB.get(HConstants.ZOOKEEPER_CLIENT_PORT),
1376 jobConf.get(HFileOutputFormat2.REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY));
1377 assertEquals(confB.get(HConstants.ZOOKEEPER_ZNODE_PARENT),
1378 jobConf.get(HFileOutputFormat2.REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY));
1379
1380 FileOutputFormat.setOutputPath(job, testDir);
1381
1382 assertFalse(util.getTestFileSystem().exists(testDir));
1383
1384 assertTrue(job.waitForCompletion(true));
1385
1386 final List<Configuration> configs =
1387 ConfigurationCaptorConnection.getCapturedConfigarutions(key);
1388
1389 assertFalse(configs.isEmpty());
1390 for (Configuration config : configs) {
1391 assertEquals(confB.get(HConstants.ZOOKEEPER_QUORUM),
1392 config.get(HConstants.ZOOKEEPER_QUORUM));
1393 assertEquals(confB.get(HConstants.ZOOKEEPER_CLIENT_PORT),
1394 config.get(HConstants.ZOOKEEPER_CLIENT_PORT));
1395 assertEquals(confB.get(HConstants.ZOOKEEPER_ZNODE_PARENT),
1396 config.get(HConstants.ZOOKEEPER_ZNODE_PARENT));
1397 }
1398 } finally {
1399 utilB.deleteTable(tableName);
1400 testDir.getFileSystem(confA).delete(testDir, true);
1401 util.shutdownMiniCluster();
1402 utilB.shutdownMiniCluster();
1403 }
1404 }
1405
1406 private static class ConfigurationCaptorConnection implements Connection {
1407 private static final String UUID_KEY = "ConfigurationCaptorConnection.uuid";
1408
1409 private static final Map<UUID, List<Configuration>> confs = new ConcurrentHashMap<>();
1410
1411 private final Connection delegate;
1412
1413 public ConfigurationCaptorConnection(
1414 Configuration conf, boolean managed, ExecutorService es, User user)
1415 throws IOException {
1416 Configuration confForDelegate = new Configuration(conf);
1417 confForDelegate.unset(HConnection.HBASE_CLIENT_CONNECTION_IMPL);
1418 delegate = createConnection(confForDelegate, es, user);
1419
1420 final String uuid = conf.get(UUID_KEY);
1421 if (uuid != null) {
1422 final UUID key = UUID.fromString(uuid);
1423 List<Configuration> configurations = confs.get(key);
1424 if (configurations == null) {
1425 configurations = new CopyOnWriteArrayList<>();
1426 confs.put(key, configurations);
1427 }
1428 configurations.add(conf);
1429 }
1430 }
1431
1432 static UUID configureConnectionImpl(Configuration conf) {
1433 conf.setClass(HConnection.HBASE_CLIENT_CONNECTION_IMPL,
1434 ConfigurationCaptorConnection.class, Connection.class);
1435
1436 final UUID uuid = UUID.randomUUID();
1437 conf.set(UUID_KEY, uuid.toString());
1438 return uuid;
1439 }
1440
1441 static List<Configuration> getCapturedConfigarutions(UUID key) {
1442 return confs.get(key);
1443 }
1444
1445 @Override
1446 public Configuration getConfiguration() {
1447 return delegate.getConfiguration();
1448 }
1449
1450 @Override
1451 public Table getTable(TableName tableName) throws IOException {
1452 return delegate.getTable(tableName);
1453 }
1454
1455 @Override
1456 public Table getTable(TableName tableName, ExecutorService pool) throws IOException {
1457 return delegate.getTable(tableName, pool);
1458 }
1459
1460 @Override
1461 public BufferedMutator getBufferedMutator(TableName tableName) throws IOException {
1462 return delegate.getBufferedMutator(tableName);
1463 }
1464
1465 @Override
1466 public BufferedMutator getBufferedMutator(BufferedMutatorParams params)
1467 throws IOException {
1468 return delegate.getBufferedMutator(params);
1469 }
1470
1471 @Override
1472 public RegionLocator getRegionLocator(TableName tableName) throws IOException {
1473 return delegate.getRegionLocator(tableName);
1474 }
1475
1476 @Override
1477 public Admin getAdmin() throws IOException {
1478 return delegate.getAdmin();
1479 }
1480
1481 @Override
1482 public String getClusterId() throws IOException {
1483 return delegate.getClusterId();
1484 }
1485
1486 @Override
1487 public void close() throws IOException {
1488 delegate.close();
1489 }
1490
1491 @Override
1492 public boolean isClosed() {
1493 return delegate.isClosed();
1494 }
1495
1496 @Override
1497 public void abort(String why, Throwable e) {
1498 delegate.abort(why, e);
1499 }
1500
1501 @Override
1502 public boolean isAborted() {
1503 return delegate.isAborted();
1504 }
1505 }
1506 }