View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.apache.hadoop.hbase.client.ConnectionFactory.createConnection;
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertFalse;
24  import static org.junit.Assert.assertNotNull;
25  import static org.junit.Assert.assertNotSame;
26  import static org.junit.Assert.assertTrue;
27  import static org.junit.Assert.fail;
28  
29  import java.io.IOException;
30  import java.io.UnsupportedEncodingException;
31  import java.util.Arrays;
32  import java.util.HashMap;
33  import java.util.Iterator;
34  import java.util.List;
35  import java.util.Map;
36  import java.util.Map.Entry;
37  import java.util.Random;
38  import java.util.Set;
39  import java.util.UUID;
40  import java.util.concurrent.Callable;
41  import java.util.concurrent.ConcurrentHashMap;
42  import java.util.concurrent.CopyOnWriteArrayList;
43  import java.util.concurrent.ExecutorService;
44  
45  import org.apache.commons.logging.Log;
46  import org.apache.commons.logging.LogFactory;
47  import org.apache.hadoop.conf.Configuration;
48  import org.apache.hadoop.fs.FileStatus;
49  import org.apache.hadoop.fs.FileSystem;
50  import org.apache.hadoop.fs.LocatedFileStatus;
51  import org.apache.hadoop.fs.Path;
52  import org.apache.hadoop.fs.RemoteIterator;
53  import org.apache.hadoop.hbase.CategoryBasedTimeout;
54  import org.apache.hadoop.hbase.Cell;
55  import org.apache.hadoop.hbase.CellUtil;
56  import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
57  import org.apache.hadoop.hbase.HBaseConfiguration;
58  import org.apache.hadoop.hbase.HBaseTestingUtility;
59  import org.apache.hadoop.hbase.HColumnDescriptor;
60  import org.apache.hadoop.hbase.HConstants;
61  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
62  import org.apache.hadoop.hbase.HTableDescriptor;
63  import org.apache.hadoop.hbase.HadoopShims;
64  import org.apache.hadoop.hbase.KeyValue;
65  import org.apache.hadoop.hbase.PerformanceEvaluation;
66  import org.apache.hadoop.hbase.TableName;
67  import org.apache.hadoop.hbase.Tag;
68  import org.apache.hadoop.hbase.TagType;
69  import org.apache.hadoop.hbase.client.Admin;
70  import org.apache.hadoop.hbase.client.BufferedMutator;
71  import org.apache.hadoop.hbase.client.BufferedMutatorParams;
72  import org.apache.hadoop.hbase.client.Connection;
73  import org.apache.hadoop.hbase.client.HConnection;
74  import org.apache.hadoop.hbase.client.HTable;
75  import org.apache.hadoop.hbase.client.Put;
76  import org.apache.hadoop.hbase.client.RegionLocator;
77  import org.apache.hadoop.hbase.client.Result;
78  import org.apache.hadoop.hbase.client.ResultScanner;
79  import org.apache.hadoop.hbase.client.Scan;
80  import org.apache.hadoop.hbase.client.Table;
81  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
82  import org.apache.hadoop.hbase.io.compress.Compression;
83  import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
84  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
85  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
86  import org.apache.hadoop.hbase.io.hfile.HFile;
87  import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
88  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
89  import org.apache.hadoop.hbase.regionserver.BloomType;
90  import org.apache.hadoop.hbase.regionserver.HRegion;
91  import org.apache.hadoop.hbase.regionserver.Store;
92  import org.apache.hadoop.hbase.regionserver.StoreFile;
93  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
94  import org.apache.hadoop.hbase.security.User;
95  import org.apache.hadoop.hbase.testclassification.LargeTests;
96  import org.apache.hadoop.hbase.util.Bytes;
97  import org.apache.hadoop.hbase.util.FSUtils;
98  import org.apache.hadoop.hbase.util.Writables;
99  import org.apache.hadoop.io.NullWritable;
100 import org.apache.hadoop.mapreduce.Job;
101 import org.apache.hadoop.mapreduce.Mapper;
102 import org.apache.hadoop.mapreduce.RecordWriter;
103 import org.apache.hadoop.mapreduce.TaskAttemptContext;
104 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
105 import org.junit.Ignore;
106 import org.junit.Rule;
107 import org.junit.Test;
108 import org.junit.experimental.categories.Category;
109 import org.junit.rules.TestRule;
110 import org.mockito.Mockito;
111 
112 /**
113  * Simple test for {@link CellSortReducer} and {@link HFileOutputFormat2}.
114  * Sets up and runs a mapreduce job that writes hfile output.
115  * Creates a few inner classes to implement splits and an inputformat that
116  * emits keys and values like those of {@link PerformanceEvaluation}.
117  */
118 @Category(LargeTests.class)
119 public class TestHFileOutputFormat2  {
120   @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
121       withTimeout(this.getClass()).withLookingForStuckThread(true).build();
122   private final static int ROWSPERSPLIT = 1024;
123 
124   private static final byte[] FAMILY_NAME = Bytes.toBytes("info");
125   private static final byte[][] FAMILIES = {
126       Bytes.add(FAMILY_NAME, Bytes.toBytes("-A")),
127       Bytes.add(FAMILY_NAME, Bytes.toBytes("-B"))
128     };
129   private static final TableName TABLE_NAME =
130       TableName.valueOf("TestTable");
131 
132   private HBaseTestingUtility util = new HBaseTestingUtility();
133 
134   private static final Log LOG = LogFactory.getLog(TestHFileOutputFormat2.class);
135 
136   /**
137    * Simple mapper that makes KeyValue output.
138    */
139   static class RandomKVGeneratingMapper
140       extends Mapper<NullWritable, NullWritable,
141                  ImmutableBytesWritable, Cell> {
142 
143     private int keyLength;
144     private static final int KEYLEN_DEFAULT=10;
145     private static final String KEYLEN_CONF="randomkv.key.length";
146 
147     private int valLength;
148     private static final int VALLEN_DEFAULT=10;
149     private static final String VALLEN_CONF="randomkv.val.length";
150     private static final byte [] QUALIFIER = Bytes.toBytes("data");
151 
152     @Override
153     protected void setup(Context context) throws IOException,
154         InterruptedException {
155       super.setup(context);
156 
157       Configuration conf = context.getConfiguration();
158       keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
159       valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
160     }
161 
162     @Override
163     protected void map(
164         NullWritable n1, NullWritable n2,
165         Mapper<NullWritable, NullWritable,
166                ImmutableBytesWritable,Cell>.Context context)
167         throws java.io.IOException ,InterruptedException
168     {
169 
170       byte keyBytes[] = new byte[keyLength];
171       byte valBytes[] = new byte[valLength];
172 
173       int taskId = context.getTaskAttemptID().getTaskID().getId();
174       assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
175 
176       Random random = new Random();
177       for (int i = 0; i < ROWSPERSPLIT; i++) {
178 
179         random.nextBytes(keyBytes);
180         // Ensure that unique tasks generate unique keys
181         keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
182         random.nextBytes(valBytes);
183         ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
184 
185         for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
186           Cell kv = new KeyValue(keyBytes, family, QUALIFIER, valBytes);
187           context.write(key, kv);
188         }
189       }
190     }
191   }
192 
193   /**
194    * Simple mapper that makes Put output.
195    */
196   static class RandomPutGeneratingMapper
197       extends Mapper<NullWritable, NullWritable,
198                  ImmutableBytesWritable, Put> {
199 
200     private int keyLength;
201     private static final int KEYLEN_DEFAULT=10;
202     private static final String KEYLEN_CONF="randomkv.key.length";
203 
204     private int valLength;
205     private static final int VALLEN_DEFAULT=10;
206     private static final String VALLEN_CONF="randomkv.val.length";
207     private static final byte [] QUALIFIER = Bytes.toBytes("data");
208 
209     @Override
210     protected void setup(Context context) throws IOException,
211         InterruptedException {
212       super.setup(context);
213 
214       Configuration conf = context.getConfiguration();
215       keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
216       valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
217     }
218 
219     @Override
220     protected void map(
221         NullWritable n1, NullWritable n2,
222         Mapper<NullWritable, NullWritable,
223                ImmutableBytesWritable,Put>.Context context)
224         throws java.io.IOException ,InterruptedException
225     {
226 
227       byte keyBytes[] = new byte[keyLength];
228       byte valBytes[] = new byte[valLength];
229 
230       int taskId = context.getTaskAttemptID().getTaskID().getId();
231       assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
232 
233       Random random = new Random();
234       for (int i = 0; i < ROWSPERSPLIT; i++) {
235 
236         random.nextBytes(keyBytes);
237         // Ensure that unique tasks generate unique keys
238         keyBytes[keyLength - 1] = (byte)(taskId & 0xFF);
239         random.nextBytes(valBytes);
240         ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
241 
242         for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
243           Put p = new Put(keyBytes);
244           p.addColumn(family, QUALIFIER, valBytes);
245           // set TTL to very low so that the scan does not return any value
246           p.setTTL(1l);
247           context.write(key, p);
248         }
249       }
250     }
251   }
252 
253   private void setupRandomGeneratorMapper(Job job, boolean putSortReducer) {
254     if (putSortReducer) {
255       job.setInputFormatClass(NMapInputFormat.class);
256       job.setMapperClass(RandomPutGeneratingMapper.class);
257       job.setMapOutputKeyClass(ImmutableBytesWritable.class);
258       job.setMapOutputValueClass(Put.class);
259     } else {
260       job.setInputFormatClass(NMapInputFormat.class);
261       job.setMapperClass(RandomKVGeneratingMapper.class);
262       job.setMapOutputKeyClass(ImmutableBytesWritable.class);
263       job.setMapOutputValueClass(KeyValue.class);
264     }
265   }
266 
267   /**
268    * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if
269    * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
270    * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
271    */
272   @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
273   public void test_LATEST_TIMESTAMP_isReplaced()
274   throws Exception {
275     Configuration conf = new Configuration(this.util.getConfiguration());
276     RecordWriter<ImmutableBytesWritable, Cell> writer = null;
277     TaskAttemptContext context = null;
278     Path dir =
279       util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
280     try {
281       Job job = new Job(conf);
282       FileOutputFormat.setOutputPath(job, dir);
283       context = createTestTaskAttemptContext(job);
284       HFileOutputFormat2 hof = new HFileOutputFormat2();
285       writer = hof.getRecordWriter(context);
286       final byte [] b = Bytes.toBytes("b");
287 
288       // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
289       // changed by call to write.  Check all in kv is same but ts.
290       KeyValue kv = new KeyValue(b, b, b);
291       KeyValue original = kv.clone();
292       writer.write(new ImmutableBytesWritable(), kv);
293       assertFalse(original.equals(kv));
294       assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
295       assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
296       assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
297       assertNotSame(original.getTimestamp(), kv.getTimestamp());
298       assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
299 
300       // Test 2. Now test passing a kv that has explicit ts.  It should not be
301       // changed by call to record write.
302       kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
303       original = kv.clone();
304       writer.write(new ImmutableBytesWritable(), kv);
305       assertTrue(original.equals(kv));
306     } finally {
307       if (writer != null && context != null) writer.close(context);
308       dir.getFileSystem(conf).delete(dir, true);
309     }
310   }
311 
312   private TaskAttemptContext createTestTaskAttemptContext(final Job job)
313   throws Exception {
314     HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
315     TaskAttemptContext context = hadoop.createTestTaskAttemptContext(
316       job, "attempt_201402131733_0001_m_000000_0");
317     return context;
318   }
319 
320   /*
321    * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE
322    * metadata used by time-restricted scans.
323    */
324   @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
325   public void test_TIMERANGE() throws Exception {
326     Configuration conf = new Configuration(this.util.getConfiguration());
327     RecordWriter<ImmutableBytesWritable, Cell> writer = null;
328     TaskAttemptContext context = null;
329     Path dir =
330       util.getDataTestDir("test_TIMERANGE_present");
331     LOG.info("Timerange dir writing to dir: " + dir);
332     try {
333       // build a record writer using HFileOutputFormat2
334       Job job = new Job(conf);
335       FileOutputFormat.setOutputPath(job, dir);
336       context = createTestTaskAttemptContext(job);
337       HFileOutputFormat2 hof = new HFileOutputFormat2();
338       writer = hof.getRecordWriter(context);
339 
340       // Pass two key values with explicit times stamps
341       final byte [] b = Bytes.toBytes("b");
342 
343       // value 1 with timestamp 2000
344       KeyValue kv = new KeyValue(b, b, b, 2000, b);
345       KeyValue original = kv.clone();
346       writer.write(new ImmutableBytesWritable(), kv);
347       assertEquals(original,kv);
348 
349       // value 2 with timestamp 1000
350       kv = new KeyValue(b, b, b, 1000, b);
351       original = kv.clone();
352       writer.write(new ImmutableBytesWritable(), kv);
353       assertEquals(original, kv);
354 
355       // verify that the file has the proper FileInfo.
356       writer.close(context);
357 
358       // the generated file lives 1 directory down from the attempt directory
359       // and is the only file, e.g.
360       // _attempt__0000_r_000000_0/b/1979617994050536795
361       FileSystem fs = FileSystem.get(conf);
362       Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
363       FileStatus[] sub1 = fs.listStatus(attemptDirectory);
364       FileStatus[] file = fs.listStatus(sub1[0].getPath());
365 
366       // open as HFile Reader and pull out TIMERANGE FileInfo.
367       HFile.Reader rd = HFile.createReader(fs, file[0].getPath(),
368           new CacheConfig(conf), conf);
369       Map<byte[],byte[]> finfo = rd.loadFileInfo();
370       byte[] range = finfo.get("TIMERANGE".getBytes());
371       assertNotNull(range);
372 
373       // unmarshall and check values.
374       TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
375       Writables.copyWritable(range, timeRangeTracker);
376       LOG.info(timeRangeTracker.getMin() +
377           "...." + timeRangeTracker.getMax());
378       assertEquals(1000, timeRangeTracker.getMin());
379       assertEquals(2000, timeRangeTracker.getMax());
380       rd.close();
381     } finally {
382       if (writer != null && context != null) writer.close(context);
383       dir.getFileSystem(conf).delete(dir, true);
384     }
385   }
386 
387   /**
388    * Run small MR job.
389    */
390   @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
391   public void testWritingPEData() throws Exception {
392     Configuration conf = util.getConfiguration();
393     Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
394     FileSystem fs = testDir.getFileSystem(conf);
395 
396     // Set down this value or we OOME in eclipse.
397     conf.setInt("mapreduce.task.io.sort.mb", 20);
398     // Write a few files.
399     conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
400 
401     Job job = new Job(conf, "testWritingPEData");
402     setupRandomGeneratorMapper(job, false);
403     // This partitioner doesn't work well for number keys but using it anyways
404     // just to demonstrate how to configure it.
405     byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
406     byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
407 
408     Arrays.fill(startKey, (byte)0);
409     Arrays.fill(endKey, (byte)0xff);
410 
411     job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
412     // Set start and end rows for partitioner.
413     SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
414     SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
415     job.setReducerClass(KeyValueSortReducer.class);
416     job.setOutputFormatClass(HFileOutputFormat2.class);
417     job.setNumReduceTasks(4);
418     job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
419         MutationSerialization.class.getName(), ResultSerialization.class.getName(),
420         KeyValueSerialization.class.getName());
421 
422     FileOutputFormat.setOutputPath(job, testDir);
423     assertTrue(job.waitForCompletion(false));
424     FileStatus [] files = fs.listStatus(testDir);
425     assertTrue(files.length > 0);
426   }
427 
428   /**
429    * Test that {@link HFileOutputFormat2} RecordWriter writes tags such as ttl into
430    * hfile.
431    */
432   @Test
433   public void test_WritingTagData()
434       throws Exception {
435     Configuration conf = new Configuration(this.util.getConfiguration());
436     final String HFILE_FORMAT_VERSION_CONF_KEY = "hfile.format.version";
437     conf.setInt(HFILE_FORMAT_VERSION_CONF_KEY, HFile.MIN_FORMAT_VERSION_WITH_TAGS);
438     RecordWriter<ImmutableBytesWritable, Cell> writer = null;
439     TaskAttemptContext context = null;
440     Path dir =
441         util.getDataTestDir("WritingTagData");
442     try {
443       Job job = new Job(conf);
444       FileOutputFormat.setOutputPath(job, dir);
445       context = createTestTaskAttemptContext(job);
446       HFileOutputFormat2 hof = new HFileOutputFormat2();
447       writer = hof.getRecordWriter(context);
448       final byte [] b = Bytes.toBytes("b");
449 
450       KeyValue kv = new KeyValue(b, b, b, HConstants.LATEST_TIMESTAMP, b, new Tag[] {
451           new Tag(TagType.TTL_TAG_TYPE, Bytes.toBytes(978670)) });
452       writer.write(new ImmutableBytesWritable(), kv);
453       writer.close(context);
454       writer = null;
455       FileSystem fs = dir.getFileSystem(conf);
456       RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, true);
457       while(iterator.hasNext()) {
458         LocatedFileStatus keyFileStatus = iterator.next();
459         HFile.Reader reader = HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf),
460             conf);
461         HFileScanner scanner = reader.getScanner(false, false, false);
462         scanner.seekTo();
463         Cell cell = scanner.getKeyValue();
464 
465         Iterator<Tag> tagsIterator = CellUtil.tagsIterator(cell.getTagsArray(),
466             cell.getTagsOffset(), cell.getTagsLength());
467         assertTrue(tagsIterator.hasNext());
468         assertTrue(tagsIterator.next().getType() == TagType.TTL_TAG_TYPE);
469       }
470     } finally {
471       if (writer != null && context != null) writer.close(context);
472       dir.getFileSystem(conf).delete(dir, true);
473     }
474   }
475 
476   @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
477   public void testJobConfiguration() throws Exception {
478     Configuration conf = new Configuration(this.util.getConfiguration());
479     conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration")
480         .toString());
481     Job job = new Job(conf);
482     job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
483     RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
484     setupMockStartKeys(regionLocator);
485     setupMockTableName(regionLocator);
486     HFileOutputFormat2.configureIncrementalLoad(job, new HTableDescriptor(), regionLocator);
487     assertEquals(job.getNumReduceTasks(), 4);
488   }
489 
490   private byte [][] generateRandomStartKeys(int numKeys) {
491     Random random = new Random();
492     byte[][] ret = new byte[numKeys][];
493     // first region start key is always empty
494     ret[0] = HConstants.EMPTY_BYTE_ARRAY;
495     for (int i = 1; i < numKeys; i++) {
496       ret[i] =
497         PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
498     }
499     return ret;
500   }
501 
502   private byte[][] generateRandomSplitKeys(int numKeys) {
503     Random random = new Random();
504     byte[][] ret = new byte[numKeys][];
505     for (int i = 0; i < numKeys; i++) {
506       ret[i] =
507           PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
508     }
509     return ret;
510   }
511 
512   @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
513   public void testMRIncrementalLoad() throws Exception {
514     LOG.info("\nStarting test testMRIncrementalLoad\n");
515     doIncrementalLoadTest(false, false, false, "testMRIncrementalLoad");
516   }
517 
518   @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
519   public void testMRIncrementalLoadWithSplit() throws Exception {
520     LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
521     doIncrementalLoadTest(true, false, false, "testMRIncrementalLoadWithSplit");
522   }
523 
524   /**
525    * Test for HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY = true This test could only check the
526    * correctness of original logic if LOCALITY_SENSITIVE_CONF_KEY is set to true. Because
527    * MiniHBaseCluster always run with single hostname (and different ports), it's not possible to
528    * check the region locality by comparing region locations and DN hostnames. When MiniHBaseCluster
529    * supports explicit hostnames parameter (just like MiniDFSCluster does), we could test region
530    * locality features more easily.
531    */
532   @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
533   public void testMRIncrementalLoadWithLocality() throws Exception {
534     LOG.info("\nStarting test testMRIncrementalLoadWithLocality\n");
535     doIncrementalLoadTest(false, true, false, "testMRIncrementalLoadWithLocality1");
536     doIncrementalLoadTest(true, true, false, "testMRIncrementalLoadWithLocality2");
537   }
538 
539   @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
540   public void testMRIncrementalLoadWithPutSortReducer() throws Exception {
541     LOG.info("\nStarting test testMRIncrementalLoadWithPutSortReducer\n");
542     doIncrementalLoadTest(false, false, true, "testMRIncrementalLoadWithPutSortReducer");
543   }
544 
545   private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality,
546       boolean putSortReducer, String tableStr) throws Exception {
547     util = new HBaseTestingUtility();
548     Configuration conf = util.getConfiguration();
549     conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
550     int hostCount = 1;
551     int regionNum = 5;
552     if (shouldKeepLocality) {
553       // We should change host count higher than hdfs replica count when MiniHBaseCluster supports
554       // explicit hostnames parameter just like MiniDFSCluster does.
555       hostCount = 3;
556       regionNum = 20;
557     }
558     byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
559     String[] hostnames = new String[hostCount];
560     for (int i = 0; i < hostCount; ++i) {
561       hostnames[i] = "datanode_" + i;
562     }
563     util.setJobWithoutMRCluster();
564     util.startMiniCluster(1, hostCount, hostnames);
565 
566     HTable table = util.createTable(TABLE_NAME, FAMILIES, splitKeys);
567     Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
568     try (RegionLocator r = table.getRegionLocator(); Admin admin = table.getConnection().getAdmin()) {
569       assertEquals("Should start with empty table", 0, util.countRows(table));
570       int numRegions = r.getStartKeys().length;
571       assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);
572 
573       // Generate the bulk load files
574       runIncrementalPELoad(conf, table.getTableDescriptor(), table.getRegionLocator(), testDir,
575         putSortReducer);
576       // This doesn't write into the table, just makes files
577       assertEquals("HFOF should not touch actual table", 0, util.countRows(table));
578 
579       // Make sure that a directory was created for every CF
580       int dir = 0;
581       for (FileStatus f : testDir.getFileSystem(conf).listStatus(testDir)) {
582         for (byte[] family : FAMILIES) {
583           if (Bytes.toString(family).equals(f.getPath().getName())) {
584             ++dir;
585           }
586         }
587       }
588       assertEquals("Column family not found in FS.", FAMILIES.length, dir);
589 
590       // handle the split case
591       if (shouldChangeRegions) {
592         LOG.info("Changing regions in table");
593         admin.disableTable(table.getName());
594         util.waitUntilNoRegionsInTransition();
595 
596         util.deleteTable(table.getName());
597         byte[][] newSplitKeys = generateRandomSplitKeys(14);
598         table = util.createTable(TABLE_NAME, FAMILIES, newSplitKeys);
599 
600         while (table.getRegionLocator().getAllRegionLocations().size() != 15
601             || !admin.isTableAvailable(table.getName())) {
602           Thread.sleep(200);
603           LOG.info("Waiting for new region assignment to happen");
604         }
605       }
606 
607       // Perform the actual load
608       new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
609 
610       int expectedRows = 0;
611       if (putSortReducer) {
612         // no rows should be extracted
613         assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows,
614           util.countRows(table));
615       } else {
616         // Ensure data shows up
617         expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
618         assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows,
619           util.countRows(table));
620         Scan scan = new Scan();
621         ResultScanner results = table.getScanner(scan);
622         for (Result res : results) {
623           assertEquals(FAMILIES.length, res.rawCells().length);
624           Cell first = res.rawCells()[0];
625           for (Cell kv : res.rawCells()) {
626             assertTrue(CellUtil.matchingRow(first, kv));
627             assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
628           }
629         }
630         results.close();
631       }
632       String tableDigestBefore = util.checksumRows(table);
633 
634       // Check region locality
635       HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
636       for (HRegion region : util.getHBaseCluster().getRegions(TABLE_NAME)) {
637         hbd.add(region.getHDFSBlocksDistribution());
638       }
639       for (String hostname : hostnames) {
640         float locality = hbd.getBlockLocalityIndex(hostname);
641         LOG.info("locality of [" + hostname + "]: " + locality);
642         assertEquals(100, (int) (locality * 100));
643       }
644 
645       // Cause regions to reopen
646       admin.disableTable(TABLE_NAME);
647       while (!admin.isTableDisabled(TABLE_NAME)) {
648         Thread.sleep(200);
649         LOG.info("Waiting for table to disable");
650       }
651       admin.enableTable(TABLE_NAME);
652       util.waitTableAvailable(TABLE_NAME);
653       assertEquals("Data should remain after reopening of regions", tableDigestBefore,
654         util.checksumRows(table));
655     } finally {
656       testDir.getFileSystem(conf).delete(testDir, true);
657       util.deleteTable(TABLE_NAME);
658       util.shutdownMiniCluster();
659     }
660   }
661 
662   private void runIncrementalPELoad(Configuration conf, HTableDescriptor tableDescriptor,
663       RegionLocator regionLocator, Path outDir, boolean putSortReducer) throws IOException,
664       UnsupportedEncodingException, InterruptedException, ClassNotFoundException {
665     Job job = new Job(conf, "testLocalMRIncrementalLoad");
666     job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
667     job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
668         MutationSerialization.class.getName(), ResultSerialization.class.getName(),
669         KeyValueSerialization.class.getName());
670     setupRandomGeneratorMapper(job, putSortReducer);
671     HFileOutputFormat2.configureIncrementalLoad(job, tableDescriptor, regionLocator);
672     FileOutputFormat.setOutputPath(job, outDir);
673 
674     assertFalse(util.getTestFileSystem().exists(outDir)) ;
675 
676     assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());
677 
678     assertTrue(job.waitForCompletion(true));
679   }
680 
681   /**
682    * Test for {@link HFileOutputFormat2#configureCompression(org.apache.hadoop.hbase.client.Table,
683    * Configuration)} and {@link HFileOutputFormat2#createFamilyCompressionMap
684    * (Configuration)}.
685    * Tests that the compression map is correctly serialized into
686    * and deserialized from configuration
687    *
688    * @throws IOException
689    */
690   @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
691   public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
692     for (int numCfs = 0; numCfs <= 3; numCfs++) {
693       Configuration conf = new Configuration(this.util.getConfiguration());
694       Map<String, Compression.Algorithm> familyToCompression =
695           getMockColumnFamiliesForCompression(numCfs);
696       Table table = Mockito.mock(HTable.class);
697       setupMockColumnFamiliesForCompression(table, familyToCompression);
698       HFileOutputFormat2.configureCompression(conf, table.getTableDescriptor());
699 
700       // read back family specific compression setting from the configuration
701       Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat2
702           .createFamilyCompressionMap(conf);
703 
704       // test that we have a value for all column families that matches with the
705       // used mock values
706       for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
707         assertEquals("Compression configuration incorrect for column family:"
708             + entry.getKey(), entry.getValue(),
709             retrievedFamilyToCompressionMap.get(entry.getKey().getBytes()));
710       }
711     }
712   }
713 
714   private void setupMockColumnFamiliesForCompression(Table table,
715       Map<String, Compression.Algorithm> familyToCompression) throws IOException {
716     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
717     for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
718       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
719           .setMaxVersions(1)
720           .setCompressionType(entry.getValue())
721           .setBlockCacheEnabled(false)
722           .setTimeToLive(0));
723     }
724     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
725   }
726 
727   /**
728    * @return a map from column family names to compression algorithms for
729    *         testing column family compression. Column family names have special characters
730    */
731   private Map<String, Compression.Algorithm>
732       getMockColumnFamiliesForCompression (int numCfs) {
733     Map<String, Compression.Algorithm> familyToCompression
734       = new HashMap<String, Compression.Algorithm>();
735     // use column family names having special characters
736     if (numCfs-- > 0) {
737       familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
738     }
739     if (numCfs-- > 0) {
740       familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
741     }
742     if (numCfs-- > 0) {
743       familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
744     }
745     if (numCfs-- > 0) {
746       familyToCompression.put("Family3", Compression.Algorithm.NONE);
747     }
748     return familyToCompression;
749   }
750 
751 
752   /**
753    * Test for {@link HFileOutputFormat2#configureBloomType(org.apache.hadoop.hbase.client.Table,
754    * Configuration)} and {@link HFileOutputFormat2#createFamilyBloomTypeMap
755    * (Configuration)}.
756    * Tests that the compression map is correctly serialized into
757    * and deserialized from configuration
758    *
759    * @throws IOException
760    */
761   @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
762   public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
763     for (int numCfs = 0; numCfs <= 2; numCfs++) {
764       Configuration conf = new Configuration(this.util.getConfiguration());
765       Map<String, BloomType> familyToBloomType =
766           getMockColumnFamiliesForBloomType(numCfs);
767       Table table = Mockito.mock(HTable.class);
768       setupMockColumnFamiliesForBloomType(table,
769           familyToBloomType);
770       HFileOutputFormat2.configureBloomType(table.getTableDescriptor(), conf);
771 
772       // read back family specific data block encoding settings from the
773       // configuration
774       Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
775           HFileOutputFormat2
776               .createFamilyBloomTypeMap(conf);
777 
778       // test that we have a value for all column families that matches with the
779       // used mock values
780       for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
781         assertEquals("BloomType configuration incorrect for column family:"
782             + entry.getKey(), entry.getValue(),
783             retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes()));
784       }
785     }
786   }
787 
788   private void setupMockColumnFamiliesForBloomType(Table table,
789       Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
790     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
791     for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
792       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
793           .setMaxVersions(1)
794           .setBloomFilterType(entry.getValue())
795           .setBlockCacheEnabled(false)
796           .setTimeToLive(0));
797     }
798     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
799   }
800 
801   /**
802    * @return a map from column family names to compression algorithms for
803    *         testing column family compression. Column family names have special characters
804    */
805   private Map<String, BloomType>
806   getMockColumnFamiliesForBloomType (int numCfs) {
807     Map<String, BloomType> familyToBloomType =
808         new HashMap<String, BloomType>();
809     // use column family names having special characters
810     if (numCfs-- > 0) {
811       familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
812     }
813     if (numCfs-- > 0) {
814       familyToBloomType.put("Family2=asdads&!AASD",
815           BloomType.ROWCOL);
816     }
817     if (numCfs-- > 0) {
818       familyToBloomType.put("Family3", BloomType.NONE);
819     }
820     return familyToBloomType;
821   }
822 
823   /**
824    * Test for {@link HFileOutputFormat2#configureBlockSize(org.apache.hadoop.hbase.client.Table,
825    * Configuration)} and {@link HFileOutputFormat2#createFamilyBlockSizeMap
826    * (Configuration)}.
827    * Tests that the compression map is correctly serialized into
828    * and deserialized from configuration
829    *
830    * @throws IOException
831    */
832   @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
833   public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
834     for (int numCfs = 0; numCfs <= 3; numCfs++) {
835       Configuration conf = new Configuration(this.util.getConfiguration());
836       Map<String, Integer> familyToBlockSize =
837           getMockColumnFamiliesForBlockSize(numCfs);
838       Table table = Mockito.mock(HTable.class);
839       setupMockColumnFamiliesForBlockSize(table,
840           familyToBlockSize);
841       HFileOutputFormat2.configureBlockSize(table.getTableDescriptor(), conf);
842 
843       // read back family specific data block encoding settings from the
844       // configuration
845       Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
846           HFileOutputFormat2
847               .createFamilyBlockSizeMap(conf);
848 
849       // test that we have a value for all column families that matches with the
850       // used mock values
851       for (Entry<String, Integer> entry : familyToBlockSize.entrySet()
852           ) {
853         assertEquals("BlockSize configuration incorrect for column family:"
854             + entry.getKey(), entry.getValue(),
855             retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes()));
856       }
857     }
858   }
859 
860   private void setupMockColumnFamiliesForBlockSize(Table table,
861       Map<String, Integer> familyToDataBlockEncoding) throws IOException {
862     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
863     for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
864       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
865           .setMaxVersions(1)
866           .setBlocksize(entry.getValue())
867           .setBlockCacheEnabled(false)
868           .setTimeToLive(0));
869     }
870     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
871   }
872 
873   /**
874    * @return a map from column family names to compression algorithms for
875    *         testing column family compression. Column family names have special characters
876    */
877   private Map<String, Integer>
878   getMockColumnFamiliesForBlockSize (int numCfs) {
879     Map<String, Integer> familyToBlockSize =
880         new HashMap<String, Integer>();
881     // use column family names having special characters
882     if (numCfs-- > 0) {
883       familyToBlockSize.put("Family1!@#!@#&", 1234);
884     }
885     if (numCfs-- > 0) {
886       familyToBlockSize.put("Family2=asdads&!AASD",
887           Integer.MAX_VALUE);
888     }
889     if (numCfs-- > 0) {
890       familyToBlockSize.put("Family2=asdads&!AASD",
891           Integer.MAX_VALUE);
892     }
893     if (numCfs-- > 0) {
894       familyToBlockSize.put("Family3", 0);
895     }
896     return familyToBlockSize;
897   }
898 
899   /**
900    * Test for {@link HFileOutputFormat2#configureDataBlockEncoding(HTableDescriptor, Configuration)}
901    * and {@link HFileOutputFormat2#createFamilyDataBlockEncodingMap(Configuration)}.
902    * Tests that the compression map is correctly serialized into
903    * and deserialized from configuration
904    *
905    * @throws IOException
906    */
907   @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
908   public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
909     for (int numCfs = 0; numCfs <= 3; numCfs++) {
910       Configuration conf = new Configuration(this.util.getConfiguration());
911       Map<String, DataBlockEncoding> familyToDataBlockEncoding =
912           getMockColumnFamiliesForDataBlockEncoding(numCfs);
913       Table table = Mockito.mock(HTable.class);
914       setupMockColumnFamiliesForDataBlockEncoding(table,
915           familyToDataBlockEncoding);
916       HTableDescriptor tableDescriptor = table.getTableDescriptor();
917       HFileOutputFormat2.configureDataBlockEncoding(tableDescriptor, conf);
918 
919       // read back family specific data block encoding settings from the
920       // configuration
921       Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
922           HFileOutputFormat2
923           .createFamilyDataBlockEncodingMap(conf);
924 
925       // test that we have a value for all column families that matches with the
926       // used mock values
927       for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
928         assertEquals("DataBlockEncoding configuration incorrect for column family:"
929             + entry.getKey(), entry.getValue(),
930             retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes()));
931       }
932     }
933   }
934 
935   private void setupMockColumnFamiliesForDataBlockEncoding(Table table,
936       Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
937     HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAME);
938     for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
939       mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
940           .setMaxVersions(1)
941           .setDataBlockEncoding(entry.getValue())
942           .setBlockCacheEnabled(false)
943           .setTimeToLive(0));
944     }
945     Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
946   }
947 
948   /**
949    * @return a map from column family names to compression algorithms for
950    *         testing column family compression. Column family names have special characters
951    */
952   private Map<String, DataBlockEncoding>
953       getMockColumnFamiliesForDataBlockEncoding (int numCfs) {
954     Map<String, DataBlockEncoding> familyToDataBlockEncoding =
955         new HashMap<String, DataBlockEncoding>();
956     // use column family names having special characters
957     if (numCfs-- > 0) {
958       familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
959     }
960     if (numCfs-- > 0) {
961       familyToDataBlockEncoding.put("Family2=asdads&!AASD",
962           DataBlockEncoding.FAST_DIFF);
963     }
964     if (numCfs-- > 0) {
965       familyToDataBlockEncoding.put("Family2=asdads&!AASD",
966           DataBlockEncoding.PREFIX);
967     }
968     if (numCfs-- > 0) {
969       familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
970     }
971     return familyToDataBlockEncoding;
972   }
973 
974   private void setupMockStartKeys(RegionLocator table) throws IOException {
975     byte[][] mockKeys = new byte[][] {
976         HConstants.EMPTY_BYTE_ARRAY,
977         Bytes.toBytes("aaa"),
978         Bytes.toBytes("ggg"),
979         Bytes.toBytes("zzz")
980     };
981     Mockito.doReturn(mockKeys).when(table).getStartKeys();
982   }
983 
984   private void setupMockTableName(RegionLocator table) throws IOException {
985     TableName mockTableName = TableName.valueOf("mock_table");
986     Mockito.doReturn(mockTableName).when(table).getName();
987   }
988 
989   /**
990    * Test that {@link HFileOutputFormat2} RecordWriter uses compression and
991    * bloom filter settings from the column family descriptor
992    */
993   @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
994   public void testColumnFamilySettings() throws Exception {
995     Configuration conf = new Configuration(this.util.getConfiguration());
996     RecordWriter<ImmutableBytesWritable, Cell> writer = null;
997     TaskAttemptContext context = null;
998     Path dir = util.getDataTestDir("testColumnFamilySettings");
999 
1000     // Setup table descriptor
1001     Table table = Mockito.mock(Table.class);
1002     RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
1003     HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
1004     Mockito.doReturn(htd).when(table).getTableDescriptor();
1005     for (HColumnDescriptor hcd: HBaseTestingUtility.generateColumnDescriptors()) {
1006       htd.addFamily(hcd);
1007     }
1008 
1009     // set up the table to return some mock keys
1010     setupMockStartKeys(regionLocator);
1011 
1012     try {
1013       // partial map red setup to get an operational writer for testing
1014       // We turn off the sequence file compression, because DefaultCodec
1015       // pollutes the GZip codec pool with an incompatible compressor.
1016       conf.set("io.seqfile.compression.type", "NONE");
1017       conf.set("hbase.fs.tmp.dir", dir.toString());
1018       // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
1019       conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
1020       Job job = new Job(conf, "testLocalMRIncrementalLoad");
1021       job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
1022       setupRandomGeneratorMapper(job, false);
1023       HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
1024       FileOutputFormat.setOutputPath(job, dir);
1025       context = createTestTaskAttemptContext(job);
1026       HFileOutputFormat2 hof = new HFileOutputFormat2();
1027       writer = hof.getRecordWriter(context);
1028 
1029       // write out random rows
1030       writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
1031       writer.close(context);
1032 
1033       // Make sure that a directory was created for every CF
1034       FileSystem fs = dir.getFileSystem(conf);
1035 
1036       // commit so that the filesystem has one directory per column family
1037       hof.getOutputCommitter(context).commitTask(context);
1038       hof.getOutputCommitter(context).commitJob(context);
1039       FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
1040       assertEquals(htd.getFamilies().size(), families.length);
1041       for (FileStatus f : families) {
1042         String familyStr = f.getPath().getName();
1043         HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
1044         // verify that the compression on this file matches the configured
1045         // compression
1046         Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
1047         Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), conf);
1048         Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
1049 
1050         byte[] bloomFilter = fileInfo.get(StoreFile.BLOOM_FILTER_TYPE_KEY);
1051         if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
1052         assertEquals("Incorrect bloom filter used for column family " + familyStr +
1053           "(reader: " + reader + ")",
1054           hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
1055         assertEquals("Incorrect compression used for column family " + familyStr +
1056           "(reader: " + reader + ")", hcd.getCompression(), reader.getFileContext().getCompression());
1057       }
1058     } finally {
1059       dir.getFileSystem(conf).delete(dir, true);
1060     }
1061   }
1062 
1063   /**
1064    * Write random values to the writer assuming a table created using
1065    * {@link #FAMILIES} as column family descriptors
1066    */
1067   private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer,
1068       TaskAttemptContext context, Set<byte[]> families, int numRows)
1069       throws IOException, InterruptedException {
1070     byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
1071     int valLength = 10;
1072     byte valBytes[] = new byte[valLength];
1073 
1074     int taskId = context.getTaskAttemptID().getTaskID().getId();
1075     assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
1076     final byte [] qualifier = Bytes.toBytes("data");
1077     Random random = new Random();
1078     for (int i = 0; i < numRows; i++) {
1079 
1080       Bytes.putInt(keyBytes, 0, i);
1081       random.nextBytes(valBytes);
1082       ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
1083 
1084       for (byte[] family : families) {
1085         Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes);
1086         writer.write(key, kv);
1087       }
1088     }
1089   }
1090 
1091   /**
1092    * This test is to test the scenario happened in HBASE-6901.
1093    * All files are bulk loaded and excluded from minor compaction.
1094    * Without the fix of HBASE-6901, an ArrayIndexOutOfBoundsException
1095    * will be thrown.
1096    */
1097   @Ignore ("Flakey: See HBASE-9051") @Test
1098   public void testExcludeAllFromMinorCompaction() throws Exception {
1099     Configuration conf = util.getConfiguration();
1100     conf.setInt("hbase.hstore.compaction.min", 2);
1101     generateRandomStartKeys(5);
1102     util.setJobWithoutMRCluster();
1103     util.startMiniCluster();
1104     try (Connection conn = createConnection();
1105         Admin admin = conn.getAdmin()) {
1106       final FileSystem fs = util.getDFSCluster().getFileSystem();
1107       HTable table = util.createTable(TABLE_NAME, FAMILIES);
1108       assertEquals("Should start with empty table", 0, util.countRows(table));
1109 
1110       // deep inspection: get the StoreFile dir
1111       final Path storePath = new Path(
1112         FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
1113           new Path(admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
1114             Bytes.toString(FAMILIES[0])));
1115       assertEquals(0, fs.listStatus(storePath).length);
1116 
1117       // Generate two bulk load files
1118       conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
1119           true);
1120 
1121       for (int i = 0; i < 2; i++) {
1122         Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
1123         runIncrementalPELoad(conf, table.getTableDescriptor(), conn.getRegionLocator(TABLE_NAME),
1124             testDir, false);
1125         // Perform the actual load
1126         new LoadIncrementalHFiles(conf).doBulkLoad(testDir, table);
1127       }
1128 
1129       // Ensure data shows up
1130       int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
1131       assertEquals("LoadIncrementalHFiles should put expected data in table",
1132           expectedRows, util.countRows(table));
1133 
1134       // should have a second StoreFile now
1135       assertEquals(2, fs.listStatus(storePath).length);
1136 
1137       // minor compactions shouldn't get rid of the file
1138       admin.compact(TABLE_NAME);
1139       try {
1140         quickPoll(new Callable<Boolean>() {
1141           @Override
1142           public Boolean call() throws Exception {
1143             List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAME);
1144             for (HRegion region : regions) {
1145               for (Store store : region.getStores()) {
1146                 store.closeAndArchiveCompactedFiles();
1147               }
1148             }
1149             return fs.listStatus(storePath).length == 1;
1150           }
1151         }, 5000);
1152         throw new IOException("SF# = " + fs.listStatus(storePath).length);
1153       } catch (AssertionError ae) {
1154         // this is expected behavior
1155       }
1156 
1157       // a major compaction should work though
1158       admin.majorCompact(TABLE_NAME);
1159       quickPoll(new Callable<Boolean>() {
1160         @Override
1161         public Boolean call() throws Exception {
1162           List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAME);
1163           for (HRegion region : regions) {
1164             for (Store store : region.getStores()) {
1165               store.closeAndArchiveCompactedFiles();
1166             }
1167           }
1168           return fs.listStatus(storePath).length == 1;
1169         }
1170       }, 5000);
1171 
1172     } finally {
1173       util.shutdownMiniCluster();
1174     }
1175   }
1176 
1177   @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
1178   public void testExcludeMinorCompaction() throws Exception {
1179     Configuration conf = util.getConfiguration();
1180     conf.setInt("hbase.hstore.compaction.min", 2);
1181     generateRandomStartKeys(5);
1182     util.setJobWithoutMRCluster();
1183     util.startMiniCluster();
1184     try (Connection conn = createConnection(conf);
1185         Admin admin = conn.getAdmin()){
1186       Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
1187       final FileSystem fs = util.getDFSCluster().getFileSystem();
1188       Table table = util.createTable(TABLE_NAME, FAMILIES);
1189       assertEquals("Should start with empty table", 0, util.countRows(table));
1190 
1191       // deep inspection: get the StoreFile dir
1192       final Path storePath = new Path(
1193         FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAME),
1194           new Path(admin.getTableRegions(TABLE_NAME).get(0).getEncodedName(),
1195             Bytes.toString(FAMILIES[0])));
1196       assertEquals(0, fs.listStatus(storePath).length);
1197 
1198       // put some data in it and flush to create a storefile
1199       Put p = new Put(Bytes.toBytes("test"));
1200       p.add(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
1201       table.put(p);
1202       admin.flush(TABLE_NAME);
1203       assertEquals(1, util.countRows(table));
1204       quickPoll(new Callable<Boolean>() {
1205         @Override
1206         public Boolean call() throws Exception {
1207           return fs.listStatus(storePath).length == 1;
1208         }
1209       }, 5000);
1210 
1211       // Generate a bulk load file with more rows
1212       conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
1213           true);
1214 
1215       RegionLocator regionLocator = conn.getRegionLocator(TABLE_NAME);
1216       runIncrementalPELoad(conf, table.getTableDescriptor(), regionLocator, testDir, false);
1217 
1218       // Perform the actual load
1219       new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, regionLocator);
1220 
1221       // Ensure data shows up
1222       int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
1223       assertEquals("LoadIncrementalHFiles should put expected data in table",
1224           expectedRows + 1, util.countRows(table));
1225 
1226       // should have a second StoreFile now
1227       assertEquals(2, fs.listStatus(storePath).length);
1228 
1229       // minor compactions shouldn't get rid of the file
1230       admin.compact(TABLE_NAME);
1231       try {
1232         quickPoll(new Callable<Boolean>() {
1233           @Override
1234           public Boolean call() throws Exception {
1235             return fs.listStatus(storePath).length == 1;
1236           }
1237         }, 5000);
1238         throw new IOException("SF# = " + fs.listStatus(storePath).length);
1239       } catch (AssertionError ae) {
1240         // this is expected behavior
1241       }
1242 
1243       // a major compaction should work though
1244       admin.majorCompact(TABLE_NAME);
1245       quickPoll(new Callable<Boolean>() {
1246         @Override
1247         public Boolean call() throws Exception {
1248           return fs.listStatus(storePath).length == 1;
1249         }
1250       }, 5000);
1251 
1252     } finally {
1253       util.shutdownMiniCluster();
1254     }
1255   }
1256 
1257   private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
1258     int sleepMs = 10;
1259     int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
1260     while (retries-- > 0) {
1261       if (c.call().booleanValue()) {
1262         return;
1263       }
1264       Thread.sleep(sleepMs);
1265     }
1266     fail();
1267   }
1268 
1269   public static void main(String args[]) throws Exception {
1270     new TestHFileOutputFormat2().manualTest(args);
1271   }
1272 
1273   public void manualTest(String args[]) throws Exception {
1274     Configuration conf = HBaseConfiguration.create();
1275     util = new HBaseTestingUtility(conf);
1276     if ("newtable".equals(args[0])) {
1277       TableName tname = TableName.valueOf(args[1]);
1278       byte[][] splitKeys = generateRandomSplitKeys(4);
1279       try (HTable table = util.createTable(tname, FAMILIES, splitKeys)) {
1280       }
1281     } else if ("incremental".equals(args[0])) {
1282       TableName tname = TableName.valueOf(args[1]);
1283       try(Connection c = createConnection(conf);
1284           Admin admin = c.getAdmin();
1285           RegionLocator regionLocator = c.getRegionLocator(tname)) {
1286         Path outDir = new Path("incremental-out");
1287         runIncrementalPELoad(conf, admin.getTableDescriptor(tname), regionLocator, outDir, false);
1288       }
1289     } else {
1290       throw new RuntimeException(
1291           "usage: TestHFileOutputFormat2 newtable | incremental");
1292     }
1293   }
1294 
1295   @Test
1296   public void TestConfigureCompression() throws Exception {
1297     Configuration conf = new Configuration(this.util.getConfiguration());
1298     RecordWriter<ImmutableBytesWritable, Cell> writer = null;
1299     TaskAttemptContext context = null;
1300     Path dir = util.getDataTestDir("TestConfigureCompression");
1301     String hfileoutputformatCompression = "gz";
1302 
1303     try {
1304       conf.set(HFileOutputFormat2.OUTPUT_TABLE_NAME_CONF_KEY, TABLE_NAME.getNameAsString());
1305       conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
1306 
1307       conf.set(HFileOutputFormat2.COMPRESSION_OVERRIDE_CONF_KEY, hfileoutputformatCompression);
1308 
1309       Job job = Job.getInstance(conf);
1310       FileOutputFormat.setOutputPath(job, dir);
1311       context = createTestTaskAttemptContext(job);
1312       HFileOutputFormat2 hof = new HFileOutputFormat2();
1313       writer = hof.getRecordWriter(context);
1314       final byte[] b = Bytes.toBytes("b");
1315 
1316       KeyValue kv = new KeyValue(b, b, b, HConstants.LATEST_TIMESTAMP, b);
1317       writer.write(new ImmutableBytesWritable(), kv);
1318       writer.close(context);
1319       writer = null;
1320       FileSystem fs = dir.getFileSystem(conf);
1321       RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, true);
1322       while (iterator.hasNext()) {
1323         LocatedFileStatus keyFileStatus = iterator.next();
1324         HFile.Reader reader =
1325             HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf), conf);
1326         assertEquals(reader.getCompressionAlgorithm().getName(), hfileoutputformatCompression);
1327       }
1328     } finally {
1329       if (writer != null && context != null) {
1330         writer.close(context);
1331       }
1332       dir.getFileSystem(conf).delete(dir, true);
1333     }
1334 
1335   }
1336 
1337   @Test
1338   public void testMRIncrementalLoadWithLocalityMultiCluster() throws Exception {
1339     // Start cluster A
1340     util = new HBaseTestingUtility();
1341     Configuration confA = util.getConfiguration();
1342     int hostCount = 3;
1343     int regionNum = 20;
1344     String[] hostnames = new String[hostCount];
1345     for (int i = 0; i < hostCount; ++i) {
1346       hostnames[i] = "datanode_" + i;
1347     }
1348     util.setJobWithoutMRCluster();
1349     util.startMiniCluster(1, hostCount, hostnames);
1350 
1351     // Start cluster B
1352     HBaseTestingUtility utilB = new HBaseTestingUtility();
1353     Configuration confB = utilB.getConfiguration();
1354     utilB.startMiniCluster(1, hostCount, hostnames);
1355 
1356     Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
1357 
1358     byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
1359     TableName tableName = TableName.valueOf("table");
1360     // Create table in cluster B
1361     try (Table table = utilB.createTable(tableName, FAMILIES, splitKeys);
1362       RegionLocator r = utilB.getConnection().getRegionLocator(tableName)) {
1363       // Generate the bulk load files
1364       // Job has zookeeper configuration for cluster A
1365       // Assume reading from cluster A by TableInputFormat and creating hfiles to cluster B
1366       Job job = new Job(confA, "testLocalMRIncrementalLoad");
1367       Configuration jobConf = job.getConfiguration();
1368       final UUID key = ConfigurationCaptorConnection.configureConnectionImpl(jobConf);
1369       job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
1370       setupRandomGeneratorMapper(job, false);
1371       HFileOutputFormat2.configureIncrementalLoad(job, table, r);
1372 
1373       assertEquals(confB.get(HConstants.ZOOKEEPER_QUORUM),
1374         jobConf.get(HFileOutputFormat2.REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY));
1375       assertEquals(confB.get(HConstants.ZOOKEEPER_CLIENT_PORT),
1376         jobConf.get(HFileOutputFormat2.REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY));
1377       assertEquals(confB.get(HConstants.ZOOKEEPER_ZNODE_PARENT),
1378         jobConf.get(HFileOutputFormat2.REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY));
1379 
1380       FileOutputFormat.setOutputPath(job, testDir);
1381 
1382       assertFalse(util.getTestFileSystem().exists(testDir));
1383 
1384       assertTrue(job.waitForCompletion(true));
1385 
1386       final List<Configuration> configs =
1387         ConfigurationCaptorConnection.getCapturedConfigarutions(key);
1388 
1389       assertFalse(configs.isEmpty());
1390       for (Configuration config : configs) {
1391         assertEquals(confB.get(HConstants.ZOOKEEPER_QUORUM),
1392           config.get(HConstants.ZOOKEEPER_QUORUM));
1393         assertEquals(confB.get(HConstants.ZOOKEEPER_CLIENT_PORT),
1394           config.get(HConstants.ZOOKEEPER_CLIENT_PORT));
1395         assertEquals(confB.get(HConstants.ZOOKEEPER_ZNODE_PARENT),
1396           config.get(HConstants.ZOOKEEPER_ZNODE_PARENT));
1397       }
1398     } finally {
1399       utilB.deleteTable(tableName);
1400       testDir.getFileSystem(confA).delete(testDir, true);
1401       util.shutdownMiniCluster();
1402       utilB.shutdownMiniCluster();
1403     }
1404   }
1405 
1406   private static class ConfigurationCaptorConnection implements Connection {
1407     private static final String UUID_KEY = "ConfigurationCaptorConnection.uuid";
1408 
1409     private static final Map<UUID, List<Configuration>> confs = new ConcurrentHashMap<>();
1410 
1411     private final Connection delegate;
1412 
1413     public ConfigurationCaptorConnection(
1414       Configuration conf, boolean managed, ExecutorService es, User user)
1415       throws IOException {
1416       Configuration confForDelegate = new Configuration(conf);
1417       confForDelegate.unset(HConnection.HBASE_CLIENT_CONNECTION_IMPL);
1418       delegate = createConnection(confForDelegate, es, user);
1419 
1420       final String uuid = conf.get(UUID_KEY);
1421       if (uuid != null) {
1422         final UUID key = UUID.fromString(uuid);
1423         List<Configuration> configurations = confs.get(key);
1424         if (configurations == null) {
1425           configurations = new CopyOnWriteArrayList<>();
1426           confs.put(key, configurations);
1427         }
1428         configurations.add(conf);
1429       }
1430     }
1431 
1432     static UUID configureConnectionImpl(Configuration conf) {
1433       conf.setClass(HConnection.HBASE_CLIENT_CONNECTION_IMPL,
1434         ConfigurationCaptorConnection.class, Connection.class);
1435 
1436       final UUID uuid = UUID.randomUUID();
1437       conf.set(UUID_KEY, uuid.toString());
1438       return uuid;
1439     }
1440 
1441     static List<Configuration> getCapturedConfigarutions(UUID key) {
1442       return confs.get(key);
1443     }
1444 
1445     @Override
1446     public Configuration getConfiguration() {
1447       return delegate.getConfiguration();
1448     }
1449 
1450     @Override
1451     public Table getTable(TableName tableName) throws IOException {
1452       return delegate.getTable(tableName);
1453     }
1454 
1455     @Override
1456     public Table getTable(TableName tableName, ExecutorService pool) throws IOException {
1457       return delegate.getTable(tableName, pool);
1458     }
1459 
1460     @Override
1461     public BufferedMutator getBufferedMutator(TableName tableName) throws IOException {
1462       return delegate.getBufferedMutator(tableName);
1463     }
1464 
1465     @Override
1466     public BufferedMutator getBufferedMutator(BufferedMutatorParams params)
1467       throws IOException {
1468       return delegate.getBufferedMutator(params);
1469     }
1470 
1471     @Override
1472     public RegionLocator getRegionLocator(TableName tableName) throws IOException {
1473       return delegate.getRegionLocator(tableName);
1474     }
1475 
1476     @Override
1477     public Admin getAdmin() throws IOException {
1478       return delegate.getAdmin();
1479     }
1480 
1481     @Override
1482     public String getClusterId() throws IOException {
1483       return delegate.getClusterId();
1484     }
1485 
1486     @Override
1487     public void close() throws IOException {
1488       delegate.close();
1489     }
1490 
1491     @Override
1492     public boolean isClosed() {
1493       return delegate.isClosed();
1494     }
1495 
1496     @Override
1497     public void abort(String why, Throwable e) {
1498       delegate.abort(why, e);
1499     }
1500 
1501     @Override
1502     public boolean isAborted() {
1503       return delegate.isAborted();
1504     }
1505   }
1506 }