View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.mockito.Mockito.mock;
22  import static org.mockito.Mockito.when;
23  
24  import java.io.IOException;
25  import java.util.Arrays;
26  import java.util.List;
27  
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.CategoryBasedTimeout;
34  import org.apache.hadoop.hbase.HBaseTestingUtility;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
37  import org.apache.hadoop.hbase.HRegionInfo;
38  import org.apache.hadoop.hbase.client.HBaseAdmin;
39  import org.apache.hadoop.hbase.client.Table;
40  import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
41  import org.apache.hadoop.hbase.testclassification.LargeTests;
42  import org.apache.hadoop.hbase.TableName;
43  import org.apache.hadoop.hbase.client.Result;
44  import org.apache.hadoop.hbase.client.Scan;
45  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
46  import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.TableSnapshotRegionSplit;
47  import org.apache.hadoop.hbase.util.Bytes;
48  import org.apache.hadoop.hbase.util.FSUtils;
49  import org.apache.hadoop.hbase.util.RegionSplitter;
50  import org.apache.hadoop.io.NullWritable;
51  import org.apache.hadoop.mapreduce.InputSplit;
52  import org.apache.hadoop.mapreduce.Job;
53  import org.apache.hadoop.mapreduce.RecordReader;
54  import org.apache.hadoop.mapreduce.Reducer;
55  import org.apache.hadoop.mapreduce.TaskAttemptContext;
56  import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
57  import org.junit.After;
58  import org.junit.Assert;
59  import org.junit.Rule;
60  import org.junit.Test;
61  import org.junit.experimental.categories.Category;
62  import org.junit.rules.TestRule;
63  
64  import com.google.common.collect.Lists;
65  
66  @Category(LargeTests.class)
67  public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBase {
68    private static final Log LOG = LogFactory.getLog(TestTableSnapshotInputFormat.class);
69    @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
70        withTimeout(this.getClass()).withLookingForStuckThread(true).build();
71  
72    private static final byte[] bbb = Bytes.toBytes("bbb");
73    private static final byte[] yyy = Bytes.toBytes("yyy");
74    private static final byte[] bbc = Bytes.toBytes("bbc");
75    private static final byte[] yya = Bytes.toBytes("yya");
76  
77    @Override
78    protected byte[] getStartRow() {
79      return bbb;
80    }
81  
82    @Override
83    protected byte[] getEndRow() {
84      return yyy;
85    }
86  
87    @After
88    public void tearDown() throws Exception {
89    }
90  
91    @Test
92    public void testGetBestLocations() throws IOException {
93      Configuration conf = UTIL.getConfiguration();
94  
95      HDFSBlocksDistribution blockDistribution = new HDFSBlocksDistribution();
96      Assert.assertEquals(Lists.newArrayList(),
97        TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
98  
99      blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 1);
100     Assert.assertEquals(Lists.newArrayList("h1"),
101       TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
102 
103     blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 1);
104     Assert.assertEquals(Lists.newArrayList("h1"),
105       TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
106 
107     blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 1);
108     Assert.assertEquals(Lists.newArrayList("h1"),
109       TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
110 
111     blockDistribution = new HDFSBlocksDistribution();
112     blockDistribution.addHostsAndBlockWeight(new String[] {"h1"}, 10);
113     blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 7);
114     blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 5);
115     blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 1);
116     Assert.assertEquals(Lists.newArrayList("h1"),
117       TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
118 
119     blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 2);
120     Assert.assertEquals(Lists.newArrayList("h1", "h2"),
121       TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
122 
123     blockDistribution.addHostsAndBlockWeight(new String[] {"h2"}, 3);
124     Assert.assertEquals(Lists.newArrayList("h2", "h1"),
125       TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
126 
127     blockDistribution.addHostsAndBlockWeight(new String[] {"h3"}, 6);
128     blockDistribution.addHostsAndBlockWeight(new String[] {"h4"}, 9);
129 
130     Assert.assertEquals(Lists.newArrayList("h2", "h3", "h4", "h1"),
131       TableSnapshotInputFormatImpl.getBestLocations(conf, blockDistribution));
132   }
133 
134   public static enum TestTableSnapshotCounters {
135     VALIDATION_ERROR
136   }
137 
138   public static class TestTableSnapshotMapper
139     extends TableMapper<ImmutableBytesWritable, NullWritable> {
140     @Override
141     protected void map(ImmutableBytesWritable key, Result value,
142         Context context) throws IOException, InterruptedException {
143       // Validate a single row coming from the snapshot, and emit the row key
144       verifyRowFromMap(key, value);
145       context.write(key, NullWritable.get());
146     }
147   }
148 
149   public static class TestTableSnapshotReducer
150     extends Reducer<ImmutableBytesWritable, NullWritable, NullWritable, NullWritable> {
151     HBaseTestingUtility.SeenRowTracker rowTracker =
152         new HBaseTestingUtility.SeenRowTracker(bbb, yyy);
153     @Override
154     protected void reduce(ImmutableBytesWritable key, Iterable<NullWritable> values,
155        Context context) throws IOException, InterruptedException {
156       rowTracker.addRow(key.get());
157     }
158 
159     @Override
160     protected void cleanup(Context context) throws IOException,
161         InterruptedException {
162       rowTracker.validate();
163     }
164   }
165 
166   @Test
167   public void testInitTableSnapshotMapperJobConfig() throws Exception {
168     setupCluster();
169     TableName tableName = TableName.valueOf("testInitTableSnapshotMapperJobConfig");
170     String snapshotName = "foo";
171 
172     try {
173       createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
174       Job job = new Job(UTIL.getConfiguration());
175       Path tmpTableDir = UTIL.getRandomDir();
176 
177       TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
178         new Scan(), TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
179         NullWritable.class, job, false, tmpTableDir);
180 
181       // TODO: would be better to examine directly the cache instance that results from this
182       // config. Currently this is not possible because BlockCache initialization is static.
183       Assert.assertEquals(
184         "Snapshot job should be configured for default LruBlockCache.",
185         HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT,
186         job.getConfiguration().getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, -1), 0.01);
187       Assert.assertEquals(
188         "Snapshot job should not use BucketCache.",
189         0, job.getConfiguration().getFloat("hbase.bucketcache.size", -1), 0.01);
190     } finally {
191       UTIL.getHBaseAdmin().deleteSnapshot(snapshotName);
192       UTIL.deleteTable(tableName);
193       tearDownCluster();
194     }
195   }
196 
197   @Override
198   public void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName,
199       String snapshotName, Path tmpTableDir) throws Exception {
200     Job job = new Job(UTIL.getConfiguration());
201     TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
202       new Scan(), TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
203       NullWritable.class, job, false, tmpTableDir);
204   }
205 
206   @Override
207   public void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
208       int numRegions, int numSplitsPerRegion, int expectedNumSplits) throws Exception {
209     setupCluster();
210     TableName tableName = TableName.valueOf("testWithMockedMapReduce");
211     try {
212       createTableAndSnapshot(
213         util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions);
214 
215       Job job = new Job(util.getConfiguration());
216       Path tmpTableDir = util.getRandomDir();
217       Scan scan = new Scan(getStartRow(), getEndRow()); // limit the scan
218 
219       if (numSplitsPerRegion > 1) {
220         TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
221                 scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
222                 NullWritable.class, job, false, tmpTableDir, new RegionSplitter.UniformSplit(),
223                 numSplitsPerRegion);
224       } else {
225         TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
226                 scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
227                 NullWritable.class, job, false, tmpTableDir);
228       }
229 
230       verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());
231 
232     } finally {
233       util.getHBaseAdmin().deleteSnapshot(snapshotName);
234       util.deleteTable(tableName);
235       tearDownCluster();
236     }
237   }
238 
239   public static void blockUntilSplitFinished(HBaseTestingUtility util, TableName tableName,
240       int expectedRegionSize) throws Exception {
241     for (int i = 0; i < 100; i++) {
242       List<HRegionInfo> hRegionInfoList = util.getHBaseAdmin().getTableRegions(tableName);
243       if (hRegionInfoList.size() >= expectedRegionSize) {
244         break;
245       }
246       Thread.sleep(1000);
247     }
248   }
249 
250   @Test
251   public void testWithMockedMapReduceWithSplitsPerRegion() throws Exception {
252     setupCluster();
253     String snapshotName = "testWithMockedMapReduceWithSplitsPerRegion";
254     final TableName tableName = TableName.valueOf(snapshotName);
255     try {
256       createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 10);
257 
258       Configuration conf = UTIL.getConfiguration();
259       Job job = new Job(conf);
260       Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
261       // test scan with startRow and stopRow
262       Scan scan = new Scan(bbc, yya);
263 
264       TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan,
265         TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false,
266         tmpTableDir, new RegionSplitter.UniformSplit(), 5);
267 
268       verifyWithMockedMapReduce(job, 10, 40, bbc, yya);
269     } finally {
270       UTIL.getHBaseAdmin().deleteSnapshot(snapshotName);
271       UTIL.deleteTable(tableName);
272       tearDownCluster();
273     }
274   }
275 
276   @Test
277   public void testWithMockedMapReduceWithNoStartRowStopRow() throws Exception {
278     setupCluster();
279     String snapshotName = "testWithMockedMapReduceWithNoStartRowStopRow";
280     final TableName tableName = TableName.valueOf(snapshotName);
281     try {
282       createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 10);
283 
284       Configuration conf = UTIL.getConfiguration();
285       Job job = new Job(conf);
286       Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
287       // test scan without startRow and stopRow
288       Scan scan2 = new Scan();
289 
290       TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan2,
291         TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false,
292         tmpTableDir, new RegionSplitter.UniformSplit(), 5);
293 
294       verifyWithMockedMapReduce(job, 10, 50, HConstants.EMPTY_START_ROW,
295         HConstants.EMPTY_START_ROW);
296 
297     } finally {
298       UTIL.getHBaseAdmin().deleteSnapshot(snapshotName);
299       UTIL.deleteTable(tableName);
300       tearDownCluster();
301     }
302   }
303 
304   @Test
305   public void testNoDuplicateResultsWhenSplitting() throws Exception {
306     setupCluster();
307     TableName tableName = TableName.valueOf("testNoDuplicateResultsWhenSplitting");
308     String snapshotName = "testSnapshotBug";
309     try {
310       if (UTIL.getHBaseAdmin().tableExists(tableName)) {
311         UTIL.deleteTable(tableName);
312       }
313 
314       UTIL.createTable(tableName, FAMILIES);
315       HBaseAdmin admin = UTIL.getHBaseAdmin();
316 
317       // put some stuff in the table
318       Table table = UTIL.getConnection().getTable(tableName);
319       UTIL.loadTable(table, FAMILIES);
320 
321       // split to 2 regions
322       admin.split(tableName, Bytes.toBytes("eee"));
323       blockUntilSplitFinished(UTIL, tableName, 2);
324 
325       Path rootDir = FSUtils.getRootDir(UTIL.getConfiguration());
326       FileSystem fs = rootDir.getFileSystem(UTIL.getConfiguration());
327 
328       SnapshotTestingUtils.createSnapshotAndValidate(admin, tableName, Arrays.asList(FAMILIES),
329         null, snapshotName, rootDir, fs, true);
330 
331       // load different values
332       byte[] value = Bytes.toBytes("after_snapshot_value");
333       UTIL.loadTable(table, FAMILIES, value);
334 
335       // cause flush to create new files in the region
336       admin.flush(tableName);
337       table.close();
338 
339       Job job = new Job(UTIL.getConfiguration());
340       Path tmpTableDir = UTIL.getDataTestDirOnTestFS(snapshotName);
341       // limit the scan
342       Scan scan = new Scan().withStartRow(getStartRow()).withStopRow(getEndRow());
343 
344       TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName, scan,
345         TestTableSnapshotMapper.class, ImmutableBytesWritable.class, NullWritable.class, job, false,
346         tmpTableDir);
347 
348       verifyWithMockedMapReduce(job, 2, 2, getStartRow(), getEndRow());
349     } finally {
350       UTIL.getHBaseAdmin().deleteSnapshot(snapshotName);
351       UTIL.deleteTable(tableName);
352       tearDownCluster();
353     }
354   }
355 
356   private void verifyWithMockedMapReduce(Job job, int numRegions, int expectedNumSplits,
357       byte[] startRow, byte[] stopRow)
358       throws IOException, InterruptedException {
359     TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
360     List<InputSplit> splits = tsif.getSplits(job);
361 
362     Assert.assertEquals(expectedNumSplits, splits.size());
363 
364     HBaseTestingUtility.SeenRowTracker rowTracker = new HBaseTestingUtility.SeenRowTracker(startRow,
365         stopRow.length > 0 ? stopRow : Bytes.toBytes("\uffff"));
366 
367     for (int i = 0; i < splits.size(); i++) {
368       // validate input split
369       InputSplit split = splits.get(i);
370       Assert.assertTrue(split instanceof TableSnapshotRegionSplit);
371       TableSnapshotRegionSplit snapshotRegionSplit = (TableSnapshotRegionSplit) split;
372       Scan scan =
373           TableMapReduceUtil.convertStringToScan(snapshotRegionSplit.getDelegate().getScan());
374       if (startRow.length > 0) {
375         Assert.assertTrue(
376           Bytes.toStringBinary(startRow) + " should <= " + Bytes.toStringBinary(scan.getStartRow()),
377           Bytes.compareTo(startRow, scan.getStartRow()) <= 0);
378       }
379       if (stopRow.length > 0) {
380         Assert.assertTrue(
381           Bytes.toStringBinary(stopRow) + " should >= " + Bytes.toStringBinary(scan.getStopRow()),
382           Bytes.compareTo(stopRow, scan.getStopRow()) >= 0);
383       }
384       Assert.assertTrue("startRow should < stopRow",
385         Bytes.compareTo(scan.getStartRow(), scan.getStopRow()) < 0);
386 
387       // validate record reader
388       TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
389       when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
390       RecordReader<ImmutableBytesWritable, Result> rr =
391           tsif.createRecordReader(split, taskAttemptContext);
392       rr.initialize(split, taskAttemptContext);
393 
394       // validate we can read all the data back
395       while (rr.nextKeyValue()) {
396         byte[] row = rr.getCurrentKey().get();
397         verifyRowFromMap(rr.getCurrentKey(), rr.getCurrentValue());
398         rowTracker.addRow(row);
399       }
400 
401       rr.close();
402     }
403 
404     // validate all rows are seen
405     rowTracker.validate();
406   }
407 
408   @Override
409   protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
410       String snapshotName, Path tableDir, int numRegions, int numSplitsPerRegion, int expectedNumSplits,
411       boolean shutdownCluster) throws Exception {
412     doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir,
413       numRegions, numSplitsPerRegion, expectedNumSplits, shutdownCluster);
414   }
415 
416   // this is also called by the IntegrationTestTableSnapshotInputFormat
417   public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
418       String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
419       int numSplitsPerRegion, int expectedNumSplits, boolean shutdownCluster) throws Exception {
420 
421     LOG.info("testing with MapReduce");
422 
423     LOG.info("create the table and snapshot");
424     createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);
425 
426     if (shutdownCluster) {
427       LOG.info("shutting down hbase cluster.");
428       util.shutdownMiniHBaseCluster();
429     }
430 
431     try {
432       // create the job
433       Job job = new Job(util.getConfiguration());
434       Scan scan = new Scan(startRow, endRow); // limit the scan
435 
436       job.setJarByClass(util.getClass());
437       TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
438         TestTableSnapshotInputFormat.class);
439 
440       if (numSplitsPerRegion > 1) {
441         TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
442                 scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
443                 NullWritable.class, job, true, tableDir, new RegionSplitter.UniformSplit(),
444                 numSplitsPerRegion);
445       } else {
446         TableMapReduceUtil.initTableSnapshotMapperJob(snapshotName,
447                 scan, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
448                 NullWritable.class, job, true, tableDir);
449       }
450 
451       job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
452       job.setNumReduceTasks(1);
453       job.setOutputFormatClass(NullOutputFormat.class);
454 
455       Assert.assertTrue(job.waitForCompletion(true));
456     } finally {
457       if (!shutdownCluster) {
458         util.getHBaseAdmin().deleteSnapshot(snapshotName);
459         util.deleteTable(tableName);
460       }
461     }
462   }
463 }