View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.mapred;
20  
21  import static org.mockito.Mockito.mock;
22  
23  import org.apache.hadoop.fs.Path;
24  import org.apache.hadoop.hbase.HBaseTestingUtility;
25  import org.apache.hadoop.hbase.HConstants;
26  import org.apache.hadoop.hbase.testclassification.LargeTests;
27  import org.apache.hadoop.hbase.TableName;
28  import org.apache.hadoop.hbase.client.Result;
29  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
30  import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatTestBase;
31  import org.apache.hadoop.hbase.util.Bytes;
32  import org.apache.hadoop.hbase.util.RegionSplitter;
33  import org.apache.hadoop.io.NullWritable;
34  import org.apache.hadoop.mapred.InputSplit;
35  import org.apache.hadoop.mapred.JobClient;
36  import org.apache.hadoop.mapred.JobConf;
37  import org.apache.hadoop.mapred.MapReduceBase;
38  import org.apache.hadoop.mapred.OutputCollector;
39  import org.apache.hadoop.mapred.RecordReader;
40  import org.apache.hadoop.mapred.Reducer;
41  import org.apache.hadoop.mapred.Reporter;
42  import org.apache.hadoop.mapred.RunningJob;
43  import org.apache.hadoop.mapred.lib.NullOutputFormat;
44  import org.junit.Assert;
45  import org.junit.Test;
46  import org.junit.experimental.categories.Category;
47  
48  import java.io.IOException;
49  import java.util.Iterator;
50  
51  @Category(LargeTests.class)
52  public class TestTableSnapshotInputFormat extends TableSnapshotInputFormatTestBase {
53  
54    private static final byte[] aaa = Bytes.toBytes("aaa");
55    private static final byte[] after_zzz = Bytes.toBytes("zz{"); // 'z' + 1 => '{'
56    private static final String COLUMNS =
57      Bytes.toString(FAMILIES[0]) + " " + Bytes.toString(FAMILIES[1]);
58  
59    @Override
60    protected byte[] getStartRow() {
61      return aaa;
62    }
63  
64    @Override
65    protected byte[] getEndRow() {
66      return after_zzz;
67    }
68  
69    static class TestTableSnapshotMapper extends MapReduceBase
70        implements  TableMap<ImmutableBytesWritable, NullWritable> {
71      @Override
72      public void map(ImmutableBytesWritable key, Result value,
73          OutputCollector<ImmutableBytesWritable, NullWritable> collector, Reporter reporter)
74          throws IOException {
75        verifyRowFromMap(key, value);
76        collector.collect(key, NullWritable.get());
77      }
78    }
79  
80    public static class TestTableSnapshotReducer extends MapReduceBase
81        implements Reducer<ImmutableBytesWritable, NullWritable, NullWritable, NullWritable> {
82      HBaseTestingUtility.SeenRowTracker rowTracker =
83        new HBaseTestingUtility.SeenRowTracker(aaa, after_zzz);
84  
85      @Override
86      public void reduce(ImmutableBytesWritable key, Iterator<NullWritable> values,
87          OutputCollector<NullWritable, NullWritable> collector, Reporter reporter)
88          throws IOException {
89        rowTracker.addRow(key.get());
90      }
91  
92      @Override
93      public void close() {
94        rowTracker.validate();
95      }
96    }
97  
98    @Test
99    public void testInitTableSnapshotMapperJobConfig() throws Exception {
100     setupCluster();
101     TableName tableName = TableName.valueOf("testInitTableSnapshotMapperJobConfig");
102     String snapshotName = "foo";
103 
104     try {
105       createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
106       JobConf job = new JobConf(UTIL.getConfiguration());
107       Path tmpTableDir = UTIL.getRandomDir();
108 
109       TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
110         COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
111         NullWritable.class, job, false, tmpTableDir);
112 
113       // TODO: would be better to examine directly the cache instance that results from this
114       // config. Currently this is not possible because BlockCache initialization is static.
115       Assert.assertEquals(
116         "Snapshot job should be configured for default LruBlockCache.",
117         HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT,
118         job.getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, -1), 0.01);
119       Assert.assertEquals(
120         "Snapshot job should not use BucketCache.",
121         0, job.getFloat("hbase.bucketcache.size", -1), 0.01);
122     } finally {
123       UTIL.getHBaseAdmin().deleteSnapshot(snapshotName);
124       UTIL.deleteTable(tableName);
125       tearDownCluster();
126     }
127   }
128 
129   // TODO: mapred does not support limiting input range by startrow, endrow.
130   // Thus the following tests must override parameterverification.
131 
132   @Test
133   @Override
134   public void testWithMockedMapReduceMultiRegion() throws Exception {
135     testWithMockedMapReduce(UTIL, "testWithMockedMapReduceMultiRegion", 10, 1, 10);
136   }
137 
138   @Test
139   @Override
140   public void testWithMapReduceMultiRegion() throws Exception {
141     testWithMapReduce(UTIL, "testWithMapReduceMultiRegion", 10, 1, 10, false);
142   }
143 
144   @Test
145   @Override
146   // run the MR job while HBase is offline
147   public void testWithMapReduceAndOfflineHBaseMultiRegion() throws Exception {
148     testWithMapReduce(UTIL, "testWithMapReduceAndOfflineHBaseMultiRegion", 10, 1, 10, true);
149   }
150 
151   @Override
152   public void testRestoreSnapshotDoesNotCreateBackRefLinksInit(TableName tableName,
153       String snapshotName, Path tmpTableDir) throws Exception {
154     JobConf job = new JobConf(UTIL.getConfiguration());
155     TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
156       COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
157       NullWritable.class, job, false, tmpTableDir);
158   }
159 
160   @Override
161   protected void testWithMockedMapReduce(HBaseTestingUtility util, String snapshotName,
162       int numRegions, int numSplitsPerRegion, int expectedNumSplits) throws Exception {
163     setupCluster();
164     TableName tableName = TableName.valueOf("testWithMockedMapReduce");
165     try {
166       createTableAndSnapshot(
167         util, tableName, snapshotName, getStartRow(), getEndRow(), numRegions);
168 
169       JobConf job = new JobConf(util.getConfiguration());
170       Path tmpTableDir = util.getRandomDir();
171 
172       if (numSplitsPerRegion > 1) {
173         TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
174                 COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
175                 NullWritable.class, job, false, tmpTableDir, new RegionSplitter.UniformSplit(),
176                 numSplitsPerRegion);
177       } else {
178         TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
179                 COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
180                 NullWritable.class, job, false, tmpTableDir);
181       }
182 
183       // mapred doesn't support start and end keys? o.O
184       verifyWithMockedMapReduce(job, numRegions, expectedNumSplits, getStartRow(), getEndRow());
185 
186     } finally {
187       util.getHBaseAdmin().deleteSnapshot(snapshotName);
188       util.deleteTable(tableName);
189       tearDownCluster();
190     }
191   }
192 
193   private void verifyWithMockedMapReduce(JobConf job, int numRegions, int expectedNumSplits,
194       byte[] startRow, byte[] stopRow) throws IOException, InterruptedException {
195     TableSnapshotInputFormat tsif = new TableSnapshotInputFormat();
196     InputSplit[] splits = tsif.getSplits(job, 0);
197 
198     Assert.assertEquals(expectedNumSplits, splits.length);
199 
200     HBaseTestingUtility.SeenRowTracker rowTracker =
201       new HBaseTestingUtility.SeenRowTracker(startRow, stopRow);
202 
203     for (int i = 0; i < splits.length; i++) {
204       // validate input split
205       InputSplit split = splits[i];
206       Assert.assertTrue(split instanceof TableSnapshotInputFormat.TableSnapshotRegionSplit);
207 
208       // validate record reader
209       OutputCollector collector = mock(OutputCollector.class);
210       Reporter reporter = mock(Reporter.class);
211       RecordReader<ImmutableBytesWritable, Result> rr = tsif.getRecordReader(split, job, reporter);
212 
213       // validate we can read all the data back
214       ImmutableBytesWritable key = rr.createKey();
215       Result value = rr.createValue();
216       while (rr.next(key, value)) {
217         verifyRowFromMap(key, value);
218         rowTracker.addRow(key.copyBytes());
219       }
220 
221       rr.close();
222     }
223 
224     // validate all rows are seen
225     rowTracker.validate();
226   }
227 
228   @Override
229   protected void testWithMapReduceImpl(HBaseTestingUtility util, TableName tableName,
230       String snapshotName, Path tableDir, int numRegions, int numSplitsPerRegion, int expectedNumSplits,
231       boolean shutdownCluster) throws Exception {
232     doTestWithMapReduce(util, tableName, snapshotName, getStartRow(), getEndRow(), tableDir,
233       numRegions, numSplitsPerRegion, expectedNumSplits, shutdownCluster);
234   }
235 
236   // this is also called by the IntegrationTestTableSnapshotInputFormat
237   public static void doTestWithMapReduce(HBaseTestingUtility util, TableName tableName,
238       String snapshotName, byte[] startRow, byte[] endRow, Path tableDir, int numRegions,
239       int numSplitsPerRegion, int expectedNumSplits, boolean shutdownCluster) throws Exception {
240 
241     //create the table and snapshot
242     createTableAndSnapshot(util, tableName, snapshotName, startRow, endRow, numRegions);
243 
244     if (shutdownCluster) {
245       util.shutdownMiniHBaseCluster();
246     }
247 
248     try {
249       // create the job
250       JobConf jobConf = new JobConf(util.getConfiguration());
251 
252       jobConf.setJarByClass(util.getClass());
253       org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJarsForClasses(jobConf,
254         TestTableSnapshotInputFormat.class);
255 
256       if(numSplitsPerRegion > 1) {
257         TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS,
258                 TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
259                 NullWritable.class, jobConf, true, tableDir, new RegionSplitter.UniformSplit(),
260                 numSplitsPerRegion);
261       } else {
262         TableMapReduceUtil.initTableSnapshotMapJob(snapshotName, COLUMNS,
263                 TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
264                 NullWritable.class, jobConf, true, tableDir);
265       }
266 
267       jobConf.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
268       jobConf.setNumReduceTasks(1);
269       jobConf.setOutputFormat(NullOutputFormat.class);
270 
271       RunningJob job = JobClient.runJob(jobConf);
272       Assert.assertTrue(job.isSuccessful());
273     } finally {
274       if (!shutdownCluster) {
275         util.getHBaseAdmin().deleteSnapshot(snapshotName);
276         util.deleteTable(tableName);
277       }
278     }
279   }
280 }