View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertNotEquals;
23  import static org.junit.Assert.assertTrue;
24  
25  import java.io.IOException;
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.Locale;
29  import java.util.Map;
30  import java.util.NavigableMap;
31  
32  import org.apache.commons.logging.Log;
33  import org.apache.commons.logging.LogFactory;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.Path;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.TableName;
38  import org.apache.hadoop.hbase.client.HTable;
39  import org.apache.hadoop.hbase.client.Result;
40  import org.apache.hadoop.hbase.client.Scan;
41  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
42  import org.apache.hadoop.hbase.util.Bytes;
43  import org.apache.hadoop.io.NullWritable;
44  import org.apache.hadoop.mapreduce.InputSplit;
45  import org.apache.hadoop.mapreduce.Job;
46  import org.apache.hadoop.mapreduce.Reducer;
47  import org.apache.hadoop.mapreduce.TaskCounter;
48  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
49  import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
50  import org.junit.AfterClass;
51  import org.junit.Assert;
52  import org.junit.BeforeClass;
53  
54  
55  /**
56   * <p>
57   * Tests various scan start and stop row scenarios. This is set in a scan and
58   * tested in a MapReduce job to see if that is handed over and done properly
59   * too.
60   * </p>
61   * <p>
62   * This test is broken into two parts in order to side-step the test timeout
63   * period of 900, as documented in HBASE-8326.
64   * </p>
65   */
66  public abstract class TestTableInputFormatScanBase {
67  
68    private static final Log LOG = LogFactory.getLog(TestTableInputFormatScanBase.class);
69    static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
70  
71    static final byte[] TABLE_NAME = Bytes.toBytes("scantest");
72    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
73    static final byte[][] INPUT_FAMILYS = {Bytes.toBytes("content1"), Bytes.toBytes("content2")};
74    static final String KEY_STARTROW = "startRow";
75    static final String KEY_LASTROW = "stpRow";
76  
77    private static HTable table = null;
78  
79    @BeforeClass
80    public static void setUpBeforeClass() throws Exception {
81      // test intermittently fails under hadoop2 (2.0.2-alpha) if shortcircuit-read (scr) is on.
82      // this turns it off for this test.  TODO: Figure out why scr breaks recovery. 
83      System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
84  
85      // switch TIF to log at DEBUG level
86      TEST_UTIL.enableDebug(TableInputFormat.class);
87      TEST_UTIL.enableDebug(TableInputFormatBase.class);
88      TEST_UTIL.setJobWithoutMRCluster();
89      // start mini hbase cluster
90      TEST_UTIL.startMiniCluster(3);
91      // create and fill table
92      table = TEST_UTIL.createMultiRegionTable(TableName.valueOf(TABLE_NAME), INPUT_FAMILY);
93      TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
94    }
95  
96    @AfterClass
97    public static void tearDownAfterClass() throws Exception {
98      TEST_UTIL.shutdownMiniCluster();
99    }
100 
101   /**
102    * Pass the key and value to reduce.
103    */
104   public static class ScanMapper
105   extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
106 
107     /**
108      * Pass the key and value to reduce.
109      *
110      * @param key  The key, here "aaa", "aab" etc.
111      * @param value  The value is the same as the key.
112      * @param context  The task context.
113      * @throws IOException When reading the rows fails.
114      */
115     @Override
116     public void map(ImmutableBytesWritable key, Result value,
117       Context context)
118     throws IOException, InterruptedException {
119       if (value.size() != 1) {
120         throw new IOException("There should only be one input column");
121       }
122       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
123         cf = value.getMap();
124       if(!cf.containsKey(INPUT_FAMILY)) {
125         throw new IOException("Wrong input columns. Missing: '" +
126           Bytes.toString(INPUT_FAMILY) + "'.");
127       }
128       String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
129       LOG.info("map: key -> " + Bytes.toStringBinary(key.get()) +
130         ", value -> " + val);
131       context.write(key, key);
132     }
133 
134   }
135 
136   /**
137    * Checks the last and first key seen against the scanner boundaries.
138    */
139   public static class ScanReducer
140   extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
141                   NullWritable, NullWritable> {
142 
143     private String first = null;
144     private String last = null;
145 
146     protected void reduce(ImmutableBytesWritable key,
147         Iterable<ImmutableBytesWritable> values, Context context)
148     throws IOException ,InterruptedException {
149       int count = 0;
150       for (ImmutableBytesWritable value : values) {
151         String val = Bytes.toStringBinary(value.get());
152         LOG.info("reduce: key[" + count + "] -> " +
153           Bytes.toStringBinary(key.get()) + ", value -> " + val);
154         if (first == null) first = val;
155         last = val;
156         count++;
157       }
158     }
159 
160     protected void cleanup(Context context)
161     throws IOException, InterruptedException {
162       Configuration c = context.getConfiguration();
163       String startRow = c.get(KEY_STARTROW);
164       String lastRow = c.get(KEY_LASTROW);
165       LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" + startRow + "\"");
166       LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow + "\"");
167       if (startRow != null && startRow.length() > 0) {
168         assertEquals(startRow, first);
169       }
170       if (lastRow != null && lastRow.length() > 0) {
171         assertEquals(lastRow, last);
172       }
173     }
174 
175   }
176 
177   /**
178    * Tests an MR Scan initialized from properties set in the Configuration.
179    * 
180    * @throws IOException
181    * @throws ClassNotFoundException
182    * @throws InterruptedException
183    */
184   protected void testScanFromConfiguration(String start, String stop, String last)
185   throws IOException, InterruptedException, ClassNotFoundException {
186     String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") +
187       "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
188     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
189     c.set(TableInputFormat.INPUT_TABLE, Bytes.toString(TABLE_NAME));
190     c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILY));
191     c.set(KEY_STARTROW, start != null ? start : "");
192     c.set(KEY_LASTROW, last != null ? last : "");
193 
194     if (start != null) {
195       c.set(TableInputFormat.SCAN_ROW_START, start);
196     }
197 
198     if (stop != null) {
199       c.set(TableInputFormat.SCAN_ROW_STOP, stop);
200     }
201 
202     Job job = new Job(c, jobName);
203     job.setMapperClass(ScanMapper.class);
204     job.setReducerClass(ScanReducer.class);
205     job.setMapOutputKeyClass(ImmutableBytesWritable.class);
206     job.setMapOutputValueClass(ImmutableBytesWritable.class);
207     job.setInputFormatClass(TableInputFormat.class);
208     job.setNumReduceTasks(1);
209     FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
210     TableMapReduceUtil.addDependencyJars(job);
211     assertTrue(job.waitForCompletion(true));
212   }
213 
214   /**
215    * Tests a MR scan using specific start and stop rows.
216    *
217    * @throws IOException
218    * @throws ClassNotFoundException
219    * @throws InterruptedException
220    */
221   protected void testScan(String start, String stop, String last)
222   throws IOException, InterruptedException, ClassNotFoundException {
223     String jobName = "Scan" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") +
224       "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
225     LOG.info("Before map/reduce startup - job " + jobName);
226     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
227     Scan scan = new Scan();
228     scan.addFamily(INPUT_FAMILY);
229     if (start != null) {
230       scan.setStartRow(Bytes.toBytes(start));
231     }
232     c.set(KEY_STARTROW, start != null ? start : "");
233     if (stop != null) {
234       scan.setStopRow(Bytes.toBytes(stop));
235     }
236     c.set(KEY_LASTROW, last != null ? last : "");
237     LOG.info("scan before: " + scan);
238     Job job = new Job(c, jobName);
239     TableMapReduceUtil.initTableMapperJob(
240       Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
241       ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
242     job.setReducerClass(ScanReducer.class);
243     job.setNumReduceTasks(1); // one to get final "first" and "last" key
244     FileOutputFormat.setOutputPath(job,
245         new Path(TEST_UTIL.getDataTestDir(), job.getJobName()));
246     LOG.info("Started " + job.getJobName());
247     assertTrue(job.waitForCompletion(true));
248     LOG.info("After map/reduce completion - job " + jobName);
249   }
250 
251 
252   /**
253    * Tests Number of inputSplits for MR job when specify number of mappers for TableInputFormatXXX
254    * This test does not run MR job
255    *
256    * @throws IOException
257    * @throws ClassNotFoundException
258    * @throws InterruptedException
259    */
260   public void testNumOfSplits(int splitsPerRegion, int expectedNumOfSplits) throws IOException,
261       InterruptedException,
262       ClassNotFoundException {
263     String jobName = "TestJobForNumOfSplits";
264     LOG.info("Before map/reduce startup - job " + jobName);
265     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
266     Scan scan = new Scan();
267     scan.addFamily(INPUT_FAMILY);
268     c.setInt("hbase.mapreduce.input.mappers.per.region", splitsPerRegion);
269     c.set(KEY_STARTROW, "");
270     c.set(KEY_LASTROW, "");
271     Job job = new Job(c, jobName);
272     TableMapReduceUtil.initTableMapperJob(TableName.valueOf(TABLE_NAME).getNameAsString(), scan, ScanMapper.class,
273         ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
274     TableInputFormat tif = new TableInputFormat();
275     tif.setConf(job.getConfiguration());
276     List<InputSplit> splits = tif.getSplits(job);
277     for (InputSplit split : splits) {
278       TableSplit tableSplit = (TableSplit) split;
279       // In table input format, we do no store the scanner at the split level
280       // because we use the scan object from the map-reduce job conf itself.
281       Assert.assertTrue(tableSplit.getScanAsString().isEmpty());
282     }
283     Assert.assertEquals(expectedNumOfSplits, splits.size());
284   }
285 
286   /**
287    * Run MR job to check the number of mapper = expectedNumOfSplits
288    * @throws IOException
289    * @throws InterruptedException
290    * @throws ClassNotFoundException
291    */
292   public void testNumOfSplitsMR(int splitsPerRegion, int expectedNumOfSplits) throws IOException,
293       InterruptedException,
294       ClassNotFoundException {
295     String jobName = "TestJobForNumOfSplits-MR";
296     LOG.info("Before map/reduce startup - job " + jobName);
297     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
298     Scan scan = new Scan();
299     scan.addFamily(INPUT_FAMILY);
300     c.setInt("hbase.mapreduce.input.mappers.per.region", splitsPerRegion);
301     Job job = new Job(c, jobName);
302     TableMapReduceUtil.initTableMapperJob(Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
303         ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
304     job.setReducerClass(ScanReducer.class);
305     job.setNumReduceTasks(1);
306     job.setOutputFormatClass(NullOutputFormat.class);
307     assertTrue("job failed!", job.waitForCompletion(true));
308     // for some reason, hbase does not expose JobCounter.TOTAL_LAUNCHED_MAPS,
309     // we use TaskCounter.SHUFFLED_MAPS to get total launched maps
310     assertEquals("Saw the wrong count of mappers per region", expectedNumOfSplits,
311         job.getCounters().findCounter(TaskCounter.SHUFFLED_MAPS).getValue());
312   }
313 
314   /**
315    * Run MR job to test autobalance for setting number of mappers for TIF
316    * This does not run real MR job
317    */
318   public void testAutobalanceNumOfSplit() throws IOException {
319     // set up splits for testing
320     List<InputSplit> splits = new ArrayList<>(5);
321     int[] regionLen = {100, 200, 200, 400, 600};
322     for (int i = 0; i < 5; i++) {
323       InputSplit split = new TableSplit(TableName.valueOf(TABLE_NAME), new Scan(),
324           Bytes.toBytes(i), Bytes.toBytes(i + 1), "", "", regionLen[i] * 1048576);
325       splits.add(split);
326     }
327     TableInputFormat tif = new TableInputFormat();
328     List<InputSplit> res = tif.calculateAutoBalancedSplits(splits, 1073741824);
329 
330     assertEquals("Saw the wrong number of splits", 5, res.size());
331     TableSplit ts1 = (TableSplit) res.get(0);
332     assertEquals("The first split end key should be", 2, Bytes.toInt(ts1.getEndRow()));
333     TableSplit ts2 = (TableSplit) res.get(1);
334     assertEquals("The second split regionsize should be", 200 * 1048576, ts2.getLength());
335     TableSplit ts3 = (TableSplit) res.get(2);
336     assertEquals("The third split start key should be", 3, Bytes.toInt(ts3.getStartRow()));
337     TableSplit ts4 = (TableSplit) res.get(4);
338     assertNotEquals("The seventh split start key should not be", 4, Bytes.toInt(ts4.getStartRow()));
339   }
340 }
341