View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import com.google.common.collect.Lists;
22  import org.apache.commons.logging.Log;
23  import org.apache.commons.logging.LogFactory;
24  import org.apache.hadoop.conf.Configuration;
25  import org.apache.hadoop.fs.FileUtil;
26  import org.apache.hadoop.fs.Path;
27  import org.apache.hadoop.hbase.CategoryBasedTimeout;
28  import org.apache.hadoop.hbase.HBaseTestingUtility;
29  import org.apache.hadoop.hbase.TableName;
30  import org.apache.hadoop.hbase.client.HTable;
31  import org.apache.hadoop.hbase.client.Result;
32  import org.apache.hadoop.hbase.client.Scan;
33  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
34  import org.apache.hadoop.hbase.util.Bytes;
35  import org.apache.hadoop.io.NullWritable;
36  import org.apache.hadoop.mapreduce.Job;
37  import org.apache.hadoop.mapreduce.Reducer;
38  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
39  import org.junit.After;
40  import org.junit.AfterClass;
41  import org.junit.BeforeClass;
42  import org.junit.Rule;
43  import org.junit.Test;
44  import org.junit.rules.TestRule;
45  
46  import java.io.File;
47  import java.io.IOException;
48  import java.util.ArrayList;
49  import java.util.List;
50  import java.util.Locale;
51  import java.util.Map;
52  import java.util.NavigableMap;
53  
54  import static org.junit.Assert.assertEquals;
55  import static org.junit.Assert.assertTrue;
56  
57  /**
58   * Base set of tests and setup for input formats touching multiple tables.
59   */
60  public abstract class MultiTableInputFormatTestBase {
61    static final Log LOG = LogFactory.getLog(MultiTableInputFormatTestBase.class);
62    @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
63        withTimeout(this.getClass()).withLookingForStuckThread(true).build();
64    public static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
65    static final String TABLE_NAME = "scantest";
66    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
67    static final String KEY_STARTROW = "startRow";
68    static final String KEY_LASTROW = "stpRow";
69  
70    static List<String> TABLES = Lists.newArrayList();
71  
72    static {
73      for (int i = 0; i < 3; i++) {
74        TABLES.add(TABLE_NAME + String.valueOf(i));
75      }
76    }
77  
78    @BeforeClass
79    public static void setUpBeforeClass() throws Exception {
80      // switch TIF to log at DEBUG level
81      TEST_UTIL.enableDebug(MultiTableInputFormatBase.class);
82      TEST_UTIL.setJobWithoutMRCluster();
83      // start mini hbase cluster
84      TEST_UTIL.startMiniCluster(3);
85      // create and fill table
86      for (String tableName : TABLES) {
87        HTable table = null;
88        try {
89          table = TEST_UTIL.createMultiRegionTable(TableName.valueOf(tableName), INPUT_FAMILY, 4);
90          TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
91        } finally {
92            if (table != null) {
93              table.close();
94            }
95          }
96      }
97    }
98  
99    @AfterClass
100   public static void tearDownAfterClass() throws Exception {
101     TEST_UTIL.shutdownMiniCluster();
102   }
103 
104   @After
105   public void tearDown() throws Exception {
106     Configuration c = TEST_UTIL.getConfiguration();
107     FileUtil.fullyDelete(new File(c.get("hadoop.tmp.dir")));
108   }
109 
110   /**
111    * Pass the key and value to reducer.
112    */
113   public static class ScanMapper extends
114       TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
115     /**
116      * Pass the key and value to reduce.
117      *
118      * @param key The key, here "aaa", "aab" etc.
119      * @param value The value is the same as the key.
120      * @param context The task context.
121      * @throws IOException When reading the rows fails.
122      */
123     @Override
124     public void map(ImmutableBytesWritable key, Result value, Context context)
125         throws IOException, InterruptedException {
126       makeAssertions(key, value);
127       context.write(key, key);
128     }
129 
130     public void makeAssertions(ImmutableBytesWritable key, Result value) throws IOException {
131       if (value.size() != 1) {
132         throw new IOException("There should only be one input column");
133       }
134       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf =
135           value.getMap();
136       if (!cf.containsKey(INPUT_FAMILY)) {
137         throw new IOException("Wrong input columns. Missing: '" +
138             Bytes.toString(INPUT_FAMILY) + "'.");
139       }
140       String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
141       LOG.debug("map: key -> " + Bytes.toStringBinary(key.get()) +
142           ", value -> " + val);
143     }
144   }
145 
146   /**
147    * Checks the last and first keys seen against the scanner boundaries.
148    */
149   public static class ScanReducer
150       extends
151       Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
152           NullWritable, NullWritable> {
153     private String first = null;
154     private String last = null;
155 
156     @Override
157     protected void reduce(ImmutableBytesWritable key,
158         Iterable<ImmutableBytesWritable> values, Context context)
159         throws IOException, InterruptedException {
160       makeAssertions(key, values);
161     }
162 
163     protected void makeAssertions(ImmutableBytesWritable key,
164         Iterable<ImmutableBytesWritable> values) {
165       int count = 0;
166       for (ImmutableBytesWritable value : values) {
167         String val = Bytes.toStringBinary(value.get());
168         LOG.debug("reduce: key[" + count + "] -> " +
169             Bytes.toStringBinary(key.get()) + ", value -> " + val);
170         if (first == null) first = val;
171         last = val;
172         count++;
173       }
174       assertEquals(3, count);
175     }
176 
177     @Override
178     protected void cleanup(Context context) throws IOException,
179         InterruptedException {
180       Configuration c = context.getConfiguration();
181       cleanup(c);
182     }
183 
184     protected void cleanup(Configuration c) {
185       String startRow = c.get(KEY_STARTROW);
186       String lastRow = c.get(KEY_LASTROW);
187       LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" +
188           startRow + "\"");
189       LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow +
190           "\"");
191       if (startRow != null && startRow.length() > 0) {
192         assertEquals(startRow, first);
193       }
194       if (lastRow != null && lastRow.length() > 0) {
195         assertEquals(lastRow, last);
196       }
197     }
198   }
199 
200   @Test
201   public void testScanEmptyToEmpty() throws IOException, InterruptedException,
202       ClassNotFoundException {
203     testScan(null, null, null);
204   }
205 
206   @Test
207   public void testScanEmptyToAPP() throws IOException, InterruptedException,
208       ClassNotFoundException {
209     testScan(null, "app", "apo");
210   }
211 
212   @Test
213   public void testScanOBBToOPP() throws IOException, InterruptedException,
214       ClassNotFoundException {
215     testScan("obb", "opp", "opo");
216   }
217 
218   @Test
219   public void testScanYZYToEmpty() throws IOException, InterruptedException,
220       ClassNotFoundException {
221     testScan("yzy", null, "zzz");
222   }
223 
224   /**
225    * Tests a MR scan using specific start and stop rows.
226    *
227    * @throws IOException
228    * @throws ClassNotFoundException
229    * @throws InterruptedException
230    */
231   private void testScan(String start, String stop, String last)
232       throws IOException, InterruptedException, ClassNotFoundException {
233     String jobName =
234         "Scan" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") + "To" +
235             (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
236     LOG.info("Before map/reduce startup - job " + jobName);
237     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
238 
239     c.set(KEY_STARTROW, start != null ? start : "");
240     c.set(KEY_LASTROW, last != null ? last : "");
241 
242     List<Scan> scans = new ArrayList<Scan>();
243 
244     for (String tableName : TABLES) {
245       Scan scan = new Scan();
246 
247       scan.addFamily(INPUT_FAMILY);
248       scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName));
249 
250       if (start != null) {
251         scan.setStartRow(Bytes.toBytes(start));
252       }
253       if (stop != null) {
254         scan.setStopRow(Bytes.toBytes(stop));
255       }
256 
257       scans.add(scan);
258 
259       LOG.info("scan before: " + scan);
260     }
261 
262     runJob(jobName, c, scans);
263   }
264 
265   protected void runJob(String jobName, Configuration c, List<Scan> scans)
266       throws IOException, InterruptedException, ClassNotFoundException {
267     Job job = new Job(c, jobName);
268 
269     initJob(scans, job);
270     job.setReducerClass(ScanReducer.class);
271     job.setNumReduceTasks(1); // one to get final "first" and "last" key
272     FileOutputFormat.setOutputPath(job,
273       new Path(TEST_UTIL.getDataTestDirOnTestFS(), job.getJobName()));
274     LOG.info("Started " + job.getJobName());
275     job.waitForCompletion(true);
276     assertTrue(job.isSuccessful());
277     LOG.info("After map/reduce completion - job " + jobName);
278   }
279 
280   protected abstract void initJob(List<Scan> scans, Job job) throws IOException;
281 
282 
283 }