View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  
23  import java.io.File;
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.List;
27  import java.util.Locale;
28  import java.util.Map;
29  import java.util.NavigableMap;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FileUtil;
35  import org.apache.hadoop.fs.Path;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.testclassification.LargeTests;
38  import org.apache.hadoop.hbase.TableName;
39  import org.apache.hadoop.hbase.client.HTable;
40  import org.apache.hadoop.hbase.client.Result;
41  import org.apache.hadoop.hbase.client.Scan;
42  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
43  import org.apache.hadoop.hbase.util.Bytes;
44  import org.apache.hadoop.io.NullWritable;
45  import org.apache.hadoop.mapreduce.Job;
46  import org.apache.hadoop.mapreduce.Reducer;
47  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
48  import org.junit.After;
49  import org.junit.AfterClass;
50  import org.junit.BeforeClass;
51  import org.junit.Test;
52  import org.junit.experimental.categories.Category;
53  
54  /**
55   * Tests various scan start and stop row scenarios. This is set in a scan and
56   * tested in a MapReduce job to see if that is handed over and done properly
57   * too.
58   */
59  @Category(LargeTests.class)
60  public class TestMultiTableInputFormat {
61  
62    private static final Log LOG = LogFactory.getLog(TestMultiTableInputFormat.class);
63    static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
64  
65    static final String TABLE_NAME = "scantest";
66    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
67    static final String KEY_STARTROW = "startRow";
68    static final String KEY_LASTROW = "stpRow";
69  
70    @BeforeClass
71    public static void setUpBeforeClass() throws Exception {
72      // switch TIF to log at DEBUG level
73      TEST_UTIL.enableDebug(MultiTableInputFormat.class);
74      TEST_UTIL.enableDebug(MultiTableInputFormatBase.class);
75      TEST_UTIL.setJobWithoutMRCluster();
76      // start mini hbase cluster
77      TEST_UTIL.startMiniCluster(3);
78      // create and fill table
79      for (int i = 0; i < 3; i++) {
80        try (HTable table =
81            TEST_UTIL.createMultiRegionTable(TableName.valueOf(TABLE_NAME + String.valueOf(i)),
82              INPUT_FAMILY, 4)) {
83          TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
84        }
85      }
86    }
87  
88    @AfterClass
89    public static void tearDownAfterClass() throws Exception {
90      TEST_UTIL.shutdownMiniCluster();
91    }
92    
93    @After
94    public void tearDown() throws Exception {
95      Configuration c = TEST_UTIL.getConfiguration();
96      FileUtil.fullyDelete(new File(c.get("hadoop.tmp.dir")));
97    }
98  
99    /**
100    * Pass the key and value to reducer.
101    */
102   public static class ScanMapper extends
103       TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
104     /**
105      * Pass the key and value to reduce.
106      *
107      * @param key The key, here "aaa", "aab" etc.
108      * @param value The value is the same as the key.
109      * @param context The task context.
110      * @throws IOException When reading the rows fails.
111      */
112     @Override
113     public void map(ImmutableBytesWritable key, Result value, Context context)
114         throws IOException, InterruptedException {
115       if (value.size() != 1) {
116         throw new IOException("There should only be one input column");
117       }
118       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf =
119           value.getMap();
120       if (!cf.containsKey(INPUT_FAMILY)) {
121         throw new IOException("Wrong input columns. Missing: '" +
122             Bytes.toString(INPUT_FAMILY) + "'.");
123       }
124       String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
125       LOG.debug("map: key -> " + Bytes.toStringBinary(key.get()) +
126           ", value -> " + val);
127       context.write(key, key);
128     }
129   }
130 
131   /**
132    * Checks the last and first keys seen against the scanner boundaries.
133    */
134   public static class ScanReducer
135       extends
136       Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
137       NullWritable, NullWritable> {
138     private String first = null;
139     private String last = null;
140 
141     @Override
142     protected void reduce(ImmutableBytesWritable key,
143         Iterable<ImmutableBytesWritable> values, Context context)
144         throws IOException, InterruptedException {
145       int count = 0;
146       for (ImmutableBytesWritable value : values) {
147         String val = Bytes.toStringBinary(value.get());
148         LOG.debug("reduce: key[" + count + "] -> " +
149             Bytes.toStringBinary(key.get()) + ", value -> " + val);
150         if (first == null) first = val;
151         last = val;
152         count++;
153       }
154       assertEquals(3, count);
155     }
156 
157     @Override
158     protected void cleanup(Context context) throws IOException,
159         InterruptedException {
160       Configuration c = context.getConfiguration();
161       String startRow = c.get(KEY_STARTROW);
162       String lastRow = c.get(KEY_LASTROW);
163       LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" +
164           startRow + "\"");
165       LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow +
166           "\"");
167       if (startRow != null && startRow.length() > 0) {
168         assertEquals(startRow, first);
169       }
170       if (lastRow != null && lastRow.length() > 0) {
171         assertEquals(lastRow, last);
172       }
173     }
174   }
175 
176   @Test
177   public void testScanEmptyToEmpty() throws IOException, InterruptedException,
178       ClassNotFoundException {
179     testScan(null, null, null);
180   }
181   
182   @Test
183   public void testScanEmptyToAPP() throws IOException, InterruptedException,
184       ClassNotFoundException {
185     testScan(null, "app", "apo");
186   }
187 
188   @Test
189   public void testScanOBBToOPP() throws IOException, InterruptedException,
190       ClassNotFoundException {
191     testScan("obb", "opp", "opo");
192   }
193 
194   @Test
195   public void testScanYZYToEmpty() throws IOException, InterruptedException,
196       ClassNotFoundException {
197     testScan("yzy", null, "zzz");
198   }
199 
200   /**
201    * Tests a MR scan using specific start and stop rows.
202    *
203    * @throws IOException
204    * @throws ClassNotFoundException
205    * @throws InterruptedException
206    */
207   private void testScan(String start, String stop, String last)
208       throws IOException, InterruptedException, ClassNotFoundException {
209     String jobName =
210         "Scan" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") + "To" +
211             (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
212     LOG.info("Before map/reduce startup - job " + jobName);
213     Configuration c = new Configuration(TEST_UTIL.getConfiguration());
214     
215     c.set(KEY_STARTROW, start != null ? start : "");
216     c.set(KEY_LASTROW, last != null ? last : "");
217     
218     List<Scan> scans = new ArrayList<Scan>();
219     
220     for(int i=0; i<3; i++){
221       Scan scan = new Scan();
222       
223       scan.addFamily(INPUT_FAMILY);
224       scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(TABLE_NAME + i));
225       
226       if (start != null) {
227         scan.setStartRow(Bytes.toBytes(start));
228       }
229       if (stop != null) {
230         scan.setStopRow(Bytes.toBytes(stop));
231       }
232       
233       scans.add(scan);
234       
235       LOG.info("scan before: " + scan);
236     }
237     
238     Job job = new Job(c, jobName);
239 
240     TableMapReduceUtil.initTableMapperJob(scans, ScanMapper.class,
241         ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
242     job.setReducerClass(ScanReducer.class);
243     job.setNumReduceTasks(1); // one to get final "first" and "last" key
244     FileOutputFormat.setOutputPath(job,
245       new Path(TEST_UTIL.getDataTestDirOnTestFS(), job.getJobName()));
246     LOG.info("Started " + job.getJobName());
247     job.waitForCompletion(true);
248     assertTrue(job.isSuccessful());
249     LOG.info("After map/reduce completion - job " + jobName);
250   }
251 }