1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import com.google.common.collect.Lists;
22 import org.apache.commons.logging.Log;
23 import org.apache.commons.logging.LogFactory;
24 import org.apache.hadoop.conf.Configuration;
25 import org.apache.hadoop.fs.FileUtil;
26 import org.apache.hadoop.fs.Path;
27 import org.apache.hadoop.hbase.CategoryBasedTimeout;
28 import org.apache.hadoop.hbase.HBaseTestingUtility;
29 import org.apache.hadoop.hbase.TableName;
30 import org.apache.hadoop.hbase.client.HTable;
31 import org.apache.hadoop.hbase.client.Result;
32 import org.apache.hadoop.hbase.client.Scan;
33 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
34 import org.apache.hadoop.hbase.util.Bytes;
35 import org.apache.hadoop.io.NullWritable;
36 import org.apache.hadoop.mapreduce.Job;
37 import org.apache.hadoop.mapreduce.Reducer;
38 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
39 import org.junit.After;
40 import org.junit.AfterClass;
41 import org.junit.BeforeClass;
42 import org.junit.Rule;
43 import org.junit.Test;
44 import org.junit.rules.TestRule;
45
46 import java.io.File;
47 import java.io.IOException;
48 import java.util.ArrayList;
49 import java.util.List;
50 import java.util.Locale;
51 import java.util.Map;
52 import java.util.NavigableMap;
53
54 import static org.junit.Assert.assertEquals;
55 import static org.junit.Assert.assertTrue;
56
57
58
59
60 public abstract class MultiTableInputFormatTestBase {
61 static final Log LOG = LogFactory.getLog(MultiTableInputFormatTestBase.class);
62 @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
63 withTimeout(this.getClass()).withLookingForStuckThread(true).build();
64 public static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
65 static final String TABLE_NAME = "scantest";
66 static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
67 static final String KEY_STARTROW = "startRow";
68 static final String KEY_LASTROW = "stpRow";
69
70 static List<String> TABLES = Lists.newArrayList();
71
72 static {
73 for (int i = 0; i < 3; i++) {
74 TABLES.add(TABLE_NAME + String.valueOf(i));
75 }
76 }
77
78 @BeforeClass
79 public static void setUpBeforeClass() throws Exception {
80
81 TEST_UTIL.enableDebug(MultiTableInputFormatBase.class);
82 TEST_UTIL.setJobWithoutMRCluster();
83
84 TEST_UTIL.startMiniCluster(3);
85
86 for (String tableName : TABLES) {
87 HTable table = null;
88 try {
89 table = TEST_UTIL.createMultiRegionTable(TableName.valueOf(tableName), INPUT_FAMILY, 4);
90 TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
91 } finally {
92 if (table != null) {
93 table.close();
94 }
95 }
96 }
97 }
98
99 @AfterClass
100 public static void tearDownAfterClass() throws Exception {
101 TEST_UTIL.shutdownMiniCluster();
102 }
103
104 @After
105 public void tearDown() throws Exception {
106 Configuration c = TEST_UTIL.getConfiguration();
107 FileUtil.fullyDelete(new File(c.get("hadoop.tmp.dir")));
108 }
109
110
111
112
113 public static class ScanMapper extends
114 TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
115
116
117
118
119
120
121
122
123 @Override
124 public void map(ImmutableBytesWritable key, Result value, Context context)
125 throws IOException, InterruptedException {
126 makeAssertions(key, value);
127 context.write(key, key);
128 }
129
130 public void makeAssertions(ImmutableBytesWritable key, Result value) throws IOException {
131 if (value.size() != 1) {
132 throw new IOException("There should only be one input column");
133 }
134 Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf =
135 value.getMap();
136 if (!cf.containsKey(INPUT_FAMILY)) {
137 throw new IOException("Wrong input columns. Missing: '" +
138 Bytes.toString(INPUT_FAMILY) + "'.");
139 }
140 String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
141 LOG.debug("map: key -> " + Bytes.toStringBinary(key.get()) +
142 ", value -> " + val);
143 }
144 }
145
146
147
148
149 public static class ScanReducer
150 extends
151 Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
152 NullWritable, NullWritable> {
153 private String first = null;
154 private String last = null;
155
156 @Override
157 protected void reduce(ImmutableBytesWritable key,
158 Iterable<ImmutableBytesWritable> values, Context context)
159 throws IOException, InterruptedException {
160 makeAssertions(key, values);
161 }
162
163 protected void makeAssertions(ImmutableBytesWritable key,
164 Iterable<ImmutableBytesWritable> values) {
165 int count = 0;
166 for (ImmutableBytesWritable value : values) {
167 String val = Bytes.toStringBinary(value.get());
168 LOG.debug("reduce: key[" + count + "] -> " +
169 Bytes.toStringBinary(key.get()) + ", value -> " + val);
170 if (first == null) first = val;
171 last = val;
172 count++;
173 }
174 assertEquals(3, count);
175 }
176
177 @Override
178 protected void cleanup(Context context) throws IOException,
179 InterruptedException {
180 Configuration c = context.getConfiguration();
181 cleanup(c);
182 }
183
184 protected void cleanup(Configuration c) {
185 String startRow = c.get(KEY_STARTROW);
186 String lastRow = c.get(KEY_LASTROW);
187 LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" +
188 startRow + "\"");
189 LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow +
190 "\"");
191 if (startRow != null && startRow.length() > 0) {
192 assertEquals(startRow, first);
193 }
194 if (lastRow != null && lastRow.length() > 0) {
195 assertEquals(lastRow, last);
196 }
197 }
198 }
199
200 @Test
201 public void testScanEmptyToEmpty() throws IOException, InterruptedException,
202 ClassNotFoundException {
203 testScan(null, null, null);
204 }
205
206 @Test
207 public void testScanEmptyToAPP() throws IOException, InterruptedException,
208 ClassNotFoundException {
209 testScan(null, "app", "apo");
210 }
211
212 @Test
213 public void testScanOBBToOPP() throws IOException, InterruptedException,
214 ClassNotFoundException {
215 testScan("obb", "opp", "opo");
216 }
217
218 @Test
219 public void testScanYZYToEmpty() throws IOException, InterruptedException,
220 ClassNotFoundException {
221 testScan("yzy", null, "zzz");
222 }
223
224
225
226
227
228
229
230
231 private void testScan(String start, String stop, String last)
232 throws IOException, InterruptedException, ClassNotFoundException {
233 String jobName =
234 "Scan" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") + "To" +
235 (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
236 LOG.info("Before map/reduce startup - job " + jobName);
237 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
238
239 c.set(KEY_STARTROW, start != null ? start : "");
240 c.set(KEY_LASTROW, last != null ? last : "");
241
242 List<Scan> scans = new ArrayList<Scan>();
243
244 for (String tableName : TABLES) {
245 Scan scan = new Scan();
246
247 scan.addFamily(INPUT_FAMILY);
248 scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName));
249
250 if (start != null) {
251 scan.setStartRow(Bytes.toBytes(start));
252 }
253 if (stop != null) {
254 scan.setStopRow(Bytes.toBytes(stop));
255 }
256
257 scans.add(scan);
258
259 LOG.info("scan before: " + scan);
260 }
261
262 runJob(jobName, c, scans);
263 }
264
265 protected void runJob(String jobName, Configuration c, List<Scan> scans)
266 throws IOException, InterruptedException, ClassNotFoundException {
267 Job job = new Job(c, jobName);
268
269 initJob(scans, job);
270 job.setReducerClass(ScanReducer.class);
271 job.setNumReduceTasks(1);
272 FileOutputFormat.setOutputPath(job,
273 new Path(TEST_UTIL.getDataTestDirOnTestFS(), job.getJobName()));
274 LOG.info("Started " + job.getJobName());
275 job.waitForCompletion(true);
276 assertTrue(job.isSuccessful());
277 LOG.info("After map/reduce completion - job " + jobName);
278 }
279
280 protected abstract void initJob(List<Scan> scans, Job job) throws IOException;
281
282
283 }