1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertNotEquals;
23 import static org.junit.Assert.assertTrue;
24
25 import java.io.IOException;
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.Locale;
29 import java.util.Map;
30 import java.util.NavigableMap;
31
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.hadoop.conf.Configuration;
35 import org.apache.hadoop.fs.Path;
36 import org.apache.hadoop.hbase.HBaseTestingUtility;
37 import org.apache.hadoop.hbase.TableName;
38 import org.apache.hadoop.hbase.client.HTable;
39 import org.apache.hadoop.hbase.client.Result;
40 import org.apache.hadoop.hbase.client.Scan;
41 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
42 import org.apache.hadoop.hbase.util.Bytes;
43 import org.apache.hadoop.io.NullWritable;
44 import org.apache.hadoop.mapreduce.InputSplit;
45 import org.apache.hadoop.mapreduce.Job;
46 import org.apache.hadoop.mapreduce.Reducer;
47 import org.apache.hadoop.mapreduce.TaskCounter;
48 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
49 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
50 import org.junit.AfterClass;
51 import org.junit.Assert;
52 import org.junit.BeforeClass;
53
54
55
56
57
58
59
60
61
62
63
64
65
66 public abstract class TestTableInputFormatScanBase {
67
68 private static final Log LOG = LogFactory.getLog(TestTableInputFormatScanBase.class);
69 static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
70
71 static final byte[] TABLE_NAME = Bytes.toBytes("scantest");
72 static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
73 static final byte[][] INPUT_FAMILYS = {Bytes.toBytes("content1"), Bytes.toBytes("content2")};
74 static final String KEY_STARTROW = "startRow";
75 static final String KEY_LASTROW = "stpRow";
76
77 private static HTable table = null;
78
79 @BeforeClass
80 public static void setUpBeforeClass() throws Exception {
81
82
83 System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
84
85
86 TEST_UTIL.enableDebug(TableInputFormat.class);
87 TEST_UTIL.enableDebug(TableInputFormatBase.class);
88 TEST_UTIL.setJobWithoutMRCluster();
89
90 TEST_UTIL.startMiniCluster(3);
91
92 table = TEST_UTIL.createMultiRegionTable(TableName.valueOf(TABLE_NAME), INPUT_FAMILY);
93 TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
94 }
95
96 @AfterClass
97 public static void tearDownAfterClass() throws Exception {
98 TEST_UTIL.shutdownMiniCluster();
99 }
100
101
102
103
104 public static class ScanMapper
105 extends TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
106
107
108
109
110
111
112
113
114
115 @Override
116 public void map(ImmutableBytesWritable key, Result value,
117 Context context)
118 throws IOException, InterruptedException {
119 if (value.size() != 1) {
120 throw new IOException("There should only be one input column");
121 }
122 Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
123 cf = value.getMap();
124 if(!cf.containsKey(INPUT_FAMILY)) {
125 throw new IOException("Wrong input columns. Missing: '" +
126 Bytes.toString(INPUT_FAMILY) + "'.");
127 }
128 String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
129 LOG.info("map: key -> " + Bytes.toStringBinary(key.get()) +
130 ", value -> " + val);
131 context.write(key, key);
132 }
133
134 }
135
136
137
138
139 public static class ScanReducer
140 extends Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
141 NullWritable, NullWritable> {
142
143 private String first = null;
144 private String last = null;
145
146 protected void reduce(ImmutableBytesWritable key,
147 Iterable<ImmutableBytesWritable> values, Context context)
148 throws IOException ,InterruptedException {
149 int count = 0;
150 for (ImmutableBytesWritable value : values) {
151 String val = Bytes.toStringBinary(value.get());
152 LOG.info("reduce: key[" + count + "] -> " +
153 Bytes.toStringBinary(key.get()) + ", value -> " + val);
154 if (first == null) first = val;
155 last = val;
156 count++;
157 }
158 }
159
160 protected void cleanup(Context context)
161 throws IOException, InterruptedException {
162 Configuration c = context.getConfiguration();
163 String startRow = c.get(KEY_STARTROW);
164 String lastRow = c.get(KEY_LASTROW);
165 LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" + startRow + "\"");
166 LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow + "\"");
167 if (startRow != null && startRow.length() > 0) {
168 assertEquals(startRow, first);
169 }
170 if (lastRow != null && lastRow.length() > 0) {
171 assertEquals(lastRow, last);
172 }
173 }
174
175 }
176
177
178
179
180
181
182
183
184 protected void testScanFromConfiguration(String start, String stop, String last)
185 throws IOException, InterruptedException, ClassNotFoundException {
186 String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") +
187 "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
188 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
189 c.set(TableInputFormat.INPUT_TABLE, Bytes.toString(TABLE_NAME));
190 c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILY));
191 c.set(KEY_STARTROW, start != null ? start : "");
192 c.set(KEY_LASTROW, last != null ? last : "");
193
194 if (start != null) {
195 c.set(TableInputFormat.SCAN_ROW_START, start);
196 }
197
198 if (stop != null) {
199 c.set(TableInputFormat.SCAN_ROW_STOP, stop);
200 }
201
202 Job job = new Job(c, jobName);
203 job.setMapperClass(ScanMapper.class);
204 job.setReducerClass(ScanReducer.class);
205 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
206 job.setMapOutputValueClass(ImmutableBytesWritable.class);
207 job.setInputFormatClass(TableInputFormat.class);
208 job.setNumReduceTasks(1);
209 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
210 TableMapReduceUtil.addDependencyJars(job);
211 assertTrue(job.waitForCompletion(true));
212 }
213
214
215
216
217
218
219
220
221 protected void testScan(String start, String stop, String last)
222 throws IOException, InterruptedException, ClassNotFoundException {
223 String jobName = "Scan" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") +
224 "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
225 LOG.info("Before map/reduce startup - job " + jobName);
226 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
227 Scan scan = new Scan();
228 scan.addFamily(INPUT_FAMILY);
229 if (start != null) {
230 scan.setStartRow(Bytes.toBytes(start));
231 }
232 c.set(KEY_STARTROW, start != null ? start : "");
233 if (stop != null) {
234 scan.setStopRow(Bytes.toBytes(stop));
235 }
236 c.set(KEY_LASTROW, last != null ? last : "");
237 LOG.info("scan before: " + scan);
238 Job job = new Job(c, jobName);
239 TableMapReduceUtil.initTableMapperJob(
240 Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
241 ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
242 job.setReducerClass(ScanReducer.class);
243 job.setNumReduceTasks(1);
244 FileOutputFormat.setOutputPath(job,
245 new Path(TEST_UTIL.getDataTestDir(), job.getJobName()));
246 LOG.info("Started " + job.getJobName());
247 assertTrue(job.waitForCompletion(true));
248 LOG.info("After map/reduce completion - job " + jobName);
249 }
250
251
252
253
254
255
256
257
258
259
260 public void testNumOfSplits(int splitsPerRegion, int expectedNumOfSplits) throws IOException,
261 InterruptedException,
262 ClassNotFoundException {
263 String jobName = "TestJobForNumOfSplits";
264 LOG.info("Before map/reduce startup - job " + jobName);
265 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
266 Scan scan = new Scan();
267 scan.addFamily(INPUT_FAMILY);
268 c.setInt("hbase.mapreduce.input.mappers.per.region", splitsPerRegion);
269 c.set(KEY_STARTROW, "");
270 c.set(KEY_LASTROW, "");
271 Job job = new Job(c, jobName);
272 TableMapReduceUtil.initTableMapperJob(TableName.valueOf(TABLE_NAME).getNameAsString(), scan, ScanMapper.class,
273 ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
274 TableInputFormat tif = new TableInputFormat();
275 tif.setConf(job.getConfiguration());
276 List<InputSplit> splits = tif.getSplits(job);
277 for (InputSplit split : splits) {
278 TableSplit tableSplit = (TableSplit) split;
279
280
281 Assert.assertTrue(tableSplit.getScanAsString().isEmpty());
282 }
283 Assert.assertEquals(expectedNumOfSplits, splits.size());
284 }
285
286
287
288
289
290
291
292 public void testNumOfSplitsMR(int splitsPerRegion, int expectedNumOfSplits) throws IOException,
293 InterruptedException,
294 ClassNotFoundException {
295 String jobName = "TestJobForNumOfSplits-MR";
296 LOG.info("Before map/reduce startup - job " + jobName);
297 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
298 Scan scan = new Scan();
299 scan.addFamily(INPUT_FAMILY);
300 c.setInt("hbase.mapreduce.input.mappers.per.region", splitsPerRegion);
301 Job job = new Job(c, jobName);
302 TableMapReduceUtil.initTableMapperJob(Bytes.toString(TABLE_NAME), scan, ScanMapper.class,
303 ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
304 job.setReducerClass(ScanReducer.class);
305 job.setNumReduceTasks(1);
306 job.setOutputFormatClass(NullOutputFormat.class);
307 assertTrue("job failed!", job.waitForCompletion(true));
308
309
310 assertEquals("Saw the wrong count of mappers per region", expectedNumOfSplits,
311 job.getCounters().findCounter(TaskCounter.SHUFFLED_MAPS).getValue());
312 }
313
314
315
316
317
318 public void testAutobalanceNumOfSplit() throws IOException {
319
320 List<InputSplit> splits = new ArrayList<>(5);
321 int[] regionLen = {100, 200, 200, 400, 600};
322 for (int i = 0; i < 5; i++) {
323 InputSplit split = new TableSplit(TableName.valueOf(TABLE_NAME), new Scan(),
324 Bytes.toBytes(i), Bytes.toBytes(i + 1), "", "", regionLen[i] * 1048576);
325 splits.add(split);
326 }
327 TableInputFormat tif = new TableInputFormat();
328 List<InputSplit> res = tif.calculateAutoBalancedSplits(splits, 1073741824);
329
330 assertEquals("Saw the wrong number of splits", 5, res.size());
331 TableSplit ts1 = (TableSplit) res.get(0);
332 assertEquals("The first split end key should be", 2, Bytes.toInt(ts1.getEndRow()));
333 TableSplit ts2 = (TableSplit) res.get(1);
334 assertEquals("The second split regionsize should be", 200 * 1048576, ts2.getLength());
335 TableSplit ts3 = (TableSplit) res.get(2);
336 assertEquals("The third split start key should be", 3, Bytes.toInt(ts3.getStartRow()));
337 TableSplit ts4 = (TableSplit) res.get(4);
338 assertNotEquals("The seventh split start key should not be", 4, Bytes.toInt(ts4.getStartRow()));
339 }
340 }
341