1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import java.io.IOException;
21 import java.lang.reflect.Constructor;
22 import java.lang.reflect.Method;
23 import java.util.concurrent.ExecutorService;
24 import java.util.concurrent.Executors;
25
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.hbase.client.Result;
30 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
31 import org.apache.hadoop.mapreduce.Counter;
32 import org.apache.hadoop.mapreduce.InputSplit;
33 import org.apache.hadoop.mapreduce.Job;
34 import org.apache.hadoop.mapreduce.JobContext;
35 import org.apache.hadoop.mapreduce.MapContext;
36 import org.apache.hadoop.mapreduce.Mapper;
37 import org.apache.hadoop.mapreduce.OutputCommitter;
38 import org.apache.hadoop.mapreduce.RecordReader;
39 import org.apache.hadoop.mapreduce.RecordWriter;
40 import org.apache.hadoop.mapreduce.StatusReporter;
41 import org.apache.hadoop.mapreduce.TaskAttemptContext;
42 import org.apache.hadoop.mapreduce.TaskAttemptID;
43 import org.apache.hadoop.util.ReflectionUtils;
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60 public class MultithreadedTableMapper<K2, V2> extends TableMapper<K2, V2> {
61 private static final Log LOG = LogFactory.getLog(MultithreadedTableMapper.class);
62 private Class<? extends Mapper<ImmutableBytesWritable, Result,K2,V2>> mapClass;
63 private Context outer;
64 private ExecutorService executor;
65 public static final String NUMBER_OF_THREADS = "hbase.mapreduce.multithreadedmapper.threads";
66 public static final String MAPPER_CLASS = "hbase.mapreduce.multithreadedmapper.mapclass";
67
68
69
70
71
72
73 public static int getNumberOfThreads(JobContext job) {
74 return job.getConfiguration().
75 getInt(NUMBER_OF_THREADS, 10);
76 }
77
78
79
80
81
82
83 public static void setNumberOfThreads(Job job, int threads) {
84 job.getConfiguration().setInt(NUMBER_OF_THREADS,
85 threads);
86 }
87
88
89
90
91
92
93
94
95 @SuppressWarnings("unchecked")
96 public static <K2,V2>
97 Class<Mapper<ImmutableBytesWritable, Result,K2,V2>> getMapperClass(JobContext job) {
98 return (Class<Mapper<ImmutableBytesWritable, Result,K2,V2>>)
99 job.getConfiguration().getClass( MAPPER_CLASS,
100 Mapper.class);
101 }
102
103
104
105
106
107
108
109
110 public static <K2,V2>
111 void setMapperClass(Job job,
112 Class<? extends Mapper<ImmutableBytesWritable, Result,K2,V2>> cls) {
113 if (MultithreadedTableMapper.class.isAssignableFrom(cls)) {
114 throw new IllegalArgumentException("Can't have recursive " +
115 "MultithreadedTableMapper instances.");
116 }
117 job.getConfiguration().setClass(MAPPER_CLASS,
118 cls, Mapper.class);
119 }
120
121
122
123
124 @Override
125 public void run(Context context) throws IOException, InterruptedException {
126 outer = context;
127 int numberOfThreads = getNumberOfThreads(context);
128 mapClass = getMapperClass(context);
129 if (LOG.isDebugEnabled()) {
130 LOG.debug("Configuring multithread runner to use " + numberOfThreads +
131 " threads");
132 }
133 executor = Executors.newFixedThreadPool(numberOfThreads);
134 for(int i=0; i < numberOfThreads; ++i) {
135 MapRunner thread = new MapRunner(context);
136 executor.execute(thread);
137 }
138 executor.shutdown();
139 while (!executor.isTerminated()) {
140
141 Thread.sleep(1000);
142 }
143 }
144
145 private class SubMapRecordReader
146 extends RecordReader<ImmutableBytesWritable, Result> {
147 private ImmutableBytesWritable key;
148 private Result value;
149 private Configuration conf;
150
151 @Override
152 public void close() throws IOException {
153 }
154
155 @Override
156 public float getProgress() throws IOException, InterruptedException {
157 return 0;
158 }
159
160 @Override
161 public void initialize(InputSplit split,
162 TaskAttemptContext context
163 ) throws IOException, InterruptedException {
164 conf = context.getConfiguration();
165 }
166
167 @Override
168 public boolean nextKeyValue() throws IOException, InterruptedException {
169 synchronized (outer) {
170 if (!outer.nextKeyValue()) {
171 return false;
172 }
173 key = ReflectionUtils.copy(outer.getConfiguration(),
174 outer.getCurrentKey(), key);
175 value = ReflectionUtils.copy(conf, outer.getCurrentValue(), value);
176 return true;
177 }
178 }
179
180 @Override
181 public ImmutableBytesWritable getCurrentKey() {
182 return key;
183 }
184
185 @Override
186 public Result getCurrentValue() {
187 return value;
188 }
189 }
190
191 private class SubMapRecordWriter extends RecordWriter<K2,V2> {
192
193 @Override
194 public void close(TaskAttemptContext context) throws IOException,
195 InterruptedException {
196 }
197
198 @Override
199 public void write(K2 key, V2 value) throws IOException,
200 InterruptedException {
201 synchronized (outer) {
202 outer.write(key, value);
203 }
204 }
205 }
206
207 private class SubMapStatusReporter extends StatusReporter {
208
209 @Override
210 public Counter getCounter(Enum<?> name) {
211 return outer.getCounter(name);
212 }
213
214 @Override
215 public Counter getCounter(String group, String name) {
216 return outer.getCounter(group, name);
217 }
218
219 @Override
220 public void progress() {
221 outer.progress();
222 }
223
224 @Override
225 public void setStatus(String status) {
226 outer.setStatus(status);
227 }
228
229 @Override
230 public float getProgress() {
231 return 0;
232 }
233 }
234
235 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
236 justification="Don't understand why FB is complaining about this one. We do throw exception")
237 private class MapRunner implements Runnable {
238 private Mapper<ImmutableBytesWritable, Result, K2,V2> mapper;
239 private Context subcontext;
240
241 @SuppressWarnings({ "rawtypes", "unchecked" })
242 MapRunner(Context context) throws IOException, InterruptedException {
243 mapper = ReflectionUtils.newInstance(mapClass,
244 context.getConfiguration());
245 try {
246 Constructor c = context.getClass().getConstructor(
247 Mapper.class,
248 Configuration.class,
249 TaskAttemptID.class,
250 RecordReader.class,
251 RecordWriter.class,
252 OutputCommitter.class,
253 StatusReporter.class,
254 InputSplit.class);
255 c.setAccessible(true);
256 subcontext = (Context) c.newInstance(
257 mapper,
258 outer.getConfiguration(),
259 outer.getTaskAttemptID(),
260 new SubMapRecordReader(),
261 new SubMapRecordWriter(),
262 context.getOutputCommitter(),
263 new SubMapStatusReporter(),
264 outer.getInputSplit());
265 } catch (Exception e) {
266 try {
267 Constructor c = Class.forName("org.apache.hadoop.mapreduce.task.MapContextImpl").getConstructor(
268 Configuration.class,
269 TaskAttemptID.class,
270 RecordReader.class,
271 RecordWriter.class,
272 OutputCommitter.class,
273 StatusReporter.class,
274 InputSplit.class);
275 c.setAccessible(true);
276 MapContext mc = (MapContext) c.newInstance(
277 outer.getConfiguration(),
278 outer.getTaskAttemptID(),
279 new SubMapRecordReader(),
280 new SubMapRecordWriter(),
281 context.getOutputCommitter(),
282 new SubMapStatusReporter(),
283 outer.getInputSplit());
284 Class<?> wrappedMapperClass = Class.forName("org.apache.hadoop.mapreduce.lib.map.WrappedMapper");
285 Method getMapContext = wrappedMapperClass.getMethod("getMapContext", MapContext.class);
286 subcontext = (Context) getMapContext.invoke(
287 wrappedMapperClass.getDeclaredConstructor().newInstance(),
288 mc);
289 } catch (Exception ee) {
290
291 throw new IOException(e);
292 }
293 }
294 }
295
296 @Override
297 public void run() {
298 try {
299 mapper.run(subcontext);
300 } catch (Throwable ie) {
301 LOG.error("Problem in running map.", ie);
302 }
303 }
304 }
305 }