1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.IOException;
22 import java.util.List;
23 import java.util.ArrayList;
24
25 import org.apache.commons.lang.StringUtils;
26 import org.apache.hadoop.hbase.HConstants;
27 import org.apache.hadoop.hbase.classification.InterfaceAudience;
28 import org.apache.hadoop.hbase.classification.InterfaceStability;
29 import org.apache.hadoop.conf.Configuration;
30 import org.apache.hadoop.hbase.HBaseConfiguration;
31 import org.apache.hadoop.hbase.client.Result;
32 import org.apache.hadoop.hbase.client.Scan;
33 import org.apache.hadoop.hbase.filter.FilterBase;
34 import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
35 import org.apache.hadoop.hbase.filter.MultiRowRangeFilter;
36 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
37 import org.apache.hadoop.hbase.util.Bytes;
38 import org.apache.hadoop.mapreduce.Job;
39 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
40 import org.apache.hadoop.util.GenericOptionsParser;
41
42
43
44
45
46 @InterfaceAudience.Public
47 @InterfaceStability.Stable
48 public class RowCounter {
49
50
51 static final String NAME = "rowcounter";
52
53
54
55
56 static class RowCounterMapper
57 extends TableMapper<ImmutableBytesWritable, Result> {
58
59
60 public static enum Counters {ROWS}
61
62
63
64
65
66
67
68
69
70
71
72 @Override
73 public void map(ImmutableBytesWritable row, Result values,
74 Context context)
75 throws IOException {
76
77 context.getCounter(Counters.ROWS).increment(1);
78 }
79 }
80
81
82
83
84
85
86
87
88
89 public static Job createSubmittableJob(Configuration conf, String[] args)
90 throws IOException {
91 String tableName = args[0];
92 List<MultiRowRangeFilter.RowRange> rowRangeList = null;
93 long startTime = 0;
94 long endTime = 0;
95
96 StringBuilder sb = new StringBuilder();
97
98 final String rangeSwitch = "--range=";
99 final String startTimeArgKey = "--starttime=";
100 final String endTimeArgKey = "--endtime=";
101
102
103 for (int i = 1; i < args.length; i++) {
104 if (args[i].startsWith(rangeSwitch)) {
105 try {
106 rowRangeList = parseRowRangeParameter(args[i], rangeSwitch);
107 } catch (IllegalArgumentException e) {
108 return null;
109 }
110 continue;
111 }
112 if (startTime < endTime) {
113 printUsage("--endtime=" + endTime + " needs to be greater than --starttime=" + startTime);
114 return null;
115 }
116 if (args[i].startsWith(startTimeArgKey)) {
117 startTime = Long.parseLong(args[i].substring(startTimeArgKey.length()));
118 continue;
119 }
120 if (args[i].startsWith(endTimeArgKey)) {
121 endTime = Long.parseLong(args[i].substring(endTimeArgKey.length()));
122 continue;
123 }
124 else {
125
126 sb.append(args[i]);
127 sb.append(" ");
128 }
129 }
130
131 Job job = new Job(conf, NAME + "_" + tableName);
132 job.setJarByClass(RowCounter.class);
133 Scan scan = new Scan();
134 scan.setCacheBlocks(false);
135 setScanFilter(scan, rowRangeList);
136 if (sb.length() > 0) {
137 for (String columnName : sb.toString().trim().split(" ")) {
138 String family = StringUtils.substringBefore(columnName, ":");
139 String qualifier = StringUtils.substringAfter(columnName, ":");
140
141 if (StringUtils.isBlank(qualifier)) {
142 scan.addFamily(Bytes.toBytes(family));
143 }
144 else {
145 scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier));
146 }
147 }
148 }
149 scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
150 job.setOutputFormatClass(NullOutputFormat.class);
151 TableMapReduceUtil.initTableMapperJob(tableName, scan,
152 RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
153 job.setNumReduceTasks(0);
154 return job;
155 }
156
157 private static List<MultiRowRangeFilter.RowRange> parseRowRangeParameter(
158 String arg, String rangeSwitch) {
159 final String[] ranges = arg.substring(rangeSwitch.length()).split(";");
160 final List<MultiRowRangeFilter.RowRange> rangeList = new ArrayList<>();
161 for (String range : ranges) {
162 String[] startEnd = range.split(",", 2);
163 if (startEnd.length != 2 || startEnd[1].contains(",")) {
164 printUsage("Please specify range in such format as \"--range=a,b\" " +
165 "or, with only one boundary, \"--range=,b\" or \"--range=a,\"");
166 throw new IllegalArgumentException("Wrong range specification: " + range);
167 }
168 String startKey = startEnd[0];
169 String endKey = startEnd[1];
170 rangeList.add(new MultiRowRangeFilter.RowRange(
171 Bytes.toBytesBinary(startKey), true,
172 Bytes.toBytesBinary(endKey), false));
173 }
174 return rangeList;
175 }
176
177
178
179
180
181
182
183
184
185
186 private static void setScanFilter(Scan scan, List<MultiRowRangeFilter.RowRange> rowRangeList) {
187 final int size = rowRangeList == null ? 0 : rowRangeList.size();
188 if (size <= 1) {
189 scan.setFilter(new FirstKeyOnlyFilter());
190 }
191 if (size == 1) {
192 MultiRowRangeFilter.RowRange range = rowRangeList.get(0);
193 scan.setStartRow(range.getStartRow());
194 scan.setStopRow(range.getStopRow());
195 } else if (size > 1) {
196 try {
197 scan.setFilter(new MultiRowRangeFilter(rowRangeList));
198 } catch (IOException e) {
199
200 throw new RuntimeException("Cannot instantiate MultiRowRangeFilter");
201 }
202 }
203 }
204
205
206
207
208 private static void printUsage(String errorMessage) {
209 System.err.println("ERROR: " + errorMessage);
210 printUsage();
211 }
212
213
214
215
216 private static void printUsage() {
217 System.err.println("Usage: RowCounter [options] <tablename> " +
218 "[--starttime=[start] --endtime=[end] " +
219 "[--range=[startKey],[endKey][;[startKey],[endKey]...]] [<column1> <column2>...]");
220 System.err.println("For performance consider the following options:\n"
221 + "-Dhbase.client.scanner.caching=100\n"
222 + "-Dmapreduce.map.speculative=false");
223 }
224
225
226
227
228
229
230
231 public static void main(String[] args) throws Exception {
232 Configuration conf = HBaseConfiguration.create();
233 String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
234 if (otherArgs.length < 1) {
235 printUsage("Wrong number of parameters: " + args.length);
236 System.exit(-1);
237 }
238 Job job = createSubmittableJob(conf, otherArgs);
239 if (job == null) {
240 System.exit(-1);
241 }
242 System.exit(job.waitForCompletion(true) ? 0 : 1);
243 }
244 }