1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.HashSet;
24 import java.util.LinkedList;
25 import java.util.List;
26 import java.util.Set;
27
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30 import org.apache.hadoop.hbase.classification.InterfaceAudience;
31 import org.apache.hadoop.conf.Configuration;
32 import org.apache.hadoop.conf.Configured;
33 import org.apache.hadoop.fs.FileStatus;
34 import org.apache.hadoop.fs.FileSystem;
35 import org.apache.hadoop.fs.FSDataOutputStream;
36 import org.apache.hadoop.fs.Path;
37 import org.apache.hadoop.io.LongWritable;
38 import org.apache.hadoop.io.NullWritable;
39 import org.apache.hadoop.io.Text;
40 import org.apache.hadoop.util.LineReader;
41 import org.apache.hadoop.util.Tool;
42 import org.apache.hadoop.util.ToolRunner;
43 import org.apache.hadoop.mapreduce.InputSplit;
44 import org.apache.hadoop.mapreduce.Job;
45 import org.apache.hadoop.mapreduce.JobContext;
46 import org.apache.hadoop.mapreduce.Mapper;
47 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
48 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
49 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
50 import org.apache.hadoop.hbase.HBaseConfiguration;
51 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
52 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
53 import org.apache.hadoop.hbase.HTableDescriptor;
54 import org.apache.hadoop.hbase.HRegionInfo;
55 import org.apache.hadoop.hbase.regionserver.HRegion;
56 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
57 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
58 import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
59 import org.apache.hadoop.hbase.mapreduce.JobUtil;
60 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
61 import org.apache.hadoop.hbase.util.Bytes;
62 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
63 import org.apache.hadoop.hbase.util.FSTableDescriptors;
64 import org.apache.hadoop.hbase.util.FSUtils;
65
66
67
68
69
70
71
72
73
74 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
75 public class CompactionTool extends Configured implements Tool {
76 private static final Log LOG = LogFactory.getLog(CompactionTool.class);
77
78 private final static String CONF_TMP_DIR = "hbase.tmp.dir";
79 private final static String CONF_COMPACT_ONCE = "hbase.compactiontool.compact.once";
80 private final static String CONF_COMPACT_MAJOR = "hbase.compactiontool.compact.major";
81 private final static String CONF_DELETE_COMPACTED = "hbase.compactiontool.delete";
82 private final static String CONF_COMPLETE_COMPACTION = "hbase.hstore.compaction.complete";
83
84
85
86
87
88 private static class CompactionWorker {
89 private final boolean keepCompactedFiles;
90 private final boolean deleteCompacted;
91 private final Configuration conf;
92 private final FileSystem fs;
93 private final Path tmpDir;
94
95 public CompactionWorker(final FileSystem fs, final Configuration conf) {
96 this.conf = conf;
97 this.keepCompactedFiles = !conf.getBoolean(CONF_COMPLETE_COMPACTION, true);
98 this.deleteCompacted = conf.getBoolean(CONF_DELETE_COMPACTED, false);
99 this.tmpDir = new Path(conf.get(CONF_TMP_DIR));
100 this.fs = fs;
101 }
102
103
104
105
106
107
108
109
110 public void compact(final Path path, final boolean compactOnce, final boolean major) throws IOException {
111 if (isFamilyDir(fs, path)) {
112 Path regionDir = path.getParent();
113 Path tableDir = regionDir.getParent();
114 HTableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
115 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
116 compactStoreFiles(tableDir, htd, hri, path.getName(), compactOnce, major);
117 } else if (isRegionDir(fs, path)) {
118 Path tableDir = path.getParent();
119 HTableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
120 compactRegion(tableDir, htd, path, compactOnce, major);
121 } else if (isTableDir(fs, path)) {
122 compactTable(path, compactOnce, major);
123 } else {
124 throw new IOException(
125 "Specified path is not a table, region or family directory. path=" + path);
126 }
127 }
128
129 private void compactTable(final Path tableDir, final boolean compactOnce, final boolean major)
130 throws IOException {
131 HTableDescriptor htd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
132 for (Path regionDir: FSUtils.getRegionDirs(fs, tableDir)) {
133 compactRegion(tableDir, htd, regionDir, compactOnce, major);
134 }
135 }
136
137 private void compactRegion(final Path tableDir, final HTableDescriptor htd,
138 final Path regionDir, final boolean compactOnce, final boolean major)
139 throws IOException {
140 HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
141 for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
142 compactStoreFiles(tableDir, htd, hri, familyDir.getName(), compactOnce, major);
143 }
144 }
145
146
147
148
149
150
151 private void compactStoreFiles(final Path tableDir, final HTableDescriptor htd,
152 final HRegionInfo hri, final String familyName, final boolean compactOnce,
153 final boolean major) throws IOException {
154 HStore store = getStore(conf, fs, tableDir, htd, hri, familyName, tmpDir);
155 LOG.info("Compact table=" + htd.getTableName() +
156 " region=" + hri.getRegionNameAsString() +
157 " family=" + familyName);
158 if (major) {
159 store.triggerMajorCompaction();
160 }
161 do {
162 CompactionContext compaction = store.requestCompaction(Store.PRIORITY_USER, null);
163 if (compaction == null) break;
164 List<StoreFile> storeFiles =
165 store.compact(compaction, NoLimitThroughputController.INSTANCE);
166 if (storeFiles != null && !storeFiles.isEmpty()) {
167 if (keepCompactedFiles && deleteCompacted) {
168 for (StoreFile storeFile: storeFiles) {
169 fs.delete(storeFile.getPath(), false);
170 }
171 }
172 }
173 } while (store.needsCompaction() && !compactOnce);
174
175 store.close();
176 }
177
178
179
180
181
182 private static HStore getStore(final Configuration conf, final FileSystem fs,
183 final Path tableDir, final HTableDescriptor htd, final HRegionInfo hri,
184 final String familyName, final Path tempDir) throws IOException {
185 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, hri) {
186 @Override
187 public Path getTempDir() {
188 return tempDir;
189 }
190 };
191 HRegion region = new HRegion(regionFs, null, conf, htd, null);
192 return new HStore(region, htd.getFamily(Bytes.toBytes(familyName)), conf);
193 }
194 }
195
196 private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
197 Path regionInfo = new Path(path, HRegionFileSystem.REGION_INFO_FILE);
198 return fs.exists(regionInfo);
199 }
200
201 private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
202 return FSTableDescriptors.getTableInfoPath(fs, path) != null;
203 }
204
205 private static boolean isFamilyDir(final FileSystem fs, final Path path) throws IOException {
206 return isRegionDir(fs, path.getParent());
207 }
208
209 private static class CompactionMapper
210 extends Mapper<LongWritable, Text, NullWritable, NullWritable> {
211 private CompactionWorker compactor = null;
212 private boolean compactOnce = false;
213 private boolean major = false;
214
215 @Override
216 public void setup(Context context) {
217 Configuration conf = context.getConfiguration();
218 compactOnce = conf.getBoolean(CONF_COMPACT_ONCE, false);
219 major = conf.getBoolean(CONF_COMPACT_MAJOR, false);
220
221 try {
222 FileSystem fs = FileSystem.get(conf);
223 this.compactor = new CompactionWorker(fs, conf);
224 } catch (IOException e) {
225 throw new RuntimeException("Could not get the input FileSystem", e);
226 }
227 }
228
229 @Override
230 public void map(LongWritable key, Text value, Context context)
231 throws InterruptedException, IOException {
232 Path path = new Path(value.toString());
233 this.compactor.compact(path, compactOnce, major);
234 }
235 }
236
237
238
239
240 private static class CompactionInputFormat extends TextInputFormat {
241 @Override
242 protected boolean isSplitable(JobContext context, Path file) {
243 return true;
244 }
245
246
247
248
249
250 @Override
251 public List<InputSplit> getSplits(JobContext job) throws IOException {
252 List<InputSplit> splits = new ArrayList<InputSplit>();
253 List<FileStatus> files = listStatus(job);
254
255 Text key = new Text();
256 for (FileStatus file: files) {
257 Path path = file.getPath();
258 FileSystem fs = path.getFileSystem(job.getConfiguration());
259 LineReader reader = new LineReader(fs.open(path));
260 long pos = 0;
261 int n;
262 try {
263 while ((n = reader.readLine(key)) > 0) {
264 String[] hosts = getStoreDirHosts(fs, path);
265 splits.add(new FileSplit(path, pos, n, hosts));
266 pos += n;
267 }
268 } finally {
269 reader.close();
270 }
271 }
272
273 return splits;
274 }
275
276
277
278
279 private static String[] getStoreDirHosts(final FileSystem fs, final Path path)
280 throws IOException {
281 FileStatus[] files = FSUtils.listStatus(fs, path);
282 if (files == null) {
283 return new String[] {};
284 }
285
286 HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
287 for (FileStatus hfileStatus: files) {
288 HDFSBlocksDistribution storeFileBlocksDistribution =
289 FSUtils.computeHDFSBlocksDistribution(fs, hfileStatus, 0, hfileStatus.getLen());
290 hdfsBlocksDistribution.add(storeFileBlocksDistribution);
291 }
292
293 List<String> hosts = hdfsBlocksDistribution.getTopHosts();
294 return hosts.toArray(new String[hosts.size()]);
295 }
296
297
298
299
300
301
302 public static void createInputFile(final FileSystem fs, final Path path,
303 final Set<Path> toCompactDirs) throws IOException {
304
305 List<Path> storeDirs = new LinkedList<Path>();
306 for (Path compactDir: toCompactDirs) {
307 if (isFamilyDir(fs, compactDir)) {
308 storeDirs.add(compactDir);
309 } else if (isRegionDir(fs, compactDir)) {
310 for (Path familyDir: FSUtils.getFamilyDirs(fs, compactDir)) {
311 storeDirs.add(familyDir);
312 }
313 } else if (isTableDir(fs, compactDir)) {
314
315 for (Path regionDir: FSUtils.getRegionDirs(fs, compactDir)) {
316 for (Path familyDir: FSUtils.getFamilyDirs(fs, regionDir)) {
317 storeDirs.add(familyDir);
318 }
319 }
320 } else {
321 throw new IOException(
322 "Specified path is not a table, region or family directory. path=" + compactDir);
323 }
324 }
325
326
327 FSDataOutputStream stream = fs.create(path);
328 LOG.info("Create input file=" + path + " with " + storeDirs.size() + " dirs to compact.");
329 try {
330 final byte[] newLine = Bytes.toBytes("\n");
331 for (Path storeDir: storeDirs) {
332 stream.write(Bytes.toBytes(storeDir.toString()));
333 stream.write(newLine);
334 }
335 } finally {
336 stream.close();
337 }
338 }
339 }
340
341
342
343
344 private int doMapReduce(final FileSystem fs, final Set<Path> toCompactDirs,
345 final boolean compactOnce, final boolean major) throws Exception {
346 Configuration conf = getConf();
347 conf.setBoolean(CONF_COMPACT_ONCE, compactOnce);
348 conf.setBoolean(CONF_COMPACT_MAJOR, major);
349
350 Job job = new Job(conf);
351 job.setJobName("CompactionTool");
352 job.setJarByClass(CompactionTool.class);
353 job.setMapperClass(CompactionMapper.class);
354 job.setInputFormatClass(CompactionInputFormat.class);
355 job.setOutputFormatClass(NullOutputFormat.class);
356 job.setMapSpeculativeExecution(false);
357 job.setNumReduceTasks(0);
358
359
360 TableMapReduceUtil.addDependencyJars(job);
361
362 Path stagingDir = JobUtil.getStagingDir(conf);
363 try {
364
365 Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTime());
366 CompactionInputFormat.createInputFile(fs, inputPath, toCompactDirs);
367 CompactionInputFormat.addInputPath(job, inputPath);
368
369
370 TableMapReduceUtil.initCredentials(job);
371
372
373 return job.waitForCompletion(true) ? 0 : 1;
374 } finally {
375 fs.delete(stagingDir, true);
376 }
377 }
378
379
380
381
382 private int doClient(final FileSystem fs, final Set<Path> toCompactDirs,
383 final boolean compactOnce, final boolean major) throws IOException {
384 CompactionWorker worker = new CompactionWorker(fs, getConf());
385 for (Path path: toCompactDirs) {
386 worker.compact(path, compactOnce, major);
387 }
388 return 0;
389 }
390
391 @Override
392 public int run(String[] args) throws Exception {
393 Set<Path> toCompactDirs = new HashSet<Path>();
394 boolean compactOnce = false;
395 boolean major = false;
396 boolean mapred = false;
397
398 Configuration conf = getConf();
399 FileSystem fs = FileSystem.get(conf);
400
401 try {
402 for (int i = 0; i < args.length; ++i) {
403 String opt = args[i];
404 if (opt.equals("-compactOnce")) {
405 compactOnce = true;
406 } else if (opt.equals("-major")) {
407 major = true;
408 } else if (opt.equals("-mapred")) {
409 mapred = true;
410 } else if (!opt.startsWith("-")) {
411 Path path = new Path(opt);
412 FileStatus status = fs.getFileStatus(path);
413 if (!status.isDirectory()) {
414 printUsage("Specified path is not a directory. path=" + path);
415 return 1;
416 }
417 toCompactDirs.add(path);
418 } else {
419 printUsage();
420 }
421 }
422 } catch (Exception e) {
423 printUsage(e.getMessage());
424 return 1;
425 }
426
427 if (toCompactDirs.size() == 0) {
428 printUsage("No directories to compact specified.");
429 return 1;
430 }
431
432
433 if (mapred) {
434 return doMapReduce(fs, toCompactDirs, compactOnce, major);
435 } else {
436 return doClient(fs, toCompactDirs, compactOnce, major);
437 }
438 }
439
440 private void printUsage() {
441 printUsage(null);
442 }
443
444 private void printUsage(final String message) {
445 if (message != null && message.length() > 0) {
446 System.err.println(message);
447 }
448 System.err.println("Usage: java " + this.getClass().getName() + " \\");
449 System.err.println(" [-compactOnce] [-major] [-mapred] [-D<property=value>]* files...");
450 System.err.println();
451 System.err.println("Options:");
452 System.err.println(" mapred Use MapReduce to run compaction.");
453 System.err.println(" compactOnce Execute just one compaction step. (default: while needed)");
454 System.err.println(" major Trigger major compaction.");
455 System.err.println();
456 System.err.println("Note: -D properties will be applied to the conf used. ");
457 System.err.println("For example: ");
458 System.err.println(" To preserve input files, pass -D"+CONF_COMPLETE_COMPACTION+"=false");
459 System.err.println(" To stop delete of compacted file, pass -D"+CONF_DELETE_COMPACTED+"=false");
460 System.err.println(" To set tmp dir, pass -D"+CONF_TMP_DIR+"=ALTERNATE_DIR");
461 System.err.println();
462 System.err.println("Examples:");
463 System.err.println(" To compact the full 'TestTable' using MapReduce:");
464 System.err.println(" $ bin/hbase " + this.getClass().getName() + " -mapred hdfs:///hbase/data/default/TestTable");
465 System.err.println();
466 System.err.println(" To compact column family 'x' of the table 'TestTable' region 'abc':");
467 System.err.println(" $ bin/hbase " + this.getClass().getName() + " hdfs:///hbase/data/default/TestTable/abc/x");
468 }
469
470 public static void main(String[] args) throws Exception {
471 System.exit(ToolRunner.run(HBaseConfiguration.create(), new CompactionTool(), args));
472 }
473 }