1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.mapreduce;
19
20 import java.io.IOException;
21 import java.io.UnsupportedEncodingException;
22 import java.net.InetSocketAddress;
23 import java.net.URLDecoder;
24 import java.net.URLEncoder;
25 import java.util.ArrayList;
26 import java.util.Collection;
27 import java.util.List;
28 import java.util.Map;
29 import java.util.TreeMap;
30 import java.util.TreeSet;
31 import java.util.UUID;
32
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.fs.FileSystem;
37 import org.apache.hadoop.fs.Path;
38 import org.apache.hadoop.hbase.Cell;
39 import org.apache.hadoop.hbase.CellUtil;
40 import org.apache.hadoop.hbase.HColumnDescriptor;
41 import org.apache.hadoop.hbase.HConstants;
42 import org.apache.hadoop.hbase.HRegionLocation;
43 import org.apache.hadoop.hbase.HTableDescriptor;
44 import org.apache.hadoop.hbase.KeyValue;
45 import org.apache.hadoop.hbase.KeyValueUtil;
46 import org.apache.hadoop.hbase.TableName;
47 import org.apache.hadoop.hbase.classification.InterfaceAudience;
48 import org.apache.hadoop.hbase.classification.InterfaceStability;
49 import org.apache.hadoop.hbase.client.Connection;
50 import org.apache.hadoop.hbase.client.ConnectionFactory;
51 import org.apache.hadoop.hbase.client.HTable;
52 import org.apache.hadoop.hbase.client.Put;
53 import org.apache.hadoop.hbase.client.RegionLocator;
54 import org.apache.hadoop.hbase.client.Table;
55 import org.apache.hadoop.hbase.fs.HFileSystem;
56 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
57 import org.apache.hadoop.hbase.io.compress.Compression;
58 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
59 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
60 import org.apache.hadoop.hbase.io.hfile.AbstractHFileWriter;
61 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
62 import org.apache.hadoop.hbase.io.hfile.HFile;
63 import org.apache.hadoop.hbase.io.hfile.HFileContext;
64 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
65 import org.apache.hadoop.hbase.regionserver.BloomType;
66 import org.apache.hadoop.hbase.regionserver.HStore;
67 import org.apache.hadoop.hbase.regionserver.StoreFile;
68 import org.apache.hadoop.hbase.util.Bytes;
69 import org.apache.hadoop.hbase.util.FSUtils;
70 import org.apache.hadoop.io.NullWritable;
71 import org.apache.hadoop.io.SequenceFile;
72 import org.apache.hadoop.io.Text;
73 import org.apache.hadoop.mapreduce.Job;
74 import org.apache.hadoop.mapreduce.OutputCommitter;
75 import org.apache.hadoop.mapreduce.OutputFormat;
76 import org.apache.hadoop.mapreduce.RecordWriter;
77 import org.apache.hadoop.mapreduce.TaskAttemptContext;
78 import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
79 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
80 import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
81
82
83
84
85
86
87
88
89
90
91 @InterfaceAudience.Public
92 @InterfaceStability.Evolving
93 public class HFileOutputFormat2
94 extends FileOutputFormat<ImmutableBytesWritable, Cell> {
95 private static final Log LOG = LogFactory.getLog(HFileOutputFormat2.class);
96
97
98
99
100
101 private static final String COMPRESSION_FAMILIES_CONF_KEY =
102 "hbase.hfileoutputformat.families.compression";
103 private static final String BLOOM_TYPE_FAMILIES_CONF_KEY =
104 "hbase.hfileoutputformat.families.bloomtype";
105 private static final String BLOCK_SIZE_FAMILIES_CONF_KEY =
106 "hbase.mapreduce.hfileoutputformat.blocksize";
107 private static final String DATABLOCK_ENCODING_FAMILIES_CONF_KEY =
108 "hbase.mapreduce.hfileoutputformat.families.datablock.encoding";
109
110
111
112
113
114 public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
115 "hbase.mapreduce.hfileoutputformat.datablock.encoding";
116 public static final String COMPRESSION_OVERRIDE_CONF_KEY =
117 "hbase.mapreduce.hfileoutputformat.compression";
118
119
120
121
122 public static final String LOCALITY_SENSITIVE_CONF_KEY =
123 "hbase.bulkload.locality.sensitive.enabled";
124 private static final boolean DEFAULT_LOCALITY_SENSITIVE = true;
125 public static final String OUTPUT_TABLE_NAME_CONF_KEY =
126 "hbase.mapreduce.hfileoutputformat.table.name";
127
128 public static final String REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY =
129 "hbase.hfileoutputformat.remote.cluster.zookeeper.quorum";
130 public static final String REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY =
131 "hbase.hfileoutputformat.remote.cluster.zookeeper." + HConstants.CLIENT_PORT_STR;
132 public static final String REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY =
133 "hbase.hfileoutputformat.remote.cluster." + HConstants.ZOOKEEPER_ZNODE_PARENT;
134
135 public static final String STORAGE_POLICY_PROPERTY = HStore.BLOCK_STORAGE_POLICY_KEY;
136 public static final String STORAGE_POLICY_PROPERTY_CF_PREFIX = STORAGE_POLICY_PROPERTY + ".";
137
138 @Override
139 public RecordWriter<ImmutableBytesWritable, Cell> getRecordWriter(
140 final TaskAttemptContext context) throws IOException, InterruptedException {
141 return createRecordWriter(context, this.getOutputCommitter(context));
142 }
143
144 static <V extends Cell> RecordWriter<ImmutableBytesWritable, V>
145 createRecordWriter(final TaskAttemptContext context, final OutputCommitter committer)
146 throws IOException {
147
148
149 final Path outputdir = ((FileOutputCommitter) committer).getWorkPath();
150 final Configuration conf = context.getConfiguration();
151 final FileSystem fs = outputdir.getFileSystem(conf);
152
153 final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE,
154 HConstants.DEFAULT_MAX_FILE_SIZE);
155
156 final String defaultCompressionStr = conf.get("hfile.compression",
157 Compression.Algorithm.NONE.getName());
158 final Algorithm defaultCompression = AbstractHFileWriter
159 .compressionByName(defaultCompressionStr);
160 String compressionStr = conf.get(COMPRESSION_OVERRIDE_CONF_KEY);
161 final Algorithm overriddenCompression;
162 if (compressionStr != null) {
163 overriddenCompression = Compression.getCompressionAlgorithmByName(compressionStr);
164 } else {
165 overriddenCompression = null;
166 }
167 final boolean compactionExclude = conf.getBoolean(
168 "hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
169
170
171 final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf);
172 final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf);
173 final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf);
174
175 String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
176 final Map<byte[], DataBlockEncoding> datablockEncodingMap
177 = createFamilyDataBlockEncodingMap(conf);
178 final DataBlockEncoding overriddenEncoding;
179 if (dataBlockEncodingStr != null) {
180 overriddenEncoding = DataBlockEncoding.valueOf(dataBlockEncodingStr);
181 } else {
182 overriddenEncoding = null;
183 }
184
185 return new RecordWriter<ImmutableBytesWritable, V>() {
186
187 private final Map<byte [], WriterLength> writers =
188 new TreeMap<byte [], WriterLength>(Bytes.BYTES_COMPARATOR);
189 private byte [] previousRow = HConstants.EMPTY_BYTE_ARRAY;
190 private final byte [] now = Bytes.toBytes(System.currentTimeMillis());
191 private boolean rollRequested = false;
192
193 @Override
194 public void write(ImmutableBytesWritable row, V cell)
195 throws IOException {
196 KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
197
198
199 if (row == null && kv == null) {
200 rollWriters();
201 return;
202 }
203
204 byte [] rowKey = CellUtil.cloneRow(kv);
205 long length = kv.getLength();
206 byte [] family = CellUtil.cloneFamily(kv);
207 WriterLength wl = this.writers.get(family);
208
209
210 if (wl == null) {
211 Path cfPath = new Path(outputdir, Bytes.toString(family));
212 fs.mkdirs(cfPath);
213 configureStoragePolicy(conf, fs, family, cfPath);
214 }
215
216
217
218 if (wl != null && wl.written + length >= maxsize) {
219 this.rollRequested = true;
220 }
221
222
223 if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) {
224 rollWriters();
225 }
226
227
228 if (wl == null || wl.writer == null) {
229 if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
230 HRegionLocation loc = null;
231 String tableName = conf.get(OUTPUT_TABLE_NAME_CONF_KEY);
232 if (tableName != null) {
233 try (Connection connection = ConnectionFactory.createConnection(
234 createRemoteClusterConf(conf));
235 RegionLocator locator =
236 connection.getRegionLocator(TableName.valueOf(tableName))) {
237 loc = locator.getRegionLocation(rowKey);
238 } catch (Throwable e) {
239 LOG.warn("there's something wrong when locating rowkey: " +
240 Bytes.toString(rowKey), e);
241 loc = null;
242 }
243 }
244
245 if (null == loc) {
246 if (LOG.isTraceEnabled()) {
247 LOG.trace("failed to get region location, so use default writer: "
248 + Bytes.toString(rowKey));
249 }
250 wl = getNewWriter(family, conf, null);
251 } else {
252 if (LOG.isDebugEnabled()) {
253 LOG.debug("first rowkey: [" + Bytes.toString(rowKey) + "]");
254 }
255 InetSocketAddress initialIsa =
256 new InetSocketAddress(loc.getHostname(), loc.getPort());
257 if (initialIsa.isUnresolved()) {
258 if (LOG.isTraceEnabled()) {
259 LOG.trace("failed to resolve bind address: " + loc.getHostname() + ":"
260 + loc.getPort() + ", so use default writer");
261 }
262 wl = getNewWriter(family, conf, null);
263 } else {
264 if (LOG.isDebugEnabled()) {
265 LOG.debug("use favored nodes writer: " + initialIsa.getHostString());
266 }
267 wl = getNewWriter(family, conf, new InetSocketAddress[] { initialIsa });
268 }
269 }
270 } else {
271 wl = getNewWriter(family, conf, null);
272 }
273 }
274
275
276 kv.updateLatestStamp(this.now);
277 wl.writer.append(kv);
278 wl.written += length;
279
280
281 this.previousRow = rowKey;
282 }
283
284 private void rollWriters() throws IOException {
285 for (WriterLength wl : this.writers.values()) {
286 if (wl.writer != null) {
287 LOG.info("Writer=" + wl.writer.getPath() +
288 ((wl.written == 0)? "": ", wrote=" + wl.written));
289 close(wl.writer);
290 }
291 wl.writer = null;
292 wl.written = 0;
293 }
294 this.rollRequested = false;
295 }
296
297 private Configuration createRemoteClusterConf(Configuration conf) {
298 final Configuration newConf = new Configuration(conf);
299
300 final String quorum = conf.get(REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY);
301 final String clientPort = conf.get(REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY);
302 final String parent = conf.get(REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY);
303
304 if (quorum != null && clientPort != null && parent != null) {
305 newConf.set(HConstants.ZOOKEEPER_QUORUM, quorum);
306 newConf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.parseInt(clientPort));
307 newConf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, parent);
308 }
309
310 return newConf;
311 }
312
313
314
315
316
317
318 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="BX_UNBOXING_IMMEDIATELY_REBOXED",
319 justification="Not important")
320 private WriterLength getNewWriter(byte[] family, Configuration conf,
321 InetSocketAddress[] favoredNodes)
322 throws IOException {
323 WriterLength wl = new WriterLength();
324 Path familydir = new Path(outputdir, Bytes.toString(family));
325 Algorithm compression = overriddenCompression;
326 compression = compression == null ? compressionMap.get(family) : compression;
327 compression = compression == null ? defaultCompression : compression;
328 BloomType bloomType = bloomTypeMap.get(family);
329 bloomType = bloomType == null ? BloomType.NONE : bloomType;
330 Integer blockSize = blockSizeMap.get(family);
331 blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
332 DataBlockEncoding encoding = overriddenEncoding;
333 encoding = encoding == null ? datablockEncodingMap.get(family) : encoding;
334 encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
335 Configuration tempConf = new Configuration(conf);
336 tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
337 HFileContextBuilder contextBuilder = new HFileContextBuilder()
338 .withCompression(compression)
339 .withChecksumType(HStore.getChecksumType(conf))
340 .withBytesPerCheckSum(HStore.getBytesPerChecksum(conf))
341 .withBlockSize(blockSize)
342 .withColumnFamily(family);
343 if (HFile.getFormatVersion(conf) >= HFile.MIN_FORMAT_VERSION_WITH_TAGS) {
344 contextBuilder.withIncludesTags(true);
345 }
346
347 contextBuilder.withDataBlockEncoding(encoding);
348 HFileContext hFileContext = contextBuilder.build();
349
350 if (null == favoredNodes) {
351 wl.writer =
352 new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
353 .withOutputDir(familydir).withBloomType(bloomType)
354 .withComparator(KeyValue.COMPARATOR).withFileContext(hFileContext).build();
355 } else {
356 wl.writer =
357 new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), new HFileSystem(fs))
358 .withOutputDir(familydir).withBloomType(bloomType)
359 .withComparator(KeyValue.COMPARATOR).withFileContext(hFileContext)
360 .withFavoredNodes(favoredNodes).build();
361 }
362
363 this.writers.put(family, wl);
364 return wl;
365 }
366
367 private void close(final StoreFile.Writer w) throws IOException {
368 if (w != null) {
369 w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY,
370 Bytes.toBytes(System.currentTimeMillis()));
371 w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY,
372 Bytes.toBytes(context.getTaskAttemptID().toString()));
373 w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY,
374 Bytes.toBytes(true));
375 w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY,
376 Bytes.toBytes(compactionExclude));
377 w.appendTrackedTimestampsToMetadata();
378 w.close();
379 }
380 }
381
382 @Override
383 public void close(TaskAttemptContext c)
384 throws IOException, InterruptedException {
385 for (WriterLength wl: this.writers.values()) {
386 close(wl.writer);
387 }
388 }
389 };
390 }
391
392
393
394
395 static void configureStoragePolicy(final Configuration conf, final FileSystem fs,
396 byte[] family, Path cfPath) {
397 if (null == conf || null == fs || null == family || null == cfPath) {
398 return;
399 }
400 String policy =
401 conf.get(STORAGE_POLICY_PROPERTY_CF_PREFIX + Bytes.toString(family),
402 conf.get(STORAGE_POLICY_PROPERTY));
403
404 FSUtils.setStoragePolicy(fs, cfPath, policy);
405 }
406
407
408
409
410 static class WriterLength {
411 long written = 0;
412 StoreFile.Writer writer = null;
413 }
414
415
416
417
418
419 private static List<ImmutableBytesWritable> getRegionStartKeys(RegionLocator table)
420 throws IOException {
421 byte[][] byteKeys = table.getStartKeys();
422 ArrayList<ImmutableBytesWritable> ret =
423 new ArrayList<ImmutableBytesWritable>(byteKeys.length);
424 for (byte[] byteKey : byteKeys) {
425 ret.add(new ImmutableBytesWritable(byteKey));
426 }
427 return ret;
428 }
429
430
431
432
433
434 @SuppressWarnings("deprecation")
435 private static void writePartitions(Configuration conf, Path partitionsPath,
436 List<ImmutableBytesWritable> startKeys) throws IOException {
437 LOG.info("Writing partition information to " + partitionsPath);
438 if (startKeys.isEmpty()) {
439 throw new IllegalArgumentException("No regions passed");
440 }
441
442
443
444
445
446 TreeSet<ImmutableBytesWritable> sorted =
447 new TreeSet<ImmutableBytesWritable>(startKeys);
448
449 ImmutableBytesWritable first = sorted.first();
450 if (!Bytes.equals(first.get(), HConstants.EMPTY_BYTE_ARRAY)) {
451 throw new IllegalArgumentException(
452 "First region of table should have empty start key. Instead has: "
453 + Bytes.toStringBinary(first.get()));
454 }
455 sorted.remove(first);
456
457
458 FileSystem fs = partitionsPath.getFileSystem(conf);
459 SequenceFile.Writer writer = SequenceFile.createWriter(
460 fs, conf, partitionsPath, ImmutableBytesWritable.class,
461 NullWritable.class);
462
463 try {
464 for (ImmutableBytesWritable startKey : sorted) {
465 writer.append(startKey, NullWritable.get());
466 }
467 } finally {
468 writer.close();
469 }
470 }
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488 @Deprecated
489 public static void configureIncrementalLoad(Job job, HTable table)
490 throws IOException {
491 configureIncrementalLoad(job, table.getTableDescriptor(), table.getRegionLocator());
492 }
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509 public static void configureIncrementalLoad(Job job, Table table, RegionLocator regionLocator)
510 throws IOException {
511 configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
512 configureRemoteCluster(job, table.getConfiguration());
513 }
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529 public static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor,
530 RegionLocator regionLocator) throws IOException {
531 configureIncrementalLoad(job, tableDescriptor, regionLocator, HFileOutputFormat2.class);
532 }
533
534 static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor,
535 RegionLocator regionLocator, Class<? extends OutputFormat<?, ?>> cls) throws IOException,
536 UnsupportedEncodingException {
537 Configuration conf = job.getConfiguration();
538 job.setOutputKeyClass(ImmutableBytesWritable.class);
539 job.setOutputValueClass(KeyValue.class);
540 job.setOutputFormatClass(cls);
541
542
543
544
545 if (KeyValue.class.equals(job.getMapOutputValueClass())) {
546 job.setReducerClass(KeyValueSortReducer.class);
547 } else if (Put.class.equals(job.getMapOutputValueClass())) {
548 job.setReducerClass(PutSortReducer.class);
549 } else if (Text.class.equals(job.getMapOutputValueClass())) {
550 job.setReducerClass(TextSortReducer.class);
551 } else {
552 LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
553 }
554
555 conf.setStrings("io.serializations", conf.get("io.serializations"),
556 MutationSerialization.class.getName(), ResultSerialization.class.getName(),
557 KeyValueSerialization.class.getName());
558
559 if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
560
561 LOG.info("bulkload locality sensitive enabled");
562 conf.set(OUTPUT_TABLE_NAME_CONF_KEY, regionLocator.getName().getNameAsString());
563 }
564
565
566 LOG.info("Looking up current regions for table " + tableDescriptor.getTableName());
567 List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator);
568 LOG.info("Configuring " + startKeys.size() + " reduce partitions " +
569 "to match current region count");
570 job.setNumReduceTasks(startKeys.size());
571
572 configurePartitioner(job, startKeys);
573
574 configureCompression(conf, tableDescriptor);
575 configureBloomType(tableDescriptor, conf);
576 configureBlockSize(tableDescriptor, conf);
577 configureDataBlockEncoding(tableDescriptor, conf);
578
579 TableMapReduceUtil.addDependencyJars(job);
580 TableMapReduceUtil.initCredentials(job);
581 LOG.info("Incremental table " + regionLocator.getName() + " output configured.");
582 }
583
584 public static void configureIncrementalLoadMap(Job job, Table table) throws IOException {
585 Configuration conf = job.getConfiguration();
586
587 job.setOutputKeyClass(ImmutableBytesWritable.class);
588 job.setOutputValueClass(KeyValue.class);
589 job.setOutputFormatClass(HFileOutputFormat2.class);
590
591
592 configureCompression(conf, table.getTableDescriptor());
593 configureBloomType(table.getTableDescriptor(), conf);
594 configureBlockSize(table.getTableDescriptor(), conf);
595 HTableDescriptor tableDescriptor = table.getTableDescriptor();
596 configureDataBlockEncoding(tableDescriptor, conf);
597
598 TableMapReduceUtil.addDependencyJars(job);
599 TableMapReduceUtil.initCredentials(job);
600 LOG.info("Incremental table " + table.getName() + " output configured.");
601 }
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626 public static void configureRemoteCluster(Job job, Configuration clusterConf) {
627 Configuration conf = job.getConfiguration();
628
629 if (!conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
630 return;
631 }
632
633 final String quorum = clusterConf.get(HConstants.ZOOKEEPER_QUORUM);
634 final int clientPort = clusterConf.getInt(
635 HConstants.ZOOKEEPER_CLIENT_PORT, HConstants.DEFAULT_ZOOKEPER_CLIENT_PORT);
636 final String parent = clusterConf.get(
637 HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
638
639 conf.set(REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY, quorum);
640 conf.setInt(REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY, clientPort);
641 conf.set(REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY, parent);
642
643 LOG.info("ZK configs for remote cluster of bulkload is configured: " +
644 quorum + ":" + clientPort + "/" + parent);
645 }
646
647
648
649
650
651
652
653
654 @InterfaceAudience.Private
655 static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration
656 conf) {
657 Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
658 COMPRESSION_FAMILIES_CONF_KEY);
659 Map<byte[], Algorithm> compressionMap = new TreeMap<byte[],
660 Algorithm>(Bytes.BYTES_COMPARATOR);
661 for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
662 Algorithm algorithm = AbstractHFileWriter.compressionByName
663 (e.getValue());
664 compressionMap.put(e.getKey(), algorithm);
665 }
666 return compressionMap;
667 }
668
669
670
671
672
673
674
675
676 @InterfaceAudience.Private
677 static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
678 Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
679 BLOOM_TYPE_FAMILIES_CONF_KEY);
680 Map<byte[], BloomType> bloomTypeMap = new TreeMap<byte[],
681 BloomType>(Bytes.BYTES_COMPARATOR);
682 for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
683 BloomType bloomType = BloomType.valueOf(e.getValue());
684 bloomTypeMap.put(e.getKey(), bloomType);
685 }
686 return bloomTypeMap;
687 }
688
689
690
691
692
693
694
695
696 @InterfaceAudience.Private
697 static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) {
698 Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
699 BLOCK_SIZE_FAMILIES_CONF_KEY);
700 Map<byte[], Integer> blockSizeMap = new TreeMap<byte[],
701 Integer>(Bytes.BYTES_COMPARATOR);
702 for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
703 Integer blockSize = Integer.parseInt(e.getValue());
704 blockSizeMap.put(e.getKey(), blockSize);
705 }
706 return blockSizeMap;
707 }
708
709
710
711
712
713
714
715
716
717 @InterfaceAudience.Private
718 static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(
719 Configuration conf) {
720 Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
721 DATABLOCK_ENCODING_FAMILIES_CONF_KEY);
722 Map<byte[], DataBlockEncoding> encoderMap = new TreeMap<byte[],
723 DataBlockEncoding>(Bytes.BYTES_COMPARATOR);
724 for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
725 encoderMap.put(e.getKey(), DataBlockEncoding.valueOf((e.getValue())));
726 }
727 return encoderMap;
728 }
729
730
731
732
733
734
735
736
737
738 private static Map<byte[], String> createFamilyConfValueMap(
739 Configuration conf, String confName) {
740 Map<byte[], String> confValMap = new TreeMap<byte[], String>(Bytes.BYTES_COMPARATOR);
741 String confVal = conf.get(confName, "");
742 for (String familyConf : confVal.split("&")) {
743 String[] familySplit = familyConf.split("=");
744 if (familySplit.length != 2) {
745 continue;
746 }
747 try {
748 confValMap.put(Bytes.toBytes(URLDecoder.decode(familySplit[0], "UTF-8")),
749 URLDecoder.decode(familySplit[1], "UTF-8"));
750 } catch (UnsupportedEncodingException e) {
751
752 throw new AssertionError(e);
753 }
754 }
755 return confValMap;
756 }
757
758
759
760
761
762 static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
763 throws IOException {
764 Configuration conf = job.getConfiguration();
765
766 FileSystem fs = FileSystem.get(conf);
767 String hbaseTmpFsDir =
768 conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
769 HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY);
770 Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
771 fs.makeQualified(partitionsPath);
772 writePartitions(conf, partitionsPath, splitPoints);
773 fs.deleteOnExit(partitionsPath);
774
775
776 job.setPartitionerClass(TotalOrderPartitioner.class);
777 TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
778 }
779
780
781
782
783
784
785
786
787
788
789 @edu.umd.cs.findbugs.annotations.SuppressWarnings(
790 value="RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
791 @InterfaceAudience.Private
792 static void configureCompression(Configuration conf, HTableDescriptor tableDescriptor)
793 throws UnsupportedEncodingException {
794 StringBuilder compressionConfigValue = new StringBuilder();
795 if(tableDescriptor == null){
796
797 return;
798 }
799 Collection<HColumnDescriptor> families = tableDescriptor.getFamilies();
800 int i = 0;
801 for (HColumnDescriptor familyDescriptor : families) {
802 if (i++ > 0) {
803 compressionConfigValue.append('&');
804 }
805 compressionConfigValue.append(URLEncoder.encode(
806 familyDescriptor.getNameAsString(), "UTF-8"));
807 compressionConfigValue.append('=');
808 compressionConfigValue.append(URLEncoder.encode(
809 familyDescriptor.getCompression().getName(), "UTF-8"));
810 }
811
812 conf.set(COMPRESSION_FAMILIES_CONF_KEY, compressionConfigValue.toString());
813 }
814
815
816
817
818
819
820
821
822
823
824 @InterfaceAudience.Private
825 static void configureBlockSize(HTableDescriptor tableDescriptor, Configuration conf)
826 throws UnsupportedEncodingException {
827 StringBuilder blockSizeConfigValue = new StringBuilder();
828 if (tableDescriptor == null) {
829
830 return;
831 }
832 Collection<HColumnDescriptor> families = tableDescriptor.getFamilies();
833 int i = 0;
834 for (HColumnDescriptor familyDescriptor : families) {
835 if (i++ > 0) {
836 blockSizeConfigValue.append('&');
837 }
838 blockSizeConfigValue.append(URLEncoder.encode(
839 familyDescriptor.getNameAsString(), "UTF-8"));
840 blockSizeConfigValue.append('=');
841 blockSizeConfigValue.append(URLEncoder.encode(
842 String.valueOf(familyDescriptor.getBlocksize()), "UTF-8"));
843 }
844
845 conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY, blockSizeConfigValue.toString());
846 }
847
848
849
850
851
852
853
854
855
856
857 @InterfaceAudience.Private
858 static void configureBloomType(HTableDescriptor tableDescriptor, Configuration conf)
859 throws UnsupportedEncodingException {
860 if (tableDescriptor == null) {
861
862 return;
863 }
864 StringBuilder bloomTypeConfigValue = new StringBuilder();
865 Collection<HColumnDescriptor> families = tableDescriptor.getFamilies();
866 int i = 0;
867 for (HColumnDescriptor familyDescriptor : families) {
868 if (i++ > 0) {
869 bloomTypeConfigValue.append('&');
870 }
871 bloomTypeConfigValue.append(URLEncoder.encode(
872 familyDescriptor.getNameAsString(), "UTF-8"));
873 bloomTypeConfigValue.append('=');
874 String bloomType = familyDescriptor.getBloomFilterType().toString();
875 if (bloomType == null) {
876 bloomType = HColumnDescriptor.DEFAULT_BLOOMFILTER;
877 }
878 bloomTypeConfigValue.append(URLEncoder.encode(bloomType, "UTF-8"));
879 }
880 conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY, bloomTypeConfigValue.toString());
881 }
882
883
884
885
886
887
888
889
890
891
892 @InterfaceAudience.Private
893 static void configureDataBlockEncoding(HTableDescriptor tableDescriptor,
894 Configuration conf) throws UnsupportedEncodingException {
895 if (tableDescriptor == null) {
896
897 return;
898 }
899 StringBuilder dataBlockEncodingConfigValue = new StringBuilder();
900 Collection<HColumnDescriptor> families = tableDescriptor.getFamilies();
901 int i = 0;
902 for (HColumnDescriptor familyDescriptor : families) {
903 if (i++ > 0) {
904 dataBlockEncodingConfigValue.append('&');
905 }
906 dataBlockEncodingConfigValue.append(
907 URLEncoder.encode(familyDescriptor.getNameAsString(), "UTF-8"));
908 dataBlockEncodingConfigValue.append('=');
909 DataBlockEncoding encoding = familyDescriptor.getDataBlockEncoding();
910 if (encoding == null) {
911 encoding = DataBlockEncoding.NONE;
912 }
913 dataBlockEncodingConfigValue.append(URLEncoder.encode(encoding.toString(),
914 "UTF-8"));
915 }
916 conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
917 dataBlockEncodingConfigValue.toString());
918 }
919 }