001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import static org.apache.hadoop.hbase.regionserver.HStoreFile.BULKLOAD_TASK_KEY; 021import static org.apache.hadoop.hbase.regionserver.HStoreFile.BULKLOAD_TIME_KEY; 022import static org.apache.hadoop.hbase.regionserver.HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY; 023import static org.apache.hadoop.hbase.regionserver.HStoreFile.MAJOR_COMPACTION_KEY; 024 025import java.io.IOException; 026import java.io.UnsupportedEncodingException; 027import java.net.InetSocketAddress; 028import java.net.URLDecoder; 029import java.net.URLEncoder; 030import java.util.ArrayList; 031import java.util.Arrays; 032import java.util.List; 033import java.util.Map; 034import java.util.Map.Entry; 035import java.util.Set; 036import java.util.TreeMap; 037import java.util.TreeSet; 038import java.util.UUID; 039import java.util.function.Function; 040import java.util.stream.Collectors; 041import org.apache.commons.lang3.StringUtils; 042import org.apache.hadoop.conf.Configuration; 043import org.apache.hadoop.fs.FileSystem; 044import org.apache.hadoop.fs.Path; 045import org.apache.hadoop.hbase.Cell; 046import org.apache.hadoop.hbase.CellUtil; 047import org.apache.hadoop.hbase.HConstants; 048import org.apache.hadoop.hbase.HRegionLocation; 049import org.apache.hadoop.hbase.HTableDescriptor; 050import org.apache.hadoop.hbase.KeyValue; 051import org.apache.hadoop.hbase.PrivateCellUtil; 052import org.apache.hadoop.hbase.TableName; 053import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 054import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 055import org.apache.hadoop.hbase.client.Connection; 056import org.apache.hadoop.hbase.client.ConnectionFactory; 057import org.apache.hadoop.hbase.client.Put; 058import org.apache.hadoop.hbase.client.RegionLocator; 059import org.apache.hadoop.hbase.client.Table; 060import org.apache.hadoop.hbase.client.TableDescriptor; 061import org.apache.hadoop.hbase.fs.HFileSystem; 062import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 063import org.apache.hadoop.hbase.io.compress.Compression; 064import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; 065import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 066import org.apache.hadoop.hbase.io.hfile.CacheConfig; 067import org.apache.hadoop.hbase.io.hfile.HFile; 068import org.apache.hadoop.hbase.io.hfile.HFileContext; 069import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 070import org.apache.hadoop.hbase.io.hfile.HFileWriterImpl; 071import org.apache.hadoop.hbase.regionserver.BloomType; 072import org.apache.hadoop.hbase.regionserver.HStore; 073import org.apache.hadoop.hbase.regionserver.StoreFileWriter; 074import org.apache.hadoop.hbase.regionserver.StoreUtils; 075import org.apache.hadoop.hbase.util.BloomFilterUtil; 076import org.apache.hadoop.hbase.util.Bytes; 077import org.apache.hadoop.hbase.util.CommonFSUtils; 078import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 079import org.apache.hadoop.hbase.util.MapReduceExtendedCell; 080import org.apache.hadoop.io.NullWritable; 081import org.apache.hadoop.io.SequenceFile; 082import org.apache.hadoop.io.Text; 083import org.apache.hadoop.mapreduce.Job; 084import org.apache.hadoop.mapreduce.OutputCommitter; 085import org.apache.hadoop.mapreduce.OutputFormat; 086import org.apache.hadoop.mapreduce.RecordWriter; 087import org.apache.hadoop.mapreduce.TaskAttemptContext; 088import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; 089import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 090import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner; 091import org.apache.yetus.audience.InterfaceAudience; 092import org.slf4j.Logger; 093import org.slf4j.LoggerFactory; 094 095/** 096 * Writes HFiles. Passed Cells must arrive in order. Writes current time as the sequence id for the 097 * file. Sets the major compacted attribute on created {@link HFile}s. Calling write(null,null) will 098 * forcibly roll all HFiles being written. 099 * <p> 100 * Using this class as part of a MapReduce job is best done using 101 * {@link #configureIncrementalLoad(Job, TableDescriptor, RegionLocator)}. 102 */ 103@InterfaceAudience.Public 104public class HFileOutputFormat2 extends FileOutputFormat<ImmutableBytesWritable, Cell> { 105 private static final Logger LOG = LoggerFactory.getLogger(HFileOutputFormat2.class); 106 107 static class TableInfo { 108 private TableDescriptor tableDesctiptor; 109 private RegionLocator regionLocator; 110 111 public TableInfo(TableDescriptor tableDesctiptor, RegionLocator regionLocator) { 112 this.tableDesctiptor = tableDesctiptor; 113 this.regionLocator = regionLocator; 114 } 115 116 /** 117 * The modification for the returned HTD doesn't affect the inner TD. 118 * @return A clone of inner table descriptor 119 * @deprecated since 2.0.0 and will be removed in 3.0.0. Use {@link #getTableDescriptor()} 120 * instead. 121 * @see #getTableDescriptor() 122 * @see <a href="https://issues.apache.org/jira/browse/HBASE-18241">HBASE-18241</a> 123 */ 124 @Deprecated 125 public HTableDescriptor getHTableDescriptor() { 126 return new HTableDescriptor(tableDesctiptor); 127 } 128 129 public TableDescriptor getTableDescriptor() { 130 return tableDesctiptor; 131 } 132 133 public RegionLocator getRegionLocator() { 134 return regionLocator; 135 } 136 } 137 138 protected static final byte[] tableSeparator = Bytes.toBytes(";"); 139 140 protected static byte[] combineTableNameSuffix(byte[] tableName, byte[] suffix) { 141 return Bytes.add(tableName, tableSeparator, suffix); 142 } 143 144 // The following constants are private since these are used by 145 // HFileOutputFormat2 to internally transfer data between job setup and 146 // reducer run using conf. 147 // These should not be changed by the client. 148 static final String COMPRESSION_FAMILIES_CONF_KEY = 149 "hbase.hfileoutputformat.families.compression"; 150 static final String BLOOM_TYPE_FAMILIES_CONF_KEY = "hbase.hfileoutputformat.families.bloomtype"; 151 static final String BLOOM_PARAM_FAMILIES_CONF_KEY = "hbase.hfileoutputformat.families.bloomparam"; 152 static final String BLOCK_SIZE_FAMILIES_CONF_KEY = "hbase.mapreduce.hfileoutputformat.blocksize"; 153 static final String DATABLOCK_ENCODING_FAMILIES_CONF_KEY = 154 "hbase.mapreduce.hfileoutputformat.families.datablock.encoding"; 155 156 // This constant is public since the client can modify this when setting 157 // up their conf object and thus refer to this symbol. 158 // It is present for backwards compatibility reasons. Use it only to 159 // override the auto-detection of datablock encoding and compression. 160 public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY = 161 "hbase.mapreduce.hfileoutputformat.datablock.encoding"; 162 public static final String COMPRESSION_OVERRIDE_CONF_KEY = 163 "hbase.mapreduce.hfileoutputformat.compression"; 164 165 /** 166 * Keep locality while generating HFiles for bulkload. See HBASE-12596 167 */ 168 public static final String LOCALITY_SENSITIVE_CONF_KEY = 169 "hbase.bulkload.locality.sensitive.enabled"; 170 private static final boolean DEFAULT_LOCALITY_SENSITIVE = true; 171 static final String OUTPUT_TABLE_NAME_CONF_KEY = "hbase.mapreduce.hfileoutputformat.table.name"; 172 static final String MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY = 173 "hbase.mapreduce.use.multi.table.hfileoutputformat"; 174 175 public static final String REMOTE_CLUSTER_CONF_PREFIX = "hbase.hfileoutputformat.remote.cluster."; 176 public static final String REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY = 177 REMOTE_CLUSTER_CONF_PREFIX + "zookeeper.quorum"; 178 public static final String REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY = 179 REMOTE_CLUSTER_CONF_PREFIX + "zookeeper." + HConstants.CLIENT_PORT_STR; 180 public static final String REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY = 181 REMOTE_CLUSTER_CONF_PREFIX + HConstants.ZOOKEEPER_ZNODE_PARENT; 182 183 public static final String STORAGE_POLICY_PROPERTY = HStore.BLOCK_STORAGE_POLICY_KEY; 184 public static final String STORAGE_POLICY_PROPERTY_CF_PREFIX = STORAGE_POLICY_PROPERTY + "."; 185 186 @Override 187 public RecordWriter<ImmutableBytesWritable, Cell> 188 getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { 189 return createRecordWriter(context, this.getOutputCommitter(context)); 190 } 191 192 protected static byte[] getTableNameSuffixedWithFamily(byte[] tableName, byte[] family) { 193 return combineTableNameSuffix(tableName, family); 194 } 195 196 static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter( 197 final TaskAttemptContext context, final OutputCommitter committer) throws IOException { 198 199 // Get the path of the temporary output file 200 final Path outputDir = ((FileOutputCommitter) committer).getWorkPath(); 201 final Configuration conf = context.getConfiguration(); 202 final boolean writeMultipleTables = 203 conf.getBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, false); 204 final String writeTableNames = conf.get(OUTPUT_TABLE_NAME_CONF_KEY); 205 if (writeTableNames == null || writeTableNames.isEmpty()) { 206 throw new IllegalArgumentException("" + OUTPUT_TABLE_NAME_CONF_KEY + " cannot be empty"); 207 } 208 final FileSystem fs = outputDir.getFileSystem(conf); 209 // These configs. are from hbase-*.xml 210 final long maxsize = 211 conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); 212 // Invented config. Add to hbase-*.xml if other than default compression. 213 final String defaultCompressionStr = 214 conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); 215 final Algorithm defaultCompression = HFileWriterImpl.compressionByName(defaultCompressionStr); 216 String compressionStr = conf.get(COMPRESSION_OVERRIDE_CONF_KEY); 217 final Algorithm overriddenCompression = 218 compressionStr != null ? Compression.getCompressionAlgorithmByName(compressionStr) : null; 219 final boolean compactionExclude = 220 conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false); 221 final Set<String> allTableNames = Arrays 222 .stream(writeTableNames.split(Bytes.toString(tableSeparator))).collect(Collectors.toSet()); 223 224 // create a map from column family to the compression algorithm 225 final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf); 226 final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf); 227 final Map<byte[], String> bloomParamMap = createFamilyBloomParamMap(conf); 228 final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf); 229 230 String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY); 231 final Map<byte[], DataBlockEncoding> datablockEncodingMap = 232 createFamilyDataBlockEncodingMap(conf); 233 final DataBlockEncoding overriddenEncoding = 234 dataBlockEncodingStr != null ? DataBlockEncoding.valueOf(dataBlockEncodingStr) : null; 235 236 return new RecordWriter<ImmutableBytesWritable, V>() { 237 // Map of families to writers and how much has been output on the writer. 238 private final Map<byte[], WriterLength> writers = new TreeMap<>(Bytes.BYTES_COMPARATOR); 239 private final Map<byte[], byte[]> previousRows = new TreeMap<>(Bytes.BYTES_COMPARATOR); 240 private final long now = EnvironmentEdgeManager.currentTime(); 241 242 @Override 243 public void write(ImmutableBytesWritable row, V cell) throws IOException { 244 Cell kv = cell; 245 // null input == user explicitly wants to flush 246 if (row == null && kv == null) { 247 rollWriters(null); 248 return; 249 } 250 251 byte[] rowKey = CellUtil.cloneRow(kv); 252 int length = (PrivateCellUtil.estimatedSerializedSizeOf(kv)) - Bytes.SIZEOF_INT; 253 byte[] family = CellUtil.cloneFamily(kv); 254 byte[] tableNameBytes = null; 255 if (writeMultipleTables) { 256 tableNameBytes = MultiTableHFileOutputFormat.getTableName(row.get()); 257 tableNameBytes = TableName.valueOf(tableNameBytes).toBytes(); 258 if (!allTableNames.contains(Bytes.toString(tableNameBytes))) { 259 throw new IllegalArgumentException( 260 "TableName " + Bytes.toString(tableNameBytes) + " not expected"); 261 } 262 } else { 263 tableNameBytes = Bytes.toBytes(writeTableNames); 264 } 265 Path tableRelPath = getTableRelativePath(tableNameBytes); 266 byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableNameBytes, family); 267 WriterLength wl = this.writers.get(tableAndFamily); 268 269 // If this is a new column family, verify that the directory exists 270 if (wl == null) { 271 Path writerPath = null; 272 if (writeMultipleTables) { 273 writerPath = new Path(outputDir, new Path(tableRelPath, Bytes.toString(family))); 274 } else { 275 writerPath = new Path(outputDir, Bytes.toString(family)); 276 } 277 fs.mkdirs(writerPath); 278 configureStoragePolicy(conf, fs, tableAndFamily, writerPath); 279 } 280 281 // This can only happen once a row is finished though 282 if ( 283 wl != null && wl.written + length >= maxsize 284 && Bytes.compareTo(this.previousRows.get(family), rowKey) != 0 285 ) { 286 rollWriters(wl); 287 } 288 289 // create a new WAL writer, if necessary 290 if (wl == null || wl.writer == null) { 291 if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) { 292 HRegionLocation loc = null; 293 294 String tableName = Bytes.toString(tableNameBytes); 295 if (tableName != null) { 296 try ( 297 Connection connection = 298 ConnectionFactory.createConnection(createRemoteClusterConf(conf)); 299 RegionLocator locator = connection.getRegionLocator(TableName.valueOf(tableName))) { 300 loc = locator.getRegionLocation(rowKey); 301 } catch (Throwable e) { 302 LOG.warn("Something wrong locating rowkey {} in {}", Bytes.toString(rowKey), 303 tableName, e); 304 loc = null; 305 } 306 } 307 308 if (null == loc) { 309 LOG.trace("Failed get of location, use default writer {}", Bytes.toString(rowKey)); 310 wl = getNewWriter(tableNameBytes, family, conf, null); 311 } else { 312 LOG.debug("First rowkey: [{}]", Bytes.toString(rowKey)); 313 InetSocketAddress initialIsa = 314 new InetSocketAddress(loc.getHostname(), loc.getPort()); 315 if (initialIsa.isUnresolved()) { 316 LOG.trace("Failed resolve address {}, use default writer", loc.getHostnamePort()); 317 wl = getNewWriter(tableNameBytes, family, conf, null); 318 } else { 319 LOG.debug("Use favored nodes writer: {}", initialIsa.getHostString()); 320 wl = getNewWriter(tableNameBytes, family, conf, 321 new InetSocketAddress[] { initialIsa }); 322 } 323 } 324 } else { 325 wl = getNewWriter(tableNameBytes, family, conf, null); 326 } 327 } 328 329 // we now have the proper WAL writer. full steam ahead 330 PrivateCellUtil.updateLatestStamp(cell, this.now); 331 wl.writer.append(kv); 332 wl.written += length; 333 334 // Copy the row so we know when a row transition. 335 this.previousRows.put(family, rowKey); 336 } 337 338 private Path getTableRelativePath(byte[] tableNameBytes) { 339 String tableName = Bytes.toString(tableNameBytes); 340 String[] tableNameParts = tableName.split(":"); 341 Path tableRelPath = new Path(tableName.split(":")[0]); 342 if (tableNameParts.length > 1) { 343 tableRelPath = new Path(tableRelPath, tableName.split(":")[1]); 344 } 345 return tableRelPath; 346 } 347 348 private void rollWriters(WriterLength writerLength) throws IOException { 349 if (writerLength != null) { 350 closeWriter(writerLength); 351 } else { 352 for (WriterLength wl : this.writers.values()) { 353 closeWriter(wl); 354 } 355 } 356 } 357 358 private void closeWriter(WriterLength wl) throws IOException { 359 if (wl.writer != null) { 360 LOG.info( 361 "Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); 362 close(wl.writer); 363 wl.writer = null; 364 } 365 wl.written = 0; 366 } 367 368 private Configuration createRemoteClusterConf(Configuration conf) { 369 final Configuration newConf = new Configuration(conf); 370 371 final String quorum = conf.get(REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY); 372 final String clientPort = conf.get(REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY); 373 final String parent = conf.get(REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY); 374 375 if (quorum != null && clientPort != null && parent != null) { 376 newConf.set(HConstants.ZOOKEEPER_QUORUM, quorum); 377 newConf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.parseInt(clientPort)); 378 newConf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, parent); 379 } 380 381 for (Entry<String, String> entry : conf) { 382 String key = entry.getKey(); 383 if ( 384 REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY.equals(key) 385 || REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY.equals(key) 386 || REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY.equals(key) 387 ) { 388 // Handled them above 389 continue; 390 } 391 392 if (entry.getKey().startsWith(REMOTE_CLUSTER_CONF_PREFIX)) { 393 String originalKey = entry.getKey().substring(REMOTE_CLUSTER_CONF_PREFIX.length()); 394 if (!originalKey.isEmpty()) { 395 newConf.set(originalKey, entry.getValue()); 396 } 397 } 398 } 399 400 return newConf; 401 } 402 403 /* 404 * Create a new StoreFile.Writer. 405 * @return A WriterLength, containing a new StoreFile.Writer. 406 */ 407 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", 408 justification = "Not important") 409 private WriterLength getNewWriter(byte[] tableName, byte[] family, Configuration conf, 410 InetSocketAddress[] favoredNodes) throws IOException { 411 byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableName, family); 412 Path familydir = new Path(outputDir, Bytes.toString(family)); 413 if (writeMultipleTables) { 414 familydir = 415 new Path(outputDir, new Path(getTableRelativePath(tableName), Bytes.toString(family))); 416 } 417 WriterLength wl = new WriterLength(); 418 Algorithm compression = overriddenCompression; 419 compression = compression == null ? compressionMap.get(tableAndFamily) : compression; 420 compression = compression == null ? defaultCompression : compression; 421 BloomType bloomType = bloomTypeMap.get(tableAndFamily); 422 bloomType = bloomType == null ? BloomType.NONE : bloomType; 423 String bloomParam = bloomParamMap.get(tableAndFamily); 424 if (bloomType == BloomType.ROWPREFIX_FIXED_LENGTH) { 425 conf.set(BloomFilterUtil.PREFIX_LENGTH_KEY, bloomParam); 426 } 427 Integer blockSize = blockSizeMap.get(tableAndFamily); 428 blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize; 429 DataBlockEncoding encoding = overriddenEncoding; 430 encoding = encoding == null ? datablockEncodingMap.get(tableAndFamily) : encoding; 431 encoding = encoding == null ? DataBlockEncoding.NONE : encoding; 432 HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression) 433 .withDataBlockEncoding(encoding).withChecksumType(StoreUtils.getChecksumType(conf)) 434 .withBytesPerCheckSum(StoreUtils.getBytesPerChecksum(conf)).withBlockSize(blockSize) 435 .withColumnFamily(family).withTableName(tableName) 436 .withCreateTime(EnvironmentEdgeManager.currentTime()); 437 438 if (HFile.getFormatVersion(conf) >= HFile.MIN_FORMAT_VERSION_WITH_TAGS) { 439 contextBuilder.withIncludesTags(true); 440 } 441 442 HFileContext hFileContext = contextBuilder.build(); 443 if (null == favoredNodes) { 444 wl.writer = 445 new StoreFileWriter.Builder(conf, CacheConfig.DISABLED, fs).withOutputDir(familydir) 446 .withBloomType(bloomType).withFileContext(hFileContext).build(); 447 } else { 448 wl.writer = new StoreFileWriter.Builder(conf, CacheConfig.DISABLED, new HFileSystem(fs)) 449 .withOutputDir(familydir).withBloomType(bloomType).withFileContext(hFileContext) 450 .withFavoredNodes(favoredNodes).build(); 451 } 452 453 this.writers.put(tableAndFamily, wl); 454 return wl; 455 } 456 457 private void close(final StoreFileWriter w) throws IOException { 458 if (w != null) { 459 w.appendFileInfo(BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); 460 w.appendFileInfo(BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); 461 w.appendFileInfo(MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); 462 w.appendFileInfo(EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); 463 w.appendTrackedTimestampsToMetadata(); 464 w.close(); 465 } 466 } 467 468 @Override 469 public void close(TaskAttemptContext c) throws IOException, InterruptedException { 470 for (WriterLength wl : this.writers.values()) { 471 close(wl.writer); 472 } 473 } 474 }; 475 } 476 477 /** 478 * Configure block storage policy for CF after the directory is created. 479 */ 480 static void configureStoragePolicy(final Configuration conf, final FileSystem fs, 481 byte[] tableAndFamily, Path cfPath) { 482 if (null == conf || null == fs || null == tableAndFamily || null == cfPath) { 483 return; 484 } 485 486 String policy = conf.get(STORAGE_POLICY_PROPERTY_CF_PREFIX + Bytes.toString(tableAndFamily), 487 conf.get(STORAGE_POLICY_PROPERTY)); 488 CommonFSUtils.setStoragePolicy(fs, cfPath, policy); 489 } 490 491 /* 492 * Data structure to hold a Writer and amount of data written on it. 493 */ 494 static class WriterLength { 495 long written = 0; 496 StoreFileWriter writer = null; 497 } 498 499 /** 500 * Return the start keys of all of the regions in this table, as a list of ImmutableBytesWritable. 501 */ 502 private static List<ImmutableBytesWritable> getRegionStartKeys(List<RegionLocator> regionLocators, 503 boolean writeMultipleTables) throws IOException { 504 505 ArrayList<ImmutableBytesWritable> ret = new ArrayList<>(); 506 for (RegionLocator regionLocator : regionLocators) { 507 TableName tableName = regionLocator.getName(); 508 LOG.info("Looking up current regions for table " + tableName); 509 byte[][] byteKeys = regionLocator.getStartKeys(); 510 for (byte[] byteKey : byteKeys) { 511 byte[] fullKey = byteKey; // HFileOutputFormat2 use case 512 if (writeMultipleTables) { 513 // MultiTableHFileOutputFormat use case 514 fullKey = combineTableNameSuffix(tableName.getName(), byteKey); 515 } 516 if (LOG.isDebugEnabled()) { 517 LOG.debug("SplitPoint startkey for " + tableName + ": " + Bytes.toStringBinary(fullKey)); 518 } 519 ret.add(new ImmutableBytesWritable(fullKey)); 520 } 521 } 522 return ret; 523 } 524 525 /** 526 * Write out a {@link SequenceFile} that can be read by {@link TotalOrderPartitioner} that 527 * contains the split points in startKeys. 528 */ 529 @SuppressWarnings("deprecation") 530 private static void writePartitions(Configuration conf, Path partitionsPath, 531 List<ImmutableBytesWritable> startKeys, boolean writeMultipleTables) throws IOException { 532 LOG.info("Writing partition information to " + partitionsPath); 533 if (startKeys.isEmpty()) { 534 throw new IllegalArgumentException("No regions passed"); 535 } 536 537 // We're generating a list of split points, and we don't ever 538 // have keys < the first region (which has an empty start key) 539 // so we need to remove it. Otherwise we would end up with an 540 // empty reducer with index 0 541 TreeSet<ImmutableBytesWritable> sorted = new TreeSet<>(startKeys); 542 ImmutableBytesWritable first = sorted.first(); 543 if (writeMultipleTables) { 544 first = 545 new ImmutableBytesWritable(MultiTableHFileOutputFormat.getSuffix(sorted.first().get())); 546 } 547 if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) { 548 throw new IllegalArgumentException( 549 "First region of table should have empty start key. Instead has: " 550 + Bytes.toStringBinary(first.get())); 551 } 552 sorted.remove(sorted.first()); 553 554 // Write the actual file 555 FileSystem fs = partitionsPath.getFileSystem(conf); 556 SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath, 557 ImmutableBytesWritable.class, NullWritable.class); 558 559 try { 560 for (ImmutableBytesWritable startKey : sorted) { 561 writer.append(startKey, NullWritable.get()); 562 } 563 } finally { 564 writer.close(); 565 } 566 } 567 568 /** 569 * Configure a MapReduce Job to perform an incremental load into the given table. This 570 * <ul> 571 * <li>Inspects the table to configure a total order partitioner</li> 572 * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> 573 * <li>Sets the number of reduce tasks to match the current number of regions</li> 574 * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li> 575 * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or 576 * PutSortReducer)</li> 577 * <li>Sets the HBase cluster key to load region locations for locality-sensitive</li> 578 * </ul> 579 * The user should be sure to set the map output value class to either KeyValue or Put before 580 * running this function. 581 */ 582 public static void configureIncrementalLoad(Job job, Table table, RegionLocator regionLocator) 583 throws IOException { 584 configureIncrementalLoad(job, table.getDescriptor(), regionLocator); 585 configureRemoteCluster(job, table.getConfiguration()); 586 } 587 588 /** 589 * Configure a MapReduce Job to perform an incremental load into the given table. This 590 * <ul> 591 * <li>Inspects the table to configure a total order partitioner</li> 592 * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> 593 * <li>Sets the number of reduce tasks to match the current number of regions</li> 594 * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li> 595 * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or 596 * PutSortReducer)</li> 597 * </ul> 598 * The user should be sure to set the map output value class to either KeyValue or Put before 599 * running this function. 600 */ 601 public static void configureIncrementalLoad(Job job, TableDescriptor tableDescriptor, 602 RegionLocator regionLocator) throws IOException { 603 ArrayList<TableInfo> singleTableInfo = new ArrayList<>(); 604 singleTableInfo.add(new TableInfo(tableDescriptor, regionLocator)); 605 configureIncrementalLoad(job, singleTableInfo, HFileOutputFormat2.class); 606 } 607 608 static void configureIncrementalLoad(Job job, List<TableInfo> multiTableInfo, 609 Class<? extends OutputFormat<?, ?>> cls) throws IOException { 610 Configuration conf = job.getConfiguration(); 611 job.setOutputKeyClass(ImmutableBytesWritable.class); 612 job.setOutputValueClass(MapReduceExtendedCell.class); 613 job.setOutputFormatClass(cls); 614 615 if (multiTableInfo.stream().distinct().count() != multiTableInfo.size()) { 616 throw new IllegalArgumentException("Duplicate entries found in TableInfo argument"); 617 } 618 boolean writeMultipleTables = false; 619 if (MultiTableHFileOutputFormat.class.equals(cls)) { 620 writeMultipleTables = true; 621 conf.setBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, true); 622 } 623 // Based on the configured map output class, set the correct reducer to properly 624 // sort the incoming values. 625 // TODO it would be nice to pick one or the other of these formats. 626 if ( 627 KeyValue.class.equals(job.getMapOutputValueClass()) 628 || MapReduceExtendedCell.class.equals(job.getMapOutputValueClass()) 629 ) { 630 job.setReducerClass(CellSortReducer.class); 631 } else if (Put.class.equals(job.getMapOutputValueClass())) { 632 job.setReducerClass(PutSortReducer.class); 633 } else if (Text.class.equals(job.getMapOutputValueClass())) { 634 job.setReducerClass(TextSortReducer.class); 635 } else { 636 LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); 637 } 638 639 conf.setStrings("io.serializations", conf.get("io.serializations"), 640 MutationSerialization.class.getName(), ResultSerialization.class.getName(), 641 CellSerialization.class.getName()); 642 643 if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) { 644 LOG.info("bulkload locality sensitive enabled"); 645 } 646 647 /* Now get the region start keys for every table required */ 648 List<String> allTableNames = new ArrayList<>(multiTableInfo.size()); 649 List<RegionLocator> regionLocators = new ArrayList<>(multiTableInfo.size()); 650 List<TableDescriptor> tableDescriptors = new ArrayList<>(multiTableInfo.size()); 651 652 for (TableInfo tableInfo : multiTableInfo) { 653 regionLocators.add(tableInfo.getRegionLocator()); 654 allTableNames.add(tableInfo.getRegionLocator().getName().getNameAsString()); 655 tableDescriptors.add(tableInfo.getTableDescriptor()); 656 } 657 // Record tablenames for creating writer by favored nodes, and decoding compression, 658 // block size and other attributes of columnfamily per table 659 conf.set(OUTPUT_TABLE_NAME_CONF_KEY, 660 StringUtils.join(allTableNames, Bytes.toString(tableSeparator))); 661 List<ImmutableBytesWritable> startKeys = 662 getRegionStartKeys(regionLocators, writeMultipleTables); 663 // Use table's region boundaries for TOP split points. 664 LOG.info("Configuring " + startKeys.size() + " reduce partitions " 665 + "to match current region count for all tables"); 666 job.setNumReduceTasks(startKeys.size()); 667 668 configurePartitioner(job, startKeys, writeMultipleTables); 669 // Set compression algorithms based on column families 670 671 conf.set(COMPRESSION_FAMILIES_CONF_KEY, 672 serializeColumnFamilyAttribute(compressionDetails, tableDescriptors)); 673 conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY, 674 serializeColumnFamilyAttribute(blockSizeDetails, tableDescriptors)); 675 conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY, 676 serializeColumnFamilyAttribute(bloomTypeDetails, tableDescriptors)); 677 conf.set(BLOOM_PARAM_FAMILIES_CONF_KEY, 678 serializeColumnFamilyAttribute(bloomParamDetails, tableDescriptors)); 679 conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY, 680 serializeColumnFamilyAttribute(dataBlockEncodingDetails, tableDescriptors)); 681 682 TableMapReduceUtil.addDependencyJars(job); 683 TableMapReduceUtil.initCredentials(job); 684 LOG.info("Incremental output configured for tables: " + StringUtils.join(allTableNames, ",")); 685 } 686 687 public static void configureIncrementalLoadMap(Job job, TableDescriptor tableDescriptor) 688 throws IOException { 689 Configuration conf = job.getConfiguration(); 690 691 job.setOutputKeyClass(ImmutableBytesWritable.class); 692 job.setOutputValueClass(MapReduceExtendedCell.class); 693 job.setOutputFormatClass(HFileOutputFormat2.class); 694 695 ArrayList<TableDescriptor> singleTableDescriptor = new ArrayList<>(1); 696 singleTableDescriptor.add(tableDescriptor); 697 698 conf.set(OUTPUT_TABLE_NAME_CONF_KEY, tableDescriptor.getTableName().getNameAsString()); 699 // Set compression algorithms based on column families 700 conf.set(COMPRESSION_FAMILIES_CONF_KEY, 701 serializeColumnFamilyAttribute(compressionDetails, singleTableDescriptor)); 702 conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY, 703 serializeColumnFamilyAttribute(blockSizeDetails, singleTableDescriptor)); 704 conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY, 705 serializeColumnFamilyAttribute(bloomTypeDetails, singleTableDescriptor)); 706 conf.set(BLOOM_PARAM_FAMILIES_CONF_KEY, 707 serializeColumnFamilyAttribute(bloomParamDetails, singleTableDescriptor)); 708 conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY, 709 serializeColumnFamilyAttribute(dataBlockEncodingDetails, singleTableDescriptor)); 710 711 TableMapReduceUtil.addDependencyJars(job); 712 TableMapReduceUtil.initCredentials(job); 713 LOG.info("Incremental table " + tableDescriptor.getTableName() + " output configured."); 714 } 715 716 /** 717 * Configure HBase cluster key for remote cluster to load region location for locality-sensitive 718 * if it's enabled. It's not necessary to call this method explicitly when the cluster key for 719 * HBase cluster to be used to load region location is configured in the job configuration. Call 720 * this method when another HBase cluster key is configured in the job configuration. For example, 721 * you should call when you load data from HBase cluster A using {@link TableInputFormat} and 722 * generate hfiles for HBase cluster B. Otherwise, HFileOutputFormat2 fetch location from cluster 723 * A and locality-sensitive won't working correctly. 724 * {@link #configureIncrementalLoad(Job, Table, RegionLocator)} calls this method using 725 * {@link Table#getConfiguration} as clusterConf. See HBASE-25608. 726 * @param job which has configuration to be updated 727 * @param clusterConf which contains cluster key of the HBase cluster to be locality-sensitive 728 * @see #configureIncrementalLoad(Job, Table, RegionLocator) 729 * @see #LOCALITY_SENSITIVE_CONF_KEY 730 * @see #REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY 731 * @see #REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY 732 * @see #REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY 733 */ 734 public static void configureRemoteCluster(Job job, Configuration clusterConf) { 735 Configuration conf = job.getConfiguration(); 736 737 if (!conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) { 738 return; 739 } 740 741 final String quorum = clusterConf.get(HConstants.ZOOKEEPER_QUORUM); 742 final int clientPort = clusterConf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT, 743 HConstants.DEFAULT_ZOOKEEPER_CLIENT_PORT); 744 final String parent = 745 clusterConf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT); 746 747 conf.set(REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY, quorum); 748 conf.setInt(REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY, clientPort); 749 conf.set(REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY, parent); 750 751 LOG.info("ZK configs for remote cluster of bulkload is configured: " + quorum + ":" + clientPort 752 + "/" + parent); 753 } 754 755 /** 756 * Runs inside the task to deserialize column family to compression algorithm map from the 757 * configuration. 758 * @param conf to read the serialized values from 759 * @return a map from column family to the configured compression algorithm 760 */ 761 @InterfaceAudience.Private 762 static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration conf) { 763 Map<byte[], String> stringMap = createFamilyConfValueMap(conf, COMPRESSION_FAMILIES_CONF_KEY); 764 Map<byte[], Algorithm> compressionMap = new TreeMap<>(Bytes.BYTES_COMPARATOR); 765 for (Map.Entry<byte[], String> e : stringMap.entrySet()) { 766 Algorithm algorithm = HFileWriterImpl.compressionByName(e.getValue()); 767 compressionMap.put(e.getKey(), algorithm); 768 } 769 return compressionMap; 770 } 771 772 /** 773 * Runs inside the task to deserialize column family to bloom filter type map from the 774 * configuration. 775 * @param conf to read the serialized values from 776 * @return a map from column family to the the configured bloom filter type 777 */ 778 @InterfaceAudience.Private 779 static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) { 780 Map<byte[], String> stringMap = createFamilyConfValueMap(conf, BLOOM_TYPE_FAMILIES_CONF_KEY); 781 Map<byte[], BloomType> bloomTypeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR); 782 for (Map.Entry<byte[], String> e : stringMap.entrySet()) { 783 BloomType bloomType = BloomType.valueOf(e.getValue()); 784 bloomTypeMap.put(e.getKey(), bloomType); 785 } 786 return bloomTypeMap; 787 } 788 789 /** 790 * Runs inside the task to deserialize column family to bloom filter param map from the 791 * configuration. 792 * @param conf to read the serialized values from 793 * @return a map from column family to the the configured bloom filter param 794 */ 795 @InterfaceAudience.Private 796 static Map<byte[], String> createFamilyBloomParamMap(Configuration conf) { 797 return createFamilyConfValueMap(conf, BLOOM_PARAM_FAMILIES_CONF_KEY); 798 } 799 800 /** 801 * Runs inside the task to deserialize column family to block size map from the configuration. 802 * @param conf to read the serialized values from 803 * @return a map from column family to the configured block size 804 */ 805 @InterfaceAudience.Private 806 static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) { 807 Map<byte[], String> stringMap = createFamilyConfValueMap(conf, BLOCK_SIZE_FAMILIES_CONF_KEY); 808 Map<byte[], Integer> blockSizeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR); 809 for (Map.Entry<byte[], String> e : stringMap.entrySet()) { 810 Integer blockSize = Integer.parseInt(e.getValue()); 811 blockSizeMap.put(e.getKey(), blockSize); 812 } 813 return blockSizeMap; 814 } 815 816 /** 817 * Runs inside the task to deserialize column family to data block encoding type map from the 818 * configuration. 819 * @param conf to read the serialized values from 820 * @return a map from column family to HFileDataBlockEncoder for the configured data block type 821 * for the family 822 */ 823 @InterfaceAudience.Private 824 static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(Configuration conf) { 825 Map<byte[], String> stringMap = 826 createFamilyConfValueMap(conf, DATABLOCK_ENCODING_FAMILIES_CONF_KEY); 827 Map<byte[], DataBlockEncoding> encoderMap = new TreeMap<>(Bytes.BYTES_COMPARATOR); 828 for (Map.Entry<byte[], String> e : stringMap.entrySet()) { 829 encoderMap.put(e.getKey(), DataBlockEncoding.valueOf((e.getValue()))); 830 } 831 return encoderMap; 832 } 833 834 /** 835 * Run inside the task to deserialize column family to given conf value map. 836 * @param conf to read the serialized values from 837 * @param confName conf key to read from the configuration 838 * @return a map of column family to the given configuration value 839 */ 840 private static Map<byte[], String> createFamilyConfValueMap(Configuration conf, String confName) { 841 Map<byte[], String> confValMap = new TreeMap<>(Bytes.BYTES_COMPARATOR); 842 String confVal = conf.get(confName, ""); 843 for (String familyConf : confVal.split("&")) { 844 String[] familySplit = familyConf.split("="); 845 if (familySplit.length != 2) { 846 continue; 847 } 848 try { 849 confValMap.put(Bytes.toBytes(URLDecoder.decode(familySplit[0], "UTF-8")), 850 URLDecoder.decode(familySplit[1], "UTF-8")); 851 } catch (UnsupportedEncodingException e) { 852 // will not happen with UTF-8 encoding 853 throw new AssertionError(e); 854 } 855 } 856 return confValMap; 857 } 858 859 /** 860 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against 861 * <code>splitPoints</code>. Cleans up the partitions file after job exists. 862 */ 863 static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints, 864 boolean writeMultipleTables) throws IOException { 865 Configuration conf = job.getConfiguration(); 866 // create the partitions file 867 FileSystem fs = FileSystem.get(conf); 868 String hbaseTmpFsDir = 869 conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY, HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY); 870 Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID()); 871 fs.makeQualified(partitionsPath); 872 writePartitions(conf, partitionsPath, splitPoints, writeMultipleTables); 873 fs.deleteOnExit(partitionsPath); 874 875 // configure job to use it 876 job.setPartitionerClass(TotalOrderPartitioner.class); 877 TotalOrderPartitioner.setPartitionFile(conf, partitionsPath); 878 } 879 880 @edu.umd.cs.findbugs.annotations.SuppressWarnings( 881 value = "RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE") 882 @InterfaceAudience.Private 883 static String serializeColumnFamilyAttribute(Function<ColumnFamilyDescriptor, String> fn, 884 List<TableDescriptor> allTables) throws UnsupportedEncodingException { 885 StringBuilder attributeValue = new StringBuilder(); 886 int i = 0; 887 for (TableDescriptor tableDescriptor : allTables) { 888 if (tableDescriptor == null) { 889 // could happen with mock table instance 890 // CODEREVIEW: Can I set an empty string in conf if mock table instance? 891 return ""; 892 } 893 for (ColumnFamilyDescriptor familyDescriptor : tableDescriptor.getColumnFamilies()) { 894 if (i++ > 0) { 895 attributeValue.append('&'); 896 } 897 attributeValue.append(URLEncoder 898 .encode(Bytes.toString(combineTableNameSuffix(tableDescriptor.getTableName().getName(), 899 familyDescriptor.getName())), "UTF-8")); 900 attributeValue.append('='); 901 attributeValue.append(URLEncoder.encode(fn.apply(familyDescriptor), "UTF-8")); 902 } 903 } 904 // Get rid of the last ampersand 905 return attributeValue.toString(); 906 } 907 908 /** 909 * Serialize column family to compression algorithm map to configuration. Invoked while 910 * configuring the MR job for incremental load. 911 */ 912 @InterfaceAudience.Private 913 static Function<ColumnFamilyDescriptor, String> compressionDetails = 914 familyDescriptor -> familyDescriptor.getCompressionType().getName(); 915 916 /** 917 * Serialize column family to block size map to configuration. Invoked while configuring the MR 918 * job for incremental load. 919 */ 920 @InterfaceAudience.Private 921 static Function<ColumnFamilyDescriptor, String> blockSizeDetails = 922 familyDescriptor -> String.valueOf(familyDescriptor.getBlocksize()); 923 924 /** 925 * Serialize column family to bloom type map to configuration. Invoked while configuring the MR 926 * job for incremental load. 927 */ 928 @InterfaceAudience.Private 929 static Function<ColumnFamilyDescriptor, String> bloomTypeDetails = familyDescriptor -> { 930 String bloomType = familyDescriptor.getBloomFilterType().toString(); 931 if (bloomType == null) { 932 bloomType = ColumnFamilyDescriptorBuilder.DEFAULT_BLOOMFILTER.name(); 933 } 934 return bloomType; 935 }; 936 937 /** 938 * Serialize column family to bloom param map to configuration. Invoked while configuring the MR 939 * job for incremental load. 940 */ 941 @InterfaceAudience.Private 942 static Function<ColumnFamilyDescriptor, String> bloomParamDetails = familyDescriptor -> { 943 BloomType bloomType = familyDescriptor.getBloomFilterType(); 944 String bloomParam = ""; 945 if (bloomType == BloomType.ROWPREFIX_FIXED_LENGTH) { 946 bloomParam = familyDescriptor.getConfigurationValue(BloomFilterUtil.PREFIX_LENGTH_KEY); 947 } 948 return bloomParam; 949 }; 950 951 /** 952 * Serialize column family to data block encoding map to configuration. Invoked while configuring 953 * the MR job for incremental load. 954 */ 955 @InterfaceAudience.Private 956 static Function<ColumnFamilyDescriptor, String> dataBlockEncodingDetails = familyDescriptor -> { 957 DataBlockEncoding encoding = familyDescriptor.getDataBlockEncoding(); 958 if (encoding == null) { 959 encoding = DataBlockEncoding.NONE; 960 } 961 return encoding.toString(); 962 }; 963 964}