001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import static org.apache.hadoop.hbase.regionserver.HStoreFile.BULKLOAD_TASK_KEY; 021import static org.apache.hadoop.hbase.regionserver.HStoreFile.BULKLOAD_TIME_KEY; 022import static org.apache.hadoop.hbase.regionserver.HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY; 023import static org.apache.hadoop.hbase.regionserver.HStoreFile.MAJOR_COMPACTION_KEY; 024 025import java.io.IOException; 026import java.io.UnsupportedEncodingException; 027import java.net.InetSocketAddress; 028import java.net.URLDecoder; 029import java.net.URLEncoder; 030import java.util.ArrayList; 031import java.util.Arrays; 032import java.util.Collections; 033import java.util.List; 034import java.util.Map; 035import java.util.Map.Entry; 036import java.util.Set; 037import java.util.TreeMap; 038import java.util.TreeSet; 039import java.util.UUID; 040import java.util.function.Function; 041import java.util.stream.Collectors; 042import org.apache.commons.lang3.StringUtils; 043import org.apache.hadoop.conf.Configuration; 044import org.apache.hadoop.fs.FileSystem; 045import org.apache.hadoop.fs.Path; 046import org.apache.hadoop.hbase.Cell; 047import org.apache.hadoop.hbase.CellUtil; 048import org.apache.hadoop.hbase.HConstants; 049import org.apache.hadoop.hbase.HRegionLocation; 050import org.apache.hadoop.hbase.HTableDescriptor; 051import org.apache.hadoop.hbase.KeyValue; 052import org.apache.hadoop.hbase.PrivateCellUtil; 053import org.apache.hadoop.hbase.TableName; 054import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 055import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 056import org.apache.hadoop.hbase.client.Connection; 057import org.apache.hadoop.hbase.client.ConnectionFactory; 058import org.apache.hadoop.hbase.client.Put; 059import org.apache.hadoop.hbase.client.RegionLocator; 060import org.apache.hadoop.hbase.client.Table; 061import org.apache.hadoop.hbase.client.TableDescriptor; 062import org.apache.hadoop.hbase.fs.HFileSystem; 063import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 064import org.apache.hadoop.hbase.io.compress.Compression; 065import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; 066import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 067import org.apache.hadoop.hbase.io.hfile.CacheConfig; 068import org.apache.hadoop.hbase.io.hfile.HFile; 069import org.apache.hadoop.hbase.io.hfile.HFileContext; 070import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 071import org.apache.hadoop.hbase.io.hfile.HFileWriterImpl; 072import org.apache.hadoop.hbase.regionserver.BloomType; 073import org.apache.hadoop.hbase.regionserver.HStore; 074import org.apache.hadoop.hbase.regionserver.StoreFileWriter; 075import org.apache.hadoop.hbase.regionserver.StoreUtils; 076import org.apache.hadoop.hbase.util.BloomFilterUtil; 077import org.apache.hadoop.hbase.util.Bytes; 078import org.apache.hadoop.hbase.util.CommonFSUtils; 079import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 080import org.apache.hadoop.hbase.util.MapReduceExtendedCell; 081import org.apache.hadoop.io.NullWritable; 082import org.apache.hadoop.io.SequenceFile; 083import org.apache.hadoop.io.Text; 084import org.apache.hadoop.mapreduce.Job; 085import org.apache.hadoop.mapreduce.OutputCommitter; 086import org.apache.hadoop.mapreduce.OutputFormat; 087import org.apache.hadoop.mapreduce.RecordWriter; 088import org.apache.hadoop.mapreduce.TaskAttemptContext; 089import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; 090import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 091import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner; 092import org.apache.yetus.audience.InterfaceAudience; 093import org.slf4j.Logger; 094import org.slf4j.LoggerFactory; 095 096/** 097 * Writes HFiles. Passed Cells must arrive in order. Writes current time as the sequence id for the 098 * file. Sets the major compacted attribute on created {@link HFile}s. Calling write(null,null) will 099 * forcibly roll all HFiles being written. 100 * <p> 101 * Using this class as part of a MapReduce job is best done using 102 * {@link #configureIncrementalLoad(Job, TableDescriptor, RegionLocator)}. 103 */ 104@InterfaceAudience.Public 105public class HFileOutputFormat2 extends FileOutputFormat<ImmutableBytesWritable, Cell> { 106 private static final Logger LOG = LoggerFactory.getLogger(HFileOutputFormat2.class); 107 108 static class TableInfo { 109 private TableDescriptor tableDesctiptor; 110 private RegionLocator regionLocator; 111 112 public TableInfo(TableDescriptor tableDesctiptor, RegionLocator regionLocator) { 113 this.tableDesctiptor = tableDesctiptor; 114 this.regionLocator = regionLocator; 115 } 116 117 /** 118 * The modification for the returned HTD doesn't affect the inner TD. 119 * @return A clone of inner table descriptor 120 * @deprecated since 2.0.0 and will be removed in 3.0.0. Use {@link #getTableDescriptor()} 121 * instead. 122 * @see #getTableDescriptor() 123 * @see <a href="https://issues.apache.org/jira/browse/HBASE-18241">HBASE-18241</a> 124 */ 125 @Deprecated 126 public HTableDescriptor getHTableDescriptor() { 127 return new HTableDescriptor(tableDesctiptor); 128 } 129 130 public TableDescriptor getTableDescriptor() { 131 return tableDesctiptor; 132 } 133 134 public RegionLocator getRegionLocator() { 135 return regionLocator; 136 } 137 } 138 139 protected static final byte[] tableSeparator = Bytes.toBytes(";"); 140 141 protected static byte[] combineTableNameSuffix(byte[] tableName, byte[] suffix) { 142 return Bytes.add(tableName, tableSeparator, suffix); 143 } 144 145 // The following constants are private since these are used by 146 // HFileOutputFormat2 to internally transfer data between job setup and 147 // reducer run using conf. 148 // These should not be changed by the client. 149 static final String COMPRESSION_FAMILIES_CONF_KEY = 150 "hbase.hfileoutputformat.families.compression"; 151 static final String BLOOM_TYPE_FAMILIES_CONF_KEY = "hbase.hfileoutputformat.families.bloomtype"; 152 static final String BLOOM_PARAM_FAMILIES_CONF_KEY = "hbase.hfileoutputformat.families.bloomparam"; 153 static final String BLOCK_SIZE_FAMILIES_CONF_KEY = "hbase.mapreduce.hfileoutputformat.blocksize"; 154 static final String DATABLOCK_ENCODING_FAMILIES_CONF_KEY = 155 "hbase.mapreduce.hfileoutputformat.families.datablock.encoding"; 156 157 // This constant is public since the client can modify this when setting 158 // up their conf object and thus refer to this symbol. 159 // It is present for backwards compatibility reasons. Use it only to 160 // override the auto-detection of datablock encoding and compression. 161 public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY = 162 "hbase.mapreduce.hfileoutputformat.datablock.encoding"; 163 public static final String COMPRESSION_OVERRIDE_CONF_KEY = 164 "hbase.mapreduce.hfileoutputformat.compression"; 165 166 /** 167 * Keep locality while generating HFiles for bulkload. See HBASE-12596 168 */ 169 public static final String LOCALITY_SENSITIVE_CONF_KEY = 170 "hbase.bulkload.locality.sensitive.enabled"; 171 private static final boolean DEFAULT_LOCALITY_SENSITIVE = true; 172 static final String OUTPUT_TABLE_NAME_CONF_KEY = "hbase.mapreduce.hfileoutputformat.table.name"; 173 static final String MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY = 174 "hbase.mapreduce.use.multi.table.hfileoutputformat"; 175 176 /** 177 * ExtendedCell and ExtendedCellSerialization are InterfaceAudience.Private. We expose this config 178 * package-private for internal usage for jobs like WALPlayer which need to use features of 179 * ExtendedCell. 180 */ 181 static final String EXTENDED_CELL_SERIALIZATION_ENABLED_KEY = 182 "hbase.mapreduce.hfileoutputformat.extendedcell.enabled"; 183 static final boolean EXTENDED_CELL_SERIALIZATION_ENABLED_DEFULT = false; 184 185 public static final String REMOTE_CLUSTER_CONF_PREFIX = "hbase.hfileoutputformat.remote.cluster."; 186 public static final String REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY = 187 REMOTE_CLUSTER_CONF_PREFIX + "zookeeper.quorum"; 188 public static final String REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY = 189 REMOTE_CLUSTER_CONF_PREFIX + "zookeeper." + HConstants.CLIENT_PORT_STR; 190 public static final String REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY = 191 REMOTE_CLUSTER_CONF_PREFIX + HConstants.ZOOKEEPER_ZNODE_PARENT; 192 193 public static final String STORAGE_POLICY_PROPERTY = HStore.BLOCK_STORAGE_POLICY_KEY; 194 public static final String STORAGE_POLICY_PROPERTY_CF_PREFIX = STORAGE_POLICY_PROPERTY + "."; 195 196 @Override 197 public RecordWriter<ImmutableBytesWritable, Cell> 198 getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { 199 return createRecordWriter(context, this.getOutputCommitter(context)); 200 } 201 202 protected static byte[] getTableNameSuffixedWithFamily(byte[] tableName, byte[] family) { 203 return combineTableNameSuffix(tableName, family); 204 } 205 206 static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter( 207 final TaskAttemptContext context, final OutputCommitter committer) throws IOException { 208 209 // Get the path of the temporary output file 210 final Path outputDir = ((FileOutputCommitter) committer).getWorkPath(); 211 final Configuration conf = context.getConfiguration(); 212 final boolean writeMultipleTables = 213 conf.getBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, false); 214 final String writeTableNames = conf.get(OUTPUT_TABLE_NAME_CONF_KEY); 215 if (writeTableNames == null || writeTableNames.isEmpty()) { 216 throw new IllegalArgumentException("" + OUTPUT_TABLE_NAME_CONF_KEY + " cannot be empty"); 217 } 218 final FileSystem fs = outputDir.getFileSystem(conf); 219 // These configs. are from hbase-*.xml 220 final long maxsize = 221 conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); 222 // Invented config. Add to hbase-*.xml if other than default compression. 223 final String defaultCompressionStr = 224 conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); 225 final Algorithm defaultCompression = HFileWriterImpl.compressionByName(defaultCompressionStr); 226 String compressionStr = conf.get(COMPRESSION_OVERRIDE_CONF_KEY); 227 final Algorithm overriddenCompression = 228 compressionStr != null ? Compression.getCompressionAlgorithmByName(compressionStr) : null; 229 final boolean compactionExclude = 230 conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false); 231 final Set<String> allTableNames = Arrays 232 .stream(writeTableNames.split(Bytes.toString(tableSeparator))).collect(Collectors.toSet()); 233 234 // create a map from column family to the compression algorithm 235 final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf); 236 final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf); 237 final Map<byte[], String> bloomParamMap = createFamilyBloomParamMap(conf); 238 final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf); 239 240 String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY); 241 final Map<byte[], DataBlockEncoding> datablockEncodingMap = 242 createFamilyDataBlockEncodingMap(conf); 243 final DataBlockEncoding overriddenEncoding = 244 dataBlockEncodingStr != null ? DataBlockEncoding.valueOf(dataBlockEncodingStr) : null; 245 246 return new RecordWriter<ImmutableBytesWritable, V>() { 247 // Map of families to writers and how much has been output on the writer. 248 private final Map<byte[], WriterLength> writers = new TreeMap<>(Bytes.BYTES_COMPARATOR); 249 private final Map<byte[], byte[]> previousRows = new TreeMap<>(Bytes.BYTES_COMPARATOR); 250 private final long now = EnvironmentEdgeManager.currentTime(); 251 252 @Override 253 public void write(ImmutableBytesWritable row, V cell) throws IOException { 254 Cell kv = cell; 255 // null input == user explicitly wants to flush 256 if (row == null && kv == null) { 257 rollWriters(null); 258 return; 259 } 260 261 byte[] rowKey = CellUtil.cloneRow(kv); 262 int length = (PrivateCellUtil.estimatedSerializedSizeOf(kv)) - Bytes.SIZEOF_INT; 263 byte[] family = CellUtil.cloneFamily(kv); 264 byte[] tableNameBytes = null; 265 if (writeMultipleTables) { 266 tableNameBytes = MultiTableHFileOutputFormat.getTableName(row.get()); 267 tableNameBytes = TableName.valueOf(tableNameBytes).toBytes(); 268 if (!allTableNames.contains(Bytes.toString(tableNameBytes))) { 269 throw new IllegalArgumentException( 270 "TableName " + Bytes.toString(tableNameBytes) + " not expected"); 271 } 272 } else { 273 tableNameBytes = Bytes.toBytes(writeTableNames); 274 } 275 Path tableRelPath = getTableRelativePath(tableNameBytes); 276 byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableNameBytes, family); 277 WriterLength wl = this.writers.get(tableAndFamily); 278 279 // If this is a new column family, verify that the directory exists 280 if (wl == null) { 281 Path writerPath = null; 282 if (writeMultipleTables) { 283 writerPath = new Path(outputDir, new Path(tableRelPath, Bytes.toString(family))); 284 } else { 285 writerPath = new Path(outputDir, Bytes.toString(family)); 286 } 287 fs.mkdirs(writerPath); 288 configureStoragePolicy(conf, fs, tableAndFamily, writerPath); 289 } 290 291 // This can only happen once a row is finished though 292 if ( 293 wl != null && wl.written + length >= maxsize 294 && Bytes.compareTo(this.previousRows.get(family), rowKey) != 0 295 ) { 296 rollWriters(wl); 297 } 298 299 // create a new WAL writer, if necessary 300 if (wl == null || wl.writer == null) { 301 if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) { 302 HRegionLocation loc = null; 303 304 String tableName = Bytes.toString(tableNameBytes); 305 if (tableName != null) { 306 try ( 307 Connection connection = 308 ConnectionFactory.createConnection(createRemoteClusterConf(conf)); 309 RegionLocator locator = connection.getRegionLocator(TableName.valueOf(tableName))) { 310 loc = locator.getRegionLocation(rowKey); 311 } catch (Throwable e) { 312 LOG.warn("Something wrong locating rowkey {} in {}", Bytes.toString(rowKey), 313 tableName, e); 314 loc = null; 315 } 316 } 317 318 if (null == loc) { 319 LOG.trace("Failed get of location, use default writer {}", Bytes.toString(rowKey)); 320 wl = getNewWriter(tableNameBytes, family, conf, null); 321 } else { 322 LOG.debug("First rowkey: [{}]", Bytes.toString(rowKey)); 323 InetSocketAddress initialIsa = 324 new InetSocketAddress(loc.getHostname(), loc.getPort()); 325 if (initialIsa.isUnresolved()) { 326 LOG.trace("Failed resolve address {}, use default writer", loc.getHostnamePort()); 327 wl = getNewWriter(tableNameBytes, family, conf, null); 328 } else { 329 LOG.debug("Use favored nodes writer: {}", initialIsa.getHostString()); 330 wl = getNewWriter(tableNameBytes, family, conf, 331 new InetSocketAddress[] { initialIsa }); 332 } 333 } 334 } else { 335 wl = getNewWriter(tableNameBytes, family, conf, null); 336 } 337 } 338 339 // we now have the proper WAL writer. full steam ahead 340 PrivateCellUtil.updateLatestStamp(cell, this.now); 341 wl.writer.append(kv); 342 wl.written += length; 343 344 // Copy the row so we know when a row transition. 345 this.previousRows.put(family, rowKey); 346 } 347 348 private Path getTableRelativePath(byte[] tableNameBytes) { 349 String tableName = Bytes.toString(tableNameBytes); 350 String[] tableNameParts = tableName.split(":"); 351 Path tableRelPath = new Path(tableName.split(":")[0]); 352 if (tableNameParts.length > 1) { 353 tableRelPath = new Path(tableRelPath, tableName.split(":")[1]); 354 } 355 return tableRelPath; 356 } 357 358 private void rollWriters(WriterLength writerLength) throws IOException { 359 if (writerLength != null) { 360 closeWriter(writerLength); 361 } else { 362 for (WriterLength wl : this.writers.values()) { 363 closeWriter(wl); 364 } 365 } 366 } 367 368 private void closeWriter(WriterLength wl) throws IOException { 369 if (wl.writer != null) { 370 LOG.info( 371 "Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); 372 close(wl.writer); 373 wl.writer = null; 374 } 375 wl.written = 0; 376 } 377 378 private Configuration createRemoteClusterConf(Configuration conf) { 379 final Configuration newConf = new Configuration(conf); 380 381 final String quorum = conf.get(REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY); 382 final String clientPort = conf.get(REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY); 383 final String parent = conf.get(REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY); 384 385 if (quorum != null && clientPort != null && parent != null) { 386 newConf.set(HConstants.ZOOKEEPER_QUORUM, quorum); 387 newConf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.parseInt(clientPort)); 388 newConf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, parent); 389 } 390 391 for (Entry<String, String> entry : conf) { 392 String key = entry.getKey(); 393 if ( 394 REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY.equals(key) 395 || REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY.equals(key) 396 || REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY.equals(key) 397 ) { 398 // Handled them above 399 continue; 400 } 401 402 if (entry.getKey().startsWith(REMOTE_CLUSTER_CONF_PREFIX)) { 403 String originalKey = entry.getKey().substring(REMOTE_CLUSTER_CONF_PREFIX.length()); 404 if (!originalKey.isEmpty()) { 405 newConf.set(originalKey, entry.getValue()); 406 } 407 } 408 } 409 410 return newConf; 411 } 412 413 /* 414 * Create a new StoreFile.Writer. 415 * @return A WriterLength, containing a new StoreFile.Writer. 416 */ 417 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "BX_UNBOXING_IMMEDIATELY_REBOXED", 418 justification = "Not important") 419 private WriterLength getNewWriter(byte[] tableName, byte[] family, Configuration conf, 420 InetSocketAddress[] favoredNodes) throws IOException { 421 byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableName, family); 422 Path familydir = new Path(outputDir, Bytes.toString(family)); 423 if (writeMultipleTables) { 424 familydir = 425 new Path(outputDir, new Path(getTableRelativePath(tableName), Bytes.toString(family))); 426 } 427 WriterLength wl = new WriterLength(); 428 Algorithm compression = overriddenCompression; 429 compression = compression == null ? compressionMap.get(tableAndFamily) : compression; 430 compression = compression == null ? defaultCompression : compression; 431 BloomType bloomType = bloomTypeMap.get(tableAndFamily); 432 bloomType = bloomType == null ? BloomType.NONE : bloomType; 433 String bloomParam = bloomParamMap.get(tableAndFamily); 434 if (bloomType == BloomType.ROWPREFIX_FIXED_LENGTH) { 435 conf.set(BloomFilterUtil.PREFIX_LENGTH_KEY, bloomParam); 436 } 437 Integer blockSize = blockSizeMap.get(tableAndFamily); 438 blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize; 439 DataBlockEncoding encoding = overriddenEncoding; 440 encoding = encoding == null ? datablockEncodingMap.get(tableAndFamily) : encoding; 441 encoding = encoding == null ? DataBlockEncoding.NONE : encoding; 442 HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression) 443 .withDataBlockEncoding(encoding).withChecksumType(StoreUtils.getChecksumType(conf)) 444 .withBytesPerCheckSum(StoreUtils.getBytesPerChecksum(conf)).withBlockSize(blockSize) 445 .withColumnFamily(family).withTableName(tableName) 446 .withCreateTime(EnvironmentEdgeManager.currentTime()); 447 448 if (HFile.getFormatVersion(conf) >= HFile.MIN_FORMAT_VERSION_WITH_TAGS) { 449 contextBuilder.withIncludesTags(true); 450 } 451 452 HFileContext hFileContext = contextBuilder.build(); 453 if (null == favoredNodes) { 454 wl.writer = 455 new StoreFileWriter.Builder(conf, CacheConfig.DISABLED, fs).withOutputDir(familydir) 456 .withBloomType(bloomType).withFileContext(hFileContext).build(); 457 } else { 458 wl.writer = new StoreFileWriter.Builder(conf, CacheConfig.DISABLED, new HFileSystem(fs)) 459 .withOutputDir(familydir).withBloomType(bloomType).withFileContext(hFileContext) 460 .withFavoredNodes(favoredNodes).build(); 461 } 462 463 this.writers.put(tableAndFamily, wl); 464 return wl; 465 } 466 467 private void close(final StoreFileWriter w) throws IOException { 468 if (w != null) { 469 w.appendFileInfo(BULKLOAD_TIME_KEY, Bytes.toBytes(EnvironmentEdgeManager.currentTime())); 470 w.appendFileInfo(BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); 471 w.appendFileInfo(MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); 472 w.appendFileInfo(EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); 473 w.appendTrackedTimestampsToMetadata(); 474 w.close(); 475 } 476 } 477 478 @Override 479 public void close(TaskAttemptContext c) throws IOException, InterruptedException { 480 for (WriterLength wl : this.writers.values()) { 481 close(wl.writer); 482 } 483 } 484 }; 485 } 486 487 /** 488 * Configure block storage policy for CF after the directory is created. 489 */ 490 static void configureStoragePolicy(final Configuration conf, final FileSystem fs, 491 byte[] tableAndFamily, Path cfPath) { 492 if (null == conf || null == fs || null == tableAndFamily || null == cfPath) { 493 return; 494 } 495 496 String policy = conf.get(STORAGE_POLICY_PROPERTY_CF_PREFIX + Bytes.toString(tableAndFamily), 497 conf.get(STORAGE_POLICY_PROPERTY)); 498 CommonFSUtils.setStoragePolicy(fs, cfPath, policy); 499 } 500 501 /* 502 * Data structure to hold a Writer and amount of data written on it. 503 */ 504 static class WriterLength { 505 long written = 0; 506 StoreFileWriter writer = null; 507 } 508 509 /** 510 * Return the start keys of all of the regions in this table, as a list of ImmutableBytesWritable. 511 */ 512 private static List<ImmutableBytesWritable> getRegionStartKeys(List<RegionLocator> regionLocators, 513 boolean writeMultipleTables) throws IOException { 514 515 ArrayList<ImmutableBytesWritable> ret = new ArrayList<>(); 516 for (RegionLocator regionLocator : regionLocators) { 517 TableName tableName = regionLocator.getName(); 518 LOG.info("Looking up current regions for table " + tableName); 519 byte[][] byteKeys = regionLocator.getStartKeys(); 520 for (byte[] byteKey : byteKeys) { 521 byte[] fullKey = byteKey; // HFileOutputFormat2 use case 522 if (writeMultipleTables) { 523 // MultiTableHFileOutputFormat use case 524 fullKey = combineTableNameSuffix(tableName.getName(), byteKey); 525 } 526 if (LOG.isDebugEnabled()) { 527 LOG.debug("SplitPoint startkey for " + tableName + ": " + Bytes.toStringBinary(fullKey)); 528 } 529 ret.add(new ImmutableBytesWritable(fullKey)); 530 } 531 } 532 return ret; 533 } 534 535 /** 536 * Write out a {@link SequenceFile} that can be read by {@link TotalOrderPartitioner} that 537 * contains the split points in startKeys. 538 */ 539 @SuppressWarnings("deprecation") 540 private static void writePartitions(Configuration conf, Path partitionsPath, 541 List<ImmutableBytesWritable> startKeys, boolean writeMultipleTables) throws IOException { 542 LOG.info("Writing partition information to " + partitionsPath); 543 if (startKeys.isEmpty()) { 544 throw new IllegalArgumentException("No regions passed"); 545 } 546 547 // We're generating a list of split points, and we don't ever 548 // have keys < the first region (which has an empty start key) 549 // so we need to remove it. Otherwise we would end up with an 550 // empty reducer with index 0 551 TreeSet<ImmutableBytesWritable> sorted = new TreeSet<>(startKeys); 552 ImmutableBytesWritable first = sorted.first(); 553 if (writeMultipleTables) { 554 first = 555 new ImmutableBytesWritable(MultiTableHFileOutputFormat.getSuffix(sorted.first().get())); 556 } 557 if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) { 558 throw new IllegalArgumentException( 559 "First region of table should have empty start key. Instead has: " 560 + Bytes.toStringBinary(first.get())); 561 } 562 sorted.remove(sorted.first()); 563 564 // Write the actual file 565 FileSystem fs = partitionsPath.getFileSystem(conf); 566 SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath, 567 ImmutableBytesWritable.class, NullWritable.class); 568 569 try { 570 for (ImmutableBytesWritable startKey : sorted) { 571 writer.append(startKey, NullWritable.get()); 572 } 573 } finally { 574 writer.close(); 575 } 576 } 577 578 /** 579 * Configure a MapReduce Job to perform an incremental load into the given table. This 580 * <ul> 581 * <li>Inspects the table to configure a total order partitioner</li> 582 * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> 583 * <li>Sets the number of reduce tasks to match the current number of regions</li> 584 * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li> 585 * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or 586 * PutSortReducer)</li> 587 * <li>Sets the HBase cluster key to load region locations for locality-sensitive</li> 588 * </ul> 589 * The user should be sure to set the map output value class to either KeyValue or Put before 590 * running this function. 591 */ 592 public static void configureIncrementalLoad(Job job, Table table, RegionLocator regionLocator) 593 throws IOException { 594 configureIncrementalLoad(job, table.getDescriptor(), regionLocator); 595 configureRemoteCluster(job, table.getConfiguration()); 596 } 597 598 /** 599 * Configure a MapReduce Job to perform an incremental load into the given table. This 600 * <ul> 601 * <li>Inspects the table to configure a total order partitioner</li> 602 * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> 603 * <li>Sets the number of reduce tasks to match the current number of regions</li> 604 * <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li> 605 * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or 606 * PutSortReducer)</li> 607 * </ul> 608 * The user should be sure to set the map output value class to either KeyValue or Put before 609 * running this function. 610 */ 611 public static void configureIncrementalLoad(Job job, TableDescriptor tableDescriptor, 612 RegionLocator regionLocator) throws IOException { 613 ArrayList<TableInfo> singleTableInfo = new ArrayList<>(); 614 singleTableInfo.add(new TableInfo(tableDescriptor, regionLocator)); 615 configureIncrementalLoad(job, singleTableInfo, HFileOutputFormat2.class); 616 } 617 618 static void configureIncrementalLoad(Job job, List<TableInfo> multiTableInfo, 619 Class<? extends OutputFormat<?, ?>> cls) throws IOException { 620 Configuration conf = job.getConfiguration(); 621 job.setOutputKeyClass(ImmutableBytesWritable.class); 622 job.setOutputValueClass(MapReduceExtendedCell.class); 623 job.setOutputFormatClass(cls); 624 625 if (multiTableInfo.stream().distinct().count() != multiTableInfo.size()) { 626 throw new IllegalArgumentException("Duplicate entries found in TableInfo argument"); 627 } 628 boolean writeMultipleTables = false; 629 if (MultiTableHFileOutputFormat.class.equals(cls)) { 630 writeMultipleTables = true; 631 conf.setBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, true); 632 } 633 // Based on the configured map output class, set the correct reducer to properly 634 // sort the incoming values. 635 // TODO it would be nice to pick one or the other of these formats. 636 if ( 637 KeyValue.class.equals(job.getMapOutputValueClass()) 638 || MapReduceExtendedCell.class.equals(job.getMapOutputValueClass()) 639 ) { 640 job.setReducerClass(CellSortReducer.class); 641 } else if (Put.class.equals(job.getMapOutputValueClass())) { 642 job.setReducerClass(PutSortReducer.class); 643 } else if (Text.class.equals(job.getMapOutputValueClass())) { 644 job.setReducerClass(TextSortReducer.class); 645 } else { 646 LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); 647 } 648 649 mergeSerializations(conf); 650 651 if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) { 652 LOG.info("bulkload locality sensitive enabled"); 653 } 654 655 /* Now get the region start keys for every table required */ 656 List<String> allTableNames = new ArrayList<>(multiTableInfo.size()); 657 List<RegionLocator> regionLocators = new ArrayList<>(multiTableInfo.size()); 658 List<TableDescriptor> tableDescriptors = new ArrayList<>(multiTableInfo.size()); 659 660 for (TableInfo tableInfo : multiTableInfo) { 661 regionLocators.add(tableInfo.getRegionLocator()); 662 allTableNames.add(tableInfo.getRegionLocator().getName().getNameAsString()); 663 tableDescriptors.add(tableInfo.getTableDescriptor()); 664 } 665 // Record tablenames for creating writer by favored nodes, and decoding compression, 666 // block size and other attributes of columnfamily per table 667 conf.set(OUTPUT_TABLE_NAME_CONF_KEY, 668 StringUtils.join(allTableNames, Bytes.toString(tableSeparator))); 669 List<ImmutableBytesWritable> startKeys = 670 getRegionStartKeys(regionLocators, writeMultipleTables); 671 // Use table's region boundaries for TOP split points. 672 LOG.info("Configuring " + startKeys.size() + " reduce partitions " 673 + "to match current region count for all tables"); 674 job.setNumReduceTasks(startKeys.size()); 675 676 configurePartitioner(job, startKeys, writeMultipleTables); 677 // Set compression algorithms based on column families 678 679 conf.set(COMPRESSION_FAMILIES_CONF_KEY, 680 serializeColumnFamilyAttribute(compressionDetails, tableDescriptors)); 681 conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY, 682 serializeColumnFamilyAttribute(blockSizeDetails, tableDescriptors)); 683 conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY, 684 serializeColumnFamilyAttribute(bloomTypeDetails, tableDescriptors)); 685 conf.set(BLOOM_PARAM_FAMILIES_CONF_KEY, 686 serializeColumnFamilyAttribute(bloomParamDetails, tableDescriptors)); 687 conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY, 688 serializeColumnFamilyAttribute(dataBlockEncodingDetails, tableDescriptors)); 689 690 TableMapReduceUtil.addDependencyJars(job); 691 TableMapReduceUtil.initCredentials(job); 692 LOG.info("Incremental output configured for tables: " + StringUtils.join(allTableNames, ",")); 693 } 694 695 private static void mergeSerializations(Configuration conf) { 696 List<String> serializations = new ArrayList<>(); 697 698 // add any existing values that have been set 699 String[] existing = conf.getStrings("io.serializations"); 700 if (existing != null) { 701 Collections.addAll(serializations, existing); 702 } 703 704 serializations.add(MutationSerialization.class.getName()); 705 serializations.add(ResultSerialization.class.getName()); 706 707 // Add ExtendedCellSerialization, if configured. Order matters here. Hadoop's 708 // SerializationFactory runs through serializations in the order they are registered. 709 // We want to register ExtendedCellSerialization before CellSerialization because both 710 // work for ExtendedCells but only ExtendedCellSerialization handles them properly. 711 if ( 712 conf.getBoolean(EXTENDED_CELL_SERIALIZATION_ENABLED_KEY, 713 EXTENDED_CELL_SERIALIZATION_ENABLED_DEFULT) 714 ) { 715 serializations.add(ExtendedCellSerialization.class.getName()); 716 } 717 serializations.add(CellSerialization.class.getName()); 718 719 conf.setStrings("io.serializations", serializations.toArray(new String[0])); 720 } 721 722 public static void configureIncrementalLoadMap(Job job, TableDescriptor tableDescriptor) 723 throws IOException { 724 Configuration conf = job.getConfiguration(); 725 726 job.setOutputKeyClass(ImmutableBytesWritable.class); 727 job.setOutputValueClass(MapReduceExtendedCell.class); 728 job.setOutputFormatClass(HFileOutputFormat2.class); 729 730 ArrayList<TableDescriptor> singleTableDescriptor = new ArrayList<>(1); 731 singleTableDescriptor.add(tableDescriptor); 732 733 conf.set(OUTPUT_TABLE_NAME_CONF_KEY, tableDescriptor.getTableName().getNameAsString()); 734 // Set compression algorithms based on column families 735 conf.set(COMPRESSION_FAMILIES_CONF_KEY, 736 serializeColumnFamilyAttribute(compressionDetails, singleTableDescriptor)); 737 conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY, 738 serializeColumnFamilyAttribute(blockSizeDetails, singleTableDescriptor)); 739 conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY, 740 serializeColumnFamilyAttribute(bloomTypeDetails, singleTableDescriptor)); 741 conf.set(BLOOM_PARAM_FAMILIES_CONF_KEY, 742 serializeColumnFamilyAttribute(bloomParamDetails, singleTableDescriptor)); 743 conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY, 744 serializeColumnFamilyAttribute(dataBlockEncodingDetails, singleTableDescriptor)); 745 746 TableMapReduceUtil.addDependencyJars(job); 747 TableMapReduceUtil.initCredentials(job); 748 LOG.info("Incremental table " + tableDescriptor.getTableName() + " output configured."); 749 } 750 751 /** 752 * Configure HBase cluster key for remote cluster to load region location for locality-sensitive 753 * if it's enabled. It's not necessary to call this method explicitly when the cluster key for 754 * HBase cluster to be used to load region location is configured in the job configuration. Call 755 * this method when another HBase cluster key is configured in the job configuration. For example, 756 * you should call when you load data from HBase cluster A using {@link TableInputFormat} and 757 * generate hfiles for HBase cluster B. Otherwise, HFileOutputFormat2 fetch location from cluster 758 * A and locality-sensitive won't working correctly. 759 * {@link #configureIncrementalLoad(Job, Table, RegionLocator)} calls this method using 760 * {@link Table#getConfiguration} as clusterConf. See HBASE-25608. 761 * @param job which has configuration to be updated 762 * @param clusterConf which contains cluster key of the HBase cluster to be locality-sensitive 763 * @see #configureIncrementalLoad(Job, Table, RegionLocator) 764 * @see #LOCALITY_SENSITIVE_CONF_KEY 765 * @see #REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY 766 * @see #REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY 767 * @see #REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY 768 */ 769 public static void configureRemoteCluster(Job job, Configuration clusterConf) { 770 Configuration conf = job.getConfiguration(); 771 772 if (!conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) { 773 return; 774 } 775 776 final String quorum = clusterConf.get(HConstants.ZOOKEEPER_QUORUM); 777 final int clientPort = clusterConf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT, 778 HConstants.DEFAULT_ZOOKEEPER_CLIENT_PORT); 779 final String parent = 780 clusterConf.get(HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT); 781 782 conf.set(REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY, quorum); 783 conf.setInt(REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY, clientPort); 784 conf.set(REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY, parent); 785 786 LOG.info("ZK configs for remote cluster of bulkload is configured: " + quorum + ":" + clientPort 787 + "/" + parent); 788 } 789 790 /** 791 * Runs inside the task to deserialize column family to compression algorithm map from the 792 * configuration. 793 * @param conf to read the serialized values from 794 * @return a map from column family to the configured compression algorithm 795 */ 796 @InterfaceAudience.Private 797 static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration conf) { 798 Map<byte[], String> stringMap = createFamilyConfValueMap(conf, COMPRESSION_FAMILIES_CONF_KEY); 799 Map<byte[], Algorithm> compressionMap = new TreeMap<>(Bytes.BYTES_COMPARATOR); 800 for (Map.Entry<byte[], String> e : stringMap.entrySet()) { 801 Algorithm algorithm = HFileWriterImpl.compressionByName(e.getValue()); 802 compressionMap.put(e.getKey(), algorithm); 803 } 804 return compressionMap; 805 } 806 807 /** 808 * Runs inside the task to deserialize column family to bloom filter type map from the 809 * configuration. 810 * @param conf to read the serialized values from 811 * @return a map from column family to the the configured bloom filter type 812 */ 813 @InterfaceAudience.Private 814 static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) { 815 Map<byte[], String> stringMap = createFamilyConfValueMap(conf, BLOOM_TYPE_FAMILIES_CONF_KEY); 816 Map<byte[], BloomType> bloomTypeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR); 817 for (Map.Entry<byte[], String> e : stringMap.entrySet()) { 818 BloomType bloomType = BloomType.valueOf(e.getValue()); 819 bloomTypeMap.put(e.getKey(), bloomType); 820 } 821 return bloomTypeMap; 822 } 823 824 /** 825 * Runs inside the task to deserialize column family to bloom filter param map from the 826 * configuration. 827 * @param conf to read the serialized values from 828 * @return a map from column family to the the configured bloom filter param 829 */ 830 @InterfaceAudience.Private 831 static Map<byte[], String> createFamilyBloomParamMap(Configuration conf) { 832 return createFamilyConfValueMap(conf, BLOOM_PARAM_FAMILIES_CONF_KEY); 833 } 834 835 /** 836 * Runs inside the task to deserialize column family to block size map from the configuration. 837 * @param conf to read the serialized values from 838 * @return a map from column family to the configured block size 839 */ 840 @InterfaceAudience.Private 841 static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) { 842 Map<byte[], String> stringMap = createFamilyConfValueMap(conf, BLOCK_SIZE_FAMILIES_CONF_KEY); 843 Map<byte[], Integer> blockSizeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR); 844 for (Map.Entry<byte[], String> e : stringMap.entrySet()) { 845 Integer blockSize = Integer.parseInt(e.getValue()); 846 blockSizeMap.put(e.getKey(), blockSize); 847 } 848 return blockSizeMap; 849 } 850 851 /** 852 * Runs inside the task to deserialize column family to data block encoding type map from the 853 * configuration. 854 * @param conf to read the serialized values from 855 * @return a map from column family to HFileDataBlockEncoder for the configured data block type 856 * for the family 857 */ 858 @InterfaceAudience.Private 859 static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(Configuration conf) { 860 Map<byte[], String> stringMap = 861 createFamilyConfValueMap(conf, DATABLOCK_ENCODING_FAMILIES_CONF_KEY); 862 Map<byte[], DataBlockEncoding> encoderMap = new TreeMap<>(Bytes.BYTES_COMPARATOR); 863 for (Map.Entry<byte[], String> e : stringMap.entrySet()) { 864 encoderMap.put(e.getKey(), DataBlockEncoding.valueOf((e.getValue()))); 865 } 866 return encoderMap; 867 } 868 869 /** 870 * Run inside the task to deserialize column family to given conf value map. 871 * @param conf to read the serialized values from 872 * @param confName conf key to read from the configuration 873 * @return a map of column family to the given configuration value 874 */ 875 private static Map<byte[], String> createFamilyConfValueMap(Configuration conf, String confName) { 876 Map<byte[], String> confValMap = new TreeMap<>(Bytes.BYTES_COMPARATOR); 877 String confVal = conf.get(confName, ""); 878 for (String familyConf : confVal.split("&")) { 879 String[] familySplit = familyConf.split("="); 880 if (familySplit.length != 2) { 881 continue; 882 } 883 try { 884 confValMap.put(Bytes.toBytes(URLDecoder.decode(familySplit[0], "UTF-8")), 885 URLDecoder.decode(familySplit[1], "UTF-8")); 886 } catch (UnsupportedEncodingException e) { 887 // will not happen with UTF-8 encoding 888 throw new AssertionError(e); 889 } 890 } 891 return confValMap; 892 } 893 894 /** 895 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against 896 * <code>splitPoints</code>. Cleans up the partitions file after job exists. 897 */ 898 static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints, 899 boolean writeMultipleTables) throws IOException { 900 Configuration conf = job.getConfiguration(); 901 // create the partitions file 902 FileSystem fs = FileSystem.get(conf); 903 String hbaseTmpFsDir = 904 conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY, HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY); 905 Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID()); 906 fs.makeQualified(partitionsPath); 907 writePartitions(conf, partitionsPath, splitPoints, writeMultipleTables); 908 fs.deleteOnExit(partitionsPath); 909 910 // configure job to use it 911 job.setPartitionerClass(TotalOrderPartitioner.class); 912 TotalOrderPartitioner.setPartitionFile(conf, partitionsPath); 913 } 914 915 @edu.umd.cs.findbugs.annotations.SuppressWarnings( 916 value = "RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE") 917 @InterfaceAudience.Private 918 static String serializeColumnFamilyAttribute(Function<ColumnFamilyDescriptor, String> fn, 919 List<TableDescriptor> allTables) throws UnsupportedEncodingException { 920 StringBuilder attributeValue = new StringBuilder(); 921 int i = 0; 922 for (TableDescriptor tableDescriptor : allTables) { 923 if (tableDescriptor == null) { 924 // could happen with mock table instance 925 // CODEREVIEW: Can I set an empty string in conf if mock table instance? 926 return ""; 927 } 928 for (ColumnFamilyDescriptor familyDescriptor : tableDescriptor.getColumnFamilies()) { 929 if (i++ > 0) { 930 attributeValue.append('&'); 931 } 932 attributeValue.append(URLEncoder 933 .encode(Bytes.toString(combineTableNameSuffix(tableDescriptor.getTableName().getName(), 934 familyDescriptor.getName())), "UTF-8")); 935 attributeValue.append('='); 936 attributeValue.append(URLEncoder.encode(fn.apply(familyDescriptor), "UTF-8")); 937 } 938 } 939 // Get rid of the last ampersand 940 return attributeValue.toString(); 941 } 942 943 /** 944 * Serialize column family to compression algorithm map to configuration. Invoked while 945 * configuring the MR job for incremental load. 946 */ 947 @InterfaceAudience.Private 948 static Function<ColumnFamilyDescriptor, String> compressionDetails = 949 familyDescriptor -> familyDescriptor.getCompressionType().getName(); 950 951 /** 952 * Serialize column family to block size map to configuration. Invoked while configuring the MR 953 * job for incremental load. 954 */ 955 @InterfaceAudience.Private 956 static Function<ColumnFamilyDescriptor, String> blockSizeDetails = 957 familyDescriptor -> String.valueOf(familyDescriptor.getBlocksize()); 958 959 /** 960 * Serialize column family to bloom type map to configuration. Invoked while configuring the MR 961 * job for incremental load. 962 */ 963 @InterfaceAudience.Private 964 static Function<ColumnFamilyDescriptor, String> bloomTypeDetails = familyDescriptor -> { 965 String bloomType = familyDescriptor.getBloomFilterType().toString(); 966 if (bloomType == null) { 967 bloomType = ColumnFamilyDescriptorBuilder.DEFAULT_BLOOMFILTER.name(); 968 } 969 return bloomType; 970 }; 971 972 /** 973 * Serialize column family to bloom param map to configuration. Invoked while configuring the MR 974 * job for incremental load. 975 */ 976 @InterfaceAudience.Private 977 static Function<ColumnFamilyDescriptor, String> bloomParamDetails = familyDescriptor -> { 978 BloomType bloomType = familyDescriptor.getBloomFilterType(); 979 String bloomParam = ""; 980 if (bloomType == BloomType.ROWPREFIX_FIXED_LENGTH) { 981 bloomParam = familyDescriptor.getConfigurationValue(BloomFilterUtil.PREFIX_LENGTH_KEY); 982 } 983 return bloomParam; 984 }; 985 986 /** 987 * Serialize column family to data block encoding map to configuration. Invoked while configuring 988 * the MR job for incremental load. 989 */ 990 @InterfaceAudience.Private 991 static Function<ColumnFamilyDescriptor, String> dataBlockEncodingDetails = familyDescriptor -> { 992 DataBlockEncoding encoding = familyDescriptor.getDataBlockEncoding(); 993 if (encoding == null) { 994 encoding = DataBlockEncoding.NONE; 995 } 996 return encoding.toString(); 997 }; 998 999}