001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mapreduce;
019
020import static org.apache.hadoop.hbase.regionserver.HStoreFile.BULKLOAD_TASK_KEY;
021import static org.apache.hadoop.hbase.regionserver.HStoreFile.BULKLOAD_TIME_KEY;
022import static org.apache.hadoop.hbase.regionserver.HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY;
023import static org.apache.hadoop.hbase.regionserver.HStoreFile.MAJOR_COMPACTION_KEY;
024
025import java.io.IOException;
026import java.io.UnsupportedEncodingException;
027import java.net.InetSocketAddress;
028import java.net.URLDecoder;
029import java.net.URLEncoder;
030import java.util.ArrayList;
031import java.util.Arrays;
032import java.util.List;
033import java.util.Map;
034import java.util.Map.Entry;
035import java.util.Set;
036import java.util.TreeMap;
037import java.util.TreeSet;
038import java.util.UUID;
039import java.util.function.Function;
040import java.util.stream.Collectors;
041import org.apache.commons.lang3.StringUtils;
042import org.apache.hadoop.conf.Configuration;
043import org.apache.hadoop.fs.FileSystem;
044import org.apache.hadoop.fs.Path;
045import org.apache.hadoop.hbase.Cell;
046import org.apache.hadoop.hbase.CellUtil;
047import org.apache.hadoop.hbase.HConstants;
048import org.apache.hadoop.hbase.HRegionLocation;
049import org.apache.hadoop.hbase.HTableDescriptor;
050import org.apache.hadoop.hbase.KeyValue;
051import org.apache.hadoop.hbase.PrivateCellUtil;
052import org.apache.hadoop.hbase.TableName;
053import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
054import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
055import org.apache.hadoop.hbase.client.Connection;
056import org.apache.hadoop.hbase.client.ConnectionFactory;
057import org.apache.hadoop.hbase.client.Put;
058import org.apache.hadoop.hbase.client.RegionLocator;
059import org.apache.hadoop.hbase.client.Table;
060import org.apache.hadoop.hbase.client.TableDescriptor;
061import org.apache.hadoop.hbase.fs.HFileSystem;
062import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
063import org.apache.hadoop.hbase.io.compress.Compression;
064import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
065import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
066import org.apache.hadoop.hbase.io.hfile.CacheConfig;
067import org.apache.hadoop.hbase.io.hfile.HFile;
068import org.apache.hadoop.hbase.io.hfile.HFileContext;
069import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
070import org.apache.hadoop.hbase.io.hfile.HFileWriterImpl;
071import org.apache.hadoop.hbase.regionserver.BloomType;
072import org.apache.hadoop.hbase.regionserver.HStore;
073import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
074import org.apache.hadoop.hbase.regionserver.StoreUtils;
075import org.apache.hadoop.hbase.util.BloomFilterUtil;
076import org.apache.hadoop.hbase.util.Bytes;
077import org.apache.hadoop.hbase.util.CommonFSUtils;
078import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
079import org.apache.hadoop.hbase.util.MapReduceExtendedCell;
080import org.apache.hadoop.io.NullWritable;
081import org.apache.hadoop.io.SequenceFile;
082import org.apache.hadoop.io.Text;
083import org.apache.hadoop.mapreduce.Job;
084import org.apache.hadoop.mapreduce.OutputCommitter;
085import org.apache.hadoop.mapreduce.OutputFormat;
086import org.apache.hadoop.mapreduce.RecordWriter;
087import org.apache.hadoop.mapreduce.TaskAttemptContext;
088import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
089import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
090import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
091import org.apache.yetus.audience.InterfaceAudience;
092import org.slf4j.Logger;
093import org.slf4j.LoggerFactory;
094
095/**
096 * Writes HFiles. Passed Cells must arrive in order.
097 * Writes current time as the sequence id for the file. Sets the major compacted
098 * attribute on created @{link {@link HFile}s. Calling write(null,null) will forcibly roll
099 * all HFiles being written.
100 * <p>
101 * Using this class as part of a MapReduce job is best done
102 * using {@link #configureIncrementalLoad(Job, TableDescriptor, RegionLocator)}.
103 */
104@InterfaceAudience.Public
105public class HFileOutputFormat2
106    extends FileOutputFormat<ImmutableBytesWritable, Cell> {
107  private static final Logger LOG = LoggerFactory.getLogger(HFileOutputFormat2.class);
108  static class TableInfo {
109    private TableDescriptor tableDesctiptor;
110    private RegionLocator regionLocator;
111
112    public TableInfo(TableDescriptor tableDesctiptor, RegionLocator regionLocator) {
113      this.tableDesctiptor = tableDesctiptor;
114      this.regionLocator = regionLocator;
115    }
116
117    /**
118     * The modification for the returned HTD doesn't affect the inner TD.
119     * @return A clone of inner table descriptor
120     * @deprecated since 2.0.0 and will be removed in 3.0.0. Use {@link #getTableDescriptor()}
121     *   instead.
122     * @see #getTableDescriptor()
123     * @see <a href="https://issues.apache.org/jira/browse/HBASE-18241">HBASE-18241</a>
124     */
125    @Deprecated
126    public HTableDescriptor getHTableDescriptor() {
127      return new HTableDescriptor(tableDesctiptor);
128    }
129
130    public TableDescriptor getTableDescriptor() {
131      return tableDesctiptor;
132    }
133
134    public RegionLocator getRegionLocator() {
135      return regionLocator;
136    }
137  }
138
139  protected static final byte[] tableSeparator = Bytes.toBytes(";");
140
141  protected static byte[] combineTableNameSuffix(byte[] tableName, byte[] suffix) {
142    return Bytes.add(tableName, tableSeparator, suffix);
143  }
144
145  // The following constants are private since these are used by
146  // HFileOutputFormat2 to internally transfer data between job setup and
147  // reducer run using conf.
148  // These should not be changed by the client.
149  static final String COMPRESSION_FAMILIES_CONF_KEY =
150      "hbase.hfileoutputformat.families.compression";
151  static final String BLOOM_TYPE_FAMILIES_CONF_KEY =
152      "hbase.hfileoutputformat.families.bloomtype";
153  static final String BLOOM_PARAM_FAMILIES_CONF_KEY =
154      "hbase.hfileoutputformat.families.bloomparam";
155  static final String BLOCK_SIZE_FAMILIES_CONF_KEY =
156      "hbase.mapreduce.hfileoutputformat.blocksize";
157  static final String DATABLOCK_ENCODING_FAMILIES_CONF_KEY =
158      "hbase.mapreduce.hfileoutputformat.families.datablock.encoding";
159
160  // This constant is public since the client can modify this when setting
161  // up their conf object and thus refer to this symbol.
162  // It is present for backwards compatibility reasons. Use it only to
163  // override the auto-detection of datablock encoding and compression.
164  public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
165      "hbase.mapreduce.hfileoutputformat.datablock.encoding";
166  public static final String COMPRESSION_OVERRIDE_CONF_KEY =
167      "hbase.mapreduce.hfileoutputformat.compression";
168
169  /**
170   * Keep locality while generating HFiles for bulkload. See HBASE-12596
171   */
172  public static final String LOCALITY_SENSITIVE_CONF_KEY =
173      "hbase.bulkload.locality.sensitive.enabled";
174  private static final boolean DEFAULT_LOCALITY_SENSITIVE = true;
175  static final String OUTPUT_TABLE_NAME_CONF_KEY =
176      "hbase.mapreduce.hfileoutputformat.table.name";
177  static final String MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY =
178          "hbase.mapreduce.use.multi.table.hfileoutputformat";
179
180  public static final String REMOTE_CLUSTER_CONF_PREFIX =
181    "hbase.hfileoutputformat.remote.cluster.";
182  public static final String REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY =
183    REMOTE_CLUSTER_CONF_PREFIX + "zookeeper.quorum";
184  public static final String REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY =
185    REMOTE_CLUSTER_CONF_PREFIX + "zookeeper." + HConstants.CLIENT_PORT_STR;
186  public static final String REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY =
187    REMOTE_CLUSTER_CONF_PREFIX + HConstants.ZOOKEEPER_ZNODE_PARENT;
188
189  public static final String STORAGE_POLICY_PROPERTY = HStore.BLOCK_STORAGE_POLICY_KEY;
190  public static final String STORAGE_POLICY_PROPERTY_CF_PREFIX = STORAGE_POLICY_PROPERTY + ".";
191
192  @Override
193  public RecordWriter<ImmutableBytesWritable, Cell> getRecordWriter(
194      final TaskAttemptContext context) throws IOException, InterruptedException {
195    return createRecordWriter(context, this.getOutputCommitter(context));
196  }
197
198  protected static byte[] getTableNameSuffixedWithFamily(byte[] tableName, byte[] family) {
199    return combineTableNameSuffix(tableName, family);
200  }
201
202  static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter(
203    final TaskAttemptContext context, final OutputCommitter committer) throws IOException {
204
205    // Get the path of the temporary output file
206    final Path outputDir = ((FileOutputCommitter)committer).getWorkPath();
207    final Configuration conf = context.getConfiguration();
208    final boolean writeMultipleTables =
209      conf.getBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, false);
210    final String writeTableNames = conf.get(OUTPUT_TABLE_NAME_CONF_KEY);
211    if (writeTableNames == null || writeTableNames.isEmpty()) {
212      throw new IllegalArgumentException("" + OUTPUT_TABLE_NAME_CONF_KEY + " cannot be empty");
213    }
214    final FileSystem fs = outputDir.getFileSystem(conf);
215    // These configs. are from hbase-*.xml
216    final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE,
217        HConstants.DEFAULT_MAX_FILE_SIZE);
218    // Invented config.  Add to hbase-*.xml if other than default compression.
219    final String defaultCompressionStr = conf.get("hfile.compression",
220        Compression.Algorithm.NONE.getName());
221    final Algorithm defaultCompression = HFileWriterImpl.compressionByName(defaultCompressionStr);
222    String compressionStr = conf.get(COMPRESSION_OVERRIDE_CONF_KEY);
223    final Algorithm overriddenCompression = compressionStr != null ?
224      Compression.getCompressionAlgorithmByName(compressionStr): null;
225    final boolean compactionExclude = conf.getBoolean(
226        "hbase.mapreduce.hfileoutputformat.compaction.exclude", false);
227    final Set<String> allTableNames = Arrays.stream(writeTableNames.split(
228            Bytes.toString(tableSeparator))).collect(Collectors.toSet());
229
230    // create a map from column family to the compression algorithm
231    final Map<byte[], Algorithm> compressionMap = createFamilyCompressionMap(conf);
232    final Map<byte[], BloomType> bloomTypeMap = createFamilyBloomTypeMap(conf);
233    final Map<byte[], String> bloomParamMap = createFamilyBloomParamMap(conf);
234    final Map<byte[], Integer> blockSizeMap = createFamilyBlockSizeMap(conf);
235
236    String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_OVERRIDE_CONF_KEY);
237    final Map<byte[], DataBlockEncoding> datablockEncodingMap
238        = createFamilyDataBlockEncodingMap(conf);
239    final DataBlockEncoding overriddenEncoding = dataBlockEncodingStr != null ?
240      DataBlockEncoding.valueOf(dataBlockEncodingStr) : null;
241
242    return new RecordWriter<ImmutableBytesWritable, V>() {
243      // Map of families to writers and how much has been output on the writer.
244      private final Map<byte[], WriterLength> writers = new TreeMap<>(Bytes.BYTES_COMPARATOR);
245      private final Map<byte[], byte[]> previousRows = new TreeMap<>(Bytes.BYTES_COMPARATOR);
246      private final long now = EnvironmentEdgeManager.currentTime();
247
248      @Override
249      public void write(ImmutableBytesWritable row, V cell) throws IOException {
250        Cell kv = cell;
251        // null input == user explicitly wants to flush
252        if (row == null && kv == null) {
253          rollWriters(null);
254          return;
255        }
256
257        byte[] rowKey = CellUtil.cloneRow(kv);
258        int length = (PrivateCellUtil.estimatedSerializedSizeOf(kv)) - Bytes.SIZEOF_INT;
259        byte[] family = CellUtil.cloneFamily(kv);
260        byte[] tableNameBytes = null;
261        if (writeMultipleTables) {
262          tableNameBytes = MultiTableHFileOutputFormat.getTableName(row.get());
263          tableNameBytes = TableName.valueOf(tableNameBytes).toBytes();
264          if (!allTableNames.contains(Bytes.toString(tableNameBytes))) {
265            throw new IllegalArgumentException("TableName " + Bytes.toString(tableNameBytes) +
266              " not expected");
267          }
268        } else {
269          tableNameBytes = Bytes.toBytes(writeTableNames);
270        }
271        Path tableRelPath = getTableRelativePath(tableNameBytes);
272        byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableNameBytes, family);
273        WriterLength wl = this.writers.get(tableAndFamily);
274
275        // If this is a new column family, verify that the directory exists
276        if (wl == null) {
277          Path writerPath = null;
278          if (writeMultipleTables) {
279            writerPath = new Path(outputDir,new Path(tableRelPath, Bytes.toString(family)));
280          }
281          else {
282            writerPath = new Path(outputDir, Bytes.toString(family));
283          }
284          fs.mkdirs(writerPath);
285          configureStoragePolicy(conf, fs, tableAndFamily, writerPath);
286        }
287
288        // This can only happen once a row is finished though
289        if (wl != null && wl.written + length >= maxsize
290                && Bytes.compareTo(this.previousRows.get(family), rowKey) != 0) {
291          rollWriters(wl);
292        }
293
294        // create a new WAL writer, if necessary
295        if (wl == null || wl.writer == null) {
296          if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
297            HRegionLocation loc = null;
298
299            String tableName = Bytes.toString(tableNameBytes);
300            if (tableName != null) {
301              try (Connection connection = ConnectionFactory.createConnection(
302                createRemoteClusterConf(conf));
303                     RegionLocator locator =
304                       connection.getRegionLocator(TableName.valueOf(tableName))) {
305                loc = locator.getRegionLocation(rowKey);
306              } catch (Throwable e) {
307                LOG.warn("Something wrong locating rowkey {} in {}",
308                  Bytes.toString(rowKey), tableName, e);
309                loc = null;
310              } }
311
312            if (null == loc) {
313              LOG.trace("Failed get of location, use default writer {}", Bytes.toString(rowKey));
314              wl = getNewWriter(tableNameBytes, family, conf, null);
315            } else {
316              LOG.debug("First rowkey: [{}]", Bytes.toString(rowKey));
317              InetSocketAddress initialIsa =
318                  new InetSocketAddress(loc.getHostname(), loc.getPort());
319              if (initialIsa.isUnresolved()) {
320                LOG.trace("Failed resolve address {}, use default writer", loc.getHostnamePort());
321                wl = getNewWriter(tableNameBytes, family, conf, null);
322              } else {
323                LOG.debug("Use favored nodes writer: {}", initialIsa.getHostString());
324                wl = getNewWriter(tableNameBytes, family, conf, new InetSocketAddress[] { initialIsa
325                });
326              }
327            }
328          } else {
329            wl = getNewWriter(tableNameBytes, family, conf, null);
330          }
331        }
332
333        // we now have the proper WAL writer. full steam ahead
334        PrivateCellUtil.updateLatestStamp(cell, this.now);
335        wl.writer.append(kv);
336        wl.written += length;
337
338        // Copy the row so we know when a row transition.
339        this.previousRows.put(family, rowKey);
340      }
341
342      private Path getTableRelativePath(byte[] tableNameBytes) {
343        String tableName = Bytes.toString(tableNameBytes);
344        String[] tableNameParts = tableName.split(":");
345        Path tableRelPath = new Path(tableName.split(":")[0]);
346        if (tableNameParts.length > 1) {
347          tableRelPath = new Path(tableRelPath, tableName.split(":")[1]);
348        }
349        return tableRelPath;
350      }
351
352      private void rollWriters(WriterLength writerLength) throws IOException {
353        if (writerLength != null) {
354          closeWriter(writerLength);
355        } else {
356          for (WriterLength wl : this.writers.values()) {
357            closeWriter(wl);
358          }
359        }
360      }
361
362      private void closeWriter(WriterLength wl) throws IOException {
363        if (wl.writer != null) {
364          LOG.info("Writer=" + wl.writer.getPath() +
365            ((wl.written == 0)? "": ", wrote=" + wl.written));
366          close(wl.writer);
367          wl.writer = null;
368        }
369        wl.written = 0;
370      }
371
372      private Configuration createRemoteClusterConf(Configuration conf) {
373        final Configuration newConf = new Configuration(conf);
374
375        final String quorum = conf.get(REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY);
376        final String clientPort = conf.get(REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY);
377        final String parent = conf.get(REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY);
378
379        if (quorum != null && clientPort != null && parent != null) {
380          newConf.set(HConstants.ZOOKEEPER_QUORUM, quorum);
381          newConf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.parseInt(clientPort));
382          newConf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, parent);
383        }
384
385        for (Entry<String, String> entry : conf) {
386          String key = entry.getKey();
387          if (REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY.equals(key) ||
388              REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY.equals(key) ||
389              REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY.equals(key)) {
390            // Handled them above
391            continue;
392          }
393
394          if (entry.getKey().startsWith(REMOTE_CLUSTER_CONF_PREFIX)) {
395            String originalKey = entry.getKey().substring(REMOTE_CLUSTER_CONF_PREFIX.length());
396            if (!originalKey.isEmpty()) {
397              newConf.set(originalKey, entry.getValue());
398            }
399          }
400        }
401
402        return newConf;
403      }
404
405      /*
406       * Create a new StoreFile.Writer.
407       * @return A WriterLength, containing a new StoreFile.Writer.
408       */
409      @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="BX_UNBOXING_IMMEDIATELY_REBOXED",
410          justification="Not important")
411      private WriterLength getNewWriter(byte[] tableName, byte[] family, Configuration conf,
412          InetSocketAddress[] favoredNodes) throws IOException {
413        byte[] tableAndFamily = getTableNameSuffixedWithFamily(tableName, family);
414        Path familydir = new Path(outputDir, Bytes.toString(family));
415        if (writeMultipleTables) {
416          familydir = new Path(outputDir,
417            new Path(getTableRelativePath(tableName), Bytes.toString(family)));
418        }
419        WriterLength wl = new WriterLength();
420        Algorithm compression = overriddenCompression;
421        compression = compression == null ? compressionMap.get(tableAndFamily) : compression;
422        compression = compression == null ? defaultCompression : compression;
423        BloomType bloomType = bloomTypeMap.get(tableAndFamily);
424        bloomType = bloomType == null ? BloomType.NONE : bloomType;
425        String bloomParam = bloomParamMap.get(tableAndFamily);
426        if (bloomType == BloomType.ROWPREFIX_FIXED_LENGTH) {
427          conf.set(BloomFilterUtil.PREFIX_LENGTH_KEY, bloomParam);
428        }
429        Integer blockSize = blockSizeMap.get(tableAndFamily);
430        blockSize = blockSize == null ? HConstants.DEFAULT_BLOCKSIZE : blockSize;
431        DataBlockEncoding encoding = overriddenEncoding;
432        encoding = encoding == null ? datablockEncodingMap.get(tableAndFamily) : encoding;
433        encoding = encoding == null ? DataBlockEncoding.NONE : encoding;
434        HFileContextBuilder contextBuilder = new HFileContextBuilder().withCompression(compression)
435          .withDataBlockEncoding(encoding).withChecksumType(StoreUtils.getChecksumType(conf))
436          .withBytesPerCheckSum(StoreUtils.getBytesPerChecksum(conf)).withBlockSize(blockSize)
437          .withColumnFamily(family).withTableName(tableName);
438
439        if (HFile.getFormatVersion(conf) >= HFile.MIN_FORMAT_VERSION_WITH_TAGS) {
440          contextBuilder.withIncludesTags(true);
441        }
442
443        HFileContext hFileContext = contextBuilder.build();
444        if (null == favoredNodes) {
445          wl.writer = new StoreFileWriter.Builder(conf, CacheConfig.DISABLED, fs)
446            .withOutputDir(familydir).withBloomType(bloomType)
447            .withFileContext(hFileContext).build();
448        } else {
449          wl.writer = new StoreFileWriter.Builder(conf, CacheConfig.DISABLED, new HFileSystem(fs))
450            .withOutputDir(familydir).withBloomType(bloomType)
451            .withFileContext(hFileContext).withFavoredNodes(favoredNodes).build();
452        }
453
454        this.writers.put(tableAndFamily, wl);
455        return wl;
456      }
457
458      private void close(final StoreFileWriter w) throws IOException {
459        if (w != null) {
460          w.appendFileInfo(BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
461          w.appendFileInfo(BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString()));
462          w.appendFileInfo(MAJOR_COMPACTION_KEY, Bytes.toBytes(true));
463          w.appendFileInfo(EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude));
464          w.appendTrackedTimestampsToMetadata();
465          w.close();
466        }
467      }
468
469      @Override
470      public void close(TaskAttemptContext c) throws IOException, InterruptedException {
471        for (WriterLength wl: this.writers.values()) {
472          close(wl.writer);
473        }
474      }
475    };
476  }
477
478  /**
479   * Configure block storage policy for CF after the directory is created.
480   */
481  static void configureStoragePolicy(final Configuration conf, final FileSystem fs,
482      byte[] tableAndFamily, Path cfPath) {
483    if (null == conf || null == fs || null == tableAndFamily || null == cfPath) {
484      return;
485    }
486
487    String policy =
488        conf.get(STORAGE_POLICY_PROPERTY_CF_PREFIX + Bytes.toString(tableAndFamily),
489          conf.get(STORAGE_POLICY_PROPERTY));
490    CommonFSUtils.setStoragePolicy(fs, cfPath, policy);
491  }
492
493  /*
494   * Data structure to hold a Writer and amount of data written on it.
495   */
496  static class WriterLength {
497    long written = 0;
498    StoreFileWriter writer = null;
499  }
500
501  /**
502   * Return the start keys of all of the regions in this table,
503   * as a list of ImmutableBytesWritable.
504   */
505  private static List<ImmutableBytesWritable> getRegionStartKeys(List<RegionLocator> regionLocators,
506                                                                 boolean writeMultipleTables)
507          throws IOException {
508
509    ArrayList<ImmutableBytesWritable> ret = new ArrayList<>();
510    for(RegionLocator regionLocator : regionLocators) {
511      TableName tableName = regionLocator.getName();
512      LOG.info("Looking up current regions for table " + tableName);
513      byte[][] byteKeys = regionLocator.getStartKeys();
514      for (byte[] byteKey : byteKeys) {
515        byte[] fullKey = byteKey; //HFileOutputFormat2 use case
516        if (writeMultipleTables) {
517          //MultiTableHFileOutputFormat use case
518          fullKey = combineTableNameSuffix(tableName.getName(), byteKey);
519        }
520        if (LOG.isDebugEnabled()) {
521          LOG.debug("SplitPoint startkey for " + tableName + ": " + Bytes.toStringBinary(fullKey));
522        }
523        ret.add(new ImmutableBytesWritable(fullKey));
524      }
525    }
526    return ret;
527  }
528
529  /**
530   * Write out a {@link SequenceFile} that can be read by
531   * {@link TotalOrderPartitioner} that contains the split points in startKeys.
532   */
533  @SuppressWarnings("deprecation")
534  private static void writePartitions(Configuration conf, Path partitionsPath,
535      List<ImmutableBytesWritable> startKeys, boolean writeMultipleTables) throws IOException {
536    LOG.info("Writing partition information to " + partitionsPath);
537    if (startKeys.isEmpty()) {
538      throw new IllegalArgumentException("No regions passed");
539    }
540
541    // We're generating a list of split points, and we don't ever
542    // have keys < the first region (which has an empty start key)
543    // so we need to remove it. Otherwise we would end up with an
544    // empty reducer with index 0
545    TreeSet<ImmutableBytesWritable> sorted = new TreeSet<>(startKeys);
546    ImmutableBytesWritable first = sorted.first();
547    if (writeMultipleTables) {
548      first =
549        new ImmutableBytesWritable(MultiTableHFileOutputFormat.getSuffix(sorted.first().get()));
550    }
551    if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
552      throw new IllegalArgumentException(
553          "First region of table should have empty start key. Instead has: "
554          + Bytes.toStringBinary(first.get()));
555    }
556    sorted.remove(sorted.first());
557
558    // Write the actual file
559    FileSystem fs = partitionsPath.getFileSystem(conf);
560    SequenceFile.Writer writer = SequenceFile.createWriter(
561      fs, conf, partitionsPath, ImmutableBytesWritable.class,
562      NullWritable.class);
563
564    try {
565      for (ImmutableBytesWritable startKey : sorted) {
566        writer.append(startKey, NullWritable.get());
567      }
568    } finally {
569      writer.close();
570    }
571  }
572
573  /**
574   * Configure a MapReduce Job to perform an incremental load into the given
575   * table. This
576   * <ul>
577   *   <li>Inspects the table to configure a total order partitioner</li>
578   *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
579   *   <li>Sets the number of reduce tasks to match the current number of regions</li>
580   *   <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li>
581   *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
582   *     PutSortReducer)</li>
583   *   <li>Sets the HBase cluster key to load region locations for locality-sensitive</li>
584   * </ul>
585   * The user should be sure to set the map output value class to either KeyValue or Put before
586   * running this function.
587   */
588  public static void configureIncrementalLoad(Job job, Table table, RegionLocator regionLocator)
589      throws IOException {
590    configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
591    configureRemoteCluster(job, table.getConfiguration());
592  }
593
594  /**
595   * Configure a MapReduce Job to perform an incremental load into the given
596   * table. This
597   * <ul>
598   *   <li>Inspects the table to configure a total order partitioner</li>
599   *   <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li>
600   *   <li>Sets the number of reduce tasks to match the current number of regions</li>
601   *   <li>Sets the output key/value class to match HFileOutputFormat2's requirements</li>
602   *   <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
603   *     PutSortReducer)</li>
604   * </ul>
605   * The user should be sure to set the map output value class to either KeyValue or Put before
606   * running this function.
607   */
608  public static void configureIncrementalLoad(Job job, TableDescriptor tableDescriptor,
609      RegionLocator regionLocator) throws IOException {
610    ArrayList<TableInfo> singleTableInfo = new ArrayList<>();
611    singleTableInfo.add(new TableInfo(tableDescriptor, regionLocator));
612    configureIncrementalLoad(job, singleTableInfo, HFileOutputFormat2.class);
613  }
614
615  static void configureIncrementalLoad(Job job, List<TableInfo> multiTableInfo,
616      Class<? extends OutputFormat<?, ?>> cls) throws IOException {
617    Configuration conf = job.getConfiguration();
618    job.setOutputKeyClass(ImmutableBytesWritable.class);
619    job.setOutputValueClass(MapReduceExtendedCell.class);
620    job.setOutputFormatClass(cls);
621
622    if (multiTableInfo.stream().distinct().count() != multiTableInfo.size()) {
623      throw new IllegalArgumentException("Duplicate entries found in TableInfo argument");
624    }
625    boolean writeMultipleTables = false;
626    if (MultiTableHFileOutputFormat.class.equals(cls)) {
627      writeMultipleTables = true;
628      conf.setBoolean(MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, true);
629    }
630    // Based on the configured map output class, set the correct reducer to properly
631    // sort the incoming values.
632    // TODO it would be nice to pick one or the other of these formats.
633    if (KeyValue.class.equals(job.getMapOutputValueClass())
634        || MapReduceExtendedCell.class.equals(job.getMapOutputValueClass())) {
635      job.setReducerClass(CellSortReducer.class);
636    } else if (Put.class.equals(job.getMapOutputValueClass())) {
637      job.setReducerClass(PutSortReducer.class);
638    } else if (Text.class.equals(job.getMapOutputValueClass())) {
639      job.setReducerClass(TextSortReducer.class);
640    } else {
641      LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
642    }
643
644    conf.setStrings("io.serializations", conf.get("io.serializations"),
645        MutationSerialization.class.getName(), ResultSerialization.class.getName(),
646        CellSerialization.class.getName());
647
648    if (conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
649      LOG.info("bulkload locality sensitive enabled");
650    }
651
652    /* Now get the region start keys for every table required */
653    List<String> allTableNames = new ArrayList<>(multiTableInfo.size());
654    List<RegionLocator> regionLocators = new ArrayList<>(multiTableInfo.size());
655    List<TableDescriptor> tableDescriptors = new ArrayList<>(multiTableInfo.size());
656
657    for(TableInfo tableInfo : multiTableInfo) {
658      regionLocators.add(tableInfo.getRegionLocator());
659      allTableNames.add(tableInfo.getRegionLocator().getName().getNameAsString());
660      tableDescriptors.add(tableInfo.getTableDescriptor());
661    }
662    // Record tablenames for creating writer by favored nodes, and decoding compression,
663    // block size and other attributes of columnfamily per table
664    conf.set(OUTPUT_TABLE_NAME_CONF_KEY, StringUtils.join(allTableNames, Bytes
665            .toString(tableSeparator)));
666    List<ImmutableBytesWritable> startKeys =
667      getRegionStartKeys(regionLocators, writeMultipleTables);
668    // Use table's region boundaries for TOP split points.
669    LOG.info("Configuring " + startKeys.size() + " reduce partitions " +
670        "to match current region count for all tables");
671    job.setNumReduceTasks(startKeys.size());
672
673    configurePartitioner(job, startKeys, writeMultipleTables);
674    // Set compression algorithms based on column families
675
676    conf.set(COMPRESSION_FAMILIES_CONF_KEY, serializeColumnFamilyAttribute(compressionDetails,
677            tableDescriptors));
678    conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY, serializeColumnFamilyAttribute(blockSizeDetails,
679            tableDescriptors));
680    conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY, serializeColumnFamilyAttribute(bloomTypeDetails,
681            tableDescriptors));
682    conf.set(BLOOM_PARAM_FAMILIES_CONF_KEY, serializeColumnFamilyAttribute(bloomParamDetails,
683        tableDescriptors));
684    conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
685            serializeColumnFamilyAttribute(dataBlockEncodingDetails, tableDescriptors));
686
687    TableMapReduceUtil.addDependencyJars(job);
688    TableMapReduceUtil.initCredentials(job);
689    LOG.info("Incremental output configured for tables: " + StringUtils.join(allTableNames, ","));
690  }
691
692  public static void configureIncrementalLoadMap(Job job, TableDescriptor tableDescriptor) throws
693      IOException {
694    Configuration conf = job.getConfiguration();
695
696    job.setOutputKeyClass(ImmutableBytesWritable.class);
697    job.setOutputValueClass(MapReduceExtendedCell.class);
698    job.setOutputFormatClass(HFileOutputFormat2.class);
699
700    ArrayList<TableDescriptor> singleTableDescriptor = new ArrayList<>(1);
701    singleTableDescriptor.add(tableDescriptor);
702
703    conf.set(OUTPUT_TABLE_NAME_CONF_KEY, tableDescriptor.getTableName().getNameAsString());
704    // Set compression algorithms based on column families
705    conf.set(COMPRESSION_FAMILIES_CONF_KEY,
706        serializeColumnFamilyAttribute(compressionDetails, singleTableDescriptor));
707    conf.set(BLOCK_SIZE_FAMILIES_CONF_KEY,
708        serializeColumnFamilyAttribute(blockSizeDetails, singleTableDescriptor));
709    conf.set(BLOOM_TYPE_FAMILIES_CONF_KEY,
710        serializeColumnFamilyAttribute(bloomTypeDetails, singleTableDescriptor));
711    conf.set(BLOOM_PARAM_FAMILIES_CONF_KEY,
712        serializeColumnFamilyAttribute(bloomParamDetails, singleTableDescriptor));
713    conf.set(DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
714        serializeColumnFamilyAttribute(dataBlockEncodingDetails, singleTableDescriptor));
715
716    TableMapReduceUtil.addDependencyJars(job);
717    TableMapReduceUtil.initCredentials(job);
718    LOG.info("Incremental table " + tableDescriptor.getTableName() + " output configured.");
719  }
720
721  /**
722   * Configure HBase cluster key for remote cluster to load region location for locality-sensitive
723   * if it's enabled.
724   * It's not necessary to call this method explicitly when the cluster key for HBase cluster to be
725   * used to load region location is configured in the job configuration.
726   * Call this method when another HBase cluster key is configured in the job configuration.
727   * For example, you should call when you load data from HBase cluster A using
728   * {@link TableInputFormat} and generate hfiles for HBase cluster B.
729   * Otherwise, HFileOutputFormat2 fetch location from cluster A and locality-sensitive won't
730   * working correctly.
731   * {@link #configureIncrementalLoad(Job, Table, RegionLocator)} calls this method using
732   * {@link Table#getConfiguration} as clusterConf.
733   * See HBASE-25608.
734   *
735   * @param job which has configuration to be updated
736   * @param clusterConf which contains cluster key of the HBase cluster to be locality-sensitive
737   *
738   * @see #configureIncrementalLoad(Job, Table, RegionLocator)
739   * @see #LOCALITY_SENSITIVE_CONF_KEY
740   * @see #REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY
741   * @see #REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY
742   * @see #REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY
743   */
744  public static void configureRemoteCluster(Job job, Configuration clusterConf) {
745    Configuration conf = job.getConfiguration();
746
747    if (!conf.getBoolean(LOCALITY_SENSITIVE_CONF_KEY, DEFAULT_LOCALITY_SENSITIVE)) {
748      return;
749    }
750
751    final String quorum = clusterConf.get(HConstants.ZOOKEEPER_QUORUM);
752    final int clientPort = clusterConf.getInt(
753      HConstants.ZOOKEEPER_CLIENT_PORT, HConstants.DEFAULT_ZOOKEEPER_CLIENT_PORT);
754    final String parent = clusterConf.get(
755      HConstants.ZOOKEEPER_ZNODE_PARENT, HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
756
757    conf.set(REMOTE_CLUSTER_ZOOKEEPER_QUORUM_CONF_KEY, quorum);
758    conf.setInt(REMOTE_CLUSTER_ZOOKEEPER_CLIENT_PORT_CONF_KEY, clientPort);
759    conf.set(REMOTE_CLUSTER_ZOOKEEPER_ZNODE_PARENT_CONF_KEY, parent);
760
761    LOG.info("ZK configs for remote cluster of bulkload is configured: " +
762      quorum + ":" + clientPort + "/" + parent);
763  }
764
765  /**
766   * Runs inside the task to deserialize column family to compression algorithm
767   * map from the configuration.
768   *
769   * @param conf to read the serialized values from
770   * @return a map from column family to the configured compression algorithm
771   */
772  @InterfaceAudience.Private
773  static Map<byte[], Algorithm> createFamilyCompressionMap(Configuration
774      conf) {
775    Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
776        COMPRESSION_FAMILIES_CONF_KEY);
777    Map<byte[], Algorithm> compressionMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
778    for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
779      Algorithm algorithm = HFileWriterImpl.compressionByName(e.getValue());
780      compressionMap.put(e.getKey(), algorithm);
781    }
782    return compressionMap;
783  }
784
785  /**
786   * Runs inside the task to deserialize column family to bloom filter type
787   * map from the configuration.
788   *
789   * @param conf to read the serialized values from
790   * @return a map from column family to the the configured bloom filter type
791   */
792  @InterfaceAudience.Private
793  static Map<byte[], BloomType> createFamilyBloomTypeMap(Configuration conf) {
794    Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
795        BLOOM_TYPE_FAMILIES_CONF_KEY);
796    Map<byte[], BloomType> bloomTypeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
797    for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
798      BloomType bloomType = BloomType.valueOf(e.getValue());
799      bloomTypeMap.put(e.getKey(), bloomType);
800    }
801    return bloomTypeMap;
802  }
803
804  /**
805   * Runs inside the task to deserialize column family to bloom filter param
806   * map from the configuration.
807   *
808   * @param conf to read the serialized values from
809   * @return a map from column family to the the configured bloom filter param
810   */
811  @InterfaceAudience.Private
812  static Map<byte[], String> createFamilyBloomParamMap(Configuration conf) {
813    return createFamilyConfValueMap(conf, BLOOM_PARAM_FAMILIES_CONF_KEY);
814  }
815
816
817  /**
818   * Runs inside the task to deserialize column family to block size
819   * map from the configuration.
820   *
821   * @param conf to read the serialized values from
822   * @return a map from column family to the configured block size
823   */
824  @InterfaceAudience.Private
825  static Map<byte[], Integer> createFamilyBlockSizeMap(Configuration conf) {
826    Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
827        BLOCK_SIZE_FAMILIES_CONF_KEY);
828    Map<byte[], Integer> blockSizeMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
829    for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
830      Integer blockSize = Integer.parseInt(e.getValue());
831      blockSizeMap.put(e.getKey(), blockSize);
832    }
833    return blockSizeMap;
834  }
835
836  /**
837   * Runs inside the task to deserialize column family to data block encoding
838   * type map from the configuration.
839   *
840   * @param conf to read the serialized values from
841   * @return a map from column family to HFileDataBlockEncoder for the
842   *         configured data block type for the family
843   */
844  @InterfaceAudience.Private
845  static Map<byte[], DataBlockEncoding> createFamilyDataBlockEncodingMap(
846      Configuration conf) {
847    Map<byte[], String> stringMap = createFamilyConfValueMap(conf,
848        DATABLOCK_ENCODING_FAMILIES_CONF_KEY);
849    Map<byte[], DataBlockEncoding> encoderMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
850    for (Map.Entry<byte[], String> e : stringMap.entrySet()) {
851      encoderMap.put(e.getKey(), DataBlockEncoding.valueOf((e.getValue())));
852    }
853    return encoderMap;
854  }
855
856
857  /**
858   * Run inside the task to deserialize column family to given conf value map.
859   *
860   * @param conf to read the serialized values from
861   * @param confName conf key to read from the configuration
862   * @return a map of column family to the given configuration value
863   */
864  private static Map<byte[], String> createFamilyConfValueMap(
865      Configuration conf, String confName) {
866    Map<byte[], String> confValMap = new TreeMap<>(Bytes.BYTES_COMPARATOR);
867    String confVal = conf.get(confName, "");
868    for (String familyConf : confVal.split("&")) {
869      String[] familySplit = familyConf.split("=");
870      if (familySplit.length != 2) {
871        continue;
872      }
873      try {
874        confValMap.put(Bytes.toBytes(URLDecoder.decode(familySplit[0], "UTF-8")),
875            URLDecoder.decode(familySplit[1], "UTF-8"));
876      } catch (UnsupportedEncodingException e) {
877        // will not happen with UTF-8 encoding
878        throw new AssertionError(e);
879      }
880    }
881    return confValMap;
882  }
883
884  /**
885   * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
886   * <code>splitPoints</code>. Cleans up the partitions file after job exists.
887   */
888  static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints, boolean
889          writeMultipleTables)
890      throws IOException {
891    Configuration conf = job.getConfiguration();
892    // create the partitions file
893    FileSystem fs = FileSystem.get(conf);
894    String hbaseTmpFsDir =
895        conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
896          HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY);
897    Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
898    fs.makeQualified(partitionsPath);
899    writePartitions(conf, partitionsPath, splitPoints, writeMultipleTables);
900    fs.deleteOnExit(partitionsPath);
901
902    // configure job to use it
903    job.setPartitionerClass(TotalOrderPartitioner.class);
904    TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
905  }
906
907  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value =
908    "RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
909  @InterfaceAudience.Private
910  static String serializeColumnFamilyAttribute(Function<ColumnFamilyDescriptor, String> fn,
911        List<TableDescriptor> allTables)
912      throws UnsupportedEncodingException {
913    StringBuilder attributeValue = new StringBuilder();
914    int i = 0;
915    for (TableDescriptor tableDescriptor : allTables) {
916      if (tableDescriptor == null) {
917        // could happen with mock table instance
918        // CODEREVIEW: Can I set an empty string in conf if mock table instance?
919        return "";
920      }
921      for (ColumnFamilyDescriptor familyDescriptor : tableDescriptor.getColumnFamilies()) {
922        if (i++ > 0) {
923          attributeValue.append('&');
924        }
925        attributeValue.append(URLEncoder.encode(
926          Bytes.toString(combineTableNameSuffix(tableDescriptor.getTableName().getName(),
927            familyDescriptor.getName())), "UTF-8"));
928        attributeValue.append('=');
929        attributeValue.append(URLEncoder.encode(fn.apply(familyDescriptor), "UTF-8"));
930      }
931    }
932    // Get rid of the last ampersand
933    return attributeValue.toString();
934  }
935
936  /**
937   * Serialize column family to compression algorithm map to configuration.
938   * Invoked while configuring the MR job for incremental load.
939   */
940  @InterfaceAudience.Private
941  static Function<ColumnFamilyDescriptor, String> compressionDetails = familyDescriptor ->
942          familyDescriptor.getCompressionType().getName();
943
944  /**
945   * Serialize column family to block size map to configuration. Invoked while
946   * configuring the MR job for incremental load.
947   */
948  @InterfaceAudience.Private
949  static Function<ColumnFamilyDescriptor, String> blockSizeDetails = familyDescriptor -> String
950          .valueOf(familyDescriptor.getBlocksize());
951
952  /**
953   * Serialize column family to bloom type map to configuration. Invoked while
954   * configuring the MR job for incremental load.
955   */
956  @InterfaceAudience.Private
957  static Function<ColumnFamilyDescriptor, String> bloomTypeDetails = familyDescriptor -> {
958    String bloomType = familyDescriptor.getBloomFilterType().toString();
959    if (bloomType == null) {
960      bloomType = ColumnFamilyDescriptorBuilder.DEFAULT_BLOOMFILTER.name();
961    }
962    return bloomType;
963  };
964
965  /**
966   * Serialize column family to bloom param map to configuration. Invoked while
967   * configuring the MR job for incremental load.
968   */
969  @InterfaceAudience.Private
970  static Function<ColumnFamilyDescriptor, String> bloomParamDetails = familyDescriptor -> {
971    BloomType bloomType = familyDescriptor.getBloomFilterType();
972    String bloomParam = "";
973    if (bloomType == BloomType.ROWPREFIX_FIXED_LENGTH) {
974      bloomParam = familyDescriptor.getConfigurationValue(BloomFilterUtil.PREFIX_LENGTH_KEY);
975    }
976    return bloomParam;
977  };
978
979  /**
980   * Serialize column family to data block encoding map to configuration.
981   * Invoked while configuring the MR job for incremental load.
982   */
983  @InterfaceAudience.Private
984  static Function<ColumnFamilyDescriptor, String> dataBlockEncodingDetails = familyDescriptor -> {
985    DataBlockEncoding encoding = familyDescriptor.getDataBlockEncoding();
986    if (encoding == null) {
987      encoding = DataBlockEncoding.NONE;
988    }
989    return encoding.toString();
990  };
991
992}