View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.Iterator;
24  import java.util.List;
25  import java.util.TreeSet;
26  
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.hbase.classification.InterfaceAudience;
29  import org.apache.hadoop.hbase.classification.InterfaceStability;
30  import org.apache.hadoop.hbase.Cell;
31  import org.apache.hadoop.hbase.KeyValue;
32  import org.apache.hadoop.hbase.KeyValueUtil;
33  import org.apache.hadoop.hbase.Tag;
34  import org.apache.hadoop.hbase.TagType;
35  import org.apache.hadoop.hbase.client.Put;
36  import org.apache.hadoop.hbase.exceptions.DeserializationException;
37  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
38  import org.apache.hadoop.hbase.security.visibility.CellVisibility;
39  import org.apache.hadoop.hbase.util.Bytes;
40  import org.apache.hadoop.mapreduce.Reducer;
41  import org.apache.hadoop.util.StringUtils;
42  
43  /**
44   * Emits sorted Puts.
45   * Reads in all Puts from passed Iterator, sorts them, then emits
46   * Puts in sorted order.  If lots of columns per row, it will use lots of
47   * memory sorting.
48   * @see HFileOutputFormat
49   * @see KeyValueSortReducer
50   */
51  @InterfaceAudience.Public
52  @InterfaceStability.Stable
53  public class PutSortReducer extends
54      Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue> {
55    // the cell creator
56    private CellCreator kvCreator;
57  
58    @Override
59    protected void
60        setup(Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue>.Context context)
61            throws IOException, InterruptedException {
62      Configuration conf = context.getConfiguration();
63      this.kvCreator = new CellCreator(conf);
64    }
65  
66    @Override
67    protected void reduce(
68        ImmutableBytesWritable row,
69        java.lang.Iterable<Put> puts,
70        Reducer<ImmutableBytesWritable, Put,
71                ImmutableBytesWritable, KeyValue>.Context context)
72        throws java.io.IOException, InterruptedException
73    {
74      // although reduce() is called per-row, handle pathological case
75      long threshold = context.getConfiguration().getLong(
76          "putsortreducer.row.threshold", 1L * (1<<30));
77      Iterator<Put> iter = puts.iterator();
78      while (iter.hasNext()) {
79        TreeSet<KeyValue> map = new TreeSet<KeyValue>(KeyValue.COMPARATOR);
80        long curSize = 0;
81        // stop at the end or the RAM threshold
82        List<Tag> tags = new ArrayList<Tag>();
83        while (iter.hasNext() && curSize < threshold) {
84          // clear the tags
85          tags.clear();
86          Put p = iter.next();
87          long t = p.getTTL();
88          if (t != Long.MAX_VALUE) {
89            // add TTL tag if found
90            tags.add(new Tag(TagType.TTL_TAG_TYPE, Bytes.toBytes(t)));
91          }
92          byte[] acl = p.getACL();
93          if (acl != null) {
94            // add ACL tag if found
95            tags.add(new Tag(TagType.ACL_TAG_TYPE, acl));
96          }
97          try {
98            CellVisibility cellVisibility = p.getCellVisibility();
99            if (cellVisibility != null) {
100             // add the visibility labels if any
101             tags.addAll(kvCreator.getVisibilityExpressionResolver()
102                 .createVisibilityExpTags(cellVisibility.getExpression()));
103           }
104         } catch (DeserializationException e) {
105           // We just throw exception here. Should we allow other mutations to proceed by
106           // just ignoring the bad one?
107           throw new IOException("Invalid visibility expression found in mutation " + p, e);
108         }
109         for (List<Cell> cells: p.getFamilyCellMap().values()) {
110           for (Cell cell: cells) {
111             // Creating the KV which needs to be directly written to HFiles. Using the Facade
112             // KVCreator for creation of kvs.
113             KeyValue kv = null;
114             Tag.carryForwardTags(tags, cell);
115             if (!tags.isEmpty()) {
116               kv = (KeyValue) kvCreator.create(cell.getRowArray(), cell.getRowOffset(),
117                 cell.getRowLength(), cell.getFamilyArray(), cell.getFamilyOffset(),
118                 cell.getFamilyLength(), cell.getQualifierArray(), cell.getQualifierOffset(),
119                 cell.getQualifierLength(), cell.getTimestamp(), cell.getValueArray(),
120                 cell.getValueOffset(), cell.getValueLength(), tags);
121             } else {
122               kv = KeyValueUtil.ensureKeyValueTypeForMR(cell);
123             }
124             if (map.add(kv)) {// don't count duplicated kv into size
125               curSize += kv.heapSize();
126             }
127           }
128         }
129       }
130       context.setStatus("Read " + map.size() + " entries of " + map.getClass()
131           + "(" + StringUtils.humanReadableInt(curSize) + ")");
132       int index = 0;
133       for (KeyValue kv : map) {
134         context.write(row, kv);
135         if (++index % 100 == 0)
136           context.setStatus("Wrote " + index);
137       }
138 
139       // if we have more entries to process
140       if (iter.hasNext()) {
141         // force flush because we cannot guarantee intra-row sorted order
142         context.write(null, null);
143       }
144     }
145   }
146 }