1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.mapreduce;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.Iterator;
24 import java.util.List;
25 import java.util.TreeSet;
26
27 import org.apache.hadoop.conf.Configuration;
28 import org.apache.hadoop.hbase.classification.InterfaceAudience;
29 import org.apache.hadoop.hbase.classification.InterfaceStability;
30 import org.apache.hadoop.hbase.Cell;
31 import org.apache.hadoop.hbase.KeyValue;
32 import org.apache.hadoop.hbase.KeyValueUtil;
33 import org.apache.hadoop.hbase.Tag;
34 import org.apache.hadoop.hbase.TagType;
35 import org.apache.hadoop.hbase.client.Put;
36 import org.apache.hadoop.hbase.exceptions.DeserializationException;
37 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
38 import org.apache.hadoop.hbase.security.visibility.CellVisibility;
39 import org.apache.hadoop.hbase.util.Bytes;
40 import org.apache.hadoop.mapreduce.Reducer;
41 import org.apache.hadoop.util.StringUtils;
42
43
44
45
46
47
48
49
50
51 @InterfaceAudience.Public
52 @InterfaceStability.Stable
53 public class PutSortReducer extends
54 Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue> {
55
56 private CellCreator kvCreator;
57
58 @Override
59 protected void
60 setup(Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue>.Context context)
61 throws IOException, InterruptedException {
62 Configuration conf = context.getConfiguration();
63 this.kvCreator = new CellCreator(conf);
64 }
65
66 @Override
67 protected void reduce(
68 ImmutableBytesWritable row,
69 java.lang.Iterable<Put> puts,
70 Reducer<ImmutableBytesWritable, Put,
71 ImmutableBytesWritable, KeyValue>.Context context)
72 throws java.io.IOException, InterruptedException
73 {
74
75 long threshold = context.getConfiguration().getLong(
76 "putsortreducer.row.threshold", 1L * (1<<30));
77 Iterator<Put> iter = puts.iterator();
78 while (iter.hasNext()) {
79 TreeSet<KeyValue> map = new TreeSet<KeyValue>(KeyValue.COMPARATOR);
80 long curSize = 0;
81
82 List<Tag> tags = new ArrayList<Tag>();
83 while (iter.hasNext() && curSize < threshold) {
84
85 tags.clear();
86 Put p = iter.next();
87 long t = p.getTTL();
88 if (t != Long.MAX_VALUE) {
89
90 tags.add(new Tag(TagType.TTL_TAG_TYPE, Bytes.toBytes(t)));
91 }
92 byte[] acl = p.getACL();
93 if (acl != null) {
94
95 tags.add(new Tag(TagType.ACL_TAG_TYPE, acl));
96 }
97 try {
98 CellVisibility cellVisibility = p.getCellVisibility();
99 if (cellVisibility != null) {
100
101 tags.addAll(kvCreator.getVisibilityExpressionResolver()
102 .createVisibilityExpTags(cellVisibility.getExpression()));
103 }
104 } catch (DeserializationException e) {
105
106
107 throw new IOException("Invalid visibility expression found in mutation " + p, e);
108 }
109 for (List<Cell> cells: p.getFamilyCellMap().values()) {
110 for (Cell cell: cells) {
111
112
113 KeyValue kv = null;
114 Tag.carryForwardTags(tags, cell);
115 if (!tags.isEmpty()) {
116 kv = (KeyValue) kvCreator.create(cell.getRowArray(), cell.getRowOffset(),
117 cell.getRowLength(), cell.getFamilyArray(), cell.getFamilyOffset(),
118 cell.getFamilyLength(), cell.getQualifierArray(), cell.getQualifierOffset(),
119 cell.getQualifierLength(), cell.getTimestamp(), cell.getValueArray(),
120 cell.getValueOffset(), cell.getValueLength(), tags);
121 } else {
122 kv = KeyValueUtil.ensureKeyValueTypeForMR(cell);
123 }
124 if (map.add(kv)) {
125 curSize += kv.heapSize();
126 }
127 }
128 }
129 }
130 context.setStatus("Read " + map.size() + " entries of " + map.getClass()
131 + "(" + StringUtils.humanReadableInt(curSize) + ")");
132 int index = 0;
133 for (KeyValue kv : map) {
134 context.write(row, kv);
135 if (++index % 100 == 0)
136 context.setStatus("Wrote " + index);
137 }
138
139
140 if (iter.hasNext()) {
141
142 context.write(null, null);
143 }
144 }
145 }
146 }