1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.hadoop.hbase.util;
18
19 import java.nio.ByteBuffer;
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Collections;
23 import java.util.HashMap;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.Random;
27
28 import org.apache.hadoop.hbase.classification.InterfaceAudience;
29 import org.apache.hadoop.hbase.KeyValue;
30 import org.apache.hadoop.hbase.Tag;
31 import org.apache.hadoop.io.WritableUtils;
32
33 import com.google.common.primitives.Bytes;
34
35
36
37
38
39 @edu.umd.cs.findbugs.annotations.SuppressWarnings(
40 value="RV_ABSOLUTE_VALUE_OF_RANDOM_INT",
41 justification="Should probably fix")
42 @InterfaceAudience.Private
43 public class RedundantKVGenerator {
44
45 static byte[] DEFAULT_COMMON_PREFIX = new byte[0];
46 static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10;
47 static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6;
48 static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3;
49 static int DEFAULT_AVERAGE_SUFFIX_LENGTH = 3;
50 static int DEFAULT_SUFFIX_LENGTH_VARIANCE = 3;
51 static int DEFAULT_NUMBER_OF_ROW = 500;
52
53
54 static float DEFAULT_CHANCE_FOR_SAME_QUALIFIER = 0.5f;
55 static float DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER = 0.4f;
56 static int DEFAULT_AVERAGE_QUALIFIER_LENGTH = 9;
57 static int DEFAULT_QUALIFIER_LENGTH_VARIANCE = 3;
58
59 static int DEFAULT_COLUMN_FAMILY_LENGTH = 9;
60 static int DEFAULT_VALUE_LENGTH = 8;
61 static float DEFAULT_CHANCE_FOR_ZERO_VALUE = 0.5f;
62
63 static int DEFAULT_BASE_TIMESTAMP_DIVIDE = 1000000;
64 static int DEFAULT_TIMESTAMP_DIFF_SIZE = 100000000;
65
66
67
68
69 public RedundantKVGenerator() {
70 this(new Random(42L),
71 DEFAULT_NUMBER_OF_ROW_PREFIXES,
72 DEFAULT_AVERAGE_PREFIX_LENGTH,
73 DEFAULT_PREFIX_LENGTH_VARIANCE,
74 DEFAULT_AVERAGE_SUFFIX_LENGTH,
75 DEFAULT_SUFFIX_LENGTH_VARIANCE,
76 DEFAULT_NUMBER_OF_ROW,
77
78 DEFAULT_CHANCE_FOR_SAME_QUALIFIER,
79 DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER,
80 DEFAULT_AVERAGE_QUALIFIER_LENGTH,
81 DEFAULT_QUALIFIER_LENGTH_VARIANCE,
82
83 DEFAULT_COLUMN_FAMILY_LENGTH,
84 DEFAULT_VALUE_LENGTH,
85 DEFAULT_CHANCE_FOR_ZERO_VALUE,
86
87 DEFAULT_BASE_TIMESTAMP_DIVIDE,
88 DEFAULT_TIMESTAMP_DIFF_SIZE
89 );
90 }
91
92
93
94
95
96 public RedundantKVGenerator(Random randomizer,
97 int numberOfRowPrefixes,
98 int averagePrefixLength,
99 int prefixLengthVariance,
100 int averageSuffixLength,
101 int suffixLengthVariance,
102 int numberOfRows,
103
104 float chanceForSameQualifier,
105 float chanceForSimiliarQualifier,
106 int averageQualifierLength,
107 int qualifierLengthVariance,
108
109 int columnFamilyLength,
110 int valueLength,
111 float chanceForZeroValue,
112
113 int baseTimestampDivide,
114 int timestampDiffSize) {
115 this.randomizer = randomizer;
116
117 this.commonPrefix = DEFAULT_COMMON_PREFIX;
118 this.numberOfRowPrefixes = numberOfRowPrefixes;
119 this.averagePrefixLength = averagePrefixLength;
120 this.prefixLengthVariance = prefixLengthVariance;
121 this.averageSuffixLength = averageSuffixLength;
122 this.suffixLengthVariance = suffixLengthVariance;
123 this.numberOfRows = numberOfRows;
124
125 this.chanceForSameQualifier = chanceForSameQualifier;
126 this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
127 this.averageQualifierLength = averageQualifierLength;
128 this.qualifierLengthVariance = qualifierLengthVariance;
129
130 this.columnFamilyLength = columnFamilyLength;
131 this.valueLength = valueLength;
132 this.chanceForZeroValue = chanceForZeroValue;
133
134 this.baseTimestampDivide = baseTimestampDivide;
135 this.timestampDiffSize = timestampDiffSize;
136 }
137
138
139 private Random randomizer;
140
141
142 private byte[] commonPrefix;
143 private int numberOfRowPrefixes;
144 private int averagePrefixLength;
145 private int prefixLengthVariance;
146 private int averageSuffixLength;
147 private int suffixLengthVariance;
148 private int numberOfRows;
149
150
151 private byte[] family;
152
153
154 private float chanceForSameQualifier;
155 private float chanceForSimilarQualifier;
156 private int averageQualifierLength;
157 private int qualifierLengthVariance;
158
159 private int columnFamilyLength;
160 private int valueLength;
161 private float chanceForZeroValue;
162
163 private int baseTimestampDivide;
164 private int timestampDiffSize;
165
166 private List<byte[]> generateRows() {
167
168 List<byte[]> prefixes = new ArrayList<>();
169 prefixes.add(new byte[0]);
170 for (int i = 1; i < numberOfRowPrefixes; ++i) {
171 int prefixLength = averagePrefixLength;
172 prefixLength += randomizer.nextInt(2 * prefixLengthVariance + 1) -
173 prefixLengthVariance;
174 byte[] newPrefix = new byte[prefixLength];
175 randomizer.nextBytes(newPrefix);
176 prefixes.add(newPrefix);
177 }
178
179
180 List<byte[]> rows = new ArrayList<>();
181 for (int i = 0; i < numberOfRows; ++i) {
182 int suffixLength = averageSuffixLength;
183 suffixLength += randomizer.nextInt(2 * suffixLengthVariance + 1) -
184 suffixLengthVariance;
185 int randomPrefix = randomizer.nextInt(prefixes.size());
186 byte[] row = new byte[prefixes.get(randomPrefix).length +
187 suffixLength];
188 byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row);
189 rows.add(rowWithCommonPrefix);
190 }
191
192 return rows;
193 }
194
195
196
197
198
199
200 public List<KeyValue> generateTestKeyValues(int howMany) {
201 return generateTestKeyValues(howMany, false);
202 }
203
204
205
206
207
208
209 public List<KeyValue> generateTestKeyValues(int howMany, boolean useTags) {
210 List<KeyValue> result = new ArrayList<>();
211
212 List<byte[]> rows = generateRows();
213 Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<>();
214
215 if(family==null){
216 family = new byte[columnFamilyLength];
217 randomizer.nextBytes(family);
218 }
219
220 long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide;
221
222 byte[] value = new byte[valueLength];
223
224 for (int i = 0; i < howMany; ++i) {
225 long timestamp = baseTimestamp;
226 if(timestampDiffSize > 0){
227 timestamp += randomizer.nextInt(timestampDiffSize);
228 }
229 Integer rowId = randomizer.nextInt(rows.size());
230 byte[] row = rows.get(rowId);
231
232
233
234 byte[] qualifier;
235 float qualifierChance = randomizer.nextFloat();
236 if (!rowsToQualifier.containsKey(rowId)
237 || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) {
238 int qualifierLength = averageQualifierLength;
239 qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1)
240 - qualifierLengthVariance;
241 qualifier = new byte[qualifierLength];
242 randomizer.nextBytes(qualifier);
243
244
245 if (!rowsToQualifier.containsKey(rowId)) {
246 rowsToQualifier.put(rowId, new ArrayList<byte[]>());
247 }
248 rowsToQualifier.get(rowId).add(qualifier);
249 } else if (qualifierChance > chanceForSameQualifier) {
250
251 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
252 byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers
253 .size()));
254
255 qualifier = new byte[originalQualifier.length];
256 int commonPrefix = randomizer.nextInt(qualifier.length);
257 System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix);
258 for (int j = commonPrefix; j < qualifier.length; ++j) {
259 qualifier[j] = (byte) (randomizer.nextInt() & 0xff);
260 }
261
262 rowsToQualifier.get(rowId).add(qualifier);
263 } else {
264
265 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
266 qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
267 }
268
269 if (randomizer.nextFloat() < chanceForZeroValue) {
270 Arrays.fill(value, (byte) 0);
271 } else {
272 randomizer.nextBytes(value);
273 }
274
275 if (useTags) {
276 result.add(new KeyValue(row, family, qualifier, timestamp, value, new Tag[] {
277 new Tag((byte) 1, "value1") }));
278 } else {
279 result.add(new KeyValue(row, family, qualifier, timestamp, value));
280 }
281 }
282
283 Collections.sort(result, KeyValue.COMPARATOR);
284
285 return result;
286 }
287
288
289
290
291
292
293 public static ByteBuffer convertKvToByteBuffer(List<KeyValue> keyValues,
294 boolean includesMemstoreTS) {
295 int totalSize = 0;
296 for (KeyValue kv : keyValues) {
297 totalSize += kv.getLength();
298 if (includesMemstoreTS) {
299 totalSize += WritableUtils.getVIntSize(kv.getMvccVersion());
300 }
301 }
302
303 ByteBuffer result = ByteBuffer.allocate(totalSize);
304 for (KeyValue kv : keyValues) {
305 result.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
306 if (includesMemstoreTS) {
307 ByteBufferUtils.writeVLong(result, kv.getMvccVersion());
308 }
309 }
310 return result;
311 }
312
313 public RedundantKVGenerator setFamily(byte[] family) {
314 this.family = family;
315 this.columnFamilyLength = family.length;
316 return this;
317 }
318 }