View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations
15   * under the License.
16   */
17  package org.apache.hadoop.hbase.util;
18  
19  import java.nio.ByteBuffer;
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.Collections;
23  import java.util.HashMap;
24  import java.util.List;
25  import java.util.Map;
26  import java.util.Random;
27  
28  import org.apache.hadoop.hbase.classification.InterfaceAudience;
29  import org.apache.hadoop.hbase.KeyValue;
30  import org.apache.hadoop.hbase.Tag;
31  import org.apache.hadoop.io.WritableUtils;
32  
33  import com.google.common.primitives.Bytes;
34  
35  /**
36   * Generate list of key values which are very useful to test data block encoding
37   * and compression.
38   */
39  @edu.umd.cs.findbugs.annotations.SuppressWarnings(
40      value="RV_ABSOLUTE_VALUE_OF_RANDOM_INT",
41      justification="Should probably fix")
42  @InterfaceAudience.Private
43  public class RedundantKVGenerator {
44    // row settings
45    static byte[] DEFAULT_COMMON_PREFIX = new byte[0];
46    static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10;
47    static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6;
48    static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3;
49    static int DEFAULT_AVERAGE_SUFFIX_LENGTH = 3;
50    static int DEFAULT_SUFFIX_LENGTH_VARIANCE = 3;
51    static int DEFAULT_NUMBER_OF_ROW = 500;
52  
53    // qualifier
54    static float DEFAULT_CHANCE_FOR_SAME_QUALIFIER = 0.5f;
55    static float DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER = 0.4f;
56    static int DEFAULT_AVERAGE_QUALIFIER_LENGTH = 9;
57    static int DEFAULT_QUALIFIER_LENGTH_VARIANCE = 3;
58  
59    static int DEFAULT_COLUMN_FAMILY_LENGTH = 9;
60    static int DEFAULT_VALUE_LENGTH = 8;
61    static float DEFAULT_CHANCE_FOR_ZERO_VALUE = 0.5f;
62  
63    static int DEFAULT_BASE_TIMESTAMP_DIVIDE = 1000000;
64    static int DEFAULT_TIMESTAMP_DIFF_SIZE = 100000000;
65  
66    /**
67     * Default constructor, assumes all parameters from class constants.
68     */
69    public RedundantKVGenerator() {
70      this(new Random(42L),
71          DEFAULT_NUMBER_OF_ROW_PREFIXES,
72          DEFAULT_AVERAGE_PREFIX_LENGTH,
73          DEFAULT_PREFIX_LENGTH_VARIANCE,
74          DEFAULT_AVERAGE_SUFFIX_LENGTH,
75          DEFAULT_SUFFIX_LENGTH_VARIANCE,
76          DEFAULT_NUMBER_OF_ROW,
77  
78          DEFAULT_CHANCE_FOR_SAME_QUALIFIER,
79          DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER,
80          DEFAULT_AVERAGE_QUALIFIER_LENGTH,
81          DEFAULT_QUALIFIER_LENGTH_VARIANCE,
82  
83          DEFAULT_COLUMN_FAMILY_LENGTH,
84          DEFAULT_VALUE_LENGTH,
85          DEFAULT_CHANCE_FOR_ZERO_VALUE,
86  
87          DEFAULT_BASE_TIMESTAMP_DIVIDE,
88          DEFAULT_TIMESTAMP_DIFF_SIZE
89      );
90    }
91  
92    /**
93     * Various configuration options for generating key values
94     * @param randomizer pick things by random
95     */
96    public RedundantKVGenerator(Random randomizer,
97        int numberOfRowPrefixes,
98        int averagePrefixLength,
99        int prefixLengthVariance,
100       int averageSuffixLength,
101       int suffixLengthVariance,
102       int numberOfRows,
103 
104       float chanceForSameQualifier,
105       float chanceForSimiliarQualifier,
106       int averageQualifierLength,
107       int qualifierLengthVariance,
108 
109       int columnFamilyLength,
110       int valueLength,
111       float chanceForZeroValue,
112 
113       int baseTimestampDivide,
114       int timestampDiffSize) {
115     this.randomizer = randomizer;
116 
117     this.commonPrefix = DEFAULT_COMMON_PREFIX;
118     this.numberOfRowPrefixes = numberOfRowPrefixes;
119     this.averagePrefixLength = averagePrefixLength;
120     this.prefixLengthVariance = prefixLengthVariance;
121     this.averageSuffixLength = averageSuffixLength;
122     this.suffixLengthVariance = suffixLengthVariance;
123     this.numberOfRows = numberOfRows;
124 
125     this.chanceForSameQualifier = chanceForSameQualifier;
126     this.chanceForSimilarQualifier = chanceForSimiliarQualifier;
127     this.averageQualifierLength = averageQualifierLength;
128     this.qualifierLengthVariance = qualifierLengthVariance;
129 
130     this.columnFamilyLength = columnFamilyLength;
131     this.valueLength = valueLength;
132     this.chanceForZeroValue = chanceForZeroValue;
133 
134     this.baseTimestampDivide = baseTimestampDivide;
135     this.timestampDiffSize = timestampDiffSize;
136   }
137 
138   /** Used to generate dataset */
139   private Random randomizer;
140 
141   // row settings
142   private byte[] commonPrefix; //global prefix before rowPrefixes
143   private int numberOfRowPrefixes;
144   private int averagePrefixLength;
145   private int prefixLengthVariance;
146   private int averageSuffixLength;
147   private int suffixLengthVariance;
148   private int numberOfRows;
149 
150   // family
151   private byte[] family;
152 
153   // qualifier
154   private float chanceForSameQualifier;
155   private float chanceForSimilarQualifier;
156   private int averageQualifierLength;
157   private int qualifierLengthVariance;
158 
159   private int columnFamilyLength;
160   private int valueLength;
161   private float chanceForZeroValue;
162 
163   private int baseTimestampDivide;
164   private int timestampDiffSize;
165 
166   private List<byte[]> generateRows() {
167     // generate prefixes
168     List<byte[]> prefixes = new ArrayList<>();
169     prefixes.add(new byte[0]);
170     for (int i = 1; i < numberOfRowPrefixes; ++i) {
171       int prefixLength = averagePrefixLength;
172       prefixLength += randomizer.nextInt(2 * prefixLengthVariance + 1) -
173           prefixLengthVariance;
174       byte[] newPrefix = new byte[prefixLength];
175       randomizer.nextBytes(newPrefix);
176       prefixes.add(newPrefix);
177     }
178 
179     // generate rest of the row
180     List<byte[]> rows = new ArrayList<>();
181     for (int i = 0; i < numberOfRows; ++i) {
182       int suffixLength = averageSuffixLength;
183       suffixLength += randomizer.nextInt(2 * suffixLengthVariance + 1) -
184           suffixLengthVariance;
185       int randomPrefix = randomizer.nextInt(prefixes.size());
186       byte[] row = new byte[prefixes.get(randomPrefix).length +
187                             suffixLength];
188       byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row);
189       rows.add(rowWithCommonPrefix);
190     }
191 
192     return rows;
193   }
194 
195   /**
196    * Generate test data useful to test encoders.
197    * @param howMany How many Key values should be generated.
198    * @return sorted list of key values
199    */
200   public List<KeyValue> generateTestKeyValues(int howMany) {
201     return generateTestKeyValues(howMany, false);
202   }
203 
204   /**
205    * Generate test data useful to test encoders.
206    * @param howMany How many Key values should be generated.
207    * @return sorted list of key values
208    */
209   public List<KeyValue> generateTestKeyValues(int howMany, boolean useTags) {
210     List<KeyValue> result = new ArrayList<>();
211 
212     List<byte[]> rows = generateRows();
213     Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<>();
214 
215     if(family==null){
216       family = new byte[columnFamilyLength];
217       randomizer.nextBytes(family);
218     }
219 
220     long baseTimestamp = Math.abs(randomizer.nextInt()) / baseTimestampDivide;
221 
222     byte[] value = new byte[valueLength];
223 
224     for (int i = 0; i < howMany; ++i) {
225       long timestamp = baseTimestamp;
226       if(timestampDiffSize > 0){
227         timestamp += randomizer.nextInt(timestampDiffSize);
228       }
229       Integer rowId = randomizer.nextInt(rows.size());
230       byte[] row = rows.get(rowId);
231 
232       // generate qualifier, sometimes it is same, sometimes similar,
233       // occasionally completely different
234       byte[] qualifier;
235       float qualifierChance = randomizer.nextFloat();
236       if (!rowsToQualifier.containsKey(rowId)
237           || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier) {
238         int qualifierLength = averageQualifierLength;
239         qualifierLength += randomizer.nextInt(2 * qualifierLengthVariance + 1)
240             - qualifierLengthVariance;
241         qualifier = new byte[qualifierLength];
242         randomizer.nextBytes(qualifier);
243 
244         // add it to map
245         if (!rowsToQualifier.containsKey(rowId)) {
246           rowsToQualifier.put(rowId, new ArrayList<byte[]>());
247         }
248         rowsToQualifier.get(rowId).add(qualifier);
249       } else if (qualifierChance > chanceForSameQualifier) {
250         // similar qualifier
251         List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
252         byte[] originalQualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers
253             .size()));
254 
255         qualifier = new byte[originalQualifier.length];
256         int commonPrefix = randomizer.nextInt(qualifier.length);
257         System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix);
258         for (int j = commonPrefix; j < qualifier.length; ++j) {
259           qualifier[j] = (byte) (randomizer.nextInt() & 0xff);
260         }
261 
262         rowsToQualifier.get(rowId).add(qualifier);
263       } else {
264         // same qualifier
265         List<byte[]> previousQualifiers = rowsToQualifier.get(rowId);
266         qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size()));
267       }
268 
269       if (randomizer.nextFloat() < chanceForZeroValue) {
270         Arrays.fill(value, (byte) 0);
271       } else {
272         randomizer.nextBytes(value);
273       }
274 
275       if (useTags) {
276         result.add(new KeyValue(row, family, qualifier, timestamp, value, new Tag[] {
277           new Tag((byte) 1, "value1") }));
278       } else {
279         result.add(new KeyValue(row, family, qualifier, timestamp, value));
280       }
281     }
282 
283     Collections.sort(result, KeyValue.COMPARATOR);
284 
285     return result;
286   }
287 
288   /**
289    * Convert list of KeyValues to byte buffer.
290    * @param keyValues list of KeyValues to be converted.
291    * @return buffer with content from key values
292    */
293   public static ByteBuffer convertKvToByteBuffer(List<KeyValue> keyValues,
294       boolean includesMemstoreTS) {
295     int totalSize = 0;
296     for (KeyValue kv : keyValues) {
297       totalSize += kv.getLength();
298       if (includesMemstoreTS) {
299         totalSize += WritableUtils.getVIntSize(kv.getMvccVersion());
300       }
301     }
302 
303     ByteBuffer result = ByteBuffer.allocate(totalSize);
304     for (KeyValue kv : keyValues) {
305       result.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
306       if (includesMemstoreTS) {
307         ByteBufferUtils.writeVLong(result, kv.getMvccVersion());
308       }
309     }
310     return result;
311   }
312 
313   public RedundantKVGenerator setFamily(byte[] family) {
314     this.family = family;
315     this.columnFamilyLength = family.length;
316     return this;
317   }
318 }