View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase;
20  
21  import java.io.IOException;
22  import java.util.Locale;
23  import java.util.Set;
24  
25  import org.apache.commons.cli.CommandLine;
26  import org.apache.commons.lang.StringUtils;
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy;
32  import org.apache.hadoop.hbase.regionserver.HStore;
33  import org.apache.hadoop.hbase.regionserver.StoreEngine;
34  import org.apache.hadoop.hbase.regionserver.StripeStoreConfig;
35  import org.apache.hadoop.hbase.regionserver.StripeStoreEngine;
36  import org.apache.hadoop.hbase.util.AbstractHBaseTool;
37  import org.apache.hadoop.hbase.util.Bytes;
38  import org.apache.hadoop.hbase.util.MultiThreadedAction;
39  import org.apache.hadoop.hbase.util.MultiThreadedReader;
40  import org.apache.hadoop.hbase.util.MultiThreadedWriter;
41  import org.apache.hadoop.hbase.util.RegionSplitter;
42  import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator;
43  import org.apache.hadoop.hbase.util.LoadTestKVGenerator;
44  import org.junit.Assert;
45  
46  
47  /**
48   * A perf test which does large data ingestion using stripe compactions and regular compactions.
49   */
50  @InterfaceAudience.Private
51  public class StripeCompactionsPerformanceEvaluation extends AbstractHBaseTool {
52    private static final Log LOG = LogFactory.getLog(StripeCompactionsPerformanceEvaluation.class);
53    private static final TableName TABLE_NAME =
54      TableName.valueOf(StripeCompactionsPerformanceEvaluation.class.getSimpleName());
55    private static final byte[] COLUMN_FAMILY = Bytes.toBytes("CF");
56    private static final int MIN_NUM_SERVERS = 1;
57  
58    // Option names.
59    private static final String DATAGEN_KEY = "datagen";
60    private static final String ITERATIONS_KEY = "iters";
61    private static final String PRELOAD_COUNT_KEY = "pwk";
62    private static final String WRITE_COUNT_KEY = "wk";
63    private static final String WRITE_THREADS_KEY = "wt";
64    private static final String READ_THREADS_KEY = "rt";
65    private static final String INITIAL_STRIPE_COUNT_KEY = "initstripes";
66    private static final String SPLIT_SIZE_KEY = "splitsize";
67    private static final String SPLIT_PARTS_KEY = "splitparts";
68    private static final String VALUE_SIZE_KEY = "valsize";
69    private static final String SEQ_SHARDS_PER_SERVER_KEY = "seqshards";
70  
71    // Option values.
72    private LoadTestDataGenerator dataGen;
73    private int iterationCount;
74    private long preloadKeys;
75    private long writeKeys;
76    private int writeThreads;
77    private int readThreads;
78    private Long initialStripeCount;
79    private Long splitSize;
80    private Long splitParts;
81  
82    private static final String VALUE_SIZE_DEFAULT = "512:4096";
83  
84    protected IntegrationTestingUtility util = new IntegrationTestingUtility();
85  
86    @Override
87    protected void addOptions() {
88      addOptWithArg(DATAGEN_KEY, "Type of data generator to use (default or sequential)");
89      addOptWithArg(SEQ_SHARDS_PER_SERVER_KEY, "Sequential generator will shard the data into many"
90          + " sequences. The number of such shards per server is specified (default is 1).");
91      addOptWithArg(ITERATIONS_KEY, "Number of iterations to run to compare");
92      addOptWithArg(PRELOAD_COUNT_KEY, "Number of keys to preload, per server");
93      addOptWithArg(WRITE_COUNT_KEY, "Number of keys to write, per server");
94      addOptWithArg(WRITE_THREADS_KEY, "Number of threads to use for writing");
95      addOptWithArg(READ_THREADS_KEY, "Number of threads to use for reading");
96      addOptWithArg(INITIAL_STRIPE_COUNT_KEY, "Number of stripes to split regions into initially");
97      addOptWithArg(SPLIT_SIZE_KEY, "Size at which a stripe will split into more stripes");
98      addOptWithArg(SPLIT_PARTS_KEY, "Number of stripes to split a stripe into when it splits");
99      addOptWithArg(VALUE_SIZE_KEY, "Value size; either a number, or a colon-separated range;"
100         + " default " + VALUE_SIZE_DEFAULT);
101   }
102 
103   @Override
104   protected void processOptions(CommandLine cmd) {
105     int minValueSize = 0, maxValueSize = 0;
106     String valueSize = cmd.getOptionValue(VALUE_SIZE_KEY, VALUE_SIZE_DEFAULT);
107     if (valueSize.contains(":")) {
108       String[] valueSizes = valueSize.split(":");
109       if (valueSize.length() != 2) throw new RuntimeException("Invalid value size: " + valueSize);
110       minValueSize = Integer.parseInt(valueSizes[0]);
111       maxValueSize = Integer.parseInt(valueSizes[1]);
112     } else {
113       minValueSize = maxValueSize = Integer.parseInt(valueSize);
114     }
115     String datagen = cmd.getOptionValue(DATAGEN_KEY, "default").toLowerCase(Locale.ROOT);
116     if ("default".equals(datagen)) {
117       dataGen = new MultiThreadedAction.DefaultDataGenerator(
118           minValueSize, maxValueSize, 1, 1, new byte[][] { COLUMN_FAMILY });
119     } else if ("sequential".equals(datagen)) {
120       int shards = Integer.parseInt(cmd.getOptionValue(SEQ_SHARDS_PER_SERVER_KEY, "1"));
121       dataGen = new SeqShardedDataGenerator(minValueSize, maxValueSize, shards);
122     } else {
123       throw new RuntimeException("Unknown " + DATAGEN_KEY + ": " + datagen);
124     }
125     iterationCount = Integer.parseInt(cmd.getOptionValue(ITERATIONS_KEY, "1"));
126     preloadKeys = Long.parseLong(cmd.getOptionValue(PRELOAD_COUNT_KEY, "1000000"));
127     writeKeys = Long.parseLong(cmd.getOptionValue(WRITE_COUNT_KEY, "1000000"));
128     writeThreads = Integer.parseInt(cmd.getOptionValue(WRITE_THREADS_KEY, "10"));
129     readThreads = Integer.parseInt(cmd.getOptionValue(READ_THREADS_KEY, "20"));
130     initialStripeCount = getLongOrNull(cmd, INITIAL_STRIPE_COUNT_KEY);
131     splitSize = getLongOrNull(cmd, SPLIT_SIZE_KEY);
132     splitParts = getLongOrNull(cmd, SPLIT_PARTS_KEY);
133   }
134 
135   private Long getLongOrNull(CommandLine cmd, String option) {
136     if (!cmd.hasOption(option)) return null;
137     return Long.parseLong(cmd.getOptionValue(option));
138   }
139 
140   @Override
141   public Configuration getConf() {
142     Configuration c = super.getConf();
143     if (c == null && util != null) {
144       conf = util.getConfiguration();
145       c = conf;
146     }
147     return c;
148   }
149 
150   @Override
151   protected int doWork() throws Exception {
152     setUp();
153     try {
154       boolean isStripe = true;
155       for (int i = 0; i < iterationCount * 2; ++i) {
156         createTable(isStripe);
157         runOneTest((isStripe ? "Stripe" : "Default") + i, conf);
158         isStripe = !isStripe;
159       }
160       return 0;
161     } finally {
162       tearDown();
163     }
164   }
165 
166 
167   private void setUp() throws Exception {
168     this.util = new IntegrationTestingUtility();
169     LOG.debug("Initializing/checking cluster has " + MIN_NUM_SERVERS + " servers");
170     util.initializeCluster(MIN_NUM_SERVERS);
171     LOG.debug("Done initializing/checking cluster");
172   }
173 
174   protected void deleteTable() throws Exception {
175     if (util.getHBaseAdmin().tableExists(TABLE_NAME)) {
176       LOG.info("Deleting table");
177       if (!util.getHBaseAdmin().isTableDisabled(TABLE_NAME)) {
178         util.getHBaseAdmin().disableTable(TABLE_NAME);
179       }
180       util.getHBaseAdmin().deleteTable(TABLE_NAME);
181       LOG.info("Deleted table");
182     }
183   }
184 
185   private void createTable(boolean isStripe) throws Exception {
186     createTable(createHtd(isStripe));
187   }
188 
189   private void tearDown() throws Exception {
190     deleteTable();
191     LOG.info("Restoring the cluster");
192     util.restoreCluster();
193     LOG.info("Done restoring the cluster");
194   }
195 
196   private void runOneTest(String description, Configuration conf) throws Exception {
197     int numServers = util.getHBaseClusterInterface().getClusterStatus().getServersSize();
198     long startKey = (long)preloadKeys * numServers;
199     long endKey = startKey + (long)writeKeys * numServers;
200     status(String.format("%s test starting on %d servers; preloading 0 to %d and writing to %d",
201         description, numServers, startKey, endKey));
202 
203     if (preloadKeys > 0) {
204       MultiThreadedWriter preloader = new MultiThreadedWriter(dataGen, conf, TABLE_NAME);
205       long time = System.currentTimeMillis();
206       preloader.start(0, startKey, writeThreads);
207       preloader.waitForFinish();
208       if (preloader.getNumWriteFailures() > 0) {
209         throw new IOException("Preload failed");
210       }
211       int waitTime = (int)Math.min(preloadKeys / 100, 30000); // arbitrary
212       status(description + " preload took " + (System.currentTimeMillis()-time)/1000
213           + "sec; sleeping for " + waitTime/1000 + "sec for store to stabilize");
214       Thread.sleep(waitTime);
215     }
216 
217     MultiThreadedWriter writer = new MultiThreadedWriter(dataGen, conf, TABLE_NAME);
218     MultiThreadedReader reader = new MultiThreadedReader(dataGen, conf, TABLE_NAME, 100);
219     // reader.getMetrics().enable();
220     reader.linkToWriter(writer);
221 
222     long testStartTime = System.currentTimeMillis();
223     writer.start(startKey, endKey, writeThreads);
224     reader.start(startKey, endKey, readThreads);
225     writer.waitForFinish();
226     reader.waitForFinish();
227     // reader.waitForVerification(300000);
228     // reader.abortAndWaitForFinish();
229     status("Readers and writers stopped for test " + description);
230 
231     boolean success = writer.getNumWriteFailures() == 0;
232     if (!success) {
233       LOG.error("Write failed");
234     } else {
235       success = reader.getNumReadErrors() == 0 && reader.getNumReadFailures() == 0;
236       if (!success) {
237         LOG.error("Read failed");
238       }
239     }
240 
241     // Dump perf regardless of the result.
242     /*StringBuilder perfDump = new StringBuilder();
243     for (Pair<Long, Long> pt : reader.getMetrics().getCombinedCdf()) {
244       perfDump.append(String.format(
245           "csvread,%s,%d,%d%n", description, pt.getFirst(), pt.getSecond()));
246     }
247     if (dumpTimePerf) {
248       Iterator<Triple<Long, Double, Long>> timePerf = reader.getMetrics().getCombinedTimeSeries();
249       while (timePerf.hasNext()) {
250         Triple<Long, Double, Long> pt = timePerf.next();
251         perfDump.append(String.format("csvtime,%s,%d,%d,%.4f%n",
252             description, pt.getFirst(), pt.getThird(), pt.getSecond()));
253       }
254     }
255     LOG.info("Performance data dump for " + description + " test: \n" + perfDump.toString());*/
256     status(description + " test took " + (System.currentTimeMillis()-testStartTime)/1000 + "sec");
257     Assert.assertTrue(success);
258   }
259 
260   private static void status(String s) {
261     LOG.info("STATUS " + s);
262     System.out.println(s);
263   }
264 
265   private HTableDescriptor createHtd(boolean isStripe) throws Exception {
266     HTableDescriptor htd = new HTableDescriptor(TABLE_NAME);
267     htd.addFamily(new HColumnDescriptor(COLUMN_FAMILY));
268     String noSplitsPolicy = DisabledRegionSplitPolicy.class.getName();
269     htd.setConfiguration(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, noSplitsPolicy);
270     if (isStripe) {
271       htd.setConfiguration(StoreEngine.STORE_ENGINE_CLASS_KEY, StripeStoreEngine.class.getName());
272       if (initialStripeCount != null) {
273         htd.setConfiguration(
274             StripeStoreConfig.INITIAL_STRIPE_COUNT_KEY, initialStripeCount.toString());
275         htd.setConfiguration(
276             HStore.BLOCKING_STOREFILES_KEY, Long.toString(10 * initialStripeCount));
277       } else {
278         htd.setConfiguration(HStore.BLOCKING_STOREFILES_KEY, "500");
279       }
280       if (splitSize != null) {
281         htd.setConfiguration(StripeStoreConfig.SIZE_TO_SPLIT_KEY, splitSize.toString());
282       }
283       if (splitParts != null) {
284         htd.setConfiguration(StripeStoreConfig.SPLIT_PARTS_KEY, splitParts.toString());
285       }
286     } else {
287       htd.setConfiguration(HStore.BLOCKING_STOREFILES_KEY, "10"); // default
288     }
289     return htd;
290   }
291 
292   protected void createTable(HTableDescriptor htd) throws Exception {
293     deleteTable();
294     if (util.getHBaseClusterInterface() instanceof MiniHBaseCluster) {
295       LOG.warn("Test does not make a lot of sense for minicluster. Will set flush size low.");
296       htd.setConfiguration(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, "1048576");
297     }
298     byte[][] splits = new RegionSplitter.HexStringSplit().split(
299         util.getHBaseClusterInterface().getClusterStatus().getServersSize());
300     util.getHBaseAdmin().createTable(htd, splits);
301   }
302 
303   public static class SeqShardedDataGenerator extends LoadTestDataGenerator {
304     private static final byte[][] COLUMN_NAMES = new byte[][] { Bytes.toBytes("col1") };
305     private static final int PAD_TO = 10;
306     private static final int PREFIX_PAD_TO = 7;
307 
308     private final int numPartitions;
309 
310     public SeqShardedDataGenerator(int minValueSize, int maxValueSize, int numPartitions) {
311       super(minValueSize, maxValueSize);
312       this.numPartitions = numPartitions;
313     }
314 
315     @Override
316     public byte[] getDeterministicUniqueKey(long keyBase) {
317       String num = StringUtils.leftPad(String.valueOf(keyBase), PAD_TO, "0");
318       return Bytes.toBytes(getPrefix(keyBase) + num);
319     }
320 
321     private String getPrefix(long i) {
322       return StringUtils.leftPad(String.valueOf((int)(i % numPartitions)), PREFIX_PAD_TO, "0");
323     }
324 
325     @Override
326     public byte[][] getColumnFamilies() {
327       return new byte[][] { COLUMN_FAMILY };
328     }
329 
330     @Override
331     public byte[][] generateColumnsForCf(byte[] rowKey, byte[] cf) {
332       return COLUMN_NAMES;
333     }
334 
335     @Override
336     public byte[] generateValue(byte[] rowKey, byte[] cf, byte[] column) {
337       return kvGenerator.generateRandomSizeValue(rowKey, cf, column);
338     }
339 
340     @Override
341     public boolean verify(byte[] rowKey, byte[] cf, byte[] column, byte[] value) {
342       return LoadTestKVGenerator.verify(value, rowKey, cf, column);
343     }
344 
345     @Override
346     public boolean verify(byte[] rowKey, byte[] cf, Set<byte[]> columnSet) {
347       return true;
348     }
349   };
350 }