1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase;
20
21 import com.google.common.base.Objects;
22 import com.google.common.collect.Sets;
23 import com.yammer.metrics.core.Histogram;
24 import org.apache.commons.cli.CommandLine;
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27 import org.apache.hadoop.conf.Configuration;
28 import org.apache.hadoop.hbase.chaos.actions.MoveRandomRegionOfTableAction;
29 import org.apache.hadoop.hbase.chaos.actions.RestartRandomRsExceptMetaAction;
30 import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
31 import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy;
32 import org.apache.hadoop.hbase.chaos.policies.Policy;
33 import org.apache.hadoop.hbase.client.Admin;
34 import org.apache.hadoop.hbase.ipc.RpcClient;
35 import org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy;
36 import org.apache.hadoop.hbase.testclassification.IntegrationTests;
37 import org.apache.hadoop.hbase.util.Bytes;
38 import org.apache.hadoop.hbase.util.YammerHistogramUtils;
39 import org.apache.hadoop.mapreduce.Counters;
40 import org.apache.hadoop.mapreduce.Job;
41 import org.apache.hadoop.util.ToolRunner;
42 import org.junit.experimental.categories.Category;
43
44 import java.util.*;
45 import java.util.concurrent.Callable;
46
47 import static java.lang.String.format;
48 import static org.junit.Assert.assertEquals;
49 import static org.junit.Assert.assertNotNull;
50 import static org.junit.Assert.assertTrue;
51
52
53
54
55
56
57
58
59 @Category(IntegrationTests.class)
60 public class IntegrationTestRegionReplicaPerf extends IntegrationTestBase {
61
62 private static final Log LOG = LogFactory.getLog(IntegrationTestRegionReplicaPerf.class);
63
64 private static final byte[] FAMILY_NAME = Bytes.toBytes("info");
65 private static final String SLEEP_TIME_KEY = "sleeptime";
66
67 private static final String SLEEP_TIME_DEFAULT = "" + (10 * 1000l);
68 private static final String TABLE_NAME_KEY = "tableName";
69 private static final String TABLE_NAME_DEFAULT = "IntegrationTestRegionReplicaPerf";
70 private static final String REPLICA_COUNT_KEY = "replicas";
71 private static final String REPLICA_COUNT_DEFAULT = "" + 3;
72 private static final String PRIMARY_TIMEOUT_KEY = "timeout";
73 private static final String PRIMARY_TIMEOUT_DEFAULT = "" + 10 * 1000;
74 private static final String NUM_RS_KEY = "numRs";
75 private static final String NUM_RS_DEFAULT = "" + 3;
76
77
78 private enum Stat {
79 STDEV {
80 @Override
81 double apply(Histogram hist) {
82 return hist.stdDev();
83 }
84 },
85 FOUR_9S {
86 @Override
87 double apply(Histogram hist) {
88 return hist.getSnapshot().getValue(0.9999);
89 }
90 };
91
92 abstract double apply(Histogram hist);
93 }
94
95 private TableName tableName;
96 private long sleepTime;
97 private int replicaCount;
98 private int primaryTimeout;
99 private int clusterSize;
100
101
102
103
104 static class PerfEvalCallable implements Callable<TimingResult> {
105 private final Queue<String> argv = new LinkedList<String>();
106 private final Admin admin;
107
108 public PerfEvalCallable(Admin admin, String argv) {
109
110 this.admin = admin;
111 this.argv.addAll(Arrays.asList(argv.split(" ")));
112 LOG.debug("Created PerformanceEvaluationCallable with args: " + argv);
113 }
114
115 @Override
116 public TimingResult call() throws Exception {
117 PerformanceEvaluation.TestOptions opts = PerformanceEvaluation.parseOpts(argv);
118 PerformanceEvaluation.checkTable(admin, opts);
119 PerformanceEvaluation.RunResult results[] = null;
120 long numRows = opts.totalRows;
121 long elapsedTime = 0;
122 if (opts.nomapred) {
123 results = PerformanceEvaluation.doLocalClients(opts, admin.getConfiguration());
124 for (PerformanceEvaluation.RunResult r : results) {
125 elapsedTime = Math.max(elapsedTime, r.duration);
126 }
127 } else {
128 Job job = PerformanceEvaluation.doMapReduce(opts, admin.getConfiguration());
129 Counters counters = job.getCounters();
130 numRows = counters.findCounter(PerformanceEvaluation.Counter.ROWS).getValue();
131 elapsedTime = counters.findCounter(PerformanceEvaluation.Counter.ELAPSED_TIME).getValue();
132 }
133 return new TimingResult(numRows, elapsedTime, results);
134 }
135 }
136
137
138
139
140 static class TimingResult {
141 public final long numRows;
142 public final long elapsedTime;
143 public final PerformanceEvaluation.RunResult results[];
144
145 public TimingResult(long numRows, long elapsedTime, PerformanceEvaluation.RunResult results[]) {
146 this.numRows = numRows;
147 this.elapsedTime = elapsedTime;
148 this.results = results;
149 }
150
151 @Override
152 public String toString() {
153 return Objects.toStringHelper(this)
154 .add("numRows", numRows)
155 .add("elapsedTime", elapsedTime)
156 .toString();
157 }
158 }
159
160 @Override
161 public void setUp() throws Exception {
162 super.setUp();
163 Configuration conf = util.getConfiguration();
164
165
166
167 assertEquals("Master must be configured with StochasticLoadBalancer",
168 "org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer",
169 conf.get("hbase.master.loadbalancer.class"));
170
171 assertTrue("hbase.regionserver.storefile.refresh.period must be greater than zero.",
172 conf.getLong("hbase.regionserver.storefile.refresh.period", 0) > 0);
173
174
175 conf.setBoolean(RpcClient.SPECIFIC_WRITE_THREAD, true);
176
177 conf.setLong("hbase.client.primaryCallTimeout.get", primaryTimeout);
178 conf.setLong("hbase.client.primaryCallTimeout.multiget", primaryTimeout);
179 }
180
181 @Override
182 public void setUpCluster() throws Exception {
183 util = getTestingUtil(getConf());
184 util.initializeCluster(clusterSize);
185 }
186
187 @Override
188 public void setUpMonkey() throws Exception {
189 Policy p = new PeriodicRandomActionPolicy(sleepTime,
190 new RestartRandomRsExceptMetaAction(sleepTime),
191 new MoveRandomRegionOfTableAction(tableName));
192 this.monkey = new PolicyBasedChaosMonkey(util, p);
193
194 }
195
196 @Override
197 protected void addOptions() {
198 addOptWithArg(TABLE_NAME_KEY, "Alternate table name. Default: '"
199 + TABLE_NAME_DEFAULT + "'");
200 addOptWithArg(SLEEP_TIME_KEY, "How long the monkey sleeps between actions. Default: "
201 + SLEEP_TIME_DEFAULT);
202 addOptWithArg(REPLICA_COUNT_KEY, "Number of region replicas. Default: "
203 + REPLICA_COUNT_DEFAULT);
204 addOptWithArg(PRIMARY_TIMEOUT_KEY, "Overrides hbase.client.primaryCallTimeout. Default: "
205 + PRIMARY_TIMEOUT_DEFAULT + " (10ms)");
206 addOptWithArg(NUM_RS_KEY, "Specify the number of RegionServers to use. Default: "
207 + NUM_RS_DEFAULT);
208 }
209
210 @Override
211 protected void processOptions(CommandLine cmd) {
212 tableName = TableName.valueOf(cmd.getOptionValue(TABLE_NAME_KEY, TABLE_NAME_DEFAULT));
213 sleepTime = Long.parseLong(cmd.getOptionValue(SLEEP_TIME_KEY, SLEEP_TIME_DEFAULT));
214 replicaCount = Integer.parseInt(cmd.getOptionValue(REPLICA_COUNT_KEY, REPLICA_COUNT_DEFAULT));
215 primaryTimeout =
216 Integer.parseInt(cmd.getOptionValue(PRIMARY_TIMEOUT_KEY, PRIMARY_TIMEOUT_DEFAULT));
217 clusterSize = Integer.parseInt(cmd.getOptionValue(NUM_RS_KEY, NUM_RS_DEFAULT));
218 LOG.debug(Objects.toStringHelper("Parsed Options")
219 .add(TABLE_NAME_KEY, tableName)
220 .add(SLEEP_TIME_KEY, sleepTime)
221 .add(REPLICA_COUNT_KEY, replicaCount)
222 .add(PRIMARY_TIMEOUT_KEY, primaryTimeout)
223 .add(NUM_RS_KEY, clusterSize)
224 .toString());
225 }
226
227 @Override
228 public int runTestFromCommandLine() throws Exception {
229 test();
230 return 0;
231 }
232
233 @Override
234 public TableName getTablename() {
235 return tableName;
236 }
237
238 @Override
239 protected Set<String> getColumnFamilies() {
240 return Sets.newHashSet(Bytes.toString(FAMILY_NAME));
241 }
242
243
244 private static double calcMean(String desc, Stat stat, List<TimingResult> results) {
245 double sum = 0;
246 int count = 0;
247
248 for (TimingResult tr : results) {
249 for (PerformanceEvaluation.RunResult r : tr.results) {
250 assertNotNull("One of the run results is missing detailed run data.", r.hist);
251 sum += stat.apply(r.hist);
252 count += 1;
253 LOG.debug(desc + "{" + YammerHistogramUtils.getHistogramReport(r.hist) + "}");
254 }
255 }
256 return sum / count;
257 }
258
259 public void test() throws Exception {
260 int maxIters = 3;
261 String replicas = "--replicas=" + replicaCount;
262
263 String splitPolicy = "--splitPolicy=" + DisabledRegionSplitPolicy.class.getName();
264 String writeOpts = format("%s --nomapred --table=%s --presplit=16 sequentialWrite 4",
265 splitPolicy, tableName);
266 String readOpts =
267 format("--nomapred --table=%s --latency --sampleRate=0.1 randomRead 4", tableName);
268 String replicaReadOpts = format("%s %s", replicas, readOpts);
269
270 ArrayList<TimingResult> resultsWithoutReplicas = new ArrayList<TimingResult>(maxIters);
271 ArrayList<TimingResult> resultsWithReplicas = new ArrayList<TimingResult>(maxIters);
272
273
274 LOG.debug("Populating table.");
275 new PerfEvalCallable(util.getHBaseAdmin(), writeOpts).call();
276
277
278 assertEquals("Table must be created with DisabledRegionSplitPolicy. Broken test.",
279 DisabledRegionSplitPolicy.class.getName(),
280 util.getHBaseAdmin().getTableDescriptor(tableName).getRegionSplitPolicyClassName());
281 startMonkey();
282
283
284 for (int i = 0; i < maxIters; i++) {
285 LOG.debug("Launching non-replica job " + (i + 1) + "/" + maxIters);
286 resultsWithoutReplicas.add(new PerfEvalCallable(util.getHBaseAdmin(), readOpts).call());
287
288 Thread.sleep(5000l);
289 }
290
291
292 cleanUpMonkey("Altering table.");
293 LOG.debug("Altering " + tableName + " replica count to " + replicaCount);
294 IntegrationTestingUtility.setReplicas(util.getHBaseAdmin(), tableName, replicaCount);
295 setUpMonkey();
296 startMonkey();
297
298
299 for (int i = 0; i < maxIters; i++) {
300 LOG.debug("Launching replica job " + (i + 1) + "/" + maxIters);
301 resultsWithReplicas.add(new PerfEvalCallable(util.getHBaseAdmin(), replicaReadOpts).call());
302
303 Thread.sleep(5000l);
304 }
305
306
307
308 double withoutReplicasStdevMean =
309 calcMean("withoutReplicas", Stat.STDEV, resultsWithoutReplicas);
310 double withoutReplicas9999Mean =
311 calcMean("withoutReplicas", Stat.FOUR_9S, resultsWithoutReplicas);
312 double withReplicasStdevMean =
313 calcMean("withReplicas", Stat.STDEV, resultsWithReplicas);
314 double withReplicas9999Mean =
315 calcMean("withReplicas", Stat.FOUR_9S, resultsWithReplicas);
316
317 LOG.info(Objects.toStringHelper(this)
318 .add("withoutReplicas", resultsWithoutReplicas)
319 .add("withReplicas", resultsWithReplicas)
320 .add("withoutReplicasStdevMean", withoutReplicasStdevMean)
321 .add("withoutReplicas99.99Mean", withoutReplicas9999Mean)
322 .add("withReplicasStdevMean", withReplicasStdevMean)
323 .add("withReplicas99.99Mean", withReplicas9999Mean)
324 .toString());
325
326 assertTrue(
327 "Running with region replicas under chaos should have less request variance than without. "
328 + "withReplicas.stdev.mean: " + withReplicasStdevMean + "ms "
329 + "withoutReplicas.stdev.mean: " + withoutReplicasStdevMean + "ms.",
330 withReplicasStdevMean <= withoutReplicasStdevMean);
331 assertTrue(
332 "Running with region replicas under chaos should improve 99.99pct latency. "
333 + "withReplicas.99.99.mean: " + withReplicas9999Mean + "ms "
334 + "withoutReplicas.99.99.mean: " + withoutReplicas9999Mean + "ms.",
335 withReplicas9999Mean <= withoutReplicas9999Mean);
336 }
337
338 public static void main(String[] args) throws Exception {
339 Configuration conf = HBaseConfiguration.create();
340 IntegrationTestingUtility.setUseDistributedCluster(conf);
341 int status = ToolRunner.run(conf, new IntegrationTestRegionReplicaPerf(), args);
342 System.exit(status);
343 }
344 }