1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.chaos.actions;
20
21 import java.util.ArrayList;
22 import java.util.HashSet;
23 import java.util.LinkedList;
24 import java.util.List;
25 import java.util.Set;
26
27 import org.apache.commons.lang.math.RandomUtils;
28 import org.apache.hadoop.hbase.ClusterStatus;
29 import org.apache.hadoop.hbase.ServerName;
30 import org.junit.Assert;
31 import org.slf4j.Logger;
32 import org.slf4j.LoggerFactory;
33
34
35 public class UnbalanceKillAndRebalanceAction extends Action {
36 private static final Logger LOG =
37 LoggerFactory.getLogger(UnbalanceKillAndRebalanceAction.class);
38
39
40 private static final double FRC_SERVERS_THAT_HOARD_AND_LIVE = 0.1;
41 private static final double FRC_SERVERS_THAT_HOARD_AND_DIE = 0.1;
42 private static final double HOARD_FRC_OF_REGIONS = 0.8;
43
44
45 private long waitForUnbalanceMilliSec;
46 private long waitForKillsMilliSec;
47 private long waitAfterBalanceMilliSec;
48 private boolean killMetaRs;
49
50 public UnbalanceKillAndRebalanceAction(long waitUnbalance, long waitKill, long waitAfterBalance,
51 boolean killMetaRs) {
52 super();
53 waitForUnbalanceMilliSec = waitUnbalance;
54 waitForKillsMilliSec = waitKill;
55 waitAfterBalanceMilliSec = waitAfterBalance;
56 this.killMetaRs = killMetaRs;
57 }
58
59 @Override protected Logger getLogger() {
60 return LOG;
61 }
62
63 @Override
64 public void perform() throws Exception {
65 ClusterStatus status = this.cluster.getClusterStatus();
66 List<ServerName> victimServers = new LinkedList<ServerName>(status.getServers());
67 Set<ServerName> killedServers = new HashSet<ServerName>();
68
69 int liveCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_LIVE * victimServers.size());
70 int deadCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_DIE * victimServers.size());
71 Assert.assertTrue(
72 "There are not enough victim servers: " + victimServers.size(),
73 liveCount + deadCount < victimServers.size());
74 List<ServerName> targetServers = new ArrayList<ServerName>(liveCount);
75 for (int i = 0; i < liveCount + deadCount; ++i) {
76 int victimIx = RandomUtils.nextInt(victimServers.size());
77 targetServers.add(victimServers.remove(victimIx));
78 }
79 unbalanceRegions(status, victimServers, targetServers, HOARD_FRC_OF_REGIONS);
80 Thread.sleep(waitForUnbalanceMilliSec);
81 ServerName metaServer = cluster.getServerHoldingMeta();
82 for (ServerName targetServer: targetServers) {
83
84
85 if (context.isStopping()) {
86 break;
87 }
88 if (killedServers.size() >= liveCount) {
89 break;
90 }
91
92 if (!killMetaRs && targetServer.equals(metaServer)) {
93 getLogger().info("Not killing server because it holds hbase:meta.");
94 } else {
95 killRs(targetServer);
96 killedServers.add(targetServer);
97 }
98 }
99
100 Thread.sleep(waitForKillsMilliSec);
101 forceBalancer();
102 Thread.sleep(waitAfterBalanceMilliSec);
103 for (ServerName server:killedServers) {
104 startRs(server);
105 }
106 }
107 }