1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.chaos.actions;
20
21 import java.util.HashSet;
22 import java.util.List;
23 import java.util.Set;
24 import org.apache.hadoop.hbase.ServerName;
25 import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
26 import org.slf4j.Logger;
27 import org.slf4j.LoggerFactory;
28
29
30
31
32 public class BatchRestartRsAction extends RestartActionBaseAction {
33 float ratio;
34 private static final Logger LOG = LoggerFactory.getLogger(BatchRestartRsAction.class);
35
36 public BatchRestartRsAction(long sleepTime, float ratio) {
37 super(sleepTime);
38 this.ratio = ratio;
39 }
40
41 @Override protected Logger getLogger() {
42 return LOG;
43 }
44
45 @Override
46 public void perform() throws Exception {
47 getLogger().info(String.format("Performing action: Batch restarting %d%% of region servers",
48 (int)(ratio * 100)));
49 List<ServerName> selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(),
50 ratio);
51
52 Set<ServerName> killedServers = new HashSet<ServerName>();
53
54 for (ServerName server : selectedServers) {
55
56
57 if (context.isStopping()) {
58 break;
59 }
60 getLogger().info("Killing region server:" + server);
61 cluster.killRegionServer(server);
62 killedServers.add(server);
63 }
64
65 for (ServerName server : killedServers) {
66 cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT);
67 }
68
69 getLogger().info("Killed " + killedServers.size() + " region servers. Reported num of rs:"
70 + cluster.getClusterStatus().getServersSize());
71
72 sleep(sleepTime);
73
74 for (ServerName server : killedServers) {
75 getLogger().info("Starting region server:" + server.getHostname());
76 cluster.startRegionServer(server.getHostname(), server.getPort());
77
78 }
79 for (ServerName server : killedServers) {
80 cluster.waitForRegionServerToStart(server.getHostname(),
81 server.getPort(),
82 PolicyBasedChaosMonkey.TIMEOUT);
83 }
84 getLogger().info("Started " + killedServers.size() +" region servers. Reported num of rs:"
85 + cluster.getClusterStatus().getServersSize());
86 }
87 }