View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.chaos.actions;
20  
21  import java.util.HashSet;
22  import java.util.List;
23  import java.util.Set;
24  import org.apache.hadoop.hbase.ServerName;
25  import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
26  import org.slf4j.Logger;
27  import org.slf4j.LoggerFactory;
28  
29  /**
30   * Restarts a ratio of the running regionservers at the same time
31   */
32  public class BatchRestartRsAction extends RestartActionBaseAction {
33    float ratio; //ratio of regionservers to restart
34    private static final Logger LOG = LoggerFactory.getLogger(BatchRestartRsAction.class);
35  
36    public BatchRestartRsAction(long sleepTime, float ratio) {
37      super(sleepTime);
38      this.ratio = ratio;
39    }
40  
41    @Override protected Logger getLogger() {
42      return LOG;
43    }
44  
45    @Override
46    public void perform() throws Exception {
47      getLogger().info(String.format("Performing action: Batch restarting %d%% of region servers",
48          (int)(ratio * 100)));
49      List<ServerName> selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(),
50          ratio);
51  
52      Set<ServerName> killedServers = new HashSet<ServerName>();
53  
54      for (ServerName server : selectedServers) {
55        // Don't keep killing servers if we're
56        // trying to stop the monkey.
57        if (context.isStopping()) {
58          break;
59        }
60        getLogger().info("Killing region server:" + server);
61        cluster.killRegionServer(server);
62        killedServers.add(server);
63      }
64  
65      for (ServerName server : killedServers) {
66        cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT);
67      }
68  
69      getLogger().info("Killed " + killedServers.size() + " region servers. Reported num of rs:"
70          + cluster.getClusterStatus().getServersSize());
71  
72      sleep(sleepTime);
73  
74      for (ServerName server : killedServers) {
75        getLogger().info("Starting region server:" + server.getHostname());
76        cluster.startRegionServer(server.getHostname(), server.getPort());
77  
78      }
79      for (ServerName server : killedServers) {
80        cluster.waitForRegionServerToStart(server.getHostname(),
81            server.getPort(),
82            PolicyBasedChaosMonkey.TIMEOUT);
83      }
84      getLogger().info("Started " + killedServers.size() +" region servers. Reported num of rs:"
85          + cluster.getClusterStatus().getServersSize());
86    }
87  }