View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.fail;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.hbase.client.Admin;
27  import org.apache.hadoop.hbase.client.Connection;
28  import org.apache.hadoop.hbase.client.ConnectionFactory;
29  import org.apache.hadoop.hbase.client.RegionLocator;
30  import org.apache.hadoop.hbase.master.RegionStates;
31  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
32  import org.apache.hadoop.hbase.regionserver.HRegionServer;
33  import org.apache.hadoop.hbase.testclassification.LargeTests;
34  import org.apache.hadoop.hbase.util.Bytes;
35  import org.apache.hadoop.hbase.util.JVMClusterUtil;
36  import org.apache.hadoop.hbase.util.Threads;
37  import org.junit.After;
38  import org.junit.Before;
39  import org.junit.Test;
40  import org.junit.experimental.categories.Category;
41  import org.junit.runner.RunWith;
42  import org.junit.runners.Parameterized;
43  import org.junit.runners.Parameterized.Parameters;
44  
45  import java.io.IOException;
46  import java.util.ArrayList;
47  import java.util.Arrays;
48  import java.util.Collection;
49  import java.util.List;
50  
51  /**
52   * Test whether region re-balancing works. (HBASE-71)
53   */
54  @Category(LargeTests.class)
55  @RunWith(value = Parameterized.class)
56  public class TestRegionRebalancing {
57  
58    @Parameters
59    public static Collection<Object[]> data() {
60      Object[][] balancers =
61          new String[][] { { "org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer" },
62              { "org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer" } };
63      return Arrays.asList(balancers);
64    }
65  
66    private static final byte[] FAMILY_NAME = Bytes.toBytes("col");
67    private static final Log LOG = LogFactory.getLog(TestRegionRebalancing.class);
68    private final HBaseTestingUtility UTIL = new HBaseTestingUtility();
69    private RegionLocator regionLocator;
70    private HTableDescriptor desc;
71    private String balancerName;
72  
73    public TestRegionRebalancing(String balancerName) {
74      this.balancerName = balancerName;
75  
76    }
77  
78    @After
79    public void after() throws Exception {
80      UTIL.shutdownMiniCluster();
81    }
82  
83    @Before
84    public void before() throws Exception {
85      UTIL.getConfiguration().set("hbase.master.loadbalancer.class", this.balancerName);
86      // set minCostNeedBalance to 0, make sure balancer run
87      UTIL.startMiniCluster(1);
88      this.desc = new HTableDescriptor(TableName.valueOf("test"));
89      this.desc.addFamily(new HColumnDescriptor(FAMILY_NAME));
90    }
91  
92    /**
93     * For HBASE-71. Try a few different configurations of starting and stopping
94     * region servers to see if the assignment or regions is pretty balanced.
95     * @throws IOException
96     * @throws InterruptedException
97     */
98    @Test (timeout=300000)
99    public void testRebalanceOnRegionServerNumberChange()
100   throws IOException, InterruptedException {
101     try(Connection connection = ConnectionFactory.createConnection(UTIL.getConfiguration());
102         Admin admin = connection.getAdmin()) {
103       admin.createTable(this.desc, Arrays.copyOfRange(HBaseTestingUtility.KEYS,
104           1, HBaseTestingUtility.KEYS.length));
105       this.regionLocator = connection.getRegionLocator(this.desc.getTableName());
106 
107       MetaTableAccessor.fullScanMetaAndPrint(admin.getConnection());
108 
109       assertEquals("Test table should have right number of regions",
110         HBaseTestingUtility.KEYS.length,
111         this.regionLocator.getStartKeys().length);
112 
113       // verify that the region assignments are balanced to start out
114       assertRegionsAreBalanced();
115 
116       // add a region server - total of 2
117       LOG.info("Started second server=" +
118         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
119       UTIL.getHBaseCluster().getMaster().balance();
120       assertRegionsAreBalanced();
121 
122       // On a balanced cluster, calling balance() should return true
123       assert(UTIL.getHBaseCluster().getMaster().balance() == true);
124 
125       // if we add a server, then the balance() call should return true
126       // add a region server - total of 3
127       LOG.info("Started third server=" +
128           UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
129       assert(UTIL.getHBaseCluster().getMaster().balance() == true);
130       assertRegionsAreBalanced();
131 
132       // kill a region server - total of 2
133       LOG.info("Stopped third server=" + UTIL.getHBaseCluster().stopRegionServer(2, false));
134       UTIL.getHBaseCluster().waitOnRegionServer(2);
135       waitOnCrashProcessing();
136       UTIL.getHBaseCluster().getMaster().balance();
137       assertRegionsAreBalanced();
138 
139       // start two more region servers - total of 4
140       LOG.info("Readding third server=" +
141           UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
142       LOG.info("Added fourth server=" +
143           UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
144       waitOnCrashProcessing();
145       assert(UTIL.getHBaseCluster().getMaster().balance() == true);
146       assertRegionsAreBalanced();
147       for (int i = 0; i < 6; i++){
148         LOG.info("Adding " + (i + 5) + "th region server");
149         UTIL.getHBaseCluster().startRegionServer();
150       }
151       assert(UTIL.getHBaseCluster().getMaster().balance() == true);
152       assertRegionsAreBalanced();
153       regionLocator.close();
154     }
155   }
156 
157   /**
158    * Wait on crash processing. Balancer won't run if processing a crashed server.
159    */
160   private void waitOnCrashProcessing() {
161     while (UTIL.getHBaseCluster().getMaster().getServerManager().areDeadServersInProgress()) {
162       LOG.info("Waiting on processing of crashed server before proceeding...");
163       Threads.sleep(1000);
164     }
165   }
166 
167   /**
168    * Determine if regions are balanced. Figure out the total, divide by the
169    * number of online servers, then test if each server is +/- 1 of average
170    * rounded up.
171    */
172   private void assertRegionsAreBalanced() throws IOException {
173     // TODO: Fix this test.  Old balancer used to run with 'slop'.  New
174     // balancer does not.
175     boolean success = false;
176     float slop = (float)UTIL.getConfiguration().getFloat("hbase.regions.slop", 0.1f);
177     if (slop <= 0) slop = 1;
178 
179     for (int i = 0; i < 5; i++) {
180       success = true;
181       // make sure all the regions are reassigned before we test balance
182       waitForAllRegionsAssigned();
183 
184       long regionCount = UTIL.getMiniHBaseCluster().countServedRegions();
185       List<HRegionServer> servers = getOnlineRegionServers();
186       double avg = UTIL.getHBaseCluster().getMaster().getAverageLoad();
187       int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop));
188       int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1;
189       LOG.debug("There are " + servers.size() + " servers and " + regionCount
190         + " regions. Load Average: " + avg + " low border: " + avgLoadMinusSlop
191         + ", up border: " + avgLoadPlusSlop + "; attempt: " + i);
192 
193       for (HRegionServer server : servers) {
194         int serverLoad =
195           ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
196         LOG.debug(server.getServerName() + " Avg: " + avg + " actual: " + serverLoad);
197         if (!(avg > 2.0 && serverLoad <= avgLoadPlusSlop
198             && serverLoad >= avgLoadMinusSlop)) {
199           for (HRegionInfo hri :
200               ProtobufUtil.getOnlineRegions(server.getRSRpcServices())) {
201             if (hri.isMetaRegion()) serverLoad--;
202             // LOG.debug(hri.getRegionNameAsString());
203           }
204           if (!(serverLoad <= avgLoadPlusSlop && serverLoad >= avgLoadMinusSlop)) {
205             LOG.debug(server.getServerName() + " Isn't balanced!!! Avg: " + avg +
206                 " actual: " + serverLoad + " slop: " + slop);
207             success = false;
208             break;
209           }
210         }
211       }
212 
213       if (!success) {
214         // one or more servers are not balanced. sleep a little to give it a
215         // chance to catch up. then, go back to the retry loop.
216         try {
217           Thread.sleep(10000);
218         } catch (InterruptedException e) {}
219 
220         UTIL.getHBaseCluster().getMaster().balance();
221         continue;
222       }
223 
224       // if we get here, all servers were balanced, so we should just return.
225       return;
226     }
227     // if we get here, we tried 5 times and never got to short circuit out of
228     // the retry loop, so this is a failure.
229     fail("After 5 attempts, region assignments were not balanced.");
230   }
231 
232   private List<HRegionServer> getOnlineRegionServers() {
233     List<HRegionServer> list = new ArrayList<HRegionServer>();
234     for (JVMClusterUtil.RegionServerThread rst :
235         UTIL.getHBaseCluster().getRegionServerThreads()) {
236       if (rst.getRegionServer().isOnline()) {
237         list.add(rst.getRegionServer());
238       }
239     }
240     return list;
241   }
242 
243   /**
244    * Wait until all the regions are assigned.
245    */
246   private void waitForAllRegionsAssigned() throws IOException {
247     int totalRegions = HBaseTestingUtility.KEYS.length;
248     while (UTIL.getMiniHBaseCluster().countServedRegions() < totalRegions) {
249     // while (!cluster.getMaster().allRegionsAssigned()) {
250       LOG.debug("Waiting for there to be "+ totalRegions +" regions, but there are "
251         + UTIL.getMiniHBaseCluster().countServedRegions() + " right now.");
252       try {
253         Thread.sleep(200);
254       } catch (InterruptedException e) {}
255     }
256     UTIL.waitUntilNoRegionsInTransition();
257   }
258 
259 }
260