View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertNotNull;
22  import static org.junit.Assert.assertNull;
23  import static org.junit.Assert.assertTrue;
24  import static org.mockito.Mockito.mock;
25  import static org.mockito.Mockito.when;
26  
27  import java.util.ArrayList;
28  import java.util.Arrays;
29  import java.util.HashMap;
30  import java.util.Iterator;
31  import java.util.List;
32  import java.util.Map;
33  import java.util.Map.Entry;
34  import java.util.Queue;
35  import java.util.TimeZone;
36  import java.util.TreeMap;
37  
38  import org.apache.commons.collections.CollectionUtils;
39  import org.apache.commons.logging.Log;
40  import org.apache.commons.logging.LogFactory;
41  import org.apache.hadoop.conf.Configuration;
42  import org.apache.hadoop.hbase.ClusterStatus;
43  import org.apache.hadoop.hbase.HBaseConfiguration;
44  import org.apache.hadoop.hbase.HConstants;
45  import org.apache.hadoop.hbase.HRegionInfo;
46  import org.apache.hadoop.hbase.RegionLoad;
47  import org.apache.hadoop.hbase.ServerLoad;
48  import org.apache.hadoop.hbase.ServerName;
49  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
50  import org.apache.hadoop.hbase.master.MockNoopMasterServices;
51  import org.apache.hadoop.hbase.master.RackManager;
52  import org.apache.hadoop.hbase.master.RegionPlan;
53  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster;
54  import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer.ServerLocalityCostFunction;
55  import org.apache.hadoop.hbase.testclassification.FlakeyTests;
56  import org.apache.hadoop.hbase.testclassification.MediumTests;
57  import org.apache.hadoop.hbase.util.Bytes;
58  import org.apache.hadoop.hbase.util.EnvironmentEdge;
59  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
60  import org.junit.Ignore;
61  import org.junit.Test;
62  import org.junit.experimental.categories.Category;
63  
64  @Category({FlakeyTests.class, MediumTests.class})
65  public class TestStochasticLoadBalancer extends BalancerTestBase {
66    public static final String REGION_KEY = "testRegion";
67    private static final Log LOG = LogFactory.getLog(TestStochasticLoadBalancer.class);
68  
69    // Mapping of locality test -> expected locality
70    private float[] expectedLocalities = {1.0f, 0.0f, 0.50f, 0.25f, 1.0f};
71  
72    /**
73     * Data set for testLocalityCost:
74     * [test][0][0] = mapping of server to number of regions it hosts
75     * [test][region + 1][0] = server that region is hosted on
76     * [test][region + 1][server + 1] = locality for region on server
77     */
78  
79    private int[][][] clusterRegionLocationMocks = new int[][][]{
80  
81        // Test 1: each region is entirely on server that hosts it
82        new int[][]{
83            new int[]{2, 1, 1},
84            new int[]{2, 0, 0, 100},   // region 0 is hosted and entirely local on server 2
85            new int[]{0, 100, 0, 0},   // region 1 is hosted and entirely on server 0
86            new int[]{0, 100, 0, 0},   // region 2 is hosted and entirely on server 0
87            new int[]{1, 0, 100, 0},   // region 1 is hosted and entirely on server 1
88        },
89  
90        // Test 2: each region is 0% local on the server that hosts it
91        new int[][]{
92            new int[]{1, 2, 1},
93            new int[]{0, 0, 0, 100},   // region 0 is hosted and entirely local on server 2
94            new int[]{1, 100, 0, 0},   // region 1 is hosted and entirely on server 0
95            new int[]{1, 100, 0, 0},   // region 2 is hosted and entirely on server 0
96            new int[]{2, 0, 100, 0},   // region 1 is hosted and entirely on server 1
97        },
98  
99        // Test 3: each region is 25% local on the server that hosts it (and 50% locality is possible)
100       new int[][]{
101           new int[]{1, 2, 1},
102           new int[]{0, 25, 0, 50},   // region 0 is hosted and entirely local on server 2
103           new int[]{1, 50, 25, 0},   // region 1 is hosted and entirely on server 0
104           new int[]{1, 50, 25, 0},   // region 2 is hosted and entirely on server 0
105           new int[]{2, 0, 50, 25},   // region 1 is hosted and entirely on server 1
106       },
107 
108       // Test 4: each region is 25% local on the server that hosts it (and 100% locality is possible)
109       new int[][]{
110           new int[]{1, 2, 1},
111           new int[]{0, 25, 0, 100},   // region 0 is hosted and entirely local on server 2
112           new int[]{1, 100, 25, 0},   // region 1 is hosted and entirely on server 0
113           new int[]{1, 100, 25, 0},   // region 2 is hosted and entirely on server 0
114           new int[]{2, 0, 100, 25},   // region 1 is hosted and entirely on server 1
115       },
116 
117       // Test 5: each region is 75% local on the server that hosts it (and 75% locality is possible everywhere)
118       new int[][]{
119           new int[]{1, 2, 1},
120           new int[]{0, 75, 75, 75},   // region 0 is hosted and entirely local on server 2
121           new int[]{1, 75, 75, 75},   // region 1 is hosted and entirely on server 0
122           new int[]{1, 75, 75, 75},   // region 2 is hosted and entirely on server 0
123           new int[]{2, 75, 75, 75},   // region 1 is hosted and entirely on server 1
124       },
125   };
126 
127 
128   @Test
129   public void testKeepRegionLoad() throws Exception {
130 
131     ServerName sn = ServerName.valueOf("test:8080", 100);
132     int numClusterStatusToAdd = 20000;
133     for (int i = 0; i < numClusterStatusToAdd; i++) {
134       ServerLoad sl = mock(ServerLoad.class);
135 
136       RegionLoad rl = mock(RegionLoad.class);
137       when(rl.getStores()).thenReturn(i);
138 
139       Map<byte[], RegionLoad> regionLoadMap =
140           new TreeMap<byte[], RegionLoad>(Bytes.BYTES_COMPARATOR);
141       regionLoadMap.put(Bytes.toBytes(REGION_KEY), rl);
142       when(sl.getRegionsLoad()).thenReturn(regionLoadMap);
143 
144       ClusterStatus clusterStatus = mock(ClusterStatus.class);
145       when(clusterStatus.getServers()).thenReturn(Arrays.asList(sn));
146       when(clusterStatus.getLoad(sn)).thenReturn(sl);
147 
148       loadBalancer.setClusterStatus(clusterStatus);
149     }
150 
151     String regionNameAsString = HRegionInfo.getRegionNameAsString(Bytes.toBytes(REGION_KEY));
152     assertTrue(loadBalancer.loads.get(regionNameAsString) != null);
153     assertTrue(loadBalancer.loads.get(regionNameAsString).size() == 15);
154 
155     Queue<RegionLoad> loads = loadBalancer.loads.get(regionNameAsString);
156     int i = 0;
157     while(loads.size() > 0) {
158       RegionLoad rl = loads.remove();
159       assertEquals(i + (numClusterStatusToAdd - 15), rl.getStores());
160       i ++;
161     }
162   }
163 
164   @Test
165   public void testNeedBalance() {
166     float minCost = conf.getFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 0.05f);
167     conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", 1.0f);
168     try {
169       // Test with/without per table balancer.
170       boolean[] perTableBalancerConfigs = {true, false};
171       for (boolean isByTable : perTableBalancerConfigs) {
172         conf.setBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable);
173         loadBalancer.setConf(conf);
174         for (int[] mockCluster : clusterStateMocks) {
175           Map<ServerName, List<HRegionInfo>> servers = mockClusterServers(mockCluster);
176           List<RegionPlan> plans = loadBalancer.balanceCluster(servers);
177           boolean emptyPlans = plans == null || plans.isEmpty();
178           assertTrue(emptyPlans || needsBalanceIdleRegion(mockCluster));
179         }
180       }
181     } finally {
182       // reset config
183       conf.setFloat("hbase.master.balancer.stochastic.minCostNeedBalance", minCost);
184       loadBalancer.setConf(conf);
185     }
186   }
187 
188   /**
189    * Test the load balancing algorithm.
190    *
191    * Invariant is that all servers should be hosting either floor(average) or
192    * ceiling(average)
193    *
194    * @throws Exception
195    */
196   @Test
197   public void testBalanceCluster() throws Exception {
198 
199     for (int[] mockCluster : clusterStateMocks) {
200       Map<ServerName, List<HRegionInfo>> servers = mockClusterServers(mockCluster);
201       List<ServerAndLoad> list = convertToList(servers);
202       LOG.info("Mock Cluster : " + printMock(list) + " " + printStats(list));
203       List<RegionPlan> plans = loadBalancer.balanceCluster(servers);
204       List<ServerAndLoad> balancedCluster = reconcile(list, plans, servers);
205       LOG.info("Mock Balance : " + printMock(balancedCluster));
206       assertClusterAsBalanced(balancedCluster);
207       List<RegionPlan> secondPlans =  loadBalancer.balanceCluster(servers);
208       assertNull(secondPlans);
209       for (Map.Entry<ServerName, List<HRegionInfo>> entry : servers.entrySet()) {
210         returnRegions(entry.getValue());
211         returnServer(entry.getKey());
212       }
213     }
214   }
215 
216   @Test
217   public void testLocalityCost() throws Exception {
218     Configuration conf = HBaseConfiguration.create();
219     MockNoopMasterServices master = new MockNoopMasterServices();
220     StochasticLoadBalancer.CostFunction
221         costFunction = new ServerLocalityCostFunction(conf, master);
222 
223     for (int test = 0; test < clusterRegionLocationMocks.length; test++) {
224       int[][] clusterRegionLocations = clusterRegionLocationMocks[test];
225       MockCluster cluster = new MockCluster(clusterRegionLocations);
226       costFunction.init(cluster);
227       double cost = costFunction.cost();
228       double expected = 1 - expectedLocalities[test];
229       assertEquals(expected, cost, 0.001);
230     }
231   }
232 
233   @Test
234   public void testMoveCostMultiplier() throws Exception {
235     Configuration conf = HBaseConfiguration.create();
236     StochasticLoadBalancer.CostFunction
237       costFunction = new StochasticLoadBalancer.MoveCostFunction(conf);
238     BaseLoadBalancer.Cluster cluster = mockCluster(clusterStateMocks[0]);
239     costFunction.init(cluster);
240     costFunction.cost();
241     assertEquals(StochasticLoadBalancer.MoveCostFunction.DEFAULT_MOVE_COST,
242       costFunction.getMultiplier(), 0.01);
243 
244     // In offpeak hours, the multiplier of move cost should be lower
245     conf.setInt("hbase.offpeak.start.hour",0);
246     conf.setInt("hbase.offpeak.end.hour",23);
247     // Set a fixed time which hour is 15, so it will always in offpeak
248     // See HBASE-24898 for more info of the calculation here
249     long deltaFor15 = TimeZone.getDefault().getRawOffset() - 28800000;
250     final long timeFor15 = 1597907081000L - deltaFor15;
251     EnvironmentEdgeManager.injectEdge(new EnvironmentEdge() {
252       @Override
253       public long currentTime() {
254         return timeFor15;
255       }
256     });
257     costFunction = new StochasticLoadBalancer.MoveCostFunction(conf);
258     costFunction.init(cluster);
259     costFunction.cost();
260     assertEquals(StochasticLoadBalancer.MoveCostFunction.DEFAULT_MOVE_COST_OFFPEAK
261       , costFunction.getMultiplier(), 0.01);
262   }
263 
264   @Test
265   public void testMoveCost() throws Exception {
266     Configuration conf = HBaseConfiguration.create();
267     StochasticLoadBalancer.CostFunction
268         costFunction = new StochasticLoadBalancer.MoveCostFunction(conf);
269     for (int[] mockCluster : clusterStateMocks) {
270       BaseLoadBalancer.Cluster cluster = mockCluster(mockCluster);
271       costFunction.init(cluster);
272       double cost = costFunction.cost();
273       assertEquals(0.0f, cost, 0.001);
274 
275       // cluster region number is smaller than maxMoves=600
276       cluster.setNumRegions(200);
277       cluster.setNumMovedRegions(10);
278       cost = costFunction.cost();
279       assertEquals(0.05f, cost, 0.001);
280       cluster.setNumMovedRegions(100);
281       cost = costFunction.cost();
282       assertEquals(0.5f, cost, 0.001);
283       cluster.setNumMovedRegions(200);
284       cost = costFunction.cost();
285       assertEquals(1.0f, cost, 0.001);
286 
287 
288       // cluster region number is bigger than maxMoves=2500
289       cluster.setNumRegions(10000);
290       cluster.setNumMovedRegions(250);
291       cost = costFunction.cost();
292       assertEquals(0.1f, cost, 0.001);
293       cluster.setNumMovedRegions(1250);
294       cost = costFunction.cost();
295       assertEquals(0.5f, cost, 0.001);
296       cluster.setNumMovedRegions(2500);
297       cost = costFunction.cost();
298       assertEquals(1.0f, cost, 0.01);
299     }
300   }
301 
302   @Test
303   public void testSkewCost() {
304     Configuration conf = HBaseConfiguration.create();
305     StochasticLoadBalancer.CostFunction
306         costFunction = new StochasticLoadBalancer.RegionCountSkewCostFunction(conf);
307     for (int[] mockCluster : clusterStateMocks) {
308       costFunction.init(mockCluster(mockCluster));
309       double cost = costFunction.cost();
310       assertTrue(cost >= 0);
311       assertTrue(cost <= 1.01);
312     }
313 
314     costFunction.init(mockCluster(new int[]{0, 0, 0, 0, 1}));
315     assertEquals(0,costFunction.cost(), 0.01);
316     costFunction.init(mockCluster(new int[]{0, 0, 0, 1, 1}));
317     assertEquals(0, costFunction.cost(), 0.01);
318     costFunction.init(mockCluster(new int[]{0, 0, 1, 1, 1}));
319     assertEquals(0, costFunction.cost(), 0.01);
320     costFunction.init(mockCluster(new int[]{0, 1, 1, 1, 1}));
321     assertEquals(0, costFunction.cost(), 0.01);
322     costFunction.init(mockCluster(new int[]{1, 1, 1, 1, 1}));
323     assertEquals(0, costFunction.cost(), 0.01);
324     costFunction.init(mockCluster(new int[]{10000, 0, 0, 0, 0}));
325     assertEquals(1, costFunction.cost(), 0.01);
326   }
327 
328   @Test
329   public void testCostAfterUndoAction() {
330     final int runs = 10;
331     loadBalancer.setConf(conf);
332     for (int[] mockCluster : clusterStateMocks) {
333       BaseLoadBalancer.Cluster cluster = mockCluster(mockCluster);
334       loadBalancer.initCosts(cluster);
335       for (int i = 0; i != runs; ++i) {
336         final double expectedCost = loadBalancer.computeCost(cluster, Double.MAX_VALUE);
337         Cluster.Action action = loadBalancer.nextAction(cluster);
338         cluster.doAction(action);
339         loadBalancer.updateCostsWithAction(cluster, action);
340         Cluster.Action undoAction = action.undoAction();
341         cluster.doAction(undoAction);
342         loadBalancer.updateCostsWithAction(cluster, undoAction);
343         final double actualCost = loadBalancer.computeCost(cluster, Double.MAX_VALUE);
344         assertEquals(expectedCost, actualCost, 0);
345       }
346     }
347   }
348 
349   @Test
350   public void testTableSkewCost() {
351     Configuration conf = HBaseConfiguration.create();
352     StochasticLoadBalancer.CostFunction
353         costFunction = new StochasticLoadBalancer.TableSkewCostFunction(conf);
354     for (int[] mockCluster : clusterStateMocks) {
355       BaseLoadBalancer.Cluster cluster = mockCluster(mockCluster);
356       costFunction.init(cluster);
357       double cost = costFunction.cost();
358       assertTrue(cost >= 0);
359       assertTrue(cost <= 1.01);
360     }
361   }
362 
363   @Test
364   public void testRegionLoadCost() {
365     List<RegionLoad> regionLoads = new ArrayList<>();
366     for (int i = 1; i < 5; i++) {
367       RegionLoad regionLoad = mock(RegionLoad.class);
368       when(regionLoad.getReadRequestsCount()).thenReturn(new Long(i));
369       when(regionLoad.getStorefileSizeMB()).thenReturn(i);
370       regionLoads.add(regionLoad);
371     }
372 
373     Configuration conf = HBaseConfiguration.create();
374     StochasticLoadBalancer.ReadRequestCostFunction readCostFunction =
375         new StochasticLoadBalancer.ReadRequestCostFunction(conf);
376     double rateResult = readCostFunction.getRegionLoadCost(regionLoads);
377     // read requests are treated as a rate so the average rate here is simply 1
378     assertEquals(1, rateResult, 0.01);
379 
380     StochasticLoadBalancer.StoreFileCostFunction storeFileCostFunction =
381         new StochasticLoadBalancer.StoreFileCostFunction(conf);
382     double result = storeFileCostFunction.getRegionLoadCost(regionLoads);
383     // storefile size cost is simply an average of it's value over time
384     assertEquals(2.5, result, 0.01);
385   }
386 
387   @Test
388   public void testCostFromArray() {
389     Configuration conf = HBaseConfiguration.create();
390     StochasticLoadBalancer.CostFromRegionLoadFunction
391         costFunction = new StochasticLoadBalancer.MemstoreSizeCostFunction(conf);
392     costFunction.init(mockCluster(new int[]{0, 0, 0, 0, 1}));
393 
394     double[] statOne = new double[100];
395     for (int i =0; i < 100; i++) {
396       statOne[i] = 10;
397     }
398     assertEquals(0, costFunction.costFromArray(statOne), 0.01);
399 
400     double[] statTwo= new double[101];
401     for (int i =0; i < 100; i++) {
402       statTwo[i] = 0;
403     }
404     statTwo[100] = 100;
405     assertEquals(1, costFunction.costFromArray(statTwo), 0.01);
406 
407     double[] statThree = new double[200];
408     for (int i =0; i < 100; i++) {
409       statThree[i] = (0);
410       statThree[i+100] = 100;
411     }
412     assertEquals(0.5, costFunction.costFromArray(statThree), 0.01);
413   }
414 
415   @Test(timeout =  60000)
416   public void testLosingRs() throws Exception {
417     int numNodes = 3;
418     int numRegions = 20;
419     int numRegionsPerServer = 3; //all servers except one
420     int replication = 1;
421     int numTables = 2;
422 
423     Map<ServerName, List<HRegionInfo>> serverMap =
424         createServerMap(numNodes, numRegions, numRegionsPerServer, replication, numTables);
425     List<ServerAndLoad> list = convertToList(serverMap);
426 
427 
428     List<RegionPlan> plans = loadBalancer.balanceCluster(serverMap);
429     assertNotNull(plans);
430 
431     // Apply the plan to the mock cluster.
432     List<ServerAndLoad> balancedCluster = reconcile(list, plans, serverMap);
433 
434     assertClusterAsBalanced(balancedCluster);
435 
436     ServerName sn = serverMap.keySet().toArray(new ServerName[serverMap.size()])[0];
437 
438     ServerName deadSn = ServerName.valueOf(sn.getHostname(), sn.getPort(), sn.getStartcode() - 100);
439 
440     serverMap.put(deadSn, new ArrayList<HRegionInfo>(0));
441 
442     plans = loadBalancer.balanceCluster(serverMap);
443     assertNull(plans);
444   }
445 
446   @Test
447   public void testReplicaCost() {
448     Configuration conf = HBaseConfiguration.create();
449     StochasticLoadBalancer.CostFunction
450         costFunction = new StochasticLoadBalancer.RegionReplicaHostCostFunction(conf);
451     for (int[] mockCluster : clusterStateMocks) {
452       BaseLoadBalancer.Cluster cluster = mockCluster(mockCluster);
453       costFunction.init(cluster);
454       double cost = costFunction.cost();
455       assertTrue(cost >= 0);
456       assertTrue(cost <= 1.01);
457     }
458   }
459 
460   @Test
461   public void testReplicaCostForReplicas() {
462     Configuration conf = HBaseConfiguration.create();
463     StochasticLoadBalancer.CostFunction
464         costFunction = new StochasticLoadBalancer.RegionReplicaHostCostFunction(conf);
465 
466     int [] servers = new int[] {3,3,3,3,3};
467     TreeMap<ServerName, List<HRegionInfo>> clusterState = mockClusterServers(servers);
468 
469     BaseLoadBalancer.Cluster cluster;
470 
471     cluster = new BaseLoadBalancer.Cluster(clusterState, null, null, null);
472     costFunction.init(cluster);
473     double costWithoutReplicas = costFunction.cost();
474     assertEquals(0, costWithoutReplicas, 0);
475 
476     // replicate the region from first server to the last server
477     HRegionInfo replica1 = RegionReplicaUtil.getRegionInfoForReplica(
478       clusterState.firstEntry().getValue().get(0),1);
479     clusterState.lastEntry().getValue().add(replica1);
480 
481     cluster = new BaseLoadBalancer.Cluster(clusterState, null, null, null);
482     costFunction.init(cluster);
483     double costWith1ReplicaDifferentServer = costFunction.cost();
484 
485     assertEquals(0, costWith1ReplicaDifferentServer, 0);
486 
487     // add a third replica to the last server
488     HRegionInfo replica2 = RegionReplicaUtil.getRegionInfoForReplica(replica1, 2);
489     clusterState.lastEntry().getValue().add(replica2);
490 
491     cluster = new BaseLoadBalancer.Cluster(clusterState, null, null, null);
492     costFunction.init(cluster);
493     double costWith1ReplicaSameServer = costFunction.cost();
494 
495     assertTrue(costWith1ReplicaDifferentServer < costWith1ReplicaSameServer);
496 
497     // test with replication = 4 for following:
498 
499     HRegionInfo replica3;
500     Iterator<Entry<ServerName, List<HRegionInfo>>> it;
501     Entry<ServerName, List<HRegionInfo>> entry;
502 
503     clusterState = mockClusterServers(servers);
504     it = clusterState.entrySet().iterator();
505     entry = it.next(); //first server
506     HRegionInfo hri = entry.getValue().get(0);
507     replica1 = RegionReplicaUtil.getRegionInfoForReplica(hri, 1);
508     replica2 = RegionReplicaUtil.getRegionInfoForReplica(hri, 2);
509     replica3 = RegionReplicaUtil.getRegionInfoForReplica(hri, 3);
510     entry.getValue().add(replica1);
511     entry.getValue().add(replica2);
512     it.next().getValue().add(replica3); //2nd server
513 
514     cluster = new BaseLoadBalancer.Cluster(clusterState, null, null, null);
515     costFunction.init(cluster);
516     double costWith3ReplicasSameServer = costFunction.cost();
517 
518     clusterState = mockClusterServers(servers);
519     hri = clusterState.firstEntry().getValue().get(0);
520     replica1 = RegionReplicaUtil.getRegionInfoForReplica(hri, 1);
521     replica2 = RegionReplicaUtil.getRegionInfoForReplica(hri, 2);
522     replica3 = RegionReplicaUtil.getRegionInfoForReplica(hri, 3);
523 
524     clusterState.firstEntry().getValue().add(replica1);
525     clusterState.lastEntry().getValue().add(replica2);
526     clusterState.lastEntry().getValue().add(replica3);
527 
528     cluster = new BaseLoadBalancer.Cluster(clusterState, null, null, null);
529     costFunction.init(cluster);
530     double costWith2ReplicasOnTwoServers = costFunction.cost();
531 
532     assertTrue(costWith2ReplicasOnTwoServers < costWith3ReplicasSameServer);
533   }
534 
535   @Test
536   public void testNeedsBalanceForColocatedReplicas() {
537     // check for the case where there are two hosts and with one rack, and where
538     // both the replicas are hosted on the same server
539     List<HRegionInfo> regions = randomRegions(1);
540     ServerName s1 = ServerName.valueOf("host1", 1000, 11111);
541     ServerName s2 = ServerName.valueOf("host11", 1000, 11111);
542     Map<ServerName, List<HRegionInfo>> map = new HashMap<ServerName, List<HRegionInfo>>();
543     map.put(s1, regions);
544     regions.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0), 1));
545     // until the step above s1 holds two replicas of a region
546     regions = randomRegions(1);
547     map.put(s2, regions);
548     assertTrue(loadBalancer.needsBalance(new Cluster(map, null, null, null)));
549     // check for the case where there are two hosts on the same rack and there are two racks
550     // and both the replicas are on the same rack
551     map.clear();
552     regions = randomRegions(1);
553     List<HRegionInfo> regionsOnS2 = new ArrayList<HRegionInfo>(1);
554     regionsOnS2.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0), 1));
555     map.put(s1, regions);
556     map.put(s2, regionsOnS2);
557     // add another server so that the cluster has some host on another rack
558     map.put(ServerName.valueOf("host2", 1000, 11111), randomRegions(1));
559     assertTrue(loadBalancer.needsBalance(new Cluster(map, null, null,
560         new ForTestRackManagerOne())));
561   }
562 
563   @Test (timeout = 60000)
564   public void testSmallCluster() {
565     int numNodes = 10;
566     int numRegions = 1000;
567     int numRegionsPerServer = 40; //all servers except one
568     int replication = 1;
569     int numTables = 10;
570     testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true);
571   }
572 
573   @Test (timeout = 60000)
574   public void testSmallCluster2() {
575     int numNodes = 20;
576     int numRegions = 2000;
577     int numRegionsPerServer = 40; //all servers except one
578     int replication = 1;
579     int numTables = 10;
580     testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true);
581   }
582 
583   @Test (timeout = 60000)
584   public void testSmallCluster3() {
585     int numNodes = 20;
586     int numRegions = 2000;
587     int numRegionsPerServer = 1; // all servers except one
588     int replication = 1;
589     int numTables = 10;
590     /* fails because of max moves */
591     testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, false, false);
592   }
593 
594   @Test (timeout = 800000)
595   public void testMidCluster() {
596     int numNodes = 100;
597     int numRegions = 10000;
598     int numRegionsPerServer = 60; // all servers except one
599     int replication = 1;
600     int numTables = 40;
601     testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true);
602   }
603 
604   @Test (timeout = 800000)
605   public void testMidCluster2() {
606     int numNodes = 200;
607     int numRegions = 100000;
608     int numRegionsPerServer = 40; // all servers except one
609     int replication = 1;
610     int numTables = 400;
611     testWithCluster(numNodes,
612         numRegions,
613         numRegionsPerServer,
614         replication,
615         numTables,
616         false, /* num large num regions means may not always get to best balance with one run */
617         false);
618   }
619 
620 
621   @Test (timeout = 800000)
622   public void testMidCluster3() {
623     int numNodes = 100;
624     int numRegions = 2000;
625     int numRegionsPerServer = 9; // all servers except one
626     int replication = 1;
627     int numTables = 110;
628     testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true);
629     // TODO(eclark): Make sure that the tables are well distributed.
630   }
631 
632   @Test
633   public void testLargeCluster() {
634     int numNodes = 1000;
635     int numRegions = 100000; //100 regions per RS
636     int numRegionsPerServer = 80; //all servers except one
637     int numTables = 100;
638     int replication = 1;
639     testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true);
640   }
641 
642   @Test (timeout = 800000)
643   public void testRegionReplicasOnSmallCluster() {
644     int numNodes = 10;
645     int numRegions = 1000;
646     int replication = 3; // 3 replicas per region
647     int numRegionsPerServer = 80; //all regions are mostly balanced
648     int numTables = 10;
649     testWithCluster(numNodes, numRegions, numRegionsPerServer, replication, numTables, true, true);
650   }
651 
652   @Ignore @Test (timeout = 800000) // Test is flakey. TODO: Fix!
653   public void testRegionReplicationOnMidClusterSameHosts() {
654     conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L);
655     conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 90 * 1000); // 90 sec
656     conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
657     loadBalancer.setConf(conf);
658     int numHosts = 100;
659     int numRegions = 100 * 100;
660     int replication = 3; // 3 replicas per region
661     int numRegionsPerServer = 5;
662     int numTables = 10;
663     Map<ServerName, List<HRegionInfo>> serverMap =
664         createServerMap(numHosts, numRegions, numRegionsPerServer, replication, numTables);
665     int numNodesPerHost = 4;
666 
667     // create a new map with 4 RS per host.
668     Map<ServerName, List<HRegionInfo>> newServerMap = new TreeMap<ServerName, List<HRegionInfo>>(serverMap);
669     for (Map.Entry<ServerName, List<HRegionInfo>> entry : serverMap.entrySet()) {
670       for (int i=1; i < numNodesPerHost; i++) {
671         ServerName s1 = entry.getKey();
672         ServerName s2 = ServerName.valueOf(s1.getHostname(), s1.getPort() + i, 1); // create an RS for the same host
673         newServerMap.put(s2, new ArrayList<HRegionInfo>());
674       }
675     }
676 
677     testWithCluster(newServerMap, null, true, true);
678   }
679 
680   private static class ForTestRackManager extends RackManager {
681     int numRacks;
682     public ForTestRackManager(int numRacks) {
683       this.numRacks = numRacks;
684     }
685     @Override
686     public String getRack(ServerName server) {
687       return "rack_" + (server.hashCode() % numRacks);
688     }
689   }
690 
691   private static class ForTestRackManagerOne extends RackManager {
692   @Override
693     public String getRack(ServerName server) {
694       return server.getHostname().endsWith("1") ? "rack1" : "rack2";
695     }
696   }
697 
698   @Test (timeout = 800000)
699   public void testRegionReplicationOnMidClusterWithRacks() {
700     conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 10000000L);
701     conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
702     conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 120 * 1000); // 120 sec
703     loadBalancer.setConf(conf);
704     int numNodes = 30;
705     int numRegions = numNodes * 30;
706     int replication = 3; // 3 replicas per region
707     int numRegionsPerServer = 28;
708     int numTables = 10;
709     int numRacks = 4; // all replicas should be on a different rack
710     Map<ServerName, List<HRegionInfo>> serverMap =
711         createServerMap(numNodes, numRegions, numRegionsPerServer, replication, numTables);
712     RackManager rm = new ForTestRackManager(numRacks);
713 
714     testWithCluster(serverMap, rm, false, true);
715   }
716 
717   @Test
718   public void testAdditionalCostFunction() {
719     conf.set(StochasticLoadBalancer.COST_FUNCTIONS_COST_FUNCTIONS_KEY,
720       DummyCostFunction.class.getName());
721 
722     loadBalancer.setConf(conf);
723     System.out.println(Arrays.toString(loadBalancer.getCostFunctionNames()));
724     assertTrue(Arrays.
725             asList(loadBalancer.getCostFunctionNames()).
726             contains(DummyCostFunction.class.getSimpleName()));
727   }
728 
729   @Test
730   public void testDefaultCostFunctionList() {
731     List<String> expected = Arrays.asList(
732       StochasticLoadBalancer.RegionCountSkewCostFunction.class.getSimpleName(),
733       StochasticLoadBalancer.PrimaryRegionCountSkewCostFunction.class.getSimpleName(),
734       StochasticLoadBalancer.MoveCostFunction.class.getSimpleName(),
735       StochasticLoadBalancer.RackLocalityCostFunction.class.getSimpleName(),
736       StochasticLoadBalancer.TableSkewCostFunction.class.getSimpleName(),
737       StochasticLoadBalancer.RegionReplicaHostCostFunction.class.getSimpleName(),
738       StochasticLoadBalancer.RegionReplicaRackCostFunction.class.getSimpleName(),
739       StochasticLoadBalancer.ReadRequestCostFunction.class.getSimpleName(),
740       StochasticLoadBalancer.WriteRequestCostFunction.class.getSimpleName(),
741       StochasticLoadBalancer.MemstoreSizeCostFunction.class.getSimpleName(),
742       StochasticLoadBalancer.StoreFileCostFunction.class.getSimpleName()
743     );
744 
745     List<String> actual = Arrays.asList(loadBalancer.getCostFunctionNames());
746     assertTrue("ExpectedCostFunctions: " + expected + " ActualCostFunctions: " + actual,
747       CollectionUtils.isEqualCollection(expected, actual));
748   }
749 
750   private boolean needsBalanceIdleRegion(int[] clusters) {
751     boolean b1 = false;
752     boolean b2 = false;
753     for (int cluster : clusters) {
754       if (cluster > 1) {
755         b1 = true;
756       } else {
757         b2 = true;
758       }
759     }
760     return b1 && b2;
761   }
762 
763   // This mock allows us to test the LocalityCostFunction
764   private class MockCluster extends BaseLoadBalancer.Cluster {
765 
766     private int[][] localities = null;   // [region][server] = percent of blocks
767 
768     public MockCluster(int[][] regions) {
769 
770       // regions[0] is an array where index = serverIndex an value = number of regions
771       super(mockClusterServers(regions[0], 1), null, null, null);
772 
773       localities = new int[regions.length - 1][];
774       for (int i = 1; i < regions.length; i++) {
775         int regionIndex = i - 1;
776         localities[regionIndex] = new int[regions[i].length - 1];
777         regionIndexToServerIndex[regionIndex] = regions[i][0];
778         for (int j = 1; j < regions[i].length; j++) {
779           int serverIndex = j - 1;
780           localities[regionIndex][serverIndex] = regions[i][j] > 100 ? regions[i][j] % 100 : regions[i][j];
781         }
782       }
783     }
784 
785     @Override
786     float getLocalityOfRegion(int region, int server) {
787       // convert the locality percentage to a fraction
788       return localities[region][server] / 100.0f;
789     }
790 
791     @Override
792     public int getRegionSizeMB(int region) {
793       return 1;
794     }
795 
796   }
797 
798 }