View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.balancer;
19  
20  import com.google.common.base.Joiner;
21  import com.google.common.collect.ArrayListMultimap;
22  import com.google.common.collect.Lists;
23  import com.google.common.collect.Sets;
24  import java.util.ArrayList;
25  import java.util.Arrays;
26  import java.util.Collection;
27  import java.util.Collections;
28  import java.util.Comparator;
29  import java.util.Deque;
30  import java.util.HashMap;
31  import java.util.HashSet;
32  import java.util.Iterator;
33  import java.util.List;
34  import java.util.Map;
35  import java.util.Map.Entry;
36  import java.util.NavigableMap;
37  import java.util.Random;
38  import java.util.Set;
39  import java.util.TreeMap;
40  import org.apache.commons.lang.NotImplementedException;
41  import org.apache.commons.logging.Log;
42  import org.apache.commons.logging.LogFactory;
43  import org.apache.hadoop.conf.Configuration;
44  import org.apache.hadoop.hbase.ClusterStatus;
45  import org.apache.hadoop.hbase.HBaseConfiguration;
46  import org.apache.hadoop.hbase.HBaseIOException;
47  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
48  import org.apache.hadoop.hbase.HRegionInfo;
49  import org.apache.hadoop.hbase.RegionLoad;
50  import org.apache.hadoop.hbase.ServerName;
51  import org.apache.hadoop.hbase.classification.InterfaceAudience;
52  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
53  import org.apache.hadoop.hbase.master.LoadBalancer;
54  import org.apache.hadoop.hbase.master.MasterServices;
55  import org.apache.hadoop.hbase.master.RackManager;
56  import org.apache.hadoop.hbase.master.RegionPlan;
57  import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
58  import org.apache.hadoop.hbase.namequeues.NamedQueueRecorder;
59  import org.apache.hadoop.util.StringUtils;
60  
61  /**
62   * The base class for load balancers. It provides the the functions used to by
63   * {@link org.apache.hadoop.hbase.master.AssignmentManager} to assign regions
64   * in the edge cases. It doesn't provide an implementation of the actual
65   * balancing algorithm.
66   *
67   */
68  public abstract class BaseLoadBalancer implements LoadBalancer {
69  
70    public static final String BALANCER_DECISION_BUFFER_ENABLED =
71      "hbase.master.balancer.decision.buffer.enabled";
72    public static final boolean DEFAULT_BALANCER_DECISION_BUFFER_ENABLED = false;
73  
74    protected static final int MIN_SERVER_BALANCE = 2;
75    private volatile boolean stopped = false;
76  
77    private static final List<HRegionInfo> EMPTY_REGION_LIST = new ArrayList<HRegionInfo>(0);
78  
79    protected RegionLocationFinder regionFinder;
80    protected boolean useRegionFinder;
81  
82    /**
83     * Use to add balancer decision history to ring-buffer
84     */
85    protected NamedQueueRecorder namedQueueRecorder;
86  
87    private static class DefaultRackManager extends RackManager {
88      @Override
89      public String getRack(ServerName server) {
90        return UNKNOWN_RACK;
91      }
92    }
93  
94    /**
95     * The constructor that uses the basic MetricsBalancer
96     */
97    protected BaseLoadBalancer() {
98      metricsBalancer = new MetricsBalancer();
99      createRegionFinder();
100   }
101 
102   /**
103    * This Constructor accepts an instance of MetricsBalancer,
104    * which will be used instead of creating a new one
105    */
106   protected BaseLoadBalancer(MetricsBalancer metricsBalancer) {
107     this.metricsBalancer = (metricsBalancer != null) ? metricsBalancer : new MetricsBalancer();
108     createRegionFinder();
109   }
110 
111   private void createRegionFinder() {
112     useRegionFinder = config.getBoolean("hbase.master.balancer.uselocality", true);
113     if (useRegionFinder) {
114       regionFinder = new RegionLocationFinder();
115     }
116   }
117 
118   /**
119    * An efficient array based implementation similar to ClusterState for keeping
120    * the status of the cluster in terms of region assignment and distribution.
121    * LoadBalancers, such as StochasticLoadBalancer uses this Cluster object because of
122    * hundreds of thousands of hashmap manipulations are very costly, which is why this
123    * class uses mostly indexes and arrays.
124    *
125    * Cluster tracks a list of unassigned regions, region assignments, and the server
126    * topology in terms of server names, hostnames and racks.
127    */
128   protected static class Cluster {
129     ServerName[] servers;
130     String[] hosts; // ServerName uniquely identifies a region server. multiple RS can run on the same host
131     String[] racks;
132     boolean multiServersPerHost = false; // whether or not any host has more than one server
133 
134     ArrayList<String> tables;
135     HRegionInfo[] regions;
136     Deque<RegionLoad>[] regionLoads;
137     private RegionLocationFinder regionFinder;
138 
139     int[][] regionLocations; //regionIndex -> list of serverIndex sorted by locality
140 
141     int[]   serverIndexToHostIndex;      //serverIndex -> host index
142     int[]   serverIndexToRackIndex;      //serverIndex -> rack index
143 
144     int[][] regionsPerServer;            //serverIndex -> region list
145     int[][] regionsPerHost;              //hostIndex -> list of regions
146     int[][] regionsPerRack;              //rackIndex -> region list
147     int[][] primariesOfRegionsPerServer; //serverIndex -> sorted list of regions by primary region index
148     int[][] primariesOfRegionsPerHost;   //hostIndex -> sorted list of regions by primary region index
149     int[][] primariesOfRegionsPerRack;   //rackIndex -> sorted list of regions by primary region index
150 
151     int[][] serversPerHost;              //hostIndex -> list of server indexes
152     int[][] serversPerRack;              //rackIndex -> list of server indexes
153     int[]   regionIndexToServerIndex;    //regionIndex -> serverIndex
154     int[]   initialRegionIndexToServerIndex;    //regionIndex -> serverIndex (initial cluster state)
155     int[]   regionIndexToTableIndex;     //regionIndex -> tableIndex
156     int[][] numRegionsPerServerPerTable; //serverIndex -> tableIndex -> # regions
157     int[]   numMaxRegionsPerTable;       //tableIndex -> max number of regions in a single RS
158     int[]   regionIndexToPrimaryIndex;   //regionIndex -> regionIndex of the primary
159     boolean hasRegionReplicas = false;   //whether there is regions with replicas
160 
161     Integer[] serverIndicesSortedByRegionCount;
162     Integer[] serverIndicesSortedByLocality;
163 
164     Map<String, Integer> serversToIndex;
165     Map<String, Integer> hostsToIndex;
166     Map<String, Integer> racksToIndex;
167     Map<String, Integer> tablesToIndex;
168     Map<HRegionInfo, Integer> regionsToIndex;
169     float[] localityPerServer;
170 
171     int numServers;
172     int numHosts;
173     int numRacks;
174     int numTables;
175     int numRegions;
176 
177     int numMovedRegions = 0; //num moved regions from the initial configuration
178     Map<ServerName, List<HRegionInfo>> clusterState;
179 
180     protected final RackManager rackManager;
181     // Maps region -> rackIndex -> locality of region on rack
182     private float[][] rackLocalities;
183     // Maps localityType -> region -> [server|rack]Index with highest locality
184     private int[][] regionsToMostLocalEntities;
185 
186     protected Cluster(
187         Map<ServerName, List<HRegionInfo>> clusterState,
188         Map<String, Deque<RegionLoad>> loads,
189         RegionLocationFinder regionFinder,
190         RackManager rackManager) {
191       this(null, clusterState, loads, regionFinder, rackManager);
192     }
193 
194     @SuppressWarnings("unchecked")
195     protected Cluster(
196         Collection<HRegionInfo> unassignedRegions,
197         Map<ServerName, List<HRegionInfo>> clusterState,
198         Map<String, Deque<RegionLoad>> loads,
199         RegionLocationFinder regionFinder,
200         RackManager rackManager) {
201 
202       if (unassignedRegions == null) {
203         unassignedRegions = EMPTY_REGION_LIST;
204       }
205 
206       serversToIndex = new HashMap<String, Integer>();
207       hostsToIndex = new HashMap<String, Integer>();
208       racksToIndex = new HashMap<String, Integer>();
209       tablesToIndex = new HashMap<String, Integer>();
210 
211       //TODO: We should get the list of tables from master
212       tables = new ArrayList<String>();
213       this.rackManager = rackManager != null ? rackManager : new DefaultRackManager();
214 
215       numRegions = 0;
216 
217       List<List<Integer>> serversPerHostList = new ArrayList<List<Integer>>();
218       List<List<Integer>> serversPerRackList = new ArrayList<List<Integer>>();
219       this.clusterState = clusterState;
220       this.regionFinder = regionFinder;
221 
222       // Use servername and port as there can be dead servers in this list. We want everything with
223       // a matching hostname and port to have the same index.
224       for (ServerName sn : clusterState.keySet()) {
225         if (serversToIndex.get(sn.getHostAndPort()) == null) {
226           serversToIndex.put(sn.getHostAndPort(), numServers++);
227         }
228         if (!hostsToIndex.containsKey(sn.getHostname())) {
229           hostsToIndex.put(sn.getHostname(), numHosts++);
230           serversPerHostList.add(new ArrayList<Integer>(1));
231         }
232 
233         int serverIndex = serversToIndex.get(sn.getHostAndPort());
234         int hostIndex = hostsToIndex.get(sn.getHostname());
235         serversPerHostList.get(hostIndex).add(serverIndex);
236 
237         String rack = this.rackManager.getRack(sn);
238         if (!racksToIndex.containsKey(rack)) {
239           racksToIndex.put(rack, numRacks++);
240           serversPerRackList.add(new ArrayList<Integer>());
241         }
242         int rackIndex = racksToIndex.get(rack);
243         serversPerRackList.get(rackIndex).add(serverIndex);
244       }
245 
246       // Count how many regions there are.
247       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
248         numRegions += entry.getValue().size();
249       }
250       numRegions += unassignedRegions.size();
251 
252       regionsToIndex = new HashMap<HRegionInfo, Integer>(numRegions);
253       servers = new ServerName[numServers];
254       serversPerHost = new int[numHosts][];
255       serversPerRack = new int[numRacks][];
256       regions = new HRegionInfo[numRegions];
257       regionIndexToServerIndex = new int[numRegions];
258       initialRegionIndexToServerIndex = new int[numRegions];
259       regionIndexToTableIndex = new int[numRegions];
260       regionIndexToPrimaryIndex = new int[numRegions];
261       regionLoads = new Deque[numRegions];
262 
263       regionLocations = new int[numRegions][];
264       serverIndicesSortedByRegionCount = new Integer[numServers];
265       serverIndicesSortedByLocality = new Integer[numServers];
266       localityPerServer = new float[numServers];
267 
268       serverIndexToHostIndex = new int[numServers];
269       serverIndexToRackIndex = new int[numServers];
270       regionsPerServer = new int[numServers][];
271       regionsPerHost = new int[numHosts][];
272       regionsPerRack = new int[numRacks][];
273       primariesOfRegionsPerServer = new int[numServers][];
274       primariesOfRegionsPerHost = new int[numHosts][];
275       primariesOfRegionsPerRack = new int[numRacks][];
276 
277       int tableIndex = 0, regionIndex = 0, regionPerServerIndex = 0;
278 
279       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
280         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
281 
282         // keep the servername if this is the first server name for this hostname
283         // or this servername has the newest startcode.
284         if (servers[serverIndex] == null ||
285             servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
286           servers[serverIndex] = entry.getKey();
287         }
288 
289         if (regionsPerServer[serverIndex] != null) {
290           // there is another server with the same hostAndPort in ClusterState.
291           // allocate the array for the total size
292           regionsPerServer[serverIndex] = new int[entry.getValue().size() + regionsPerServer[serverIndex].length];
293         } else {
294           regionsPerServer[serverIndex] = new int[entry.getValue().size()];
295         }
296         primariesOfRegionsPerServer[serverIndex] = new int[regionsPerServer[serverIndex].length];
297         serverIndicesSortedByRegionCount[serverIndex] = serverIndex;
298         serverIndicesSortedByLocality[serverIndex] = serverIndex;
299       }
300 
301       hosts = new String[numHosts];
302       for (Entry<String, Integer> entry : hostsToIndex.entrySet()) {
303         hosts[entry.getValue()] = entry.getKey();
304       }
305       racks = new String[numRacks];
306       for (Entry<String, Integer> entry : racksToIndex.entrySet()) {
307         racks[entry.getValue()] = entry.getKey();
308       }
309 
310       for (Entry<ServerName, List<HRegionInfo>> entry : clusterState.entrySet()) {
311         int serverIndex = serversToIndex.get(entry.getKey().getHostAndPort());
312         regionPerServerIndex = 0;
313 
314         int hostIndex = hostsToIndex.get(entry.getKey().getHostname());
315         serverIndexToHostIndex[serverIndex] = hostIndex;
316 
317         int rackIndex = racksToIndex.get(this.rackManager.getRack(entry.getKey()));
318         serverIndexToRackIndex[serverIndex] = rackIndex;
319 
320         for (HRegionInfo region : entry.getValue()) {
321           registerRegion(region, regionIndex, serverIndex, loads, regionFinder);
322           regionsPerServer[serverIndex][regionPerServerIndex++] = regionIndex;
323           regionIndex++;
324         }
325       }
326 
327       for (HRegionInfo region : unassignedRegions) {
328         registerRegion(region, regionIndex, -1, loads, regionFinder);
329         regionIndex++;
330       }
331 
332       for (int i = 0; i < serversPerHostList.size(); i++) {
333         serversPerHost[i] = new int[serversPerHostList.get(i).size()];
334         for (int j = 0; j < serversPerHost[i].length; j++) {
335           serversPerHost[i][j] = serversPerHostList.get(i).get(j);
336         }
337         if (serversPerHost[i].length > 1) {
338           multiServersPerHost = true;
339         }
340       }
341 
342       for (int i = 0; i < serversPerRackList.size(); i++) {
343         serversPerRack[i] = new int[serversPerRackList.get(i).size()];
344         for (int j = 0; j < serversPerRack[i].length; j++) {
345           serversPerRack[i][j] = serversPerRackList.get(i).get(j);
346         }
347       }
348 
349       numTables = tables.size();
350       numRegionsPerServerPerTable = new int[numServers][numTables];
351 
352       for (int i = 0; i < numServers; i++) {
353         for (int j = 0; j < numTables; j++) {
354           numRegionsPerServerPerTable[i][j] = 0;
355         }
356       }
357 
358       for (int i=0; i < regionIndexToServerIndex.length; i++) {
359         if (regionIndexToServerIndex[i] >= 0) {
360           numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
361         }
362       }
363 
364       numMaxRegionsPerTable = new int[numTables];
365       for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
366         for (tableIndex = 0 ; tableIndex < numRegionsPerServerPerTable[serverIndex].length; tableIndex++) {
367           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
368             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
369           }
370         }
371       }
372 
373       for (int i = 0; i < regions.length; i ++) {
374         HRegionInfo info = regions[i];
375         if (RegionReplicaUtil.isDefaultReplica(info)) {
376           regionIndexToPrimaryIndex[i] = i;
377         } else {
378           hasRegionReplicas = true;
379           HRegionInfo primaryInfo = RegionReplicaUtil.getRegionInfoForDefaultReplica(info);
380           regionIndexToPrimaryIndex[i] =
381               regionsToIndex.containsKey(primaryInfo) ?
382               regionsToIndex.get(primaryInfo):
383               -1;
384         }
385       }
386 
387       for (int i = 0; i < regionsPerServer.length; i++) {
388         primariesOfRegionsPerServer[i] = new int[regionsPerServer[i].length];
389         for (int j = 0; j < regionsPerServer[i].length; j++) {
390           int primaryIndex = regionIndexToPrimaryIndex[regionsPerServer[i][j]];
391           primariesOfRegionsPerServer[i][j] = primaryIndex;
392         }
393         // sort the regions by primaries.
394         Arrays.sort(primariesOfRegionsPerServer[i]);
395       }
396 
397       // compute regionsPerHost
398       if (multiServersPerHost) {
399         for (int i = 0 ; i < serversPerHost.length; i++) {
400           int numRegionsPerHost = 0;
401           for (int j = 0; j < serversPerHost[i].length; j++) {
402             numRegionsPerHost += regionsPerServer[serversPerHost[i][j]].length;
403           }
404           regionsPerHost[i] = new int[numRegionsPerHost];
405           primariesOfRegionsPerHost[i] = new int[numRegionsPerHost];
406         }
407         for (int i = 0 ; i < serversPerHost.length; i++) {
408           int numRegionPerHostIndex = 0;
409           for (int j = 0; j < serversPerHost[i].length; j++) {
410             for (int k = 0; k < regionsPerServer[serversPerHost[i][j]].length; k++) {
411               int region = regionsPerServer[serversPerHost[i][j]][k];
412               regionsPerHost[i][numRegionPerHostIndex] = region;
413               int primaryIndex = regionIndexToPrimaryIndex[region];
414               primariesOfRegionsPerHost[i][numRegionPerHostIndex] = primaryIndex;
415               numRegionPerHostIndex++;
416             }
417           }
418           // sort the regions by primaries.
419           Arrays.sort(primariesOfRegionsPerHost[i]);
420         }
421       }
422 
423       // compute regionsPerRack
424       if (numRacks > 1) {
425         for (int i = 0 ; i < serversPerRack.length; i++) {
426           int numRegionsPerRack = 0;
427           for (int j = 0; j < serversPerRack[i].length; j++) {
428             numRegionsPerRack += regionsPerServer[serversPerRack[i][j]].length;
429           }
430           regionsPerRack[i] = new int[numRegionsPerRack];
431           primariesOfRegionsPerRack[i] = new int[numRegionsPerRack];
432         }
433 
434         for (int i = 0 ; i < serversPerRack.length; i++) {
435           int numRegionPerRackIndex = 0;
436           for (int j = 0; j < serversPerRack[i].length; j++) {
437             for (int k = 0; k < regionsPerServer[serversPerRack[i][j]].length; k++) {
438               int region = regionsPerServer[serversPerRack[i][j]][k];
439               regionsPerRack[i][numRegionPerRackIndex] = region;
440               int primaryIndex = regionIndexToPrimaryIndex[region];
441               primariesOfRegionsPerRack[i][numRegionPerRackIndex] = primaryIndex;
442               numRegionPerRackIndex++;
443             }
444           }
445           // sort the regions by primaries.
446           Arrays.sort(primariesOfRegionsPerRack[i]);
447         }
448       }
449     }
450 
451     /** Helper for Cluster constructor to handle a region */
452     private void registerRegion(HRegionInfo region, int regionIndex,
453         int serverIndex, Map<String, Deque<RegionLoad>> loads,
454         RegionLocationFinder regionFinder) {
455       String tableName = region.getTable().getNameAsString();
456       if (!tablesToIndex.containsKey(tableName)) {
457         tables.add(tableName);
458         tablesToIndex.put(tableName, tablesToIndex.size());
459       }
460       int tableIndex = tablesToIndex.get(tableName);
461 
462       regionsToIndex.put(region, regionIndex);
463       regions[regionIndex] = region;
464       regionIndexToServerIndex[regionIndex] = serverIndex;
465       initialRegionIndexToServerIndex[regionIndex] = serverIndex;
466       regionIndexToTableIndex[regionIndex] = tableIndex;
467 
468       // region load
469       if (loads != null) {
470         Deque<RegionLoad> rl = loads.get(region.getRegionNameAsString());
471         // That could have failed if the RegionLoad is using the other regionName
472         if (rl == null) {
473           // Try getting the region load using encoded name.
474           rl = loads.get(region.getEncodedName());
475         }
476         regionLoads[regionIndex] = rl;
477       }
478 
479       if (regionFinder != null) {
480         // region location
481         List<ServerName> loc = regionFinder.getTopBlockLocations(region);
482         regionLocations[regionIndex] = new int[loc.size()];
483         for (int i = 0; i < loc.size(); i++) {
484           regionLocations[regionIndex][i] = loc.get(i) == null ? -1
485               : (serversToIndex.get(loc.get(i).getHostAndPort()) == null ? -1
486                   : serversToIndex.get(loc.get(i).getHostAndPort()));
487         }
488       }
489     }
490 
491     /**
492      * Returns true iff a given server has less regions than the balanced amount
493      */
494     public boolean serverHasTooFewRegions(int server) {
495       int minLoad = this.numRegions / numServers;
496       int numRegions = getNumRegions(server);
497       return numRegions < minLoad;
498     }
499 
500     /**
501      * Retrieves and lazily initializes a field storing the locality of
502      * every region/server combination
503      */
504     public float[][] getOrComputeRackLocalities() {
505       if (rackLocalities == null || regionsToMostLocalEntities == null) {
506         computeCachedLocalities();
507       }
508       return rackLocalities;
509     }
510 
511     /**
512      * Lazily initializes and retrieves a mapping of region -> server for which region has
513      * the highest the locality
514      */
515     public int[] getOrComputeRegionsToMostLocalEntities(LocalityType type) {
516       if (rackLocalities == null || regionsToMostLocalEntities == null) {
517         computeCachedLocalities();
518       }
519       return regionsToMostLocalEntities[type.ordinal()];
520     }
521 
522     /**
523      * Looks up locality from cache of localities. Will create cache if it does
524      * not already exist.
525      */
526     public float getOrComputeLocality(int region, int entity, LocalityType type) {
527       switch (type) {
528         case SERVER:
529           return getLocalityOfRegion(region, entity);
530         case RACK:
531           return getOrComputeRackLocalities()[region][entity];
532         default:
533           throw new IllegalArgumentException("Unsupported LocalityType: " + type);
534       }
535     }
536 
537     /**
538      * Returns locality weighted by region size in MB. Will create locality cache
539      * if it does not already exist.
540      */
541     public double getOrComputeWeightedLocality(int region, int server, LocalityType type) {
542       return getRegionSizeMB(region) * getOrComputeLocality(region, server, type);
543     }
544 
545     /**
546      * Returns the size in MB from the most recent RegionLoad for region
547      */
548     public int getRegionSizeMB(int region) {
549       Deque<RegionLoad> load = regionLoads[region];
550       // This means regions have no actual data on disk
551       if (load == null) {
552         return 0;
553       }
554       return regionLoads[region].getLast().getStorefileSizeMB();
555     }
556 
557     /**
558      * Computes and caches the locality for each region/rack combinations,
559      * as well as storing a mapping of region -> server and region -> rack such that server
560      * and rack have the highest locality for region
561      */
562     private void computeCachedLocalities() {
563       rackLocalities = new float[numRegions][numServers];
564       regionsToMostLocalEntities = new int[LocalityType.values().length][numRegions];
565 
566       // Compute localities and find most local server per region
567       for (int region = 0; region < numRegions; region++) {
568         int serverWithBestLocality = 0;
569         float bestLocalityForRegion = 0;
570         for (int server = 0; server < numServers; server++) {
571           // Aggregate per-rack locality
572           float locality = getLocalityOfRegion(region, server);
573           int rack = serverIndexToRackIndex[server];
574           int numServersInRack = serversPerRack[rack].length;
575           rackLocalities[region][rack] += locality / numServersInRack;
576 
577           if (locality > bestLocalityForRegion) {
578             serverWithBestLocality = server;
579             bestLocalityForRegion = locality;
580           }
581         }
582         regionsToMostLocalEntities[LocalityType.SERVER.ordinal()][region] = serverWithBestLocality;
583 
584         // Find most local rack per region
585         int rackWithBestLocality = 0;
586         float bestRackLocalityForRegion = 0.0f;
587         for (int rack = 0; rack < numRacks; rack++) {
588           float rackLocality = rackLocalities[region][rack];
589           if (rackLocality > bestRackLocalityForRegion) {
590             bestRackLocalityForRegion = rackLocality;
591             rackWithBestLocality = rack;
592           }
593         }
594         regionsToMostLocalEntities[LocalityType.RACK.ordinal()][region] = rackWithBestLocality;
595       }
596 
597     }
598 
599     /**
600      * Maps region index to rack index
601      */
602     public int getRackForRegion(int region) {
603       return serverIndexToRackIndex[regionIndexToServerIndex[region]];
604     }
605 
606     enum LocalityType {
607       SERVER,
608       RACK
609     }
610 
611     /** An action to move or swap a region */
612     public static class Action {
613       public static enum Type {
614         ASSIGN_REGION,
615         MOVE_REGION,
616         SWAP_REGIONS,
617         NULL,
618       }
619 
620       public Type type;
621       public Action (Type type) {this.type = type;}
622       /** Returns an Action which would undo this action */
623       public Action undoAction() { return this; }
624       @Override
625       public String toString() { return type + ":";}
626     }
627 
628     public static class AssignRegionAction extends Action {
629       public int region;
630       public int server;
631       public AssignRegionAction(int region, int server) {
632         super(Type.ASSIGN_REGION);
633         this.region = region;
634         this.server = server;
635       }
636       @Override
637       public Action undoAction() {
638         // TODO implement this. This action is not being used by the StochasticLB for now
639         // in case it uses it, we should implement this function.
640         throw new NotImplementedException();
641       }
642       @Override
643       public String toString() {
644         return type + ": " + region + ":" + server;
645       }
646     }
647 
648     public static class MoveRegionAction extends Action {
649       public int region;
650       public int fromServer;
651       public int toServer;
652 
653       public MoveRegionAction(int region, int fromServer, int toServer) {
654         super(Type.MOVE_REGION);
655         this.fromServer = fromServer;
656         this.region = region;
657         this.toServer = toServer;
658       }
659       @Override
660       public Action undoAction() {
661         return new MoveRegionAction (region, toServer, fromServer);
662       }
663       @Override
664       public String toString() {
665         return type + ": " + region + ":" + fromServer + " -> " + toServer;
666       }
667     }
668 
669     public static class SwapRegionsAction extends Action {
670       public int fromServer;
671       public int fromRegion;
672       public int toServer;
673       public int toRegion;
674       public SwapRegionsAction(int fromServer, int fromRegion, int toServer, int toRegion) {
675         super(Type.SWAP_REGIONS);
676         this.fromServer = fromServer;
677         this.fromRegion = fromRegion;
678         this.toServer = toServer;
679         this.toRegion = toRegion;
680       }
681       @Override
682       public Action undoAction() {
683         return new SwapRegionsAction (fromServer, toRegion, toServer, fromRegion);
684       }
685       @Override
686       public String toString() {
687         return type + ": " + fromRegion + ":" + fromServer + " <-> " + toRegion + ":" + toServer;
688       }
689     }
690 
691     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NM_FIELD_NAMING_CONVENTION",
692         justification="Mistake. Too disruptive to change now")
693     public static final Action NullAction = new Action(Type.NULL);
694 
695     public void doAction(Action action) {
696       switch (action.type) {
697       case NULL: break;
698       case ASSIGN_REGION:
699         // FindBugs: Having the assert quietens FB BC_UNCONFIRMED_CAST warnings
700         assert action instanceof AssignRegionAction: action.getClass();
701         AssignRegionAction ar = (AssignRegionAction) action;
702         regionsPerServer[ar.server] = addRegion(regionsPerServer[ar.server], ar.region);
703         regionMoved(ar.region, -1, ar.server);
704         break;
705       case MOVE_REGION:
706         assert action instanceof MoveRegionAction: action.getClass();
707         MoveRegionAction mra = (MoveRegionAction) action;
708         regionsPerServer[mra.fromServer] = removeRegion(regionsPerServer[mra.fromServer], mra.region);
709         regionsPerServer[mra.toServer] = addRegion(regionsPerServer[mra.toServer], mra.region);
710         regionMoved(mra.region, mra.fromServer, mra.toServer);
711         break;
712       case SWAP_REGIONS:
713         assert action instanceof SwapRegionsAction: action.getClass();
714         SwapRegionsAction a = (SwapRegionsAction) action;
715         regionsPerServer[a.fromServer] = replaceRegion(regionsPerServer[a.fromServer], a.fromRegion, a.toRegion);
716         regionsPerServer[a.toServer] = replaceRegion(regionsPerServer[a.toServer], a.toRegion, a.fromRegion);
717         regionMoved(a.fromRegion, a.fromServer, a.toServer);
718         regionMoved(a.toRegion, a.toServer, a.fromServer);
719         break;
720       default:
721         throw new RuntimeException("Uknown action:" + action.type);
722       }
723     }
724 
725     /**
726      * Return true if the placement of region on server would lower the availability
727      * of the region in question
728      * @param server
729      * @param region
730      * @return true or false
731      */
732     boolean wouldLowerAvailability(HRegionInfo regionInfo, ServerName serverName) {
733       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
734         return false; // safeguard against race between cluster.servers and servers from LB method args
735       }
736       int server = serversToIndex.get(serverName.getHostAndPort());
737       int region = regionsToIndex.get(regionInfo);
738 
739       int primary = regionIndexToPrimaryIndex[region];
740 
741       // there is a subset relation for server < host < rack
742       // check server first
743 
744       if (contains(primariesOfRegionsPerServer[server], primary)) {
745         // check for whether there are other servers that we can place this region
746         for (int i = 0; i < primariesOfRegionsPerServer.length; i++) {
747           if (i != server && !contains(primariesOfRegionsPerServer[i], primary)) {
748             return true; // meaning there is a better server
749           }
750         }
751         return false; // there is not a better server to place this
752       }
753 
754       // check host
755       if (multiServersPerHost) { // these arrays would only be allocated if we have more than one server per host
756         int host = serverIndexToHostIndex[server];
757         if (contains(primariesOfRegionsPerHost[host], primary)) {
758           // check for whether there are other hosts that we can place this region
759           for (int i = 0; i < primariesOfRegionsPerHost.length; i++) {
760             if (i != host && !contains(primariesOfRegionsPerHost[i], primary)) {
761               return true; // meaning there is a better host
762             }
763           }
764           return false; // there is not a better host to place this
765         }
766       }
767 
768       // check rack
769       if (numRacks > 1) {
770         int rack = serverIndexToRackIndex[server];
771         if (contains(primariesOfRegionsPerRack[rack], primary)) {
772           // check for whether there are other racks that we can place this region
773           for (int i = 0; i < primariesOfRegionsPerRack.length; i++) {
774             if (i != rack && !contains(primariesOfRegionsPerRack[i], primary)) {
775               return true; // meaning there is a better rack
776             }
777           }
778           return false; // there is not a better rack to place this
779         }
780       }
781       return false;
782     }
783 
784     void doAssignRegion(HRegionInfo regionInfo, ServerName serverName) {
785       if (!serversToIndex.containsKey(serverName.getHostAndPort())) {
786         return;
787       }
788       int server = serversToIndex.get(serverName.getHostAndPort());
789       int region = regionsToIndex.get(regionInfo);
790       doAction(new AssignRegionAction(region, server));
791     }
792 
793     void regionMoved(int region, int oldServer, int newServer) {
794       regionIndexToServerIndex[region] = newServer;
795       if (initialRegionIndexToServerIndex[region] == newServer) {
796         numMovedRegions--; //region moved back to original location
797       } else if (oldServer >= 0 && initialRegionIndexToServerIndex[region] == oldServer) {
798         numMovedRegions++; //region moved from original location
799       }
800       int tableIndex = regionIndexToTableIndex[region];
801       if (oldServer >= 0) {
802         numRegionsPerServerPerTable[oldServer][tableIndex]--;
803       }
804       numRegionsPerServerPerTable[newServer][tableIndex]++;
805 
806       //check whether this caused maxRegionsPerTable in the new Server to be updated
807       if (numRegionsPerServerPerTable[newServer][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
808         numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[newServer][tableIndex];
809       } else if (oldServer >= 0 && (numRegionsPerServerPerTable[oldServer][tableIndex] + 1)
810           == numMaxRegionsPerTable[tableIndex]) {
811         //recompute maxRegionsPerTable since the previous value was coming from the old server
812         numMaxRegionsPerTable[tableIndex] = 0;
813         for (int serverIndex = 0 ; serverIndex < numRegionsPerServerPerTable.length; serverIndex++) {
814           if (numRegionsPerServerPerTable[serverIndex][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
815             numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[serverIndex][tableIndex];
816           }
817         }
818       }
819 
820       // update for servers
821       int primary = regionIndexToPrimaryIndex[region];
822       if (oldServer >= 0) {
823         primariesOfRegionsPerServer[oldServer] = removeRegion(
824           primariesOfRegionsPerServer[oldServer], primary);
825       }
826       primariesOfRegionsPerServer[newServer] = addRegionSorted(
827         primariesOfRegionsPerServer[newServer], primary);
828 
829       // update for hosts
830       if (multiServersPerHost) {
831         int oldHost = oldServer >= 0 ? serverIndexToHostIndex[oldServer] : -1;
832         int newHost = serverIndexToHostIndex[newServer];
833         if (newHost != oldHost) {
834           regionsPerHost[newHost] = addRegion(regionsPerHost[newHost], region);
835           primariesOfRegionsPerHost[newHost] = addRegionSorted(primariesOfRegionsPerHost[newHost], primary);
836           if (oldHost >= 0) {
837             regionsPerHost[oldHost] = removeRegion(regionsPerHost[oldHost], region);
838             primariesOfRegionsPerHost[oldHost] = removeRegion(
839               primariesOfRegionsPerHost[oldHost], primary); // will still be sorted
840           }
841         }
842       }
843 
844       // update for racks
845       if (numRacks > 1) {
846         int oldRack = oldServer >= 0 ? serverIndexToRackIndex[oldServer] : -1;
847         int newRack = serverIndexToRackIndex[newServer];
848         if (newRack != oldRack) {
849           regionsPerRack[newRack] = addRegion(regionsPerRack[newRack], region);
850           primariesOfRegionsPerRack[newRack] = addRegionSorted(primariesOfRegionsPerRack[newRack], primary);
851           if (oldRack >= 0) {
852             regionsPerRack[oldRack] = removeRegion(regionsPerRack[oldRack], region);
853             primariesOfRegionsPerRack[oldRack] = removeRegion(
854               primariesOfRegionsPerRack[oldRack], primary); // will still be sorted
855           }
856         }
857       }
858     }
859 
860     int[] removeRegion(int[] regions, int regionIndex) {
861       //TODO: this maybe costly. Consider using linked lists
862       int[] newRegions = new int[regions.length - 1];
863       int i = 0;
864       for (i = 0; i < regions.length; i++) {
865         if (regions[i] == regionIndex) {
866           break;
867         }
868         newRegions[i] = regions[i];
869       }
870       System.arraycopy(regions, i+1, newRegions, i, newRegions.length - i);
871       return newRegions;
872     }
873 
874     int[] addRegion(int[] regions, int regionIndex) {
875       int[] newRegions = new int[regions.length + 1];
876       System.arraycopy(regions, 0, newRegions, 0, regions.length);
877       newRegions[newRegions.length - 1] = regionIndex;
878       return newRegions;
879     }
880 
881     int[] addRegionSorted(int[] regions, int regionIndex) {
882       int[] newRegions = new int[regions.length + 1];
883       int i = 0;
884       for (i = 0; i < regions.length; i++) { // find the index to insert
885         if (regions[i] > regionIndex) {
886           break;
887         }
888       }
889       System.arraycopy(regions, 0, newRegions, 0, i); // copy first half
890       System.arraycopy(regions, i, newRegions, i+1, regions.length - i); // copy second half
891       newRegions[i] = regionIndex;
892 
893       return newRegions;
894     }
895 
896     int[] replaceRegion(int[] regions, int regionIndex, int newRegionIndex) {
897       int i = 0;
898       for (i = 0; i < regions.length; i++) {
899         if (regions[i] == regionIndex) {
900           regions[i] = newRegionIndex;
901           break;
902         }
903       }
904       return regions;
905     }
906 
907     void sortServersByRegionCount() {
908       Arrays.sort(serverIndicesSortedByRegionCount, numRegionsComparator);
909     }
910 
911     int getNumRegions(int server) {
912       return regionsPerServer[server].length;
913     }
914 
915     boolean contains(int[] arr, int val) {
916       return Arrays.binarySearch(arr, val) >= 0;
917     }
918 
919     private Comparator<Integer> numRegionsComparator = new Comparator<Integer>() {
920       @Override
921       public int compare(Integer integer, Integer integer2) {
922         return Integer.compare(getNumRegions(integer), getNumRegions(integer2));
923       }
924     };
925 
926     void sortServersByLocality() {
927       Arrays.sort(serverIndicesSortedByLocality, localityComparator);
928     }
929 
930     float getLocality(int server) {
931       return localityPerServer[server];
932     }
933 
934     private Comparator<Integer> localityComparator = new Comparator<Integer>() {
935       @Override
936       public int compare(Integer integer, Integer integer2) {
937         return Float.compare(getLocality(integer), getLocality(integer2));
938       }
939     };
940 
941     int getLowestLocalityRegionServer() {
942       if (regionFinder == null) {
943         return -1;
944       } else {
945         sortServersByLocality();
946         // We want to find server with non zero regions having lowest locality.
947         int i = 0;
948         int lowestLocalityServerIndex = serverIndicesSortedByLocality[i];
949         while (localityPerServer[lowestLocalityServerIndex] == 0
950             && (regionsPerServer[lowestLocalityServerIndex].length == 0)) {
951           i++;
952           lowestLocalityServerIndex = serverIndicesSortedByLocality[i];
953         }
954         if (LOG.isTraceEnabled()) {
955           LOG.trace("Lowest locality region server with non zero regions is "
956             + servers[lowestLocalityServerIndex].getHostname() + " with locality "
957             + localityPerServer[lowestLocalityServerIndex]);
958         }
959         return lowestLocalityServerIndex;
960       }
961     }
962 
963     int getLowestLocalityRegionOnServer(int serverIndex) {
964       if (regionFinder != null) {
965         float lowestLocality = 1.0f;
966         int lowestLocalityRegionIndex = -1;
967         if (regionsPerServer[serverIndex].length == 0) {
968           // No regions on that region server
969           return -1;
970         }
971         for (int j = 0; j < regionsPerServer[serverIndex].length; j++) {
972           int regionIndex = regionsPerServer[serverIndex][j];
973           HDFSBlocksDistribution distribution = regionFinder
974               .getBlockDistribution(regions[regionIndex]);
975           float locality = distribution.getBlockLocalityIndex(servers[serverIndex].getHostname());
976           // skip empty region
977           if (distribution.getUniqueBlocksTotalWeight() == 0) {
978             continue;
979           }
980           if (locality < lowestLocality) {
981             lowestLocality = locality;
982             lowestLocalityRegionIndex = j;
983           }
984         }
985         if (lowestLocalityRegionIndex == -1) {
986           return -1;
987         }
988         if (LOG.isTraceEnabled()) {
989           LOG.trace("Lowest locality region is "
990               + regions[regionsPerServer[serverIndex][lowestLocalityRegionIndex]]
991                   .getRegionNameAsString() + " with locality " + lowestLocality
992               + " and its region server contains " + regionsPerServer[serverIndex].length
993               + " regions");
994         }
995         return regionsPerServer[serverIndex][lowestLocalityRegionIndex];
996       } else {
997         return -1;
998       }
999     }
1000 
1001     float getLocalityOfRegion(int region, int server) {
1002       if (regionFinder != null) {
1003         HDFSBlocksDistribution distribution = regionFinder.getBlockDistribution(regions[region]);
1004         return distribution.getBlockLocalityIndex(servers[server].getHostname());
1005       } else {
1006         return 0f;
1007       }
1008     }
1009 
1010     /**
1011      * Returns a least loaded server which has better locality for this region
1012      * than the current server.
1013      */
1014     int getLeastLoadedTopServerForRegion(int region, int currentServer) {
1015       if (regionFinder != null) {
1016         List<ServerName> topLocalServers = regionFinder.getTopBlockLocations(regions[region],
1017           servers[currentServer].getHostname());
1018         int leastLoadedServerIndex = -1;
1019         int load = Integer.MAX_VALUE;
1020         for (ServerName sn : topLocalServers) {
1021           if (!serversToIndex.containsKey(sn.getHostAndPort())) {
1022             continue;
1023           }
1024           int index = serversToIndex.get(sn.getHostAndPort());
1025           if (regionsPerServer[index] == null) {
1026             continue;
1027           }
1028           int tempLoad = regionsPerServer[index].length;
1029           if (tempLoad <= load) {
1030             leastLoadedServerIndex = index;
1031             load = tempLoad;
1032           }
1033         }
1034         if (leastLoadedServerIndex != -1) {
1035           if (LOG.isTraceEnabled()) {
1036             LOG.trace(
1037               "Pick the least loaded server " + servers[leastLoadedServerIndex].getHostname()
1038                   + " with better locality for region " + regions[region]);
1039           }
1040         }
1041         return leastLoadedServerIndex;
1042       } else {
1043         return -1;
1044       }
1045     }
1046 
1047     void calculateRegionServerLocalities() {
1048       if (regionFinder == null) {
1049         LOG.warn("Region location finder found null, skipping locality calculations.");
1050         return;
1051       }
1052       for (int i = 0; i < regionsPerServer.length; i++) {
1053         HDFSBlocksDistribution distribution = new HDFSBlocksDistribution();
1054         if (regionsPerServer[i].length > 0) {
1055           for (int j = 0; j < regionsPerServer[i].length; j++) {
1056             int regionIndex = regionsPerServer[i][j];
1057             distribution.add(regionFinder.getBlockDistribution(regions[regionIndex]));
1058           }
1059         } else {
1060           LOG.debug("Server " + servers[i].getHostname() + " had 0 regions.");
1061         }
1062         localityPerServer[i] = distribution.getBlockLocalityIndex(servers[i].getHostname());
1063       }
1064     }
1065 
1066     @InterfaceAudience.Private
1067     protected void setNumRegions(int numRegions) {
1068       this.numRegions = numRegions;
1069     }
1070 
1071     @InterfaceAudience.Private
1072     protected void setNumMovedRegions(int numMovedRegions) {
1073       this.numMovedRegions = numMovedRegions;
1074     }
1075 
1076     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="SBSC_USE_STRINGBUFFER_CONCATENATION",
1077         justification="Not important but should be fixed")
1078     @Override
1079     public String toString() {
1080       String desc = "Cluster{" +
1081           "servers=[";
1082           for(ServerName sn:servers) {
1083              desc += sn.getHostAndPort() + ", ";
1084           }
1085           desc +=
1086           ", serverIndicesSortedByRegionCount="+
1087           Arrays.toString(serverIndicesSortedByRegionCount) +
1088           ", regionsPerServer=[";
1089 
1090           for (int[]r:regionsPerServer) {
1091             desc += Arrays.toString(r);
1092           }
1093           desc += "]" +
1094           ", numMaxRegionsPerTable=" +
1095           Arrays.toString(numMaxRegionsPerTable) +
1096           ", numRegions=" +
1097           numRegions +
1098           ", numServers=" +
1099           numServers +
1100           ", numTables=" +
1101           numTables +
1102           ", numMovedRegions=" +
1103           numMovedRegions +
1104           '}';
1105       return desc;
1106     }
1107   }
1108 
1109   // slop for regions
1110   protected float slop;
1111   protected Configuration config = HBaseConfiguration.create();
1112   protected RackManager rackManager;
1113   private static final Random RANDOM = new Random(System.currentTimeMillis());
1114   private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
1115 
1116   public static final String TABLES_ON_MASTER =
1117     "hbase.balancer.tablesOnMaster";
1118 
1119   protected final Set<String> tablesOnMaster = new HashSet<String>();
1120   protected MetricsBalancer metricsBalancer = null;
1121   protected ClusterStatus clusterStatus = null;
1122   protected ServerName masterServerName;
1123   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="IS2_INCONSISTENT_SYNC",
1124   justification="The services is just assigned once when master start")
1125   protected MasterServices services;
1126 
1127   protected static String[] getTablesOnMaster(Configuration conf) {
1128     String valueString = conf.get(TABLES_ON_MASTER);
1129     if (valueString != null) {
1130       valueString = valueString.trim();
1131     }
1132     if (valueString == null || valueString.equalsIgnoreCase("none")) {
1133       return null;
1134     }
1135     return StringUtils.getStrings(valueString);
1136   }
1137 
1138   /**
1139    * Check if configured to put any tables on the active master
1140    */
1141   public static boolean tablesOnMaster(Configuration conf) {
1142     String[] tables = getTablesOnMaster(conf);
1143     return tables != null && tables.length > 0;
1144   }
1145 
1146   public static boolean userTablesOnMaster(Configuration conf) {
1147     String[] tables = getTablesOnMaster(conf);
1148     if (tables == null || tables.length == 0) {
1149       return false;
1150     }
1151     for (String tn:tables) {
1152       if (!tn.startsWith("hbase:")) {
1153         return true;
1154       }
1155     }
1156     return false;
1157   }
1158 
1159   @Override
1160   public void setConf(Configuration conf) {
1161     setSlop(conf);
1162     if (slop < 0) slop = 0;
1163     else if (slop > 1) slop = 1;
1164 
1165     this.config = conf;
1166     String[] tables = getTablesOnMaster(conf);
1167     if (tables != null && tables.length > 0) {
1168       Collections.addAll(tablesOnMaster, tables);
1169     }
1170     this.rackManager = new RackManager(getConf());
1171     if (useRegionFinder) {
1172       regionFinder.setConf(conf);
1173     }
1174   }
1175 
1176   protected void setSlop(Configuration conf) {
1177     this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
1178   }
1179 
1180   /**
1181    * Check if a region belongs to some small system table.
1182    * If so, the primary replica may be expected to be put on the master regionserver.
1183    */
1184   public boolean shouldBeOnMaster(HRegionInfo region) {
1185     return tablesOnMaster.contains(region.getTable().getNameAsString())
1186         && region.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID;
1187   }
1188 
1189   /**
1190    * Balance the regions that should be on master regionserver.
1191    */
1192   protected List<RegionPlan> balanceMasterRegions(
1193       Map<ServerName, List<HRegionInfo>> clusterMap) {
1194     if (masterServerName == null
1195         || clusterMap == null || clusterMap.size() <= 1) return null;
1196     List<RegionPlan> plans = null;
1197     List<HRegionInfo> regions = clusterMap.get(masterServerName);
1198     if (regions != null) {
1199       Iterator<ServerName> keyIt = null;
1200       for (HRegionInfo region: regions) {
1201         if (shouldBeOnMaster(region)) continue;
1202 
1203         // Find a non-master regionserver to host the region
1204         if (keyIt == null || !keyIt.hasNext()) {
1205           keyIt = clusterMap.keySet().iterator();
1206         }
1207         ServerName dest = keyIt.next();
1208         if (masterServerName.equals(dest)) {
1209           if (!keyIt.hasNext()) {
1210             keyIt = clusterMap.keySet().iterator();
1211           }
1212           dest = keyIt.next();
1213         }
1214 
1215         // Move this region away from the master regionserver
1216         RegionPlan plan = new RegionPlan(region, masterServerName, dest);
1217         if (plans == null) {
1218           plans = new ArrayList<RegionPlan>();
1219         }
1220         plans.add(plan);
1221       }
1222     }
1223     for (Map.Entry<ServerName, List<HRegionInfo>> server: clusterMap.entrySet()) {
1224       if (masterServerName.equals(server.getKey())) continue;
1225       for (HRegionInfo region: server.getValue()) {
1226         if (!shouldBeOnMaster(region)) continue;
1227 
1228         // Move this region to the master regionserver
1229         RegionPlan plan = new RegionPlan(region, server.getKey(), masterServerName);
1230         if (plans == null) {
1231           plans = new ArrayList<RegionPlan>();
1232         }
1233         plans.add(plan);
1234       }
1235     }
1236     return plans;
1237   }
1238 
1239   /**
1240    * Assign the regions that should be on master regionserver.
1241    */
1242   protected Map<ServerName, List<HRegionInfo>> assignMasterRegions(
1243       Collection<HRegionInfo> regions, List<ServerName> servers) {
1244     if (servers == null || regions == null || regions.isEmpty()) {
1245       return null;
1246     }
1247     Map<ServerName, List<HRegionInfo>> assignments
1248       = new TreeMap<ServerName, List<HRegionInfo>>();
1249     if (masterServerName != null && servers.contains(masterServerName)) {
1250       assignments.put(masterServerName, new ArrayList<HRegionInfo>());
1251       for (HRegionInfo region: regions) {
1252         if (shouldBeOnMaster(region)) {
1253           assignments.get(masterServerName).add(region);
1254         }
1255       }
1256     }
1257     return assignments;
1258   }
1259 
1260   @Override
1261   public Configuration getConf() {
1262     return this.config;
1263   }
1264 
1265   @Override
1266   public synchronized void setClusterStatus(ClusterStatus st) {
1267     this.clusterStatus = st;
1268     if (useRegionFinder) {
1269       regionFinder.setClusterStatus(st);
1270     }
1271   }
1272 
1273   @Override
1274   public void setMasterServices(MasterServices masterServices) {
1275     masterServerName = masterServices.getServerName();
1276     this.services = masterServices;
1277     if (useRegionFinder) {
1278       this.regionFinder.setServices(masterServices);
1279     }
1280   }
1281 
1282   @Override
1283   public void postMasterStartupInitialize() {
1284     if (services != null && regionFinder != null) {
1285       try {
1286         Set<HRegionInfo> regions =
1287             services.getAssignmentManager().getRegionStates().getRegionAssignments().keySet();
1288         regionFinder.refreshAndWait(regions);
1289       } catch (Exception e) {
1290         LOG.warn("Refreshing region HDFS Block dist failed with exception, ignoring", e);
1291       }
1292     }
1293   }
1294 
1295   public void setRackManager(RackManager rackManager) {
1296     this.rackManager = rackManager;
1297   }
1298 
1299   protected boolean needsBalance(Cluster c) {
1300     ClusterLoadState cs = new ClusterLoadState(c.clusterState);
1301     if (cs.getNumServers() < MIN_SERVER_BALANCE) {
1302       if (LOG.isDebugEnabled()) {
1303         LOG.debug("Not running balancer because only " + cs.getNumServers()
1304             + " active regionserver(s)");
1305       }
1306       return false;
1307     }
1308     if(areSomeRegionReplicasColocated(c)) return true;
1309     if(idleRegionServerExist(c)) {
1310       return true;
1311     }
1312 
1313     // Check if we even need to do any load balancing
1314     // HBASE-3681 check sloppiness first
1315     float average = cs.getLoadAverage(); // for logging
1316     int floor = (int) Math.floor(average * (1 - slop));
1317     int ceiling = (int) Math.ceil(average * (1 + slop));
1318     if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) {
1319       NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
1320       if (LOG.isTraceEnabled()) {
1321         // If nothing to balance, then don't say anything unless trace-level logging.
1322         LOG.trace("Skipping load balancing because balanced cluster; " +
1323           "servers=" + cs.getNumServers() +
1324           " regions=" + cs.getNumRegions() + " average=" + average +
1325           " mostloaded=" + serversByLoad.lastKey().getLoad() +
1326           " leastloaded=" + serversByLoad.firstKey().getLoad());
1327       }
1328       return false;
1329     }
1330     return true;
1331   }
1332 
1333   /**
1334    * Subclasses should implement this to return true if the cluster has nodes that hosts
1335    * multiple replicas for the same region, or, if there are multiple racks and the same
1336    * rack hosts replicas of the same region
1337    * @param c Cluster information
1338    * @return whether region replicas are currently co-located
1339    */
1340   protected boolean areSomeRegionReplicasColocated(Cluster c) {
1341     return false;
1342   }
1343 
1344   protected final boolean idleRegionServerExist(Cluster c){
1345     boolean isServerExistsWithMoreRegions = false;
1346     boolean isServerExistsWithZeroRegions = false;
1347     for (int[] serverList: c.regionsPerServer){
1348       if (serverList.length > 1) {
1349         isServerExistsWithMoreRegions = true;
1350       }
1351       if (serverList.length == 0) {
1352         isServerExistsWithZeroRegions = true;
1353       }
1354     }
1355     return isServerExistsWithMoreRegions && isServerExistsWithZeroRegions;
1356   }
1357 
1358   /**
1359    * Generates a bulk assignment plan to be used on cluster startup using a
1360    * simple round-robin assignment.
1361    * <p>
1362    * Takes a list of all the regions and all the servers in the cluster and
1363    * returns a map of each server to the regions that it should be assigned.
1364    * <p>
1365    * Currently implemented as a round-robin assignment. Same invariant as load
1366    * balancing, all servers holding floor(avg) or ceiling(avg).
1367    *
1368    * TODO: Use block locations from HDFS to place regions with their blocks
1369    *
1370    * @param regions all regions
1371    * @param servers all servers
1372    * @return map of server to the regions it should take, or null if no
1373    *         assignment is possible (ie. no regions or no servers)
1374    */
1375   @Override
1376   public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
1377       List<ServerName> servers) {
1378     metricsBalancer.incrMiscInvocations();
1379     Map<ServerName, List<HRegionInfo>> assignments = assignMasterRegions(regions, servers);
1380     if (assignments != null && !assignments.isEmpty()) {
1381       servers = new ArrayList<ServerName>(servers);
1382       // Guarantee not to put other regions on master
1383       servers.remove(masterServerName);
1384       List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1385       if (!masterRegions.isEmpty()) {
1386         regions = new ArrayList<HRegionInfo>(regions);
1387         for (HRegionInfo region: masterRegions) {
1388           regions.remove(region);
1389         }
1390       }
1391     }
1392     if (regions == null || regions.isEmpty()) {
1393       return assignments;
1394     }
1395 
1396     int numServers = servers == null ? 0 : servers.size();
1397     if (numServers == 0) {
1398       LOG.warn("Wanted to do round robin assignment but no servers to assign to");
1399       return null;
1400     }
1401 
1402     // TODO: instead of retainAssignment() and roundRobinAssignment(), we should just run the
1403     // normal LB.balancerCluster() with unassignedRegions. We only need to have a candidate
1404     // generator for AssignRegionAction. The LB will ensure the regions are mostly local
1405     // and balanced. This should also run fast with fewer number of iterations.
1406 
1407     if (numServers == 1) { // Only one server, nothing fancy we can do here
1408       ServerName server = servers.get(0);
1409       assignments.put(server, new ArrayList<HRegionInfo>(regions));
1410       return assignments;
1411     }
1412 
1413     Cluster cluster = createCluster(servers, regions);
1414     List<HRegionInfo> unassignedRegions = new ArrayList<HRegionInfo>();
1415 
1416     roundRobinAssignment(cluster, regions, unassignedRegions,
1417       servers, assignments);
1418 
1419     List<HRegionInfo> lastFewRegions = new ArrayList<HRegionInfo>();
1420     // assign the remaining by going through the list and try to assign to servers one-by-one
1421     int serverIdx = RANDOM.nextInt(numServers);
1422     for (HRegionInfo region : unassignedRegions) {
1423       boolean assigned = false;
1424       for (int j = 0; j < numServers; j++) { // try all servers one by one
1425         ServerName serverName = servers.get((j + serverIdx) % numServers);
1426         if (!cluster.wouldLowerAvailability(region, serverName)) {
1427           List<HRegionInfo> serverRegions = assignments.get(serverName);
1428           if (serverRegions == null) {
1429             serverRegions = new ArrayList<HRegionInfo>();
1430             assignments.put(serverName, serverRegions);
1431           }
1432           serverRegions.add(region);
1433           cluster.doAssignRegion(region, serverName);
1434           serverIdx = (j + serverIdx + 1) % numServers; //remain from next server
1435           assigned = true;
1436           break;
1437         }
1438       }
1439       if (!assigned) {
1440         lastFewRegions.add(region);
1441       }
1442     }
1443     // just sprinkle the rest of the regions on random regionservers. The balanceCluster will
1444     // make it optimal later. we can end up with this if numReplicas > numServers.
1445     for (HRegionInfo region : lastFewRegions) {
1446       int i = RANDOM.nextInt(numServers);
1447       ServerName server = servers.get(i);
1448       List<HRegionInfo> serverRegions = assignments.get(server);
1449       if (serverRegions == null) {
1450         serverRegions = new ArrayList<HRegionInfo>();
1451         assignments.put(server, serverRegions);
1452       }
1453       serverRegions.add(region);
1454       cluster.doAssignRegion(region, server);
1455     }
1456     return assignments;
1457   }
1458 
1459   protected Cluster createCluster(List<ServerName> servers, Collection<HRegionInfo> regions) {
1460     // Get the snapshot of the current assignments for the regions in question, and then create
1461     // a cluster out of it. Note that we might have replicas already assigned to some servers
1462     // earlier. So we want to get the snapshot to see those assignments, but this will only contain
1463     // replicas of the regions that are passed (for performance).
1464     Map<ServerName, List<HRegionInfo>> clusterState = getRegionAssignmentsByServer(regions);
1465 
1466     for (ServerName server : servers) {
1467       if (!clusterState.containsKey(server)) {
1468         clusterState.put(server, EMPTY_REGION_LIST);
1469       }
1470     }
1471     return new Cluster(regions, clusterState, null, this.regionFinder,
1472         rackManager);
1473   }
1474 
1475   /**
1476    * Generates an immediate assignment plan to be used by a new master for
1477    * regions in transition that do not have an already known destination.
1478    *
1479    * Takes a list of regions that need immediate assignment and a list of all
1480    * available servers. Returns a map of regions to the server they should be
1481    * assigned to.
1482    *
1483    * This method will return quickly and does not do any intelligent balancing.
1484    * The goal is to make a fast decision not the best decision possible.
1485    *
1486    * Currently this is random.
1487    *
1488    * @param regions
1489    * @param servers
1490    * @return map of regions to the server it should be assigned to
1491    */
1492   @Override
1493   public Map<HRegionInfo, ServerName> immediateAssignment(List<HRegionInfo> regions,
1494       List<ServerName> servers) {
1495     metricsBalancer.incrMiscInvocations();
1496     if (servers == null || servers.isEmpty()) {
1497       LOG.warn("Wanted to do random assignment but no servers to assign to");
1498       return null;
1499     }
1500 
1501     Map<HRegionInfo, ServerName> assignments = new TreeMap<HRegionInfo, ServerName>();
1502     for (HRegionInfo region : regions) {
1503       assignments.put(region, randomAssignment(region, servers));
1504     }
1505     return assignments;
1506   }
1507 
1508   /**
1509    * Used to assign a single region to a random server.
1510    */
1511   @Override
1512   public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
1513     metricsBalancer.incrMiscInvocations();
1514     if (servers != null && servers.contains(masterServerName)) {
1515       if (shouldBeOnMaster(regionInfo)) {
1516         return masterServerName;
1517       }
1518       servers = new ArrayList<ServerName>(servers);
1519       // Guarantee not to put other regions on master
1520       servers.remove(masterServerName);
1521     }
1522 
1523     int numServers = servers == null ? 0 : servers.size();
1524     if (numServers == 0) {
1525       LOG.warn("Wanted to do retain assignment but no servers to assign to");
1526       return null;
1527     }
1528     if (numServers == 1) { // Only one server, nothing fancy we can do here
1529       return servers.get(0);
1530     }
1531 
1532     List<HRegionInfo> regions = Lists.newArrayList(regionInfo);
1533     Cluster cluster = createCluster(servers, regions);
1534     return randomAssignment(cluster, regionInfo, servers);
1535   }
1536 
1537   /**
1538    * Generates a bulk assignment startup plan, attempting to reuse the existing
1539    * assignment information from META, but adjusting for the specified list of
1540    * available/online servers available for assignment.
1541    * <p>
1542    * Takes a map of all regions to their existing assignment from META. Also
1543    * takes a list of online servers for regions to be assigned to. Attempts to
1544    * retain all assignment, so in some instances initial assignment will not be
1545    * completely balanced.
1546    * <p>
1547    * Any leftover regions without an existing server to be assigned to will be
1548    * assigned randomly to available servers.
1549    *
1550    * @param regions regions and existing assignment from meta
1551    * @param servers available servers
1552    * @return map of servers and regions to be assigned to them
1553    */
1554   @Override
1555   public Map<ServerName, List<HRegionInfo>> retainAssignment(Map<HRegionInfo, ServerName> regions,
1556       List<ServerName> servers) {
1557     // Update metrics
1558     metricsBalancer.incrMiscInvocations();
1559     Map<ServerName, List<HRegionInfo>> assignments
1560       = assignMasterRegions(regions.keySet(), servers);
1561     if (assignments != null && !assignments.isEmpty()) {
1562       servers = new ArrayList<ServerName>(servers);
1563       // Guarantee not to put other regions on master
1564       servers.remove(masterServerName);
1565       List<HRegionInfo> masterRegions = assignments.get(masterServerName);
1566       if (!masterRegions.isEmpty()) {
1567         regions = new HashMap<HRegionInfo, ServerName>(regions);
1568         for (HRegionInfo region: masterRegions) {
1569           regions.remove(region);
1570         }
1571       }
1572     }
1573     if (regions == null || regions.isEmpty()) {
1574       return assignments;
1575     }
1576 
1577     int numServers = servers == null ? 0 : servers.size();
1578     if (numServers == 0) {
1579       LOG.warn("Wanted to do retain assignment but no servers to assign to");
1580       return null;
1581     }
1582     if (numServers == 1) { // Only one server, nothing fancy we can do here
1583       ServerName server = servers.get(0);
1584       assignments.put(server, new ArrayList<HRegionInfo>(regions.keySet()));
1585       return assignments;
1586     }
1587 
1588     // Group all of the old assignments by their hostname.
1589     // We can't group directly by ServerName since the servers all have
1590     // new start-codes.
1591 
1592     // Group the servers by their hostname. It's possible we have multiple
1593     // servers on the same host on different ports.
1594     ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create();
1595     for (ServerName server : servers) {
1596       assignments.put(server, new ArrayList<HRegionInfo>());
1597       serversByHostname.put(server.getHostnameLowerCase(), server);
1598     }
1599 
1600     // Collection of the hostnames that used to have regions
1601     // assigned, but for which we no longer have any RS running
1602     // after the cluster restart.
1603     Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
1604 
1605     // If the old servers aren't present, lets assign those regions later.
1606     List<HRegionInfo> randomAssignRegions = Lists.newArrayList();
1607 
1608     int numRandomAssignments = 0;
1609     int numRetainedAssigments = 0;
1610 
1611     for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
1612       HRegionInfo region = entry.getKey();
1613       ServerName oldServerName = entry.getValue();
1614       List<ServerName> localServers = new ArrayList<ServerName>();
1615       if (oldServerName != null) {
1616         localServers = serversByHostname.get(oldServerName.getHostnameLowerCase());
1617       }
1618       if (localServers.isEmpty()) {
1619         // No servers on the new cluster match up with this hostname, assign randomly, later.
1620         randomAssignRegions.add(region);
1621         if (oldServerName != null) {
1622           oldHostsNoLongerPresent.add(oldServerName.getHostnameLowerCase());
1623         }
1624       } else if (localServers.size() == 1) {
1625         // the usual case - one new server on same host
1626         ServerName target = localServers.get(0);
1627         assignments.get(target).add(region);
1628         numRetainedAssigments++;
1629       } else {
1630         // multiple new servers in the cluster on this same host
1631         if (localServers.contains(oldServerName)) {
1632           assignments.get(oldServerName).add(region);
1633           numRetainedAssigments++;
1634         } else {
1635           ServerName target = null;
1636           for (ServerName tmp : localServers) {
1637             if (tmp.getPort() == oldServerName.getPort()) {
1638               target = tmp;
1639               assignments.get(tmp).add(region);
1640               numRetainedAssigments++;
1641               break;
1642             }
1643           }
1644           if (target == null) {
1645             randomAssignRegions.add(region);
1646           }
1647         }
1648       }
1649     }
1650 
1651     // If servers from prior assignment aren't present, then lets do randomAssignment on regions.
1652     if (randomAssignRegions.size() > 0) {
1653       Cluster cluster = createCluster(servers, regions.keySet());
1654       for (Map.Entry<ServerName, List<HRegionInfo>> entry : assignments.entrySet()) {
1655         ServerName sn = entry.getKey();
1656         for (HRegionInfo region : entry.getValue()) {
1657           cluster.doAssignRegion(region, sn);
1658         }
1659       }
1660       for (HRegionInfo region : randomAssignRegions) {
1661         ServerName target = randomAssignment(cluster, region, servers);
1662         assignments.get(target).add(region);
1663         cluster.doAssignRegion(region, target);
1664         numRandomAssignments++;
1665       }
1666     }
1667 
1668     String randomAssignMsg = "";
1669     if (numRandomAssignments > 0) {
1670       randomAssignMsg =
1671           numRandomAssignments + " regions were assigned "
1672               + "to random hosts, since the old hosts for these regions are no "
1673               + "longer present in the cluster. These hosts were:\n  "
1674               + Joiner.on("\n  ").join(oldHostsNoLongerPresent);
1675     }
1676 
1677     LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments
1678         + " retained the pre-restart assignment. " + randomAssignMsg);
1679     return assignments;
1680   }
1681 
1682   @Override
1683   public void initialize() throws HBaseIOException{
1684   }
1685 
1686   @Override
1687   public void regionOnline(HRegionInfo regionInfo, ServerName sn) {
1688   }
1689 
1690   @Override
1691   public void regionOffline(HRegionInfo regionInfo) {
1692   }
1693 
1694   @Override
1695   public boolean isStopped() {
1696     return stopped;
1697   }
1698 
1699   @Override
1700   public void stop(String why) {
1701     LOG.info("Load Balancer stop requested: "+why);
1702     stopped = true;
1703   }
1704 
1705   /**
1706   * Updates the balancer status tag reported to JMX
1707   */
1708   public void updateBalancerStatus(boolean status) {
1709     metricsBalancer.balancerStatus(status);
1710   }
1711 
1712   /**
1713    * Used to assign a single region to a random server.
1714    */
1715   private ServerName randomAssignment(Cluster cluster, HRegionInfo regionInfo,
1716       List<ServerName> servers) {
1717     int numServers = servers.size(); // servers is not null, numServers > 1
1718     ServerName sn = null;
1719     final int maxIterations = numServers * 4;
1720     int iterations = 0;
1721 
1722     do {
1723       int i = RANDOM.nextInt(numServers);
1724       sn = servers.get(i);
1725     } while (cluster.wouldLowerAvailability(regionInfo, sn)
1726         && iterations++ < maxIterations);
1727     cluster.doAssignRegion(regionInfo, sn);
1728     return sn;
1729   }
1730 
1731   /**
1732    * Round robin a list of regions to a list of servers
1733    */
1734   private void roundRobinAssignment(Cluster cluster, List<HRegionInfo> regions,
1735       List<HRegionInfo> unassignedRegions, List<ServerName> servers,
1736       Map<ServerName, List<HRegionInfo>> assignments) {
1737 
1738     int numServers = servers.size();
1739     int numRegions = regions.size();
1740     int max = (int) Math.ceil((float) numRegions / numServers);
1741     int serverIdx = 0;
1742     if (numServers > 1) {
1743       serverIdx = RANDOM.nextInt(numServers);
1744     }
1745     int regionIdx = 0;
1746 
1747     for (int j = 0; j < numServers; j++) {
1748       ServerName server = servers.get((j + serverIdx) % numServers);
1749       List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
1750       for (int i = regionIdx; i < numRegions; i += numServers) {
1751         HRegionInfo region = regions.get(i % numRegions);
1752         if (cluster.wouldLowerAvailability(region, server)) {
1753           unassignedRegions.add(region);
1754         } else {
1755           serverRegions.add(region);
1756           cluster.doAssignRegion(region, server);
1757         }
1758       }
1759       assignments.put(server, serverRegions);
1760       regionIdx++;
1761     }
1762   }
1763 
1764   protected Map<ServerName, List<HRegionInfo>> getRegionAssignmentsByServer(
1765     Collection<HRegionInfo> regions) {
1766     if (this.services != null && this.services.getAssignmentManager() != null) {
1767       return this.services.getAssignmentManager().getSnapShotOfAssignment(regions);
1768     } else {
1769       return new HashMap<ServerName, List<HRegionInfo>>();
1770     }
1771   }
1772 
1773   @Override
1774   public void onConfigurationChange(Configuration conf) {
1775   }
1776 }