1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.balancer;
19
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Collections;
23 import java.util.HashMap;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.NavigableMap;
27 import java.util.Random;
28 import java.util.TreeMap;
29
30 import org.apache.commons.logging.Log;
31 import org.apache.commons.logging.LogFactory;
32 import org.apache.hadoop.hbase.classification.InterfaceAudience;
33 import org.apache.hadoop.hbase.HBaseIOException;
34 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
35 import org.apache.hadoop.hbase.HRegionInfo;
36 import org.apache.hadoop.hbase.ServerName;
37 import org.apache.hadoop.hbase.TableName;
38 import org.apache.hadoop.hbase.master.RegionPlan;
39
40 import com.google.common.collect.MinMaxPriorityQueue;
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
60 public class SimpleLoadBalancer extends BaseLoadBalancer {
61 private static final Log LOG = LogFactory.getLog(SimpleLoadBalancer.class);
62 private static final Random RANDOM = new Random(System.currentTimeMillis());
63
64 private RegionInfoComparator riComparator = new RegionInfoComparator();
65 private RegionPlan.RegionPlanComparator rpComparator = new RegionPlan.RegionPlanComparator();
66
67
68
69
70
71
72
73
74
75
76 static class BalanceInfo {
77
78 private final int nextRegionForUnload;
79 private int numRegionsAdded;
80
81 public BalanceInfo(int nextRegionForUnload, int numRegionsAdded) {
82 this.nextRegionForUnload = nextRegionForUnload;
83 this.numRegionsAdded = numRegionsAdded;
84 }
85
86 int getNextRegionForUnload() {
87 return nextRegionForUnload;
88 }
89
90 int getNumRegionsAdded() {
91 return numRegionsAdded;
92 }
93
94 void setNumRegionsAdded(int numAdded) {
95 this.numRegionsAdded = numAdded;
96 }
97 }
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184 @Override
185 public List<RegionPlan> balanceCluster(
186 Map<ServerName, List<HRegionInfo>> clusterMap) {
187 List<RegionPlan> regionsToReturn = balanceMasterRegions(clusterMap);
188 if (regionsToReturn != null || clusterMap == null || clusterMap.size() <= 1) {
189 return regionsToReturn;
190 }
191 if (masterServerName != null && clusterMap.containsKey(masterServerName)) {
192 if (clusterMap.size() <= 2) {
193 return null;
194 }
195 clusterMap = new HashMap<ServerName, List<HRegionInfo>>(clusterMap);
196 clusterMap.remove(masterServerName);
197 }
198
199 long startTime = System.currentTimeMillis();
200
201
202
203 Cluster c = new Cluster(clusterMap, null, this.regionFinder, this.rackManager);
204 if (!this.needsBalance(c)) return null;
205
206 ClusterLoadState cs = new ClusterLoadState(clusterMap);
207 int numServers = cs.getNumServers();
208 NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad = cs.getServersByLoad();
209 int numRegions = cs.getNumRegions();
210 float average = cs.getLoadAverage();
211 int max = (int)Math.ceil(average);
212 int min = (int)average;
213
214
215 StringBuilder strBalanceParam = new StringBuilder();
216 strBalanceParam.append("Balance parameter: numRegions=").append(numRegions)
217 .append(", numServers=").append(numServers).append(", max=").append(max)
218 .append(", min=").append(min);
219 LOG.debug(strBalanceParam.toString());
220
221
222
223 MinMaxPriorityQueue<RegionPlan> regionsToMove =
224 MinMaxPriorityQueue.orderedBy(rpComparator).create();
225 regionsToReturn = new ArrayList<RegionPlan>();
226
227
228 int serversOverloaded = 0;
229
230 boolean fetchFromTail = false;
231 Map<ServerName, BalanceInfo> serverBalanceInfo =
232 new TreeMap<ServerName, BalanceInfo>();
233 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server:
234 serversByLoad.descendingMap().entrySet()) {
235 ServerAndLoad sal = server.getKey();
236 int load = sal.getLoad();
237 if (load <= max) {
238 serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0));
239 break;
240 }
241 serversOverloaded++;
242 List<HRegionInfo> regions = server.getValue();
243 int numToOffload = Math.min(load - max, regions.size());
244
245
246 Collections.sort(regions, riComparator);
247 int numTaken = 0;
248 for (int i = 0; i <= numToOffload; ) {
249 HRegionInfo hri = regions.get(i);
250 if (fetchFromTail) {
251 hri = regions.get(regions.size() - 1 - i);
252 }
253 i++;
254
255 if (shouldBeOnMaster(hri)
256 && masterServerName.equals(sal.getServerName())) continue;
257 regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null));
258 numTaken++;
259 if (numTaken >= numToOffload) break;
260 }
261 serverBalanceInfo.put(sal.getServerName(),
262 new BalanceInfo(numToOffload, (-1)*numTaken));
263 }
264 int totalNumMoved = regionsToMove.size();
265
266
267 int neededRegions = 0;
268 fetchFromTail = false;
269
270 Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>();
271 int maxToTake = numRegions - min;
272 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server:
273 serversByLoad.entrySet()) {
274 if (maxToTake == 0) break;
275 int load = server.getKey().getLoad();
276 if (load >= min) {
277 continue;
278 }
279 int regionsToPut = min - load;
280 maxToTake -= regionsToPut;
281 underloadedServers.put(server.getKey().getServerName(), regionsToPut);
282 }
283
284 int serversUnderloaded = underloadedServers.size();
285 int incr = 1;
286 List<ServerName> sns =
287 Arrays.asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
288 Collections.shuffle(sns, RANDOM);
289 while (regionsToMove.size() > 0) {
290 int cnt = 0;
291 int i = incr > 0 ? 0 : underloadedServers.size()-1;
292 for (; i >= 0 && i < underloadedServers.size(); i += incr) {
293 if (regionsToMove.isEmpty()) break;
294 ServerName si = sns.get(i);
295 int numToTake = underloadedServers.get(si);
296 if (numToTake == 0) continue;
297
298 addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn);
299
300 underloadedServers.put(si, numToTake-1);
301 cnt++;
302 BalanceInfo bi = serverBalanceInfo.get(si);
303 if (bi == null) {
304 bi = new BalanceInfo(0, 0);
305 serverBalanceInfo.put(si, bi);
306 }
307 bi.setNumRegionsAdded(bi.getNumRegionsAdded()+1);
308 }
309 if (cnt == 0) break;
310
311 incr = -incr;
312 }
313 for (Integer i : underloadedServers.values()) {
314
315 neededRegions += i;
316 }
317
318
319
320 if (neededRegions == 0 && regionsToMove.isEmpty()) {
321 long endTime = System.currentTimeMillis();
322 LOG.info("Calculated a load balance in " + (endTime-startTime) + "ms. " +
323 "Moving " + totalNumMoved + " regions off of " +
324 serversOverloaded + " overloaded servers onto " +
325 serversUnderloaded + " less loaded servers");
326 return regionsToReturn;
327 }
328
329
330
331
332
333 if (neededRegions != 0) {
334
335 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
336 serversByLoad.descendingMap().entrySet()) {
337 BalanceInfo balanceInfo =
338 serverBalanceInfo.get(server.getKey().getServerName());
339 int idx =
340 balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
341 if (idx >= server.getValue().size()) break;
342 HRegionInfo region = server.getValue().get(idx);
343 if (region.isMetaRegion()) continue;
344 regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null));
345 totalNumMoved++;
346 if (--neededRegions == 0) {
347
348 break;
349 }
350 }
351 }
352
353
354
355
356
357 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
358 serversByLoad.entrySet()) {
359 int regionCount = server.getKey().getLoad();
360 if (regionCount >= min) break;
361 BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
362 if(balanceInfo != null) {
363 regionCount += balanceInfo.getNumRegionsAdded();
364 }
365 if(regionCount >= min) {
366 continue;
367 }
368 int numToTake = min - regionCount;
369 int numTaken = 0;
370 while(numTaken < numToTake && 0 < regionsToMove.size()) {
371 addRegionPlan(regionsToMove, fetchFromTail,
372 server.getKey().getServerName(), regionsToReturn);
373 numTaken++;
374 }
375 }
376
377
378 if (0 < regionsToMove.size()) {
379 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
380 serversByLoad.entrySet()) {
381 int regionCount = server.getKey().getLoad();
382 BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
383 if(balanceInfo != null) {
384 regionCount += balanceInfo.getNumRegionsAdded();
385 }
386 if(regionCount >= max) {
387 break;
388 }
389 addRegionPlan(regionsToMove, fetchFromTail,
390 server.getKey().getServerName(), regionsToReturn);
391 if (regionsToMove.isEmpty()) {
392 break;
393 }
394 }
395 }
396
397 long endTime = System.currentTimeMillis();
398
399 if (!regionsToMove.isEmpty() || neededRegions != 0) {
400
401 LOG.warn("regionsToMove=" + totalNumMoved +
402 ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded +
403 ", serversUnderloaded=" + serversUnderloaded);
404 StringBuilder sb = new StringBuilder();
405 for (Map.Entry<ServerName, List<HRegionInfo>> e: clusterMap.entrySet()) {
406 if (sb.length() > 0) sb.append(", ");
407 sb.append(e.getKey().toString());
408 sb.append(" ");
409 sb.append(e.getValue().size());
410 }
411 LOG.warn("Input " + sb.toString());
412 }
413
414
415 LOG.info("Done. Calculated a load balance in " + (endTime-startTime) + "ms. " +
416 "Moving " + totalNumMoved + " regions off of " +
417 serversOverloaded + " overloaded servers onto " +
418 serversUnderloaded + " less loaded servers");
419
420 return regionsToReturn;
421 }
422
423
424
425
426 private void addRegionPlan(final MinMaxPriorityQueue<RegionPlan> regionsToMove,
427 final boolean fetchFromTail, final ServerName sn, List<RegionPlan> regionsToReturn) {
428 RegionPlan rp = null;
429 if (!fetchFromTail) rp = regionsToMove.remove();
430 else rp = regionsToMove.removeLast();
431 rp.setDestination(sn);
432 regionsToReturn.add(rp);
433 }
434
435 @Override
436 public List<RegionPlan> balanceCluster(TableName tableName,
437 Map<ServerName, List<HRegionInfo>> clusterState) throws HBaseIOException {
438 return balanceCluster(clusterState);
439 }
440 }