1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.master.balancer;
19
20 import java.util.ArrayDeque;
21 import java.util.ArrayList;
22 import java.util.Arrays;
23 import java.util.Collection;
24 import java.util.Collections;
25 import java.util.Deque;
26 import java.util.HashMap;
27 import java.util.LinkedList;
28 import java.util.List;
29 import java.util.Map;
30 import java.util.Map.Entry;
31 import java.util.Random;
32
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.hbase.classification.InterfaceAudience;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.hbase.ClusterStatus;
38 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
39 import org.apache.hadoop.hbase.HConstants;
40 import org.apache.hadoop.hbase.HRegionInfo;
41 import org.apache.hadoop.hbase.RegionLoad;
42 import org.apache.hadoop.hbase.ServerLoad;
43 import org.apache.hadoop.hbase.ServerName;
44 import org.apache.hadoop.hbase.TableName;
45 import org.apache.hadoop.hbase.client.BalancerDecision;
46 import org.apache.hadoop.hbase.master.MasterServices;
47 import org.apache.hadoop.hbase.master.RegionPlan;
48 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action;
49 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
50 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.AssignRegionAction;
51 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.LocalityType;
52 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.MoveRegionAction;
53 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.SwapRegionsAction;
54 import org.apache.hadoop.hbase.namequeues.BalancerDecisionDetails;
55 import org.apache.hadoop.hbase.namequeues.NamedQueueRecorder;
56 import org.apache.hadoop.hbase.regionserver.compactions.OffPeakHours;
57 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
58 import org.apache.hadoop.hbase.util.ReflectionUtils;
59
60 import com.google.common.base.Optional;
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
113 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="IS2_INCONSISTENT_SYNC",
114 justification="Complaint is about costFunctions not being synchronized; not end of the world")
115 public class StochasticLoadBalancer extends BaseLoadBalancer {
116
117 protected static final String STEPS_PER_REGION_KEY =
118 "hbase.master.balancer.stochastic.stepsPerRegion";
119 protected static final String MAX_STEPS_KEY =
120 "hbase.master.balancer.stochastic.maxSteps";
121 protected static final String RUN_MAX_STEPS_KEY =
122 "hbase.master.balancer.stochastic.runMaxSteps";
123 protected static final String MAX_RUNNING_TIME_KEY =
124 "hbase.master.balancer.stochastic.maxRunningTime";
125 protected static final String KEEP_REGION_LOADS =
126 "hbase.master.balancer.stochastic.numRegionLoadsToRemember";
127 private static final String TABLE_FUNCTION_SEP = "_";
128 protected static final String MIN_COST_NEED_BALANCE_KEY =
129 "hbase.master.balancer.stochastic.minCostNeedBalance";
130 protected static final String COST_FUNCTIONS_COST_FUNCTIONS_KEY =
131 "hbase.master.balancer.stochastic.additionalCostFunctions";
132
133 private static final Random RANDOM = new Random(System.currentTimeMillis());
134 private static final Log LOG = LogFactory.getLog(StochasticLoadBalancer.class);
135
136 Map<String, Deque<RegionLoad>> loads = new HashMap<String, Deque<RegionLoad>>();
137
138
139 private int maxSteps = 1000000;
140 private boolean runMaxSteps = false;
141 private int stepsPerRegion = 800;
142 private long maxRunningTime = 30 * 1000 * 1;
143 private int numRegionLoadsToRemember = 15;
144 private float minCostNeedBalance = 0.05f;
145
146 private CandidateGenerator[] candidateGenerators;
147 private CostFromRegionLoadFunction[] regionLoadFunctions;
148 private List<CostFunction> costFunctions;
149
150
151 private Double curOverallCost = 0d;
152 private Double[] tempFunctionCosts;
153 private Double[] curFunctionCosts;
154
155
156
157 private LocalityBasedCandidateGenerator localityCandidateGenerator;
158 private ServerLocalityCostFunction localityCost;
159 private RackLocalityCostFunction rackLocalityCost;
160 private RegionReplicaHostCostFunction regionReplicaHostCostFunction;
161 private RegionReplicaRackCostFunction regionReplicaRackCostFunction;
162 private boolean isByTable = false;
163 private TableName tableName = null;
164
165
166
167
168
169 public StochasticLoadBalancer() {
170 super(new MetricsStochasticBalancer());
171 }
172
173 @Override
174 public void onConfigurationChange(Configuration conf) {
175 setConf(conf);
176 }
177
178 @Override
179 public synchronized void setConf(Configuration conf) {
180 super.setConf(conf);
181 LOG.info("loading config");
182
183 maxSteps = conf.getInt(MAX_STEPS_KEY, maxSteps);
184
185 stepsPerRegion = conf.getInt(STEPS_PER_REGION_KEY, stepsPerRegion);
186 maxRunningTime = conf.getLong(MAX_RUNNING_TIME_KEY, maxRunningTime);
187 runMaxSteps = conf.getBoolean(RUN_MAX_STEPS_KEY, runMaxSteps);
188
189 numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember);
190 isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable);
191
192 minCostNeedBalance = conf.getFloat(MIN_COST_NEED_BALANCE_KEY, minCostNeedBalance);
193
194 if (localityCandidateGenerator == null) {
195 localityCandidateGenerator = new LocalityBasedCandidateGenerator(services);
196 }
197 localityCost = new ServerLocalityCostFunction(conf, services);
198 rackLocalityCost = new RackLocalityCostFunction(conf, services);
199
200 if (candidateGenerators == null) {
201 candidateGenerators = new CandidateGenerator[] {
202 new RandomCandidateGenerator(),
203 new LoadCandidateGenerator(),
204 localityCandidateGenerator,
205 new RegionReplicaRackCandidateGenerator(),
206 };
207 }
208
209 regionLoadFunctions = new CostFromRegionLoadFunction[] {
210 new ReadRequestCostFunction(conf),
211 new WriteRequestCostFunction(conf),
212 new MemstoreSizeCostFunction(conf),
213 new StoreFileCostFunction(conf)
214 };
215
216 regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf);
217 regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
218
219 costFunctions = new ArrayList<>();
220 addCostFunction(new RegionCountSkewCostFunction(conf));
221 addCostFunction(new PrimaryRegionCountSkewCostFunction(conf));
222 addCostFunction(new MoveCostFunction(conf));
223 addCostFunction(localityCost);
224 addCostFunction(rackLocalityCost);
225 addCostFunction(new TableSkewCostFunction(conf));
226 addCostFunction(regionReplicaHostCostFunction);
227 addCostFunction(regionReplicaRackCostFunction);
228 addCostFunction(regionLoadFunctions[0]);
229 addCostFunction(regionLoadFunctions[1]);
230 addCostFunction(regionLoadFunctions[2]);
231 addCostFunction(regionLoadFunctions[3]);
232 loadCustomCostFunctions(conf);
233
234 curFunctionCosts = new Double[costFunctions.size()];
235 tempFunctionCosts = new Double[costFunctions.size()];
236
237 boolean isBalancerDecisionRecording = getConf()
238 .getBoolean(BaseLoadBalancer.BALANCER_DECISION_BUFFER_ENABLED,
239 BaseLoadBalancer.DEFAULT_BALANCER_DECISION_BUFFER_ENABLED);
240 if (this.namedQueueRecorder == null && isBalancerDecisionRecording) {
241 this.namedQueueRecorder = NamedQueueRecorder.getInstance(getConf());
242 }
243
244 LOG.info("Loaded config; maxSteps=" + maxSteps + ", stepsPerRegion=" + stepsPerRegion +
245 ", maxRunningTime=" + maxRunningTime + ", isByTable=" + isByTable + ", etc." +
246 ", maxRunningTime=" + maxRunningTime + ", isByTable=" + isByTable + ", CostFunctions=" +
247 Arrays.toString(getCostFunctionNames()) + " etc.");
248 }
249
250 private void loadCustomCostFunctions(Configuration conf) {
251 String[] functionsNames = conf.getStrings(COST_FUNCTIONS_COST_FUNCTIONS_KEY);
252
253 if (null == functionsNames) {
254 return;
255 }
256
257 for(String functionName: functionsNames) {
258
259 Class<? extends CostFunction> klass = null;
260 try {
261 klass = (Class<? extends CostFunction>) Class.forName(functionName);
262 if (klass == null) {
263 continue;
264 }
265 CostFunction reflected = ReflectionUtils.newInstance(klass, conf);
266
267 LOG.info("Successfully loaded custom CostFunction '" +
268 reflected.getClass().getSimpleName() + "'");
269
270 this.costFunctions.add(reflected);
271 } catch (ClassNotFoundException e) {
272 LOG.warn("Cannot load class " + functionName + "': " + e.getMessage());
273 }
274 }
275 }
276
277 @Override
278 protected void setSlop(Configuration conf) {
279 this.slop = conf.getFloat("hbase.regions.slop", 0.001F);
280 }
281
282 @Override
283 public synchronized void setClusterStatus(ClusterStatus st) {
284 super.setClusterStatus(st);
285 updateRegionLoad();
286 for(CostFromRegionLoadFunction cost : regionLoadFunctions) {
287 cost.setClusterStatus(st);
288 }
289
290
291 try {
292
293 int tablesCount = isByTable ? services.getTableDescriptors().getAll().size() : 1;
294 int functionsCount = getCostFunctionNames().length;
295
296 updateMetricsSize(tablesCount * (functionsCount + 1));
297 } catch (Exception e) {
298 LOG.error("failed to get the size of all tables", e);
299 }
300 }
301
302
303
304
305 public void updateMetricsSize(int size) {
306 if (metricsBalancer instanceof MetricsStochasticBalancer) {
307 ((MetricsStochasticBalancer) metricsBalancer).updateMetricsSize(size);
308 }
309 }
310
311 @Override
312 public synchronized void setMasterServices(MasterServices masterServices) {
313 super.setMasterServices(masterServices);
314 this.localityCost.setServices(masterServices);
315 this.rackLocalityCost.setServices(masterServices);
316 this.localityCandidateGenerator.setServices(masterServices);
317 }
318
319 @Override
320 protected synchronized boolean areSomeRegionReplicasColocated(Cluster c) {
321 regionReplicaHostCostFunction.init(c);
322 if (regionReplicaHostCostFunction.cost() > 0) return true;
323 regionReplicaRackCostFunction.init(c);
324 if (regionReplicaRackCostFunction.cost() > 0) return true;
325 return false;
326 }
327
328 @Override
329 protected boolean needsBalance(Cluster cluster) {
330 ClusterLoadState cs = new ClusterLoadState(cluster.clusterState);
331 if (cs.getNumServers() < MIN_SERVER_BALANCE) {
332 if (LOG.isDebugEnabled()) {
333 LOG.debug("Not running balancer because only " + cs.getNumServers()
334 + " active regionserver(s)");
335 }
336 return false;
337 }
338 if (areSomeRegionReplicasColocated(cluster)) {
339 return true;
340 }
341
342 if (idleRegionServerExist(cluster)){
343 return true;
344 }
345
346 double total = 0.0;
347 float sumMultiplier = 0.0f;
348 for (CostFunction c : costFunctions) {
349 float multiplier = c.getMultiplier();
350 if (multiplier <= 0) {
351 if (LOG.isTraceEnabled()) {
352 LOG.trace(c.getClass().getSimpleName() + " not needed because multiplier is <= 0");
353 }
354 continue;
355 }
356 if (!c.isNeeded()) {
357 if (LOG.isTraceEnabled()) {
358 LOG.trace(c.getClass().getSimpleName() + " not needed");
359 }
360 continue;
361 }
362 sumMultiplier += multiplier;
363 total += c.cost() * multiplier;
364 }
365
366 boolean balanced = total <= 0 || sumMultiplier <= 0 ||
367 (sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance);
368 if (LOG.isDebugEnabled()) {
369 LOG.debug(
370 (balanced ? "Skipping load balancing because balanced" : "We need to load balance") +
371 " " + (isByTable ? String.format("table (%s)", tableName) : "cluster") +
372 "; total cost=" + total + ", sum multiplier=" + sumMultiplier + "; cost/multiplier to " +
373 "need a balance is " + minCostNeedBalance);
374 if (LOG.isTraceEnabled()) {
375 LOG.trace("Balance decision detailed function costs=" + functionCost());
376 }
377 }
378 return !balanced;
379 }
380
381 @Override
382 public synchronized List<RegionPlan> balanceCluster(TableName tableName, Map<ServerName,
383 List<HRegionInfo>> clusterState) {
384 this.tableName = tableName;
385 return balanceCluster(clusterState);
386 }
387
388 @InterfaceAudience.Private
389 Cluster.Action nextAction(Cluster cluster) {
390 return candidateGenerators[(RANDOM.nextInt(candidateGenerators.length))]
391 .generate(cluster);
392 }
393
394
395
396
397
398 @Override
399 public synchronized List<RegionPlan> balanceCluster(Map<ServerName,
400 List<HRegionInfo>> clusterState) {
401 List<RegionPlan> plans = balanceMasterRegions(clusterState);
402 if (plans != null || clusterState == null || clusterState.size() <= 1) {
403 return plans;
404 }
405
406 if (masterServerName != null && clusterState.containsKey(masterServerName)) {
407 if (clusterState.size() <= 2) {
408 return null;
409 }
410 clusterState = new HashMap<ServerName, List<HRegionInfo>>(clusterState);
411 clusterState.remove(masterServerName);
412 }
413
414
415
416
417
418 RegionLocationFinder finder = null;
419 if ((this.localityCost != null && this.localityCost.getMultiplier() > 0)
420 || (this.rackLocalityCost != null && this.rackLocalityCost.getMultiplier() > 0)) {
421 finder = this.regionFinder;
422 }
423
424
425
426
427 Cluster cluster = new Cluster(clusterState, loads, finder, rackManager);
428
429 long startTime = EnvironmentEdgeManager.currentTime();
430
431 initCosts(cluster);
432
433 if (!needsBalance(cluster)) {
434 return null;
435 }
436
437 double currentCost = computeCost(cluster, Double.MAX_VALUE);
438 curOverallCost = currentCost;
439 System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, this.curFunctionCosts.length);
440
441 double initCost = currentCost;
442 double newCost;
443
444 long computedMaxSteps = 0;
445 if (runMaxSteps) {
446 computedMaxSteps = Math.max(this.maxSteps,
447 ((long)cluster.numRegions * (long)this.stepsPerRegion * (long)cluster.numServers));
448 } else {
449 long calculatedMaxSteps =
450 (long) cluster.numRegions * (long) this.stepsPerRegion * (long) cluster.numServers;
451 computedMaxSteps = Math.min(this.maxSteps, calculatedMaxSteps);
452 if (calculatedMaxSteps > maxSteps) {
453 LOG.warn(String.format("calculatedMaxSteps:%d for loadbalancer's stochastic walk is larger "
454 + "than maxSteps:%d. Hence load balancing may not work well. Setting parameter "
455 + "\"hbase.master.balancer.stochastic.runMaxSteps\" to true to overcome this issue."
456 + "(This config change does not require service restart)", calculatedMaxSteps,
457 maxSteps));
458
459 }
460 }
461 LOG.info("start StochasticLoadBalancer.balancer, initCost=" + currentCost + ", functionCost="
462 + functionCost() + " computedMaxSteps: " + computedMaxSteps);
463
464 final String initFunctionTotalCosts = totalCostsPerFunc();
465
466 long step;
467
468 for (step = 0; step < computedMaxSteps; step++) {
469 Cluster.Action action = nextAction(cluster);
470
471 if (action.type == Type.NULL) {
472 continue;
473 }
474
475 cluster.doAction(action);
476 updateCostsWithAction(cluster, action);
477
478 newCost = computeCost(cluster, currentCost);
479
480
481 if (newCost < currentCost) {
482 currentCost = newCost;
483
484
485 curOverallCost = currentCost;
486 System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, this.curFunctionCosts.length);
487 } else {
488
489
490 Action undoAction = action.undoAction();
491 cluster.doAction(undoAction);
492 updateCostsWithAction(cluster, undoAction);
493 }
494
495 if (EnvironmentEdgeManager.currentTime() - startTime >
496 maxRunningTime) {
497 break;
498 }
499 }
500 long endTime = EnvironmentEdgeManager.currentTime();
501
502 metricsBalancer.balanceCluster(endTime - startTime);
503
504
505 updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
506 if (initCost > currentCost) {
507 plans = createRegionPlans(cluster);
508 if (LOG.isDebugEnabled()) {
509 LOG.debug("Finished computing new load balance plan. Computation took "
510 + (endTime - startTime) + "ms to try " + step
511 + " different iterations. Found a solution that moves "
512 + plans.size() + " regions; Going from a computed cost of "
513 + initCost + " to a new cost of " + currentCost);
514 }
515 sendRegionPlansToRingBuffer(plans, currentCost, initCost, initFunctionTotalCosts, step);
516 return plans;
517 }
518 if (LOG.isDebugEnabled()) {
519 LOG.debug("Could not find a better load balance plan. Tried "
520 + step + " different configurations in " + (endTime - startTime)
521 + "ms, and did not find anything with a computed cost less than " + initCost);
522 }
523 return null;
524 }
525
526 private void sendRegionPlansToRingBuffer(List<RegionPlan> plans, double currentCost,
527 double initCost, String initFunctionTotalCosts, long step) {
528 if (this.namedQueueRecorder != null) {
529 List<String> regionPlans = new ArrayList<>();
530 for (RegionPlan plan : plans) {
531 regionPlans.add(
532 "table: " + plan.getRegionInfo().getTable() + " , region: " + plan.getRegionName()
533 + " , source: " + plan.getSource() + " , destination: " + plan.getDestination());
534 }
535 BalancerDecision balancerDecision =
536 new BalancerDecision.Builder()
537 .setInitTotalCost(initCost)
538 .setInitialFunctionCosts(initFunctionTotalCosts)
539 .setComputedTotalCost(currentCost)
540 .setFinalFunctionCosts(totalCostsPerFunc())
541 .setComputedSteps(step)
542 .setRegionPlans(regionPlans).build();
543 namedQueueRecorder.addRecord(new BalancerDecisionDetails(balancerDecision));
544 }
545 }
546
547
548
549
550 private void updateStochasticCosts(TableName tableName, Double overall, Double[] subCosts) {
551 if (tableName == null) return;
552
553
554 if (metricsBalancer instanceof MetricsStochasticBalancer) {
555 MetricsStochasticBalancer balancer = (MetricsStochasticBalancer) metricsBalancer;
556
557 balancer.updateStochasticCost(tableName.getNameAsString(),
558 "Overall", "Overall cost", overall);
559
560
561 for (int i = 0; i < costFunctions.size(); i++) {
562 CostFunction costFunction = costFunctions.get(i);
563 String costFunctionName = costFunction.getClass().getSimpleName();
564 Double costPercent = (overall == 0) ? 0 : (subCosts[i] / overall);
565
566 balancer.updateStochasticCost(tableName.getNameAsString(), costFunctionName,
567 "The percent of " + costFunctionName, costPercent);
568 }
569 }
570 }
571
572 private void addCostFunction(CostFunction costFunction) {
573 if (costFunction.getMultiplier() > 0) {
574 costFunctions.add(costFunction);
575 }
576 }
577
578 private String functionCost() {
579 StringBuilder builder = new StringBuilder();
580 for (CostFunction c:costFunctions) {
581 builder.append(c.getClass().getSimpleName());
582 builder.append(" : (");
583 builder.append(c.getMultiplier());
584 builder.append(", ");
585 builder.append(c.cost());
586 builder.append("); ");
587 }
588 return builder.toString();
589 }
590
591 private String totalCostsPerFunc() {
592 StringBuilder builder = new StringBuilder();
593 for (CostFunction c : costFunctions) {
594 if (c.getMultiplier() * c.cost() > 0.0) {
595 builder.append(" ");
596 builder.append(c.getClass().getSimpleName());
597 builder.append(" : ");
598 builder.append(c.getMultiplier() * c.cost());
599 builder.append(";");
600 }
601 }
602 if (builder.length() > 0) {
603 builder.deleteCharAt(builder.length() - 1);
604 }
605 return builder.toString();
606 }
607
608
609
610
611
612
613
614
615 private List<RegionPlan> createRegionPlans(Cluster cluster) {
616 List<RegionPlan> plans = new LinkedList<RegionPlan>();
617 for (int regionIndex = 0;
618 regionIndex < cluster.regionIndexToServerIndex.length; regionIndex++) {
619 int initialServerIndex = cluster.initialRegionIndexToServerIndex[regionIndex];
620 int newServerIndex = cluster.regionIndexToServerIndex[regionIndex];
621
622 if (initialServerIndex != newServerIndex) {
623 HRegionInfo region = cluster.regions[regionIndex];
624 ServerName initialServer = cluster.servers[initialServerIndex];
625 ServerName newServer = cluster.servers[newServerIndex];
626
627 if (LOG.isTraceEnabled()) {
628 LOG.trace("Moving Region " + region.getEncodedName() + " from server "
629 + initialServer.getHostname() + " to " + newServer.getHostname());
630 }
631 RegionPlan rp = new RegionPlan(region, initialServer, newServer);
632 plans.add(rp);
633 }
634 }
635 return plans;
636 }
637
638
639
640
641 private synchronized void updateRegionLoad() {
642
643
644 Map<String, Deque<RegionLoad>> oldLoads = loads;
645 loads = new HashMap<String, Deque<RegionLoad>>();
646
647 for (ServerName sn : clusterStatus.getServers()) {
648 ServerLoad sl = clusterStatus.getLoad(sn);
649 if (sl == null) {
650 continue;
651 }
652 for (Entry<byte[], RegionLoad> entry : sl.getRegionsLoad().entrySet()) {
653 String regionNameAsString = HRegionInfo.getRegionNameAsString(entry.getKey());
654 Deque<RegionLoad> rLoads = oldLoads.get(regionNameAsString);
655 if (rLoads == null) {
656
657 rLoads = new ArrayDeque<RegionLoad>();
658 } else if (rLoads.size() >= numRegionLoadsToRemember) {
659 rLoads.remove();
660 }
661 rLoads.add(entry.getValue());
662 loads.put(regionNameAsString, rLoads);
663 }
664 }
665
666 for(CostFromRegionLoadFunction cost : regionLoadFunctions) {
667 cost.setLoads(loads);
668 }
669 }
670
671 protected void initCosts(Cluster cluster) {
672 for (CostFunction c:costFunctions) {
673 c.init(cluster);
674 }
675 }
676
677 protected void updateCostsWithAction(Cluster cluster, Action action) {
678 for (CostFunction c : costFunctions) {
679 c.postAction(action);
680 }
681 }
682
683
684
685
686 public String[] getCostFunctionNames() {
687 if (costFunctions == null) return null;
688 String[] ret = new String[costFunctions.size()];
689 for (int i = 0; i < costFunctions.size(); i++) {
690 CostFunction c = costFunctions.get(i);
691 ret[i] = c.getClass().getSimpleName();
692 }
693
694 return ret;
695 }
696
697
698
699
700
701
702
703
704
705
706 protected double computeCost(Cluster cluster, double previousCost) {
707 double total = 0;
708
709 for (int i = 0; i < costFunctions.size(); i++) {
710 CostFunction c = costFunctions.get(i);
711 this.tempFunctionCosts[i] = 0.0;
712
713 if (c.getMultiplier() <= 0) {
714 continue;
715 }
716
717 Float multiplier = c.getMultiplier();
718 Double cost = c.cost();
719
720 this.tempFunctionCosts[i] = multiplier*cost;
721 total += this.tempFunctionCosts[i];
722
723 if (total > previousCost) {
724 break;
725 }
726 }
727
728 return total;
729 }
730
731
732 abstract static class CandidateGenerator {
733 abstract Cluster.Action generate(Cluster cluster);
734
735
736
737
738
739
740
741
742
743
744
745
746
747 protected int pickRandomRegion(Cluster cluster, int server, double chanceOfNoSwap) {
748
749 if (cluster.regionsPerServer[server].length == 0 || RANDOM.nextFloat() < chanceOfNoSwap) {
750
751 return -1;
752 }
753 int rand = RANDOM.nextInt(cluster.regionsPerServer[server].length);
754 return cluster.regionsPerServer[server][rand];
755
756 }
757 protected int pickRandomServer(Cluster cluster) {
758 if (cluster.numServers < 1) {
759 return -1;
760 }
761
762 return RANDOM.nextInt(cluster.numServers);
763 }
764
765 protected int pickRandomRack(Cluster cluster) {
766 if (cluster.numRacks < 1) {
767 return -1;
768 }
769
770 return RANDOM.nextInt(cluster.numRacks);
771 }
772
773 protected int pickOtherRandomServer(Cluster cluster, int serverIndex) {
774 if (cluster.numServers < 2) {
775 return -1;
776 }
777 while (true) {
778 int otherServerIndex = pickRandomServer(cluster);
779 if (otherServerIndex != serverIndex) {
780 return otherServerIndex;
781 }
782 }
783 }
784
785 protected int pickOtherRandomRack(Cluster cluster, int rackIndex) {
786 if (cluster.numRacks < 2) {
787 return -1;
788 }
789 while (true) {
790 int otherRackIndex = pickRandomRack(cluster);
791 if (otherRackIndex != rackIndex) {
792 return otherRackIndex;
793 }
794 }
795 }
796
797 protected Cluster.Action pickRandomRegions(Cluster cluster,
798 int thisServer,
799 int otherServer) {
800 if (thisServer < 0 || otherServer < 0) {
801 return Cluster.NullAction;
802 }
803
804
805 int thisRegionCount = cluster.getNumRegions(thisServer);
806 int otherRegionCount = cluster.getNumRegions(otherServer);
807
808
809 double thisChance = (thisRegionCount > otherRegionCount) ? 0 : 0.5;
810 double otherChance = (thisRegionCount <= otherRegionCount) ? 0 : 0.5;
811
812 int thisRegion = pickRandomRegion(cluster, thisServer, thisChance);
813 int otherRegion = pickRandomRegion(cluster, otherServer, otherChance);
814
815 return getAction(thisServer, thisRegion, otherServer, otherRegion);
816 }
817
818 protected Cluster.Action getAction(int fromServer, int fromRegion,
819 int toServer, int toRegion) {
820 if (fromServer < 0 || toServer < 0) {
821 return Cluster.NullAction;
822 }
823 if (fromRegion > 0 && toRegion > 0) {
824 return new Cluster.SwapRegionsAction(fromServer, fromRegion,
825 toServer, toRegion);
826 } else if (fromRegion > 0) {
827 return new Cluster.MoveRegionAction(fromRegion, fromServer, toServer);
828 } else if (toRegion > 0) {
829 return new Cluster.MoveRegionAction(toRegion, toServer, fromServer);
830 } else {
831 return Cluster.NullAction;
832 }
833 }
834
835
836
837
838 protected List<Integer> getRandomIterationOrder(int length) {
839 ArrayList<Integer> order = new ArrayList<>(length);
840 for (int i = 0; i < length; i++) {
841 order.add(i);
842 }
843 Collections.shuffle(order);
844 return order;
845 }
846 }
847
848 static class RandomCandidateGenerator extends CandidateGenerator {
849
850 @Override
851 Cluster.Action generate(Cluster cluster) {
852
853 int thisServer = pickRandomServer(cluster);
854
855
856 int otherServer = pickOtherRandomServer(cluster, thisServer);
857
858 return pickRandomRegions(cluster, thisServer, otherServer);
859 }
860 }
861
862 static class LoadCandidateGenerator extends CandidateGenerator {
863
864 @Override
865 Cluster.Action generate(Cluster cluster) {
866 cluster.sortServersByRegionCount();
867 int thisServer = pickMostLoadedServer(cluster, -1);
868 int otherServer = pickLeastLoadedServer(cluster, thisServer);
869
870 return pickRandomRegions(cluster, thisServer, otherServer);
871 }
872
873 private int pickLeastLoadedServer(final Cluster cluster, int thisServer) {
874 Integer[] servers = cluster.serverIndicesSortedByRegionCount;
875
876 int index = 0;
877 while (servers[index] == null || servers[index] == thisServer) {
878 index++;
879 if (index == servers.length) {
880 return -1;
881 }
882 }
883 return servers[index];
884 }
885
886 private int pickMostLoadedServer(final Cluster cluster, int thisServer) {
887 Integer[] servers = cluster.serverIndicesSortedByRegionCount;
888
889 int index = servers.length - 1;
890 while (servers[index] == null || servers[index] == thisServer) {
891 index--;
892 if (index < 0) {
893 return -1;
894 }
895 }
896 return servers[index];
897 }
898 }
899
900 static class LocalityBasedCandidateGenerator extends CandidateGenerator {
901
902 private MasterServices masterServices;
903
904 LocalityBasedCandidateGenerator(MasterServices masterServices) {
905 this.masterServices = masterServices;
906 }
907
908 @Override
909 Cluster.Action generate(Cluster cluster) {
910 if (this.masterServices == null) {
911 int thisServer = pickRandomServer(cluster);
912
913 int otherServer = pickOtherRandomServer(cluster, thisServer);
914 return pickRandomRegions(cluster, thisServer, otherServer);
915 }
916
917
918 for (int region : getRandomIterationOrder(cluster.numRegions)) {
919 int currentServer = cluster.regionIndexToServerIndex[region];
920 if (currentServer != cluster.getOrComputeRegionsToMostLocalEntities(LocalityType.SERVER)[region]) {
921 Optional<Action> potential = tryMoveOrSwap(
922 cluster,
923 currentServer,
924 region,
925 cluster.getOrComputeRegionsToMostLocalEntities(LocalityType.SERVER)[region]
926 );
927 if (potential.isPresent()) {
928 return potential.get();
929 }
930 }
931 }
932 return Cluster.NullAction;
933 }
934
935
936
937
938
939 private Optional<Action> tryMoveOrSwap(Cluster cluster,
940 int fromServer,
941 int fromRegion,
942 int toServer) {
943
944 if (cluster.serverHasTooFewRegions(toServer)) {
945 return Optional.of(getAction(fromServer, fromRegion, toServer, -1));
946 }
947
948
949 double fromRegionLocalityDelta =
950 getWeightedLocality(cluster, fromRegion, toServer) - getWeightedLocality(cluster, fromRegion, fromServer);
951 for (int toRegionIndex : getRandomIterationOrder(cluster.regionsPerServer[toServer].length)) {
952 int toRegion = cluster.regionsPerServer[toServer][toRegionIndex];
953 double toRegionLocalityDelta =
954 getWeightedLocality(cluster, toRegion, fromServer) - getWeightedLocality(cluster, toRegion, toServer);
955
956 if (fromRegionLocalityDelta + toRegionLocalityDelta >= 0) {
957 return Optional.of(getAction(fromServer, fromRegion, toServer, toRegion));
958 }
959 }
960
961 return Optional.absent();
962 }
963
964 private double getWeightedLocality(Cluster cluster, int region, int server) {
965 return cluster.getOrComputeWeightedLocality(region, server, LocalityType.SERVER);
966 }
967
968 void setServices(MasterServices services) {
969 this.masterServices = services;
970 }
971 }
972
973
974
975
976
977 static class RegionReplicaCandidateGenerator extends CandidateGenerator {
978
979 RandomCandidateGenerator randomGenerator = new RandomCandidateGenerator();
980
981
982
983
984
985
986
987
988
989
990 int selectCoHostedRegionPerGroup(int[] primariesOfRegionsPerGroup, int[] regionsPerGroup
991 , int[] regionIndexToPrimaryIndex) {
992 int currentPrimary = -1;
993 int currentPrimaryIndex = -1;
994 int selectedPrimaryIndex = -1;
995 double currentLargestRandom = -1;
996
997
998
999 for (int j = 0; j <= primariesOfRegionsPerGroup.length; j++) {
1000 int primary = j < primariesOfRegionsPerGroup.length
1001 ? primariesOfRegionsPerGroup[j] : -1;
1002 if (primary != currentPrimary) {
1003 int numReplicas = j - currentPrimaryIndex;
1004 if (numReplicas > 1) {
1005
1006 double currentRandom = RANDOM.nextDouble();
1007
1008
1009 if (currentRandom > currentLargestRandom) {
1010 selectedPrimaryIndex = currentPrimary;
1011 currentLargestRandom = currentRandom;
1012 }
1013 }
1014 currentPrimary = primary;
1015 currentPrimaryIndex = j;
1016 }
1017 }
1018
1019
1020
1021 for (int j = 0; j < regionsPerGroup.length; j++) {
1022 int regionIndex = regionsPerGroup[j];
1023 if (selectedPrimaryIndex == regionIndexToPrimaryIndex[regionIndex]) {
1024
1025 if (selectedPrimaryIndex != regionIndex) {
1026 return regionIndex;
1027 }
1028 }
1029 }
1030 return -1;
1031 }
1032
1033 @Override
1034 Cluster.Action generate(Cluster cluster) {
1035 int serverIndex = pickRandomServer(cluster);
1036 if (cluster.numServers <= 1 || serverIndex == -1) {
1037 return Cluster.NullAction;
1038 }
1039
1040 int regionIndex = selectCoHostedRegionPerGroup(
1041 cluster.primariesOfRegionsPerServer[serverIndex],
1042 cluster.regionsPerServer[serverIndex],
1043 cluster.regionIndexToPrimaryIndex);
1044
1045
1046 if (regionIndex == -1) {
1047
1048 return randomGenerator.generate(cluster);
1049 }
1050
1051 int toServerIndex = pickOtherRandomServer(cluster, serverIndex);
1052 int toRegionIndex = pickRandomRegion(cluster, toServerIndex, 0.9f);
1053 return getAction(serverIndex, regionIndex, toServerIndex, toRegionIndex);
1054 }
1055 }
1056
1057
1058
1059
1060
1061 static class RegionReplicaRackCandidateGenerator extends RegionReplicaCandidateGenerator {
1062 @Override
1063 Cluster.Action generate(Cluster cluster) {
1064 int rackIndex = pickRandomRack(cluster);
1065 if (cluster.numRacks <= 1 || rackIndex == -1) {
1066 return super.generate(cluster);
1067 }
1068
1069 int regionIndex = selectCoHostedRegionPerGroup(
1070 cluster.primariesOfRegionsPerRack[rackIndex],
1071 cluster.regionsPerRack[rackIndex],
1072 cluster.regionIndexToPrimaryIndex);
1073
1074
1075 if (regionIndex == -1) {
1076
1077 return randomGenerator.generate(cluster);
1078 }
1079
1080 int serverIndex = cluster.regionIndexToServerIndex[regionIndex];
1081 int toRackIndex = pickOtherRandomRack(cluster, rackIndex);
1082
1083 int rand = RANDOM.nextInt(cluster.serversPerRack[toRackIndex].length);
1084 int toServerIndex = cluster.serversPerRack[toRackIndex][rand];
1085 int toRegionIndex = pickRandomRegion(cluster, toServerIndex, 0.9f);
1086 return getAction(serverIndex, regionIndex, toServerIndex, toRegionIndex);
1087 }
1088 }
1089
1090
1091
1092
1093 public abstract static class CostFunction {
1094
1095 private float multiplier = 0;
1096
1097 protected Cluster cluster;
1098
1099 public CostFunction(Configuration c) {
1100 }
1101
1102 boolean isNeeded() {
1103 return true;
1104 }
1105 float getMultiplier() {
1106 return multiplier;
1107 }
1108
1109 void setMultiplier(float m) {
1110 this.multiplier = m;
1111 }
1112
1113
1114
1115
1116 void init(Cluster cluster) {
1117 this.cluster = cluster;
1118 }
1119
1120
1121
1122
1123
1124 void postAction(Action action) {
1125 switch (action.type) {
1126 case NULL: break;
1127 case ASSIGN_REGION:
1128 AssignRegionAction ar = (AssignRegionAction) action;
1129 regionMoved(ar.region, -1, ar.server);
1130 break;
1131 case MOVE_REGION:
1132 MoveRegionAction mra = (MoveRegionAction) action;
1133 regionMoved(mra.region, mra.fromServer, mra.toServer);
1134 break;
1135 case SWAP_REGIONS:
1136 SwapRegionsAction a = (SwapRegionsAction) action;
1137 regionMoved(a.fromRegion, a.fromServer, a.toServer);
1138 regionMoved(a.toRegion, a.toServer, a.fromServer);
1139 break;
1140 default:
1141 throw new RuntimeException("Uknown action:" + action.type);
1142 }
1143 }
1144
1145 protected void regionMoved(int region, int oldServer, int newServer) {
1146 }
1147
1148 protected abstract double cost();
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158 protected double costFromArray(double[] stats) {
1159 double totalCost = 0;
1160 double total = getSum(stats);
1161
1162 double count = stats.length;
1163 double mean = total/count;
1164
1165
1166
1167 double max = ((count - 1) * mean) + (total - mean);
1168
1169
1170 double min;
1171 if (count > total) {
1172 min = ((count - total) * mean) + ((1 - mean) * total);
1173 } else {
1174
1175 int numHigh = (int) (total - (Math.floor(mean) * count));
1176 int numLow = (int) (count - numHigh);
1177
1178 min = (numHigh * (Math.ceil(mean) - mean)) + (numLow * (mean - Math.floor(mean)));
1179
1180 }
1181 min = Math.max(0, min);
1182 for (int i=0; i<stats.length; i++) {
1183 double n = stats[i];
1184 double diff = Math.abs(mean - n);
1185 totalCost += diff;
1186 }
1187
1188 double scaled = scale(min, max, totalCost);
1189 return scaled;
1190 }
1191
1192 private double getSum(double[] stats) {
1193 double total = 0;
1194 for(double s:stats) {
1195 total += s;
1196 }
1197 return total;
1198 }
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208 protected double scale(double min, double max, double value) {
1209 if (max <= min || value <= min) {
1210 return 0;
1211 }
1212 if ((max - min) == 0) return 0;
1213
1214 return Math.max(0d, Math.min(1d, (value - min) / (max - min)));
1215 }
1216 }
1217
1218
1219
1220
1221
1222 static class MoveCostFunction extends CostFunction {
1223 private static final String MOVE_COST_KEY = "hbase.master.balancer.stochastic.moveCost";
1224 private static final String MOVE_COST_OFFPEAK_KEY =
1225 "hbase.master.balancer.stochastic.moveCost.offpeak";
1226 private static final String MAX_MOVES_PERCENT_KEY =
1227 "hbase.master.balancer.stochastic.maxMovePercent";
1228 static final float DEFAULT_MOVE_COST = 7;
1229 static final float DEFAULT_MOVE_COST_OFFPEAK = 3;
1230 private static final int DEFAULT_MAX_MOVES = 600;
1231 private static final float DEFAULT_MAX_MOVE_PERCENT = 0.25f;
1232
1233 private final float maxMovesPercent;
1234 private final Configuration conf;
1235
1236 MoveCostFunction(Configuration conf) {
1237 super(conf);
1238 this.conf = conf;
1239
1240 maxMovesPercent = conf.getFloat(MAX_MOVES_PERCENT_KEY, DEFAULT_MAX_MOVE_PERCENT);
1241
1242
1243
1244 this.setMultiplier(conf.getFloat(MOVE_COST_KEY, DEFAULT_MOVE_COST));
1245 }
1246
1247 @Override
1248 protected double cost() {
1249
1250
1251 if (OffPeakHours.getInstance(conf).isOffPeakHour()) {
1252 this.setMultiplier(conf.getFloat(MOVE_COST_OFFPEAK_KEY, DEFAULT_MOVE_COST_OFFPEAK));
1253 } else {
1254 this.setMultiplier(conf.getFloat(MOVE_COST_KEY, DEFAULT_MOVE_COST));
1255 }
1256
1257 int maxMoves = Math.max((int) (cluster.numRegions * maxMovesPercent),
1258 DEFAULT_MAX_MOVES);
1259
1260 double moveCost = cluster.numMovedRegions;
1261
1262
1263
1264 if (moveCost > maxMoves) {
1265 return 1000000;
1266 }
1267
1268 return scale(0, Math.min(cluster.numRegions, maxMoves), moveCost);
1269 }
1270 }
1271
1272
1273
1274
1275
1276 static class RegionCountSkewCostFunction extends CostFunction {
1277 static final String REGION_COUNT_SKEW_COST_KEY =
1278 "hbase.master.balancer.stochastic.regionCountCost";
1279 static final float DEFAULT_REGION_COUNT_SKEW_COST = 500;
1280
1281 private double[] stats = null;
1282
1283 RegionCountSkewCostFunction(Configuration conf) {
1284 super(conf);
1285
1286 this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST));
1287 }
1288
1289 @Override
1290 void init(Cluster cluster) {
1291 super.init(cluster);
1292 LOG.debug(getClass().getSimpleName() + " sees a total of " + cluster.numServers +
1293 " servers and " + cluster.numRegions + " regions.");
1294 if (LOG.isTraceEnabled()) {
1295 for (int i =0; i < cluster.numServers; i++) {
1296 LOG.trace(getClass().getSimpleName() + " sees server '" + cluster.servers[i] +
1297 "' has " + cluster.regionsPerServer[i].length + " regions");
1298 }
1299 }
1300 }
1301
1302 @Override
1303 protected double cost() {
1304 if (stats == null || stats.length != cluster.numServers) {
1305 stats = new double[cluster.numServers];
1306 }
1307 for (int i =0; i < cluster.numServers; i++) {
1308 stats[i] = cluster.regionsPerServer[i].length;
1309 }
1310 return costFromArray(stats);
1311 }
1312 }
1313
1314
1315
1316
1317
1318 static class PrimaryRegionCountSkewCostFunction extends CostFunction {
1319 private static final String PRIMARY_REGION_COUNT_SKEW_COST_KEY =
1320 "hbase.master.balancer.stochastic.primaryRegionCountCost";
1321 private static final float DEFAULT_PRIMARY_REGION_COUNT_SKEW_COST = 500;
1322
1323 private double[] stats = null;
1324
1325 PrimaryRegionCountSkewCostFunction(Configuration conf) {
1326 super(conf);
1327
1328 this.setMultiplier(conf.getFloat(PRIMARY_REGION_COUNT_SKEW_COST_KEY,
1329 DEFAULT_PRIMARY_REGION_COUNT_SKEW_COST));
1330 }
1331
1332 @Override
1333 boolean isNeeded() {
1334 return cluster.hasRegionReplicas;
1335 }
1336
1337 @Override
1338 protected double cost() {
1339 if (!cluster.hasRegionReplicas) {
1340 return 0;
1341 }
1342 if (stats == null || stats.length != cluster.numServers) {
1343 stats = new double[cluster.numServers];
1344 }
1345
1346 for (int i =0; i < cluster.numServers; i++) {
1347 stats[i] = 0;
1348 for (int regionIdx : cluster.regionsPerServer[i]) {
1349 if (regionIdx == cluster.regionIndexToPrimaryIndex[regionIdx]) {
1350 stats[i] ++;
1351 }
1352 }
1353 }
1354
1355 return costFromArray(stats);
1356 }
1357 }
1358
1359
1360
1361
1362
1363 static class TableSkewCostFunction extends CostFunction {
1364
1365 private static final String TABLE_SKEW_COST_KEY =
1366 "hbase.master.balancer.stochastic.tableSkewCost";
1367 private static final float DEFAULT_TABLE_SKEW_COST = 35;
1368
1369 TableSkewCostFunction(Configuration conf) {
1370 super(conf);
1371 this.setMultiplier(conf.getFloat(TABLE_SKEW_COST_KEY, DEFAULT_TABLE_SKEW_COST));
1372 }
1373
1374 @Override
1375 protected double cost() {
1376 double max = cluster.numRegions;
1377 double min = ((double) cluster.numRegions) / cluster.numServers;
1378 double value = 0;
1379
1380 for (int i = 0; i < cluster.numMaxRegionsPerTable.length; i++) {
1381 value += cluster.numMaxRegionsPerTable[i];
1382 }
1383
1384 return scale(min, max, value);
1385 }
1386 }
1387
1388
1389
1390
1391
1392 static abstract class LocalityBasedCostFunction extends CostFunction {
1393
1394 private final LocalityType type;
1395
1396 private double bestLocality;
1397 private double locality;
1398
1399 private MasterServices services;
1400
1401 LocalityBasedCostFunction(Configuration conf,
1402 MasterServices srv,
1403 LocalityType type,
1404 String localityCostKey,
1405 float defaultLocalityCost) {
1406 super(conf);
1407 this.type = type;
1408 this.setMultiplier(conf.getFloat(localityCostKey, defaultLocalityCost));
1409 this.services = srv;
1410 this.locality = 0.0;
1411 this.bestLocality = 0.0;
1412 }
1413
1414
1415
1416
1417 abstract int regionIndexToEntityIndex(int region);
1418
1419 public void setServices(MasterServices srvc) {
1420 this.services = srvc;
1421 }
1422
1423 @Override
1424 void init(Cluster cluster) {
1425 super.init(cluster);
1426 locality = 0.0;
1427 bestLocality = 0.0;
1428
1429
1430 if (this.services == null) {
1431 return;
1432 }
1433
1434 for (int region = 0; region < cluster.numRegions; region++) {
1435 locality += getWeightedLocality(region, regionIndexToEntityIndex(region));
1436 bestLocality += getWeightedLocality(region, getMostLocalEntityForRegion(region));
1437 }
1438
1439
1440
1441
1442 locality = bestLocality == 0 ? 1.0 : locality / bestLocality;
1443 }
1444
1445 @Override
1446 protected void regionMoved(int region, int oldServer, int newServer) {
1447 int oldEntity = type == LocalityType.SERVER ? oldServer : cluster.serverIndexToRackIndex[oldServer];
1448 int newEntity = type == LocalityType.SERVER ? newServer : cluster.serverIndexToRackIndex[newServer];
1449 if (this.services == null) {
1450 return;
1451 }
1452 double localityDelta = getWeightedLocality(region, newEntity) - getWeightedLocality(region, oldEntity);
1453 double normalizedDelta = bestLocality == 0 ? 0.0 : localityDelta / bestLocality;
1454 locality += normalizedDelta;
1455 }
1456
1457 @Override
1458 protected double cost() {
1459 return 1 - locality;
1460 }
1461
1462 private int getMostLocalEntityForRegion(int region) {
1463 return cluster.getOrComputeRegionsToMostLocalEntities(type)[region];
1464 }
1465
1466 private double getWeightedLocality(int region, int entity) {
1467 return cluster.getOrComputeWeightedLocality(region, entity, type);
1468 }
1469
1470 }
1471
1472 static class ServerLocalityCostFunction extends LocalityBasedCostFunction {
1473
1474 private static final String LOCALITY_COST_KEY = "hbase.master.balancer.stochastic.localityCost";
1475 private static final float DEFAULT_LOCALITY_COST = 25;
1476
1477 ServerLocalityCostFunction(Configuration conf, MasterServices srv) {
1478 super(
1479 conf,
1480 srv,
1481 LocalityType.SERVER,
1482 LOCALITY_COST_KEY,
1483 DEFAULT_LOCALITY_COST
1484 );
1485 }
1486
1487 @Override
1488 int regionIndexToEntityIndex(int region) {
1489 return cluster.regionIndexToServerIndex[region];
1490 }
1491 }
1492
1493 static class RackLocalityCostFunction extends LocalityBasedCostFunction {
1494
1495 private static final String RACK_LOCALITY_COST_KEY = "hbase.master.balancer.stochastic.rackLocalityCost";
1496 private static final float DEFAULT_RACK_LOCALITY_COST = 15;
1497
1498 public RackLocalityCostFunction(Configuration conf, MasterServices services) {
1499 super(
1500 conf,
1501 services,
1502 LocalityType.RACK,
1503 RACK_LOCALITY_COST_KEY,
1504 DEFAULT_RACK_LOCALITY_COST
1505 );
1506 }
1507
1508 @Override
1509 int regionIndexToEntityIndex(int region) {
1510 return cluster.getRackForRegion(region);
1511 }
1512 }
1513
1514
1515
1516
1517
1518 abstract static class CostFromRegionLoadFunction extends CostFunction {
1519
1520 private ClusterStatus clusterStatus = null;
1521 private Map<String, Deque<RegionLoad>> loads = null;
1522 private double[] stats = null;
1523 CostFromRegionLoadFunction(Configuration conf) {
1524 super(conf);
1525 }
1526
1527 void setClusterStatus(ClusterStatus status) {
1528 this.clusterStatus = status;
1529 }
1530
1531 void setLoads(Map<String, Deque<RegionLoad>> l) {
1532 this.loads = l;
1533 }
1534
1535 @Override
1536 protected double cost() {
1537 if (clusterStatus == null || loads == null) {
1538 return 0;
1539 }
1540
1541 if (stats == null || stats.length != cluster.numServers) {
1542 stats = new double[cluster.numServers];
1543 }
1544
1545 for (int i =0; i < stats.length; i++) {
1546
1547 long cost = 0;
1548
1549
1550 for(int regionIndex:cluster.regionsPerServer[i]) {
1551 Collection<RegionLoad> regionLoadList = cluster.regionLoads[regionIndex];
1552
1553
1554 if (regionLoadList != null) {
1555 cost = (long) (cost + getRegionLoadCost(regionLoadList));
1556 }
1557 }
1558
1559
1560 stats[i] = cost;
1561 }
1562
1563
1564 return costFromArray(stats);
1565 }
1566
1567 protected double getRegionLoadCost(Collection<RegionLoad> regionLoadList) {
1568 double cost = 0;
1569
1570 for (RegionLoad rl : regionLoadList) {
1571 cost += getCostFromRl(rl);
1572 }
1573 return cost / regionLoadList.size();
1574 }
1575
1576
1577 protected abstract double getCostFromRl(RegionLoad rl);
1578 }
1579
1580
1581
1582
1583
1584
1585 abstract static class CostFromRegionLoadAsRateFunction extends CostFromRegionLoadFunction {
1586
1587 CostFromRegionLoadAsRateFunction(Configuration conf) {
1588 super(conf);
1589 }
1590
1591 @Override
1592 protected double getRegionLoadCost(Collection<RegionLoad> regionLoadList) {
1593 double cost = 0;
1594 double previous = 0;
1595 boolean isFirst = true;
1596 for (RegionLoad rl : regionLoadList) {
1597 double current = getCostFromRl(rl);
1598 if (isFirst) {
1599 isFirst = false;
1600 } else {
1601 cost += current - previous;
1602 }
1603 previous = current;
1604 }
1605 return Math.max(0, cost / (regionLoadList.size() - 1));
1606 }
1607 }
1608
1609
1610
1611
1612
1613
1614 static class ReadRequestCostFunction extends CostFromRegionLoadAsRateFunction {
1615
1616 private static final String READ_REQUEST_COST_KEY =
1617 "hbase.master.balancer.stochastic.readRequestCost";
1618 private static final float DEFAULT_READ_REQUEST_COST = 5;
1619
1620 ReadRequestCostFunction(Configuration conf) {
1621 super(conf);
1622 this.setMultiplier(conf.getFloat(READ_REQUEST_COST_KEY, DEFAULT_READ_REQUEST_COST));
1623 }
1624
1625
1626 @Override
1627 protected double getCostFromRl(RegionLoad rl) {
1628 return rl.getReadRequestsCount();
1629 }
1630 }
1631
1632
1633
1634
1635
1636 static class WriteRequestCostFunction extends CostFromRegionLoadAsRateFunction {
1637
1638 private static final String WRITE_REQUEST_COST_KEY =
1639 "hbase.master.balancer.stochastic.writeRequestCost";
1640 private static final float DEFAULT_WRITE_REQUEST_COST = 5;
1641
1642 WriteRequestCostFunction(Configuration conf) {
1643 super(conf);
1644 this.setMultiplier(conf.getFloat(WRITE_REQUEST_COST_KEY, DEFAULT_WRITE_REQUEST_COST));
1645 }
1646
1647 @Override
1648 protected double getCostFromRl(RegionLoad rl) {
1649 return rl.getWriteRequestsCount();
1650 }
1651 }
1652
1653
1654
1655
1656
1657
1658
1659 static class RegionReplicaHostCostFunction extends CostFunction {
1660 private static final String REGION_REPLICA_HOST_COST_KEY =
1661 "hbase.master.balancer.stochastic.regionReplicaHostCostKey";
1662 private static final float DEFAULT_REGION_REPLICA_HOST_COST_KEY = 100000;
1663
1664 long maxCost = 0;
1665 long[] costsPerGroup;
1666 int[][] primariesOfRegionsPerGroup;
1667
1668 public RegionReplicaHostCostFunction(Configuration conf) {
1669 super(conf);
1670 this.setMultiplier(conf.getFloat(REGION_REPLICA_HOST_COST_KEY,
1671 DEFAULT_REGION_REPLICA_HOST_COST_KEY));
1672 }
1673
1674 @Override
1675 void init(Cluster cluster) {
1676 super.init(cluster);
1677
1678 maxCost = cluster.numHosts > 1 ? getMaxCost(cluster) : 0;
1679 costsPerGroup = new long[cluster.numHosts];
1680 primariesOfRegionsPerGroup = cluster.multiServersPerHost
1681 ? cluster.primariesOfRegionsPerHost
1682 : cluster.primariesOfRegionsPerServer;
1683 for (int i = 0 ; i < primariesOfRegionsPerGroup.length; i++) {
1684 costsPerGroup[i] = costPerGroup(primariesOfRegionsPerGroup[i]);
1685 }
1686 }
1687
1688 long getMaxCost(Cluster cluster) {
1689 if (!cluster.hasRegionReplicas) {
1690 return 0;
1691 }
1692
1693 int[] primariesOfRegions = new int[cluster.numRegions];
1694 System.arraycopy(cluster.regionIndexToPrimaryIndex, 0, primariesOfRegions, 0,
1695 cluster.regions.length);
1696
1697 Arrays.sort(primariesOfRegions);
1698
1699
1700 return costPerGroup(primariesOfRegions);
1701 }
1702
1703 @Override
1704 boolean isNeeded() {
1705 return cluster.hasRegionReplicas;
1706 }
1707
1708 @Override
1709 protected double cost() {
1710 if (maxCost <= 0) {
1711 return 0;
1712 }
1713
1714 long totalCost = 0;
1715 for (int i = 0 ; i < costsPerGroup.length; i++) {
1716 totalCost += costsPerGroup[i];
1717 }
1718 return scale(0, maxCost, totalCost);
1719 }
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729 protected long costPerGroup(int[] primariesOfRegions) {
1730 long cost = 0;
1731 int currentPrimary = -1;
1732 int currentPrimaryIndex = -1;
1733
1734
1735 for (int j = 0 ; j <= primariesOfRegions.length; j++) {
1736 int primary = j < primariesOfRegions.length ? primariesOfRegions[j] : -1;
1737 if (primary != currentPrimary) {
1738 int numReplicas = j - currentPrimaryIndex;
1739
1740 if (numReplicas > 1) {
1741 cost += (numReplicas - 1) * (numReplicas - 1);
1742 }
1743 currentPrimary = primary;
1744 currentPrimaryIndex = j;
1745 }
1746 }
1747
1748 return cost;
1749 }
1750
1751 @Override
1752 protected void regionMoved(int region, int oldServer, int newServer) {
1753 if (maxCost <= 0) {
1754 return;
1755 }
1756 if (cluster.multiServersPerHost) {
1757 int oldHost = cluster.serverIndexToHostIndex[oldServer];
1758 int newHost = cluster.serverIndexToHostIndex[newServer];
1759 if (newHost != oldHost) {
1760 costsPerGroup[oldHost] = costPerGroup(cluster.primariesOfRegionsPerHost[oldHost]);
1761 costsPerGroup[newHost] = costPerGroup(cluster.primariesOfRegionsPerHost[newHost]);
1762 }
1763 } else {
1764 costsPerGroup[oldServer] = costPerGroup(cluster.primariesOfRegionsPerServer[oldServer]);
1765 costsPerGroup[newServer] = costPerGroup(cluster.primariesOfRegionsPerServer[newServer]);
1766 }
1767 }
1768 }
1769
1770
1771
1772
1773
1774
1775 static class RegionReplicaRackCostFunction extends RegionReplicaHostCostFunction {
1776 private static final String REGION_REPLICA_RACK_COST_KEY =
1777 "hbase.master.balancer.stochastic.regionReplicaRackCostKey";
1778 private static final float DEFAULT_REGION_REPLICA_RACK_COST_KEY = 10000;
1779
1780 public RegionReplicaRackCostFunction(Configuration conf) {
1781 super(conf);
1782 this.setMultiplier(conf.getFloat(REGION_REPLICA_RACK_COST_KEY,
1783 DEFAULT_REGION_REPLICA_RACK_COST_KEY));
1784 }
1785
1786 @Override
1787 void init(Cluster cluster) {
1788 this.cluster = cluster;
1789 if (cluster.numRacks <= 1) {
1790 maxCost = 0;
1791 return;
1792 }
1793
1794 maxCost = getMaxCost(cluster);
1795 costsPerGroup = new long[cluster.numRacks];
1796 for (int i = 0 ; i < cluster.primariesOfRegionsPerRack.length; i++) {
1797 costsPerGroup[i] = costPerGroup(cluster.primariesOfRegionsPerRack[i]);
1798 }
1799 }
1800
1801 @Override
1802 protected void regionMoved(int region, int oldServer, int newServer) {
1803 if (maxCost <= 0) {
1804 return;
1805 }
1806 int oldRack = cluster.serverIndexToRackIndex[oldServer];
1807 int newRack = cluster.serverIndexToRackIndex[newServer];
1808 if (newRack != oldRack) {
1809 costsPerGroup[oldRack] = costPerGroup(cluster.primariesOfRegionsPerRack[oldRack]);
1810 costsPerGroup[newRack] = costPerGroup(cluster.primariesOfRegionsPerRack[newRack]);
1811 }
1812 }
1813 }
1814
1815
1816
1817
1818
1819 static class MemstoreSizeCostFunction extends CostFromRegionLoadAsRateFunction {
1820
1821 private static final String MEMSTORE_SIZE_COST_KEY =
1822 "hbase.master.balancer.stochastic.memstoreSizeCost";
1823 private static final float DEFAULT_MEMSTORE_SIZE_COST = 5;
1824
1825 MemstoreSizeCostFunction(Configuration conf) {
1826 super(conf);
1827 this.setMultiplier(conf.getFloat(MEMSTORE_SIZE_COST_KEY, DEFAULT_MEMSTORE_SIZE_COST));
1828 }
1829
1830 @Override
1831 protected double getCostFromRl(RegionLoad rl) {
1832 return rl.getMemStoreSizeMB();
1833 }
1834 }
1835
1836
1837
1838
1839 static class StoreFileCostFunction extends CostFromRegionLoadFunction {
1840
1841 private static final String STOREFILE_SIZE_COST_KEY =
1842 "hbase.master.balancer.stochastic.storefileSizeCost";
1843 private static final float DEFAULT_STOREFILE_SIZE_COST = 5;
1844
1845 StoreFileCostFunction(Configuration conf) {
1846 super(conf);
1847 this.setMultiplier(conf.getFloat(STOREFILE_SIZE_COST_KEY, DEFAULT_STOREFILE_SIZE_COST));
1848 }
1849
1850 @Override
1851 protected double getCostFromRl(RegionLoad rl) {
1852 return rl.getStorefileSizeMB();
1853 }
1854 }
1855
1856
1857
1858
1859 public static String composeAttributeName(String tableName, String costFunctionName) {
1860 return tableName + TABLE_FUNCTION_SEP + costFunctionName;
1861 }
1862 }