1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.apache.hadoop.hbase.rsgroup;
22
23 import com.google.common.collect.ArrayListMultimap;
24 import com.google.common.collect.ListMultimap;
25 import com.google.common.collect.Lists;
26 import com.google.common.collect.Maps;
27
28 import java.io.IOException;
29 import java.util.ArrayList;
30 import java.util.Collections;
31 import java.util.HashMap;
32 import java.util.HashSet;
33 import java.util.LinkedList;
34 import java.util.List;
35 import java.util.Map;
36 import java.util.Set;
37 import java.util.TreeMap;
38
39 import org.apache.commons.logging.Log;
40 import org.apache.commons.logging.LogFactory;
41 import org.apache.hadoop.conf.Configuration;
42 import org.apache.hadoop.hbase.ClusterStatus;
43 import org.apache.hadoop.hbase.HBaseIOException;
44 import org.apache.hadoop.hbase.HRegionInfo;
45 import org.apache.hadoop.hbase.ServerName;
46 import org.apache.hadoop.hbase.TableName;
47 import org.apache.hadoop.hbase.classification.InterfaceAudience;
48 import org.apache.hadoop.hbase.constraint.ConstraintException;
49 import org.apache.hadoop.hbase.master.LoadBalancer;
50 import org.apache.hadoop.hbase.master.MasterServices;
51 import org.apache.hadoop.hbase.master.RegionPlan;
52 import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
53 import org.apache.hadoop.hbase.net.Address;
54 import org.apache.hadoop.hbase.util.Pair;
55 import org.apache.hadoop.util.ReflectionUtils;
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71 @InterfaceAudience.Private
72 public class RSGroupBasedLoadBalancer implements RSGroupableBalancer, LoadBalancer {
73
74 public static final String HBASE_GROUP_LOADBALANCER_CLASS = "hbase.group.grouploadbalancer.class";
75
76 private static final Log LOG = LogFactory.getLog(RSGroupBasedLoadBalancer.class);
77
78 private Configuration config;
79 private ClusterStatus clusterStatus;
80 private MasterServices masterServices;
81 private RSGroupInfoManager infoManager;
82 private LoadBalancer internalBalancer;
83
84
85
86
87
88
89
90
91 public static final String FALLBACK_GROUP_ENABLE_KEY = "hbase.rsgroup.fallback.enable";
92
93 private boolean fallbackEnabled = false;
94
95
96 @InterfaceAudience.Private
97 public RSGroupBasedLoadBalancer() {
98 }
99
100
101 @InterfaceAudience.Private
102 public RSGroupBasedLoadBalancer(RSGroupInfoManager RSGroupInfoManager) {
103 this.infoManager = RSGroupInfoManager;
104 }
105
106 @Override
107 public Configuration getConf() {
108 return config;
109 }
110
111 @Override
112 public void setConf(Configuration conf) {
113 this.config = conf;
114 if (internalBalancer != null) {
115 internalBalancer.setConf(conf);
116 }
117 }
118
119 @Override
120 public void setClusterStatus(ClusterStatus st) {
121 this.clusterStatus = st;
122 if (internalBalancer != null) {
123 internalBalancer.setClusterStatus(st);
124 }
125 }
126
127 @Override
128 public void setMasterServices(MasterServices masterServices) {
129 this.masterServices = masterServices;
130 }
131
132 @Override
133 public List<RegionPlan> balanceCluster(TableName tableName, Map<ServerName, List<HRegionInfo>>
134 clusterState) throws HBaseIOException {
135 return balanceCluster(clusterState);
136 }
137
138 @Override
139 public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState)
140 throws HBaseIOException {
141 if (!isOnline()) {
142 throw new ConstraintException(RSGroupInfoManager.RSGROUP_TABLE_NAME +
143 " is not online, unable to perform balance");
144 }
145
146 Map<ServerName,List<HRegionInfo>> correctedState = correctAssignments(clusterState);
147 List<RegionPlan> regionPlans = new ArrayList<>();
148
149 List<HRegionInfo> misplacedRegions = correctedState.get(LoadBalancer.BOGUS_SERVER_NAME);
150 for (HRegionInfo regionInfo : misplacedRegions) {
151 if (fallbackEnabled) {
152 regionPlans.add(new RegionPlan(regionInfo, findServerForRegion(clusterState, regionInfo),
153 null));
154 } else {
155 regionPlans.add(new RegionPlan(regionInfo, null, null));
156 }
157 }
158 try {
159
160 HashSet<ServerName> processedServers = new HashSet<>();
161
162
163 for (RSGroupInfo rsgroup : infoManager.listRSGroups()) {
164 Map<ServerName, List<HRegionInfo>> groupClusterState = new HashMap<>();
165 for (ServerName server : clusterState.keySet()) {
166 if (!processedServers.contains(server)
167 && rsgroup.containsServer(server.getAddress())) {
168 List<HRegionInfo> regionsOnServer = correctedState.get(server);
169 groupClusterState.put(server, regionsOnServer);
170 processedServers.add(server);
171 }
172 }
173
174 List<RegionPlan> groupPlans = this.internalBalancer
175 .balanceCluster(groupClusterState);
176 if (groupPlans != null) {
177 regionPlans.addAll(groupPlans);
178 }
179 }
180 } catch (IOException exp) {
181 LOG.warn("Exception while balancing cluster.", exp);
182 regionPlans.clear();
183 }
184 return regionPlans;
185 }
186
187 @Override
188 public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(
189 List<HRegionInfo> regions, List<ServerName> servers) throws HBaseIOException {
190 Map<ServerName, List<HRegionInfo>> assignments = Maps.newHashMap();
191 List<Pair<List<HRegionInfo>, List<ServerName>>> pairs =
192 generateGroupAssignments(regions, servers);
193 for (Pair<List<HRegionInfo>, List<ServerName>> pair : pairs) {
194 Map<ServerName, List<HRegionInfo>> result = this.internalBalancer
195 .roundRobinAssignment(pair.getFirst(), pair.getSecond());
196 if (result != null) {
197 for (Map.Entry<ServerName, List<HRegionInfo>> entry : result.entrySet()) {
198 ServerName serverName = entry.getKey();
199 List<HRegionInfo> regionInfos = entry.getValue();
200 if (!assignments.containsKey(serverName)) {
201 assignments.put(serverName, Lists.<HRegionInfo>newArrayList());
202 }
203 assignments.get(serverName).addAll(regionInfos);
204 }
205 }
206 }
207 return assignments;
208 }
209
210 @Override
211 public Map<ServerName, List<HRegionInfo>> retainAssignment(
212 Map<HRegionInfo, ServerName> regions, List<ServerName> servers) throws HBaseIOException {
213 try {
214 Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<>();
215 List<Pair<List<HRegionInfo>, List<ServerName>>> pairs =
216 generateGroupAssignments(Lists.newArrayList(regions.keySet()), servers);
217 for (Pair<List<HRegionInfo>, List<ServerName>> pair : pairs) {
218 List<HRegionInfo> regionList = pair.getFirst();
219 Map<HRegionInfo, ServerName> currentAssignmentMap = Maps.newTreeMap();
220 for (HRegionInfo regionInfo: regionList) {
221 currentAssignmentMap.put(regionInfo, regions.get(regionInfo));
222 }
223 Map<ServerName, List<HRegionInfo>> pairResult =
224 this.internalBalancer.retainAssignment(currentAssignmentMap, pair.getSecond());
225 for (Map.Entry<ServerName, List<HRegionInfo>> entry : pairResult.entrySet()) {
226 ServerName serverName = entry.getKey();
227 List<HRegionInfo> regionInfos = entry.getValue();
228 if (!assignments.containsKey(serverName)) {
229 assignments.put(serverName, Lists.<HRegionInfo>newArrayList());
230 }
231 assignments.get(serverName).addAll(regionInfos);
232 }
233 }
234 return assignments;
235 } catch (IOException e) {
236 throw new HBaseIOException("Failed to do online retain assignment", e);
237 }
238 }
239
240 @Override
241 public Map<HRegionInfo, ServerName> immediateAssignment(List<HRegionInfo> regions,
242 List<ServerName> servers) throws HBaseIOException {
243 throw new UnsupportedOperationException("immediateAssignment is not supported");
244 }
245
246 @Override
247 public ServerName randomAssignment(HRegionInfo region,
248 List<ServerName> servers) throws HBaseIOException {
249 List<Pair<List<HRegionInfo>, List<ServerName>>> pairs =
250 generateGroupAssignments(Lists.newArrayList(region), servers);
251 List<ServerName> filteredServers = pairs.iterator().next().getSecond();
252 return this.internalBalancer.randomAssignment(region, filteredServers);
253 }
254
255 private List<Pair<List<HRegionInfo>, List<ServerName>>> generateGroupAssignments(
256 List<HRegionInfo> regions, List<ServerName> servers) throws HBaseIOException {
257 try {
258 ListMultimap<String, HRegionInfo> regionMap = ArrayListMultimap.create();
259 ListMultimap<String, ServerName> serverMap = ArrayListMultimap.create();
260 for (HRegionInfo region : regions) {
261 String groupName = infoManager.getRSGroupOfTable(region.getTable());
262 if (groupName == null) {
263 LOG.debug("Group not found for table " + region.getTable() + ", using default");
264 groupName = RSGroupInfo.DEFAULT_GROUP;
265 }
266 regionMap.put(groupName, region);
267 }
268 for (String groupKey : regionMap.keySet()) {
269 RSGroupInfo info = infoManager.getRSGroup(groupKey);
270 serverMap.putAll(groupKey, filterOfflineServers(info, servers));
271 }
272
273 List<Pair<List<HRegionInfo>, List<ServerName>>> result = Lists.newArrayList();
274 List<HRegionInfo> fallbackRegions = Lists.newArrayList();
275 for (String groupKey : regionMap.keySet()) {
276 if (serverMap.get(groupKey).isEmpty()) {
277 fallbackRegions.addAll(regionMap.get(groupKey));
278 } else {
279 result.add(Pair.newPair(regionMap.get(groupKey), serverMap.get(groupKey)));
280 }
281 }
282 if (!fallbackRegions.isEmpty()) {
283 List<ServerName> candidates = null;
284 if (isFallbackEnabled()) {
285 candidates = getFallBackCandidates(servers);
286 }
287 candidates = (candidates == null || candidates.isEmpty()) ?
288 Lists.newArrayList(BOGUS_SERVER_NAME) : candidates;
289 result.add(Pair.newPair(fallbackRegions, candidates));
290 }
291 return result;
292 } catch(IOException e) {
293 throw new HBaseIOException("Failed to generate group assignments", e);
294 }
295 }
296
297 private List<ServerName> filterOfflineServers(RSGroupInfo RSGroupInfo,
298 List<ServerName> onlineServers) {
299 if (RSGroupInfo != null) {
300 return filterServers(RSGroupInfo.getServers(), onlineServers);
301 } else {
302 LOG.debug("Group Information found to be null. Some regions might be unassigned.");
303 return Collections.emptyList();
304 }
305 }
306
307
308
309
310
311
312
313
314
315
316 private List<ServerName> filterServers(Set<Address> servers,
317 List<ServerName> onlineServers) {
318
319
320
321
322
323
324 ArrayList<ServerName> finalList = new ArrayList<>();
325 for (ServerName onlineServer : onlineServers) {
326 if (servers.contains(onlineServer.getAddress())) {
327 finalList.add(onlineServer);
328 }
329 }
330
331 return finalList;
332 }
333
334 public Set<HRegionInfo> getMisplacedRegions(
335 Map<HRegionInfo, ServerName> regions) throws IOException {
336 Set<HRegionInfo> misplacedRegions = new HashSet<HRegionInfo>();
337 for(Map.Entry<HRegionInfo, ServerName> region : regions.entrySet()) {
338 HRegionInfo regionInfo = region.getKey();
339 ServerName assignedServer = region.getValue();
340 String groupName = infoManager.getRSGroupOfTable(regionInfo.getTable());
341 if (groupName == null) {
342 LOG.debug("Group not found for table " + regionInfo.getTable() + ", using default");
343 groupName = RSGroupInfo.DEFAULT_GROUP;
344 }
345 RSGroupInfo info = infoManager.getRSGroup(groupName);
346 if (assignedServer == null) {
347 LOG.debug("There is no assigned server for " + region);
348 continue;
349 }
350 RSGroupInfo otherInfo = infoManager.getRSGroupOfServer(assignedServer.getAddress());
351 if (info == null && otherInfo == null) {
352 LOG.warn("Couldn't obtain rs group information for " + region + " on " + assignedServer);
353 continue;
354 }
355 if ((info == null || !info.containsServer(assignedServer.getAddress()))) {
356 LOG.debug("Found misplaced region: " + regionInfo.getRegionNameAsString() +
357 " on server: " + assignedServer +
358 " found in group: " + otherInfo +
359 " outside of group: " + (info == null ? "UNKNOWN" : info.getName()));
360 misplacedRegions.add(regionInfo);
361 }
362 }
363 return misplacedRegions;
364 }
365
366 private ServerName findServerForRegion(
367 Map<ServerName, List<HRegionInfo>> existingAssignments, HRegionInfo region) {
368 for (Map.Entry<ServerName, List<HRegionInfo>> entry : existingAssignments.entrySet()) {
369 if (entry.getValue().contains(region)) {
370 return entry.getKey();
371 }
372 }
373
374 throw new IllegalStateException("Could not find server for region "
375 + region.getShortNameToLog());
376 }
377
378 private Map<ServerName, List<HRegionInfo>> correctAssignments(
379 Map<ServerName, List<HRegionInfo>> existingAssignments) {
380 Map<ServerName, List<HRegionInfo>> correctAssignments =
381 new TreeMap<ServerName, List<HRegionInfo>>();
382 correctAssignments.put(LoadBalancer.BOGUS_SERVER_NAME, new LinkedList<HRegionInfo>());
383 for (Map.Entry<ServerName, List<HRegionInfo>> assignments : existingAssignments.entrySet()){
384 ServerName sName = assignments.getKey();
385 correctAssignments.put(sName, new LinkedList<HRegionInfo>());
386 List<HRegionInfo> regions = assignments.getValue();
387 for (HRegionInfo region : regions) {
388 RSGroupInfo info = null;
389 try {
390 String groupName = infoManager.getRSGroupOfTable(region.getTable());
391 if (groupName == null) {
392 LOG.debug("Group not found for table " + region.getTable() + ", using default");
393 groupName = RSGroupInfo.DEFAULT_GROUP;
394 }
395 info = infoManager.getRSGroup(groupName);
396 } catch (IOException exp) {
397 LOG.debug("Group information null for region of table " + region.getTable(),
398 exp);
399 }
400 if ((info == null) || (!info.containsServer(sName.getAddress()))) {
401 correctAssignments.get(LoadBalancer.BOGUS_SERVER_NAME).add(region);
402 } else {
403 correctAssignments.get(sName).add(region);
404 }
405 }
406 }
407 return correctAssignments;
408 }
409
410 @Override
411 public void initialize() throws HBaseIOException {
412 try {
413 if (infoManager == null) {
414 List<RSGroupAdminEndpoint> cps =
415 masterServices.getMasterCoprocessorHost().findCoprocessors(RSGroupAdminEndpoint.class);
416 if (cps.size() != 1) {
417 String msg = "Expected one implementation of GroupAdminEndpoint but found " + cps.size();
418 LOG.error(msg);
419 throw new HBaseIOException(msg);
420 }
421 infoManager = cps.get(0).getGroupInfoManager();
422 if(infoManager == null){
423 String msg = "RSGroupInfoManager hasn't been initialized";
424 LOG.error(msg);
425 throw new HBaseIOException(msg);
426 }
427 infoManager.start();
428 }
429 } catch (IOException e) {
430 throw new HBaseIOException("Failed to initialize GroupInfoManagerImpl", e);
431 }
432
433
434 Class<? extends LoadBalancer> balancerKlass = config.getClass(
435 HBASE_GROUP_LOADBALANCER_CLASS,
436 StochasticLoadBalancer.class, LoadBalancer.class);
437 internalBalancer = ReflectionUtils.newInstance(balancerKlass, config);
438 if (clusterStatus != null) {
439 internalBalancer.setClusterStatus(clusterStatus);
440 }
441 internalBalancer.setMasterServices(masterServices);
442 internalBalancer.setConf(config);
443 internalBalancer.initialize();
444
445 this.fallbackEnabled = config.getBoolean(FALLBACK_GROUP_ENABLE_KEY, false);
446 }
447
448 public boolean isOnline() {
449 return infoManager != null && infoManager.isOnline();
450 }
451
452 public boolean isFallbackEnabled() {
453 return fallbackEnabled;
454 }
455
456 @Override
457 public void regionOnline(HRegionInfo regionInfo, ServerName sn) {
458 }
459
460 @Override
461 public void regionOffline(HRegionInfo regionInfo) {
462 }
463
464 @Override
465 public void onConfigurationChange(Configuration conf) {
466 boolean newFallbackEnabled = conf.getBoolean(FALLBACK_GROUP_ENABLE_KEY, false);
467 if (fallbackEnabled != newFallbackEnabled) {
468 LOG.info("Changing the value of " + FALLBACK_GROUP_ENABLE_KEY + " from " + fallbackEnabled
469 + " to " + newFallbackEnabled);
470 fallbackEnabled = newFallbackEnabled;
471 }
472 internalBalancer.onConfigurationChange(conf);
473 }
474
475 @Override
476 public void stop(String why) {
477 }
478
479 @Override
480 public boolean isStopped() {
481 return false;
482 }
483
484 @Override
485 public void postMasterStartupInitialize() {
486 this.internalBalancer.postMasterStartupInitialize();
487 }
488
489 public void updateBalancerStatus(boolean status) {
490 internalBalancer.updateBalancerStatus(status);
491 }
492
493 private List<ServerName> getFallBackCandidates(List<ServerName> servers) {
494 List<ServerName> serverNames = null;
495 try {
496 RSGroupInfo info = infoManager.getRSGroup(RSGroupInfo.DEFAULT_GROUP);
497 serverNames = filterOfflineServers(info, servers);
498 } catch (IOException e) {
499 LOG.error("Failed to get default rsgroup info to fallback", e);
500 }
501 return serverNames == null || serverNames.isEmpty() ? servers : serverNames;
502 }
503 }