1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.rsgroup;
21
22 import com.google.common.collect.Lists;
23 import com.google.common.collect.Maps;
24 import com.google.common.collect.Sets;
25
26 import java.io.IOException;
27 import java.util.ArrayList;
28 import java.util.Collections;
29 import java.util.HashMap;
30 import java.util.HashSet;
31 import java.util.Iterator;
32 import java.util.LinkedList;
33 import java.util.List;
34 import java.util.Map;
35 import java.util.Set;
36 import java.util.concurrent.ConcurrentHashMap;
37 import java.util.concurrent.ConcurrentMap;
38
39 import org.apache.commons.lang.StringUtils;
40 import org.apache.commons.logging.Log;
41 import org.apache.commons.logging.LogFactory;
42 import org.apache.hadoop.hbase.HRegionInfo;
43 import org.apache.hadoop.hbase.HTableDescriptor;
44 import org.apache.hadoop.hbase.NamespaceDescriptor;
45 import org.apache.hadoop.hbase.ServerName;
46 import org.apache.hadoop.hbase.TableName;
47 import org.apache.hadoop.hbase.classification.InterfaceAudience;
48 import org.apache.hadoop.hbase.constraint.ConstraintException;
49 import org.apache.hadoop.hbase.master.AssignmentManager;
50 import org.apache.hadoop.hbase.master.LoadBalancer;
51 import org.apache.hadoop.hbase.master.MasterServices;
52 import org.apache.hadoop.hbase.master.RegionPlan;
53 import org.apache.hadoop.hbase.master.RegionState;
54 import org.apache.hadoop.hbase.master.ServerManager;
55 import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
56 import org.apache.hadoop.hbase.net.Address;
57 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
58
59
60
61
62 @InterfaceAudience.Private
63 public class RSGroupAdminServer implements RSGroupAdmin {
64 private static final Log LOG = LogFactory.getLog(RSGroupAdminServer.class);
65
66 private MasterServices master;
67
68
69 private ConcurrentMap<Address,String> serversInTransition =
70 new ConcurrentHashMap<Address, String>();
71 private RSGroupInfoManager rsGroupInfoManager;
72
73 public RSGroupAdminServer(MasterServices master,
74 RSGroupInfoManager RSGroupInfoManager) throws IOException {
75 this.master = master;
76 this.rsGroupInfoManager = RSGroupInfoManager;
77 }
78
79 @Override
80 public RSGroupInfo getRSGroupInfo(String groupName) throws IOException{
81 return getRSGroupInfoManager().getRSGroup(groupName);
82 }
83
84
85 @Override
86 public RSGroupInfo getRSGroupInfoOfTable(TableName tableName) throws IOException {
87 String groupName = getRSGroupInfoManager().getRSGroupOfTable(tableName);
88 if (groupName == null) {
89 return null;
90 }
91 return getRSGroupInfoManager().getRSGroup(groupName);
92 }
93
94 @Override
95 public void moveServers(Set<Address> servers, String targetGroupName)
96 throws IOException {
97 if (servers == null) {
98 throw new ConstraintException(
99 "The list of servers cannot be null.");
100 }
101 if (StringUtils.isEmpty(targetGroupName)) {
102 throw new ConstraintException("The target group cannot be null.");
103 }
104 if (servers.size() < 1) {
105 return;
106 }
107
108 RSGroupInfo targetGrp = getRSGroupInfo(targetGroupName);
109 if (targetGrp == null) {
110 throw new ConstraintException("Group does not exist: "+targetGroupName);
111 }
112
113 RSGroupInfoManager manager = getRSGroupInfoManager();
114 synchronized (manager) {
115 Address firstServer = servers.iterator().next();
116
117
118 RSGroupInfo srcGrp = manager.getRSGroupOfServer(firstServer);
119
120
121
122 if (srcGrp == null) {
123 throw new ConstraintException(
124 "Server "+firstServer+" does not have a group.");
125 }
126 if (RSGroupInfo.DEFAULT_GROUP.equals(srcGrp.getName())) {
127 Set<Address> onlineServers = new HashSet<Address>();
128 for(ServerName server: master.getServerManager().getOnlineServers().keySet()) {
129 onlineServers.add(server.getAddress());
130 }
131 for(Address el: servers) {
132 if(!onlineServers.contains(el)) {
133 throw new ConstraintException(
134 "Server "+el+" is not an online server in default group.");
135 }
136 }
137 }
138
139 if (RSGroupInfo.DEFAULT_GROUP.equals(srcGrp.getName()) && srcGrp.getServers().size() <=
140 servers.size()) {
141 throw new ConstraintException("Should keep at least one server in 'default' RSGroup ");
142 }
143
144 if(srcGrp.getServers().size() <= servers.size() && srcGrp.getTables().size() > 0) {
145 throw new ConstraintException("Cannot leave a group "+srcGrp.getName()+
146 " that contains tables " +"without servers.");
147 }
148
149 String sourceGroupName = getRSGroupInfoManager()
150 .getRSGroupOfServer(srcGrp.getServers().iterator().next()).getName();
151 if(getRSGroupInfo(targetGroupName) == null) {
152 throw new ConstraintException("Target group does not exist: "+targetGroupName);
153 }
154
155 for(Address server: servers) {
156 if (serversInTransition.containsKey(server)) {
157 throw new ConstraintException(
158 "Server list contains a server that is already being moved: "+server);
159 }
160 String tmpGroup = getRSGroupInfoManager().getRSGroupOfServer(server).getName();
161 if (sourceGroupName != null && !tmpGroup.equals(sourceGroupName)) {
162 throw new ConstraintException(
163 "Move server request should only come from one source group. "+
164 "Expecting only "+sourceGroupName+" but contains "+tmpGroup);
165 }
166 }
167
168 if(sourceGroupName.equals(targetGroupName)) {
169 throw new ConstraintException(
170 "Target group is the same as source group: "+targetGroupName);
171 }
172
173 try {
174
175 for (Address server : servers) {
176 serversInTransition.put(server, targetGroupName);
177 }
178
179 getRSGroupInfoManager().moveServers(servers, sourceGroupName, targetGroupName);
180 boolean found;
181 List<Address> tmpServers = Lists.newArrayList(servers);
182 do {
183 found = false;
184 for (Iterator<Address> iter = tmpServers.iterator();
185 iter.hasNext(); ) {
186 Address rs = iter.next();
187
188 List<HRegionInfo> regions = new LinkedList<HRegionInfo>();
189 for (Map.Entry<HRegionInfo, ServerName> el :
190 master.getAssignmentManager().getRegionStates().getRegionAssignments().entrySet()) {
191 if (el.getValue().getAddress().equals(rs)) {
192 regions.add(el.getKey());
193 }
194 }
195 Iterator<RegionState> i =
196 master.getAssignmentManager().getRegionStates().getRegionsInTransition().iterator();
197 while (i.hasNext()) {
198 RegionState state = i.next();
199 if (state.getServerName().getAddress().equals(rs)) {
200 regions.add(state.getRegion());
201 }
202 }
203
204
205 LOG.info("Unassigning " + regions.size() +
206 " regions from server " + rs + " for move to " + targetGroupName);
207 if (regions.size() > 0) {
208
209 for (HRegionInfo region : regions) {
210
211
212 if (!targetGrp.containsTable(region.getTable())) {
213 master.getAssignmentManager().unassign(region);
214 found = true;
215 }
216 }
217 }
218 if (!found) {
219 iter.remove();
220 }
221 }
222 try {
223 manager.wait(1000);
224 } catch (InterruptedException e) {
225 LOG.warn("Sleep interrupted", e);
226 Thread.currentThread().interrupt();
227 }
228 } while (found);
229 } finally {
230
231 for (Address server : servers) {
232 serversInTransition.remove(server);
233 }
234 }
235 LOG.info("Move server done: "+sourceGroupName+"->"+targetGroupName);
236 }
237 }
238
239 @Override
240 public void moveTables(Set<TableName> tables, String targetGroup) throws IOException {
241 if (tables == null) {
242 throw new ConstraintException(
243 "The list of servers cannot be null.");
244 }
245 if(tables.size() < 1) {
246 LOG.debug("moveTables() passed an empty set. Ignoring.");
247 return;
248 }
249 RSGroupInfoManager manager = getRSGroupInfoManager();
250 synchronized (manager) {
251 if(targetGroup != null) {
252 RSGroupInfo destGroup = manager.getRSGroup(targetGroup);
253 if(destGroup == null) {
254 throw new ConstraintException("Target group does not exist: "+targetGroup);
255 }
256 if(destGroup.getServers().size() < 1) {
257 throw new ConstraintException("Target group must have at least one server.");
258 }
259 }
260
261 for(TableName table : tables) {
262 String srcGroup = manager.getRSGroupOfTable(table);
263 if(srcGroup != null && srcGroup.equals(targetGroup)) {
264 throw new ConstraintException(
265 "Source group is the same as target group for table "+table+" :"+srcGroup);
266 }
267 }
268 manager.moveTables(tables, targetGroup);
269 }
270 for(TableName table: tables) {
271 if (master.getAssignmentManager().getTableStateManager().isTableState(table,
272 ZooKeeperProtos.Table.State.DISABLED,
273 ZooKeeperProtos.Table.State.DISABLING)) {
274 LOG.debug("Skipping move regions because the table" + table + " is disabled.");
275 continue;
276 }
277 TableLock lock = master.getTableLockManager().writeLock(table, "Group: table move");
278 for (HRegionInfo region :
279 master.getAssignmentManager().getRegionStates().getRegionsOfTable(table)) {
280 master.getAssignmentManager().unassign(region);
281 }
282
283 }
284 }
285
286 @Override
287 public void addRSGroup(String name) throws IOException {
288 getRSGroupInfoManager().addRSGroup(new RSGroupInfo(name));
289 }
290
291 @Override
292 public void removeRSGroup(String name) throws IOException {
293 RSGroupInfoManager manager = getRSGroupInfoManager();
294 synchronized (manager) {
295 RSGroupInfo groupInfo = getRSGroupInfoManager().getRSGroup(name);
296 if(groupInfo == null) {
297 throw new ConstraintException("Group "+name+" does not exist");
298 }
299 int tableCount = groupInfo.getTables().size();
300 if (tableCount > 0) {
301 throw new ConstraintException("Group "+name+" must have no associated tables: "+tableCount);
302 }
303 int serverCount = groupInfo.getServers().size();
304 if(serverCount > 0) {
305 throw new ConstraintException(
306 "Group "+name+" must have no associated servers: "+serverCount);
307 }
308 for(NamespaceDescriptor ns: master.getTableNamespaceManager().list()) {
309 String nsGroup = ns.getConfigurationValue(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP);
310 if(nsGroup != null && nsGroup.equals(name)) {
311 throw new ConstraintException("Group "+name+" is referenced by namespace: "+ns.getName());
312 }
313 }
314 manager.removeRSGroup(name);
315 }
316 }
317
318 @Override
319 public boolean balanceRSGroup(String groupName) throws IOException {
320 ServerManager serverManager = master.getServerManager();
321 AssignmentManager assignmentManager = master.getAssignmentManager();
322 LoadBalancer balancer = master.getLoadBalancer();
323
324 boolean balancerRan;
325 synchronized (balancer) {
326 if (getRSGroupInfo(groupName) == null) {
327 throw new ConstraintException("Group does not exist: "+groupName);
328 }
329
330 Map<String, RegionState> groupRIT = rsGroupGetRegionsInTransition(groupName);
331 if (groupRIT.size() > 0) {
332 LOG.debug("Not running balancer because " +
333 groupRIT.size() +
334 " region(s) in transition: " +
335 StringUtils.abbreviate(
336 master.getAssignmentManager().getRegionStates().getRegionsInTransition().toString(),
337 256));
338 return false;
339 }
340 if (serverManager.areDeadServersInProgress()) {
341 LOG.debug("Not running balancer because processing dead regionserver(s): " +
342 serverManager.getDeadServers());
343 return false;
344 }
345
346
347 List<RegionPlan> plans = new ArrayList<RegionPlan>();
348 for(Map.Entry<TableName, Map<ServerName, List<HRegionInfo>>> tableMap:
349 getRSGroupAssignmentsByTable(groupName).entrySet()) {
350 LOG.info("Creating partial plan for table "+tableMap.getKey()+": "+tableMap.getValue());
351 List<RegionPlan> partialPlans = balancer.balanceCluster(tableMap.getValue());
352 LOG.info("Partial plan for table "+tableMap.getKey()+": "+partialPlans);
353 if (partialPlans != null) {
354 plans.addAll(partialPlans);
355 }
356 }
357 long startTime = System.currentTimeMillis();
358 balancerRan = plans != null;
359 if (plans != null && !plans.isEmpty()) {
360 LOG.info("Group balance "+groupName+" starting with plan count: "+plans.size());
361 for (RegionPlan plan: plans) {
362 LOG.info("balance " + plan);
363 assignmentManager.balance(plan);
364 }
365 LOG.info("Group balance "+groupName+" completed after "+
366 (System.currentTimeMillis()-startTime)+" seconds");
367 }
368 }
369 return balancerRan;
370 }
371
372 @Override
373 public List<RSGroupInfo> listRSGroups() throws IOException {
374 return getRSGroupInfoManager().listRSGroups();
375 }
376
377 @Override
378 public RSGroupInfo getRSGroupOfServer(Address server) throws IOException {
379 return getRSGroupInfoManager().getRSGroupOfServer(server);
380 }
381
382 @InterfaceAudience.Private
383 public RSGroupInfoManager getRSGroupInfoManager() throws IOException {
384 return rsGroupInfoManager;
385 }
386
387 @Override
388 public void removeServers(Set<Address> servers) throws IOException {
389 {
390 if (servers == null || servers.isEmpty()) {
391 throw new ConstraintException("The set of servers to remove cannot be null or empty.");
392 }
393
394
395 synchronized (rsGroupInfoManager) {
396
397 checkForDeadOrOnlineServers(servers);
398 rsGroupInfoManager.removeServers(servers);
399 LOG.info("Remove decommissioned servers " + servers + " from rsgroup done.");
400 }
401 }
402 }
403
404 private Map<String, RegionState> rsGroupGetRegionsInTransition(String groupName)
405 throws IOException {
406 Map<String, RegionState> rit = Maps.newTreeMap();
407 AssignmentManager am = master.getAssignmentManager();
408 RSGroupInfo RSGroupInfo = getRSGroupInfo(groupName);
409 for(TableName tableName : RSGroupInfo.getTables()) {
410 for(HRegionInfo regionInfo: am.getRegionStates().getRegionsOfTable(tableName)) {
411 RegionState state =
412 master.getAssignmentManager().getRegionStates().getRegionTransitionState(regionInfo);
413 if(state != null) {
414 rit.put(regionInfo.getEncodedName(), state);
415 }
416 }
417 }
418 return rit;
419 }
420
421 private Map<TableName, Map<ServerName, List<HRegionInfo>>>
422 getRSGroupAssignmentsByTable(String groupName) throws IOException {
423 Map<TableName, Map<ServerName, List<HRegionInfo>>> result = Maps.newHashMap();
424 RSGroupInfo RSGroupInfo = getRSGroupInfo(groupName);
425 Map<TableName, Map<ServerName, List<HRegionInfo>>> assignments = Maps.newHashMap();
426 for(Map.Entry<HRegionInfo, ServerName> entry:
427 master.getAssignmentManager().getRegionStates().getRegionAssignments().entrySet()) {
428 TableName currTable = entry.getKey().getTable();
429 ServerName currServer = entry.getValue();
430 HRegionInfo currRegion = entry.getKey();
431 if(RSGroupInfo.getTables().contains(currTable)) {
432 if(!assignments.containsKey(entry.getKey().getTable())) {
433 assignments.put(currTable, new HashMap<ServerName, List<HRegionInfo>>());
434 }
435 if(!assignments.get(currTable).containsKey(currServer)) {
436 assignments.get(currTable).put(currServer, new ArrayList<HRegionInfo>());
437 }
438 assignments.get(currTable).get(currServer).add(currRegion);
439 }
440 }
441
442 Map<ServerName, List<HRegionInfo>> serverMap = Maps.newHashMap();
443 for(ServerName serverName: master.getServerManager().getOnlineServers().keySet()) {
444 if(RSGroupInfo.getServers().contains(serverName.getAddress())) {
445 serverMap.put(serverName, Collections.<HRegionInfo> emptyList());
446 }
447 }
448
449
450 for(TableName tableName : RSGroupInfo.getTables()) {
451 if(assignments.containsKey(tableName)) {
452 result.put(tableName, new HashMap<ServerName, List<HRegionInfo>>());
453 result.get(tableName).putAll(serverMap);
454 result.get(tableName).putAll(assignments.get(tableName));
455 LOG.debug("Adding assignments for "+tableName+": "+assignments.get(tableName));
456 }
457 }
458
459 return result;
460 }
461
462 public void prepareRSGroupForTable(HTableDescriptor desc) throws IOException {
463 String groupName =
464 master.getTableNamespaceManager().get(desc.getTableName().getNamespaceAsString())
465 .getConfigurationValue(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP);
466 if (groupName == null) {
467 groupName = RSGroupInfo.DEFAULT_GROUP;
468 }
469 RSGroupInfo RSGroupInfo = getRSGroupInfo(groupName);
470 if (RSGroupInfo == null) {
471 throw new ConstraintException("RSGroup " + groupName + " does not exist.");
472 }
473 if (!RSGroupInfo.containsTable(desc.getTableName())) {
474 LOG.debug("Pre-moving table " + desc.getTableName() + " to rsgroup " + groupName);
475 moveTables(Sets.newHashSet(desc.getTableName()), groupName);
476 }
477 }
478
479 public void cleanupRSGroupForTable(TableName tableName) throws IOException {
480 try {
481 RSGroupInfo group = getRSGroupInfoOfTable(tableName);
482 if (group != null) {
483 LOG.debug("Removing deleted table from table rsgroup " + group.getName());
484 moveTables(Sets.newHashSet(tableName), null);
485 }
486 } catch (ConstraintException ex) {
487 LOG.debug("Failed to perform rsgroup information cleanup for table: " + tableName, ex);
488 } catch (IOException ex) {
489 LOG.debug("Failed to perform rsgroup information cleanup for table: " + tableName, ex);
490 }
491 }
492
493 @Override
494 public void moveServersAndTables(Set<Address> servers, Set<TableName> tables,
495 String targetGroup) throws IOException {
496 if (servers == null || servers.isEmpty() ) {
497 throw new ConstraintException("The list of servers to move cannot be null or empty.");
498 }
499 if (tables == null || tables.isEmpty()) {
500 throw new ConstraintException("The list of tables to move cannot be null or empty.");
501 }
502 moveServers(servers, targetGroup);
503 moveTables(tables, targetGroup);
504 }
505
506 @Override
507 public void renameRSGroup(String oldName, String newName) throws IOException {
508 synchronized (rsGroupInfoManager) {
509 rsGroupInfoManager.renameRSGroup(oldName, newName);
510 }
511 }
512
513 @Override
514 public void close() throws IOException {
515 }
516
517
518
519
520
521 private void checkForDeadOrOnlineServers(Set<Address> servers) throws ConstraintException {
522
523 Set<Address> onlineServers = new HashSet<>();
524 for(ServerName server: master.getServerManager().getOnlineServers().keySet()) {
525 onlineServers.add(server.getAddress());
526 }
527
528 Set<Address> deadServers = new HashSet<>();
529 for(ServerName server: master.getServerManager().getDeadServers().copyServerNames()) {
530 deadServers.add(server.getAddress());
531 }
532
533 for (Address address: servers) {
534 if (onlineServers.contains(address)) {
535 throw new ConstraintException(
536 "Server " + address + " is an online server, not allowed to remove.");
537 }
538 if (deadServers.contains(address)) {
539 throw new ConstraintException(
540 "Server " + address + " is on the dead servers list,"
541 + " Maybe it will come back again, not allowed to remove.");
542 }
543 }
544 }
545 }