001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020package org.apache.hadoop.hbase.util; 021 022import java.io.BufferedInputStream; 023import java.io.BufferedOutputStream; 024import java.io.Closeable; 025import java.io.DataInputStream; 026import java.io.DataOutputStream; 027import java.io.File; 028import java.io.FileInputStream; 029import java.io.FileOutputStream; 030import java.io.IOException; 031import java.nio.file.Files; 032import java.nio.file.Paths; 033import java.util.ArrayList; 034import java.util.Collections; 035import java.util.EnumSet; 036import java.util.HashSet; 037import java.util.Iterator; 038import java.util.List; 039import java.util.Locale; 040import java.util.Optional; 041import java.util.Set; 042import java.util.concurrent.Callable; 043import java.util.concurrent.CancellationException; 044import java.util.concurrent.ExecutionException; 045import java.util.concurrent.ExecutorService; 046import java.util.concurrent.Executors; 047import java.util.concurrent.Future; 048import java.util.concurrent.TimeUnit; 049import java.util.concurrent.TimeoutException; 050import java.util.function.Predicate; 051import org.apache.commons.io.IOUtils; 052import org.apache.hadoop.conf.Configuration; 053import org.apache.hadoop.hbase.ClusterMetrics.Option; 054import org.apache.hadoop.hbase.HBaseConfiguration; 055import org.apache.hadoop.hbase.HConstants; 056import org.apache.hadoop.hbase.ServerName; 057import org.apache.hadoop.hbase.UnknownRegionException; 058import org.apache.hadoop.hbase.client.Admin; 059import org.apache.hadoop.hbase.client.Connection; 060import org.apache.hadoop.hbase.client.ConnectionFactory; 061import org.apache.hadoop.hbase.client.DoNotRetryRegionException; 062import org.apache.hadoop.hbase.client.RegionInfo; 063import org.apache.hadoop.hbase.master.RackManager; 064import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 065import org.apache.yetus.audience.InterfaceAudience; 066import org.slf4j.Logger; 067import org.slf4j.LoggerFactory; 068 069import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; 070import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils; 071 072/** 073 * Tool for loading/unloading regions to/from given regionserver This tool can be run from Command 074 * line directly as a utility. Supports Ack/No Ack mode for loading/unloading operations.Ack mode 075 * acknowledges if regions are online after movement while noAck mode is best effort mode that 076 * improves performance but will still move on if region is stuck/not moved. Motivation behind noAck 077 * mode being RS shutdown where even if a Region is stuck, upon shutdown master will move it 078 * anyways. This can also be used by constructiong an Object using the builder and then calling 079 * {@link #load()} or {@link #unload()} methods for the desired operations. 080 */ 081@InterfaceAudience.Public 082public class RegionMover extends AbstractHBaseTool implements Closeable { 083 public static final String MOVE_RETRIES_MAX_KEY = "hbase.move.retries.max"; 084 public static final String MOVE_WAIT_MAX_KEY = "hbase.move.wait.max"; 085 public static final String SERVERSTART_WAIT_MAX_KEY = "hbase.serverstart.wait.max"; 086 public static final int DEFAULT_MOVE_RETRIES_MAX = 5; 087 public static final int DEFAULT_MOVE_WAIT_MAX = 60; 088 public static final int DEFAULT_SERVERSTART_WAIT_MAX = 180; 089 090 private static final Logger LOG = LoggerFactory.getLogger(RegionMover.class); 091 092 private RegionMoverBuilder rmbuilder; 093 private boolean ack = true; 094 private int maxthreads = 1; 095 private int timeout; 096 private String loadUnload; 097 private String hostname; 098 private String filename; 099 private String excludeFile; 100 private String designatedFile; 101 private int port; 102 private Connection conn; 103 private Admin admin; 104 private RackManager rackManager; 105 106 private RegionMover(RegionMoverBuilder builder) throws IOException { 107 this.hostname = builder.hostname; 108 this.filename = builder.filename; 109 this.excludeFile = builder.excludeFile; 110 this.designatedFile = builder.designatedFile; 111 this.maxthreads = builder.maxthreads; 112 this.ack = builder.ack; 113 this.port = builder.port; 114 this.timeout = builder.timeout; 115 setConf(builder.conf); 116 this.conn = ConnectionFactory.createConnection(conf); 117 this.admin = conn.getAdmin(); 118 // Only while running unit tests, builder.rackManager will not be null for the convenience of 119 // providing custom rackManager. Otherwise for regular workflow/user triggered action, 120 // builder.rackManager is supposed to be null. Hence, setter of builder.rackManager is 121 // provided as @InterfaceAudience.Private and it is commented that this is just 122 // to be used by unit test. 123 rackManager = builder.rackManager == null ? new RackManager(conf) : builder.rackManager; 124 } 125 126 private RegionMover() { 127 } 128 129 @Override 130 public void close() { 131 IOUtils.closeQuietly(this.admin, e -> LOG.warn("failed to close admin", e)); 132 IOUtils.closeQuietly(this.conn, e -> LOG.warn("failed to close conn", e)); 133 } 134 135 /** 136 * Builder for Region mover. Use the {@link #build()} method to create RegionMover object. Has 137 * {@link #filename(String)}, {@link #excludeFile(String)}, {@link #maxthreads(int)}, 138 * {@link #ack(boolean)}, {@link #timeout(int)}, {@link #designatedFile(String)} methods to set 139 * the corresponding options. 140 */ 141 public static class RegionMoverBuilder { 142 private boolean ack = true; 143 private int maxthreads = 1; 144 private int timeout = Integer.MAX_VALUE; 145 private String hostname; 146 private String filename; 147 private String excludeFile = null; 148 private String designatedFile = null; 149 private String defaultDir = System.getProperty("java.io.tmpdir"); 150 @InterfaceAudience.Private 151 final int port; 152 private final Configuration conf; 153 private RackManager rackManager; 154 155 public RegionMoverBuilder(String hostname) { 156 this(hostname, createConf()); 157 } 158 159 /** 160 * Creates a new configuration and sets region mover specific overrides 161 */ 162 private static Configuration createConf() { 163 Configuration conf = HBaseConfiguration.create(); 164 conf.setInt("hbase.client.prefetch.limit", 1); 165 conf.setInt("hbase.client.pause", 500); 166 conf.setInt("hbase.client.retries.number", 100); 167 return conf; 168 } 169 170 /** 171 * @param hostname Hostname to unload regions from or load regions to. Can be either hostname 172 * or hostname:port. 173 * @param conf Configuration object 174 */ 175 public RegionMoverBuilder(String hostname, Configuration conf) { 176 String[] splitHostname = hostname.toLowerCase().split(":"); 177 this.hostname = splitHostname[0]; 178 if (splitHostname.length == 2) { 179 this.port = Integer.parseInt(splitHostname[1]); 180 } else { 181 this.port = conf.getInt(HConstants.REGIONSERVER_PORT, HConstants.DEFAULT_REGIONSERVER_PORT); 182 } 183 this.filename = defaultDir + File.separator + System.getProperty("user.name") + this.hostname 184 + ":" + Integer.toString(this.port); 185 this.conf = conf; 186 } 187 188 /** 189 * Path of file where regions will be written to during unloading/read from during loading 190 * @param filename 191 * @return RegionMoverBuilder object 192 */ 193 public RegionMoverBuilder filename(String filename) { 194 this.filename = filename; 195 return this; 196 } 197 198 /** 199 * Set the max number of threads that will be used to move regions 200 */ 201 public RegionMoverBuilder maxthreads(int threads) { 202 this.maxthreads = threads; 203 return this; 204 } 205 206 /** 207 * Path of file containing hostnames to be excluded during region movement. Exclude file should 208 * have 'host:port' per line. Port is mandatory here as we can have many RS running on a single 209 * host. 210 */ 211 public RegionMoverBuilder excludeFile(String excludefile) { 212 this.excludeFile = excludefile; 213 return this; 214 } 215 216 /** 217 * Set the designated file. Designated file contains hostnames where region moves. Designated 218 * file should have 'host:port' per line. Port is mandatory here as we can have many RS running 219 * on a single host. 220 * @param designatedFile The designated file 221 * @return RegionMoverBuilder object 222 */ 223 public RegionMoverBuilder designatedFile(String designatedFile) { 224 this.designatedFile = designatedFile; 225 return this; 226 } 227 228 /** 229 * Set ack/noAck mode. 230 * <p> 231 * In ack mode regions are acknowledged before and after moving and the move is retried 232 * hbase.move.retries.max times, if unsuccessful we quit with exit code 1.No Ack mode is a best 233 * effort mode,each region movement is tried once.This can be used during graceful shutdown as 234 * even if we have a stuck region,upon shutdown it'll be reassigned anyway. 235 * <p> 236 * @param ack 237 * @return RegionMoverBuilder object 238 */ 239 public RegionMoverBuilder ack(boolean ack) { 240 this.ack = ack; 241 return this; 242 } 243 244 /** 245 * Set the timeout for Load/Unload operation in seconds.This is a global timeout,threadpool for 246 * movers also have a separate time which is hbase.move.wait.max * number of regions to 247 * load/unload 248 * @param timeout in seconds 249 * @return RegionMoverBuilder object 250 */ 251 public RegionMoverBuilder timeout(int timeout) { 252 this.timeout = timeout; 253 return this; 254 } 255 256 /** 257 * Set specific rackManager implementation. 258 * This setter method is for testing purpose only. 259 * 260 * @param rackManager rackManager impl 261 * @return RegionMoverBuilder object 262 */ 263 @InterfaceAudience.Private 264 public RegionMoverBuilder rackManager(RackManager rackManager) { 265 this.rackManager = rackManager; 266 return this; 267 } 268 269 /** 270 * This method builds the appropriate RegionMover object which can then be used to load/unload 271 * using load and unload methods 272 * @return RegionMover object 273 */ 274 public RegionMover build() throws IOException { 275 return new RegionMover(this); 276 } 277 } 278 279 /** 280 * Loads the specified {@link #hostname} with regions listed in the {@link #filename} RegionMover 281 * Object has to be created using {@link #RegionMover(RegionMoverBuilder)} 282 * @return true if loading succeeded, false otherwise 283 */ 284 public boolean load() throws ExecutionException, InterruptedException, TimeoutException { 285 ExecutorService loadPool = Executors.newFixedThreadPool(1); 286 Future<Boolean> loadTask = loadPool.submit(getMetaRegionMovePlan()); 287 boolean isMetaMoved = waitTaskToFinish(loadPool, loadTask, "loading"); 288 if (!isMetaMoved) { 289 return false; 290 } 291 loadPool = Executors.newFixedThreadPool(1); 292 loadTask = loadPool.submit(getNonMetaRegionsMovePlan()); 293 return waitTaskToFinish(loadPool, loadTask, "loading"); 294 } 295 296 private Callable<Boolean> getMetaRegionMovePlan() { 297 return getRegionsMovePlan(true); 298 } 299 300 private Callable<Boolean> getNonMetaRegionsMovePlan() { 301 return getRegionsMovePlan(false); 302 } 303 304 private Callable<Boolean> getRegionsMovePlan(boolean moveMetaRegion) { 305 return () -> { 306 try { 307 List<RegionInfo> regionsToMove = readRegionsFromFile(filename); 308 if (regionsToMove.isEmpty()) { 309 LOG.info("No regions to load.Exiting"); 310 return true; 311 } 312 Optional<RegionInfo> metaRegion = getMetaRegionInfoIfToBeMoved(regionsToMove); 313 if (moveMetaRegion) { 314 if (metaRegion.isPresent()) { 315 loadRegions(Collections.singletonList(metaRegion.get())); 316 } 317 } else { 318 metaRegion.ifPresent(regionsToMove::remove); 319 loadRegions(regionsToMove); 320 } 321 } catch (Exception e) { 322 LOG.error("Error while loading regions to " + hostname, e); 323 return false; 324 } 325 return true; 326 }; 327 } 328 329 private Optional<RegionInfo> getMetaRegionInfoIfToBeMoved(List<RegionInfo> regionsToMove) { 330 return regionsToMove.stream().filter(RegionInfo::isMetaRegion).findFirst(); 331 } 332 333 private void loadRegions(List<RegionInfo> regionsToMove) 334 throws Exception { 335 ServerName server = getTargetServer(); 336 List<RegionInfo> movedRegions = Collections.synchronizedList(new ArrayList<>()); 337 LOG.info( 338 "Moving " + regionsToMove.size() + " regions to " + server + " using " + this.maxthreads 339 + " threads.Ack mode:" + this.ack); 340 341 final ExecutorService moveRegionsPool = Executors.newFixedThreadPool(this.maxthreads); 342 List<Future<Boolean>> taskList = new ArrayList<>(); 343 int counter = 0; 344 while (counter < regionsToMove.size()) { 345 RegionInfo region = regionsToMove.get(counter); 346 ServerName currentServer = MoveWithAck.getServerNameForRegion(region, admin, conn); 347 if (currentServer == null) { 348 LOG.warn( 349 "Could not get server for Region:" + region.getRegionNameAsString() + " moving on"); 350 counter++; 351 continue; 352 } else if (server.equals(currentServer)) { 353 LOG.info( 354 "Region " + region.getRegionNameAsString() + " is already on target server=" + server); 355 counter++; 356 continue; 357 } 358 if (ack) { 359 Future<Boolean> task = moveRegionsPool 360 .submit(new MoveWithAck(conn, region, currentServer, server, movedRegions)); 361 taskList.add(task); 362 } else { 363 Future<Boolean> task = moveRegionsPool 364 .submit(new MoveWithoutAck(admin, region, currentServer, server, movedRegions)); 365 taskList.add(task); 366 } 367 counter++; 368 } 369 370 moveRegionsPool.shutdown(); 371 long timeoutInSeconds = regionsToMove.size() * admin.getConfiguration() 372 .getLong(MOVE_WAIT_MAX_KEY, DEFAULT_MOVE_WAIT_MAX); 373 waitMoveTasksToFinish(moveRegionsPool, taskList, timeoutInSeconds); 374 } 375 376 /** 377 * Unload regions from given {@link #hostname} using ack/noAck mode and {@link #maxthreads}.In 378 * noAck mode we do not make sure that region is successfully online on the target region 379 * server,hence it is best effort.We do not unload regions to hostnames given in 380 * {@link #excludeFile}. If designatedFile is present with some contents, we will unload regions 381 * to hostnames provided in {@link #designatedFile} 382 * 383 * @return true if unloading succeeded, false otherwise 384 */ 385 public boolean unload() throws InterruptedException, ExecutionException, TimeoutException { 386 return unloadRegions(false); 387 } 388 389 /** 390 * Unload regions from given {@link #hostname} using ack/noAck mode and {@link #maxthreads}.In 391 * noAck mode we do not make sure that region is successfully online on the target region 392 * server,hence it is best effort.We do not unload regions to hostnames given in 393 * {@link #excludeFile}. If designatedFile is present with some contents, we will unload regions 394 * to hostnames provided in {@link #designatedFile}. 395 * While unloading regions, destination RegionServers are selected from different rack i.e 396 * regions should not move to any RegionServers that belong to same rack as source RegionServer. 397 * 398 * @return true if unloading succeeded, false otherwise 399 */ 400 public boolean unloadFromRack() 401 throws InterruptedException, ExecutionException, TimeoutException { 402 return unloadRegions(true); 403 } 404 405 private boolean unloadRegions(boolean unloadFromRack) throws InterruptedException, 406 ExecutionException, TimeoutException { 407 deleteFile(this.filename); 408 ExecutorService unloadPool = Executors.newFixedThreadPool(1); 409 Future<Boolean> unloadTask = unloadPool.submit(() -> { 410 List<RegionInfo> movedRegions = Collections.synchronizedList(new ArrayList<>()); 411 try { 412 // Get Online RegionServers 413 List<ServerName> regionServers = new ArrayList<>(); 414 regionServers.addAll(admin.getRegionServers()); 415 // Remove the host Region server from target Region Servers list 416 ServerName server = stripServer(regionServers, hostname, port); 417 if (server == null) { 418 LOG.info("Could not find server '{}:{}' in the set of region servers. giving up.", 419 hostname, port); 420 LOG.debug("List of region servers: {}", regionServers); 421 return false; 422 } 423 // Remove RS not present in the designated file 424 includeExcludeRegionServers(designatedFile, regionServers, true); 425 426 // Remove RS present in the exclude file 427 includeExcludeRegionServers(excludeFile, regionServers, false); 428 429 if (unloadFromRack) { 430 // remove regionServers that belong to same rack (as source host) since the goal is to 431 // unload regions from source regionServer to destination regionServers 432 // that belong to different rack only. 433 String sourceRack = rackManager.getRack(server); 434 List<String> racks = rackManager.getRack(regionServers); 435 Iterator<ServerName> iterator = regionServers.iterator(); 436 int i = 0; 437 while (iterator.hasNext()) { 438 iterator.next(); 439 if (racks.size() > i && racks.get(i) != null && racks.get(i).equals(sourceRack)) { 440 iterator.remove(); 441 } 442 i++; 443 } 444 } 445 446 // Remove decommissioned RS 447 Set<ServerName> decommissionedRS = new HashSet<>(admin.listDecommissionedRegionServers()); 448 if (CollectionUtils.isNotEmpty(decommissionedRS)) { 449 regionServers.removeIf(decommissionedRS::contains); 450 LOG.debug("Excluded RegionServers from unloading regions to because they " + 451 "are marked as decommissioned. Servers: {}", decommissionedRS); 452 } 453 454 stripMaster(regionServers); 455 if (regionServers.isEmpty()) { 456 LOG.warn("No Regions were moved - no servers available"); 457 return false; 458 } 459 unloadRegions(server, regionServers, movedRegions); 460 } catch (Exception e) { 461 LOG.error("Error while unloading regions ", e); 462 return false; 463 } finally { 464 if (movedRegions != null) { 465 writeFile(filename, movedRegions); 466 } 467 } 468 return true; 469 }); 470 return waitTaskToFinish(unloadPool, unloadTask, "unloading"); 471 } 472 473 private void unloadRegions(ServerName server, List<ServerName> regionServers, 474 List<RegionInfo> movedRegions) throws Exception { 475 while (true) { 476 List<RegionInfo> regionsToMove = admin.getRegions(server); 477 regionsToMove.removeAll(movedRegions); 478 if (regionsToMove.isEmpty()) { 479 LOG.info("No Regions to move....Quitting now"); 480 break; 481 } 482 LOG.info("Moving {} regions from {} to {} servers using {} threads .Ack Mode: {}", 483 regionsToMove.size(), this.hostname, regionServers.size(), this.maxthreads, ack); 484 485 Optional<RegionInfo> metaRegion = getMetaRegionInfoIfToBeMoved(regionsToMove); 486 if (metaRegion.isPresent()) { 487 RegionInfo meta = metaRegion.get(); 488 submitRegionMovesWhileUnloading(server, regionServers, movedRegions, 489 Collections.singletonList(meta)); 490 regionsToMove.remove(meta); 491 } 492 submitRegionMovesWhileUnloading(server, regionServers, movedRegions, regionsToMove); 493 } 494 } 495 496 private void submitRegionMovesWhileUnloading(ServerName server, List<ServerName> regionServers, 497 List<RegionInfo> movedRegions, List<RegionInfo> regionsToMove) throws Exception { 498 final ExecutorService moveRegionsPool = Executors.newFixedThreadPool(this.maxthreads); 499 List<Future<Boolean>> taskList = new ArrayList<>(); 500 int serverIndex = 0; 501 for (RegionInfo regionToMove : regionsToMove) { 502 if (ack) { 503 Future<Boolean> task = moveRegionsPool.submit( 504 new MoveWithAck(conn, regionToMove, server, regionServers.get(serverIndex), 505 movedRegions)); 506 taskList.add(task); 507 } else { 508 Future<Boolean> task = moveRegionsPool.submit( 509 new MoveWithoutAck(admin, regionToMove, server, regionServers.get(serverIndex), 510 movedRegions)); 511 taskList.add(task); 512 } 513 serverIndex = (serverIndex + 1) % regionServers.size(); 514 } 515 moveRegionsPool.shutdown(); 516 long timeoutInSeconds = regionsToMove.size() * admin.getConfiguration() 517 .getLong(MOVE_WAIT_MAX_KEY, DEFAULT_MOVE_WAIT_MAX); 518 waitMoveTasksToFinish(moveRegionsPool, taskList, timeoutInSeconds); 519 } 520 521 private boolean waitTaskToFinish(ExecutorService pool, Future<Boolean> task, String operation) 522 throws TimeoutException, InterruptedException, ExecutionException { 523 pool.shutdown(); 524 try { 525 if (!pool.awaitTermination((long) this.timeout, TimeUnit.SECONDS)) { 526 LOG.warn( 527 "Timed out before finishing the " + operation + " operation. Timeout: " + this.timeout 528 + "sec"); 529 pool.shutdownNow(); 530 } 531 } catch (InterruptedException e) { 532 pool.shutdownNow(); 533 Thread.currentThread().interrupt(); 534 } 535 try { 536 return task.get(5, TimeUnit.SECONDS); 537 } catch (InterruptedException e) { 538 LOG.warn("Interrupted while " + operation + " Regions on " + this.hostname, e); 539 throw e; 540 } catch (ExecutionException e) { 541 LOG.error("Error while " + operation + " regions on RegionServer " + this.hostname, e); 542 throw e; 543 } 544 } 545 546 private void waitMoveTasksToFinish(ExecutorService moveRegionsPool, 547 List<Future<Boolean>> taskList, long timeoutInSeconds) throws Exception { 548 try { 549 if (!moveRegionsPool.awaitTermination(timeoutInSeconds, TimeUnit.SECONDS)) { 550 moveRegionsPool.shutdownNow(); 551 } 552 } catch (InterruptedException e) { 553 moveRegionsPool.shutdownNow(); 554 Thread.currentThread().interrupt(); 555 } 556 for (Future<Boolean> future : taskList) { 557 try { 558 // if even after shutdownNow threads are stuck we wait for 5 secs max 559 if (!future.get(5, TimeUnit.SECONDS)) { 560 LOG.error("Was Not able to move region....Exiting Now"); 561 throw new Exception("Could not move region Exception"); 562 } 563 } catch (InterruptedException e) { 564 LOG.error("Interrupted while waiting for Thread to Complete " + e.getMessage(), e); 565 throw e; 566 } catch (ExecutionException e) { 567 boolean ignoreFailure = ignoreRegionMoveFailure(e); 568 if (ignoreFailure) { 569 LOG.debug("Ignore region move failure, it might have been split/merged.", e); 570 } else { 571 LOG.error("Got Exception From Thread While moving region {}", e.getMessage(), e); 572 throw e; 573 } 574 } catch (CancellationException e) { 575 LOG.error("Thread for moving region cancelled. Timeout for cancellation:" + timeoutInSeconds 576 + "secs", e); 577 throw e; 578 } 579 } 580 } 581 582 private boolean ignoreRegionMoveFailure(ExecutionException e) { 583 boolean ignoreFailure = false; 584 if (e.getCause() instanceof UnknownRegionException) { 585 // region does not exist anymore 586 ignoreFailure = true; 587 } else if (e.getCause() instanceof DoNotRetryRegionException 588 && e.getCause().getMessage() != null && e.getCause().getMessage() 589 .contains(AssignmentManager.UNEXPECTED_STATE_REGION + "state=SPLIT,")) { 590 // region is recently split 591 ignoreFailure = true; 592 } 593 return ignoreFailure; 594 } 595 596 private ServerName getTargetServer() throws Exception { 597 ServerName server = null; 598 int maxWaitInSeconds = 599 admin.getConfiguration().getInt(SERVERSTART_WAIT_MAX_KEY, DEFAULT_SERVERSTART_WAIT_MAX); 600 long maxWait = EnvironmentEdgeManager.currentTime() + maxWaitInSeconds * 1000; 601 while (EnvironmentEdgeManager.currentTime() < maxWait) { 602 try { 603 List<ServerName> regionServers = new ArrayList<>(); 604 regionServers.addAll(admin.getRegionServers()); 605 // Remove the host Region server from target Region Servers list 606 server = stripServer(regionServers, hostname, port); 607 if (server != null) { 608 break; 609 } else { 610 LOG.warn("Server " + hostname + ":" + port + " is not up yet, waiting"); 611 } 612 } catch (IOException e) { 613 LOG.warn("Could not get list of region servers", e); 614 } 615 Thread.sleep(500); 616 } 617 if (server == null) { 618 LOG.error("Server " + hostname + ":" + port + " is not up. Giving up."); 619 throw new Exception("Server " + hostname + ":" + port + " to load regions not online"); 620 } 621 return server; 622 } 623 624 private List<RegionInfo> readRegionsFromFile(String filename) throws IOException { 625 List<RegionInfo> regions = new ArrayList<>(); 626 File f = new File(filename); 627 if (!f.exists()) { 628 return regions; 629 } 630 try (DataInputStream dis = new DataInputStream( 631 new BufferedInputStream(new FileInputStream(f)))) { 632 int numRegions = dis.readInt(); 633 int index = 0; 634 while (index < numRegions) { 635 regions.add(RegionInfo.parseFromOrNull(Bytes.readByteArray(dis))); 636 index++; 637 } 638 } catch (IOException e) { 639 LOG.error("Error while reading regions from file:" + filename, e); 640 throw e; 641 } 642 return regions; 643 } 644 645 /** 646 * Write the number of regions moved in the first line followed by regions moved in subsequent 647 * lines 648 */ 649 private void writeFile(String filename, List<RegionInfo> movedRegions) throws IOException { 650 try (DataOutputStream dos = new DataOutputStream( 651 new BufferedOutputStream(new FileOutputStream(filename)))) { 652 dos.writeInt(movedRegions.size()); 653 for (RegionInfo region : movedRegions) { 654 Bytes.writeByteArray(dos, RegionInfo.toByteArray(region)); 655 } 656 } catch (IOException e) { 657 LOG.error( 658 "ERROR: Was Not able to write regions moved to output file but moved " + movedRegions 659 .size() + " regions", e); 660 throw e; 661 } 662 } 663 664 private void deleteFile(String filename) { 665 File f = new File(filename); 666 if (f.exists()) { 667 f.delete(); 668 } 669 } 670 671 /** 672 * @param filename The file should have 'host:port' per line 673 * @return List of servers from the file in format 'hostname:port'. 674 */ 675 private List<String> readServersFromFile(String filename) throws IOException { 676 List<String> servers = new ArrayList<>(); 677 if (filename != null) { 678 try { 679 Files.readAllLines(Paths.get(filename)).stream().map(String::trim) 680 .filter(((Predicate<String>) String::isEmpty).negate()).map(String::toLowerCase) 681 .forEach(servers::add); 682 } catch (IOException e) { 683 LOG.error("Exception while reading servers from file,", e); 684 throw e; 685 } 686 } 687 return servers; 688 } 689 690 /** 691 * Designates or excludes the servername whose hostname and port portion matches the list given 692 * in the file. 693 * Example:<br> 694 * If you want to designated RSs, suppose designatedFile has RS1, regionServers has RS1, RS2 and 695 * RS3. When we call includeExcludeRegionServers(designatedFile, regionServers, true), RS2 and 696 * RS3 are removed from regionServers list so that regions can move to only RS1. 697 * If you want to exclude RSs, suppose excludeFile has RS1, regionServers has RS1, RS2 and RS3. 698 * When we call includeExcludeRegionServers(excludeFile, servers, false), RS1 is removed from 699 * regionServers list so that regions can move to only RS2 and RS3. 700 */ 701 private void includeExcludeRegionServers(String fileName, List<ServerName> regionServers, 702 boolean isInclude) throws IOException { 703 if (fileName != null) { 704 List<String> servers = readServersFromFile(fileName); 705 if (servers.isEmpty()) { 706 LOG.warn("No servers provided in the file: {}." + fileName); 707 return; 708 } 709 Iterator<ServerName> i = regionServers.iterator(); 710 while (i.hasNext()) { 711 String rs = i.next().getServerName(); 712 String rsPort = rs.split(ServerName.SERVERNAME_SEPARATOR)[0].toLowerCase() + ":" + rs 713 .split(ServerName.SERVERNAME_SEPARATOR)[1]; 714 if (isInclude != servers.contains(rsPort)) { 715 i.remove(); 716 } 717 } 718 } 719 } 720 721 /** 722 * Exclude master from list of RSs to move regions to 723 */ 724 private void stripMaster(List<ServerName> regionServers) throws IOException { 725 ServerName master = admin.getClusterMetrics(EnumSet.of(Option.MASTER)).getMasterName(); 726 stripServer(regionServers, master.getHostname(), master.getPort()); 727 } 728 729 /** 730 * Remove the servername whose hostname and port portion matches from the passed array of servers. 731 * Returns as side-effect the servername removed. 732 * @return server removed from list of Region Servers 733 */ 734 private ServerName stripServer(List<ServerName> regionServers, String hostname, int port) { 735 for (Iterator<ServerName> iter = regionServers.iterator(); iter.hasNext();) { 736 ServerName server = iter.next(); 737 if (server.getAddress().getHostname().equalsIgnoreCase(hostname) && 738 server.getAddress().getPort() == port) { 739 iter.remove(); 740 return server; 741 } 742 } 743 return null; 744 } 745 746 @Override 747 protected void addOptions() { 748 this.addRequiredOptWithArg("r", "regionserverhost", "region server <hostname>|<hostname:port>"); 749 this.addRequiredOptWithArg("o", "operation", "Expected: load/unload/unload_from_rack"); 750 this.addOptWithArg("m", "maxthreads", 751 "Define the maximum number of threads to use to unload and reload the regions"); 752 this.addOptWithArg("x", "excludefile", 753 "File with <hostname:port> per line to exclude as unload targets; default excludes only " 754 + "target host; useful for rack decommisioning."); 755 this.addOptWithArg("d","designatedfile","File with <hostname:port> per line as unload targets;" 756 + "default is all online hosts"); 757 this.addOptWithArg("f", "filename", 758 "File to save regions list into unloading, or read from loading; " 759 + "default /tmp/<usernamehostname:port>"); 760 this.addOptNoArg("n", "noack", 761 "Turn on No-Ack mode(default: false) which won't check if region is online on target " 762 + "RegionServer, hence best effort. This is more performant in unloading and loading " 763 + "but might lead to region being unavailable for some time till master reassigns it " 764 + "in case the move failed"); 765 this.addOptWithArg("t", "timeout", "timeout in seconds after which the tool will exit " 766 + "irrespective of whether it finished or not;default Integer.MAX_VALUE"); 767 } 768 769 @Override 770 protected void processOptions(CommandLine cmd) { 771 String hostname = cmd.getOptionValue("r"); 772 rmbuilder = new RegionMoverBuilder(hostname); 773 if (cmd.hasOption('m')) { 774 rmbuilder.maxthreads(Integer.parseInt(cmd.getOptionValue('m'))); 775 } 776 if (cmd.hasOption('n')) { 777 rmbuilder.ack(false); 778 } 779 if (cmd.hasOption('f')) { 780 rmbuilder.filename(cmd.getOptionValue('f')); 781 } 782 if (cmd.hasOption('x')) { 783 rmbuilder.excludeFile(cmd.getOptionValue('x')); 784 } 785 if (cmd.hasOption('d')) { 786 rmbuilder.designatedFile(cmd.getOptionValue('d')); 787 } 788 if (cmd.hasOption('t')) { 789 rmbuilder.timeout(Integer.parseInt(cmd.getOptionValue('t'))); 790 } 791 this.loadUnload = cmd.getOptionValue("o").toLowerCase(Locale.ROOT); 792 } 793 794 @Override 795 protected int doWork() throws Exception { 796 boolean success; 797 try (RegionMover rm = rmbuilder.build()) { 798 if (loadUnload.equalsIgnoreCase("load")) { 799 success = rm.load(); 800 } else if (loadUnload.equalsIgnoreCase("unload")) { 801 success = rm.unload(); 802 } else if (loadUnload.equalsIgnoreCase("unload_from_rack")) { 803 success = rm.unloadFromRack(); 804 } else { 805 printUsage(); 806 success = false; 807 } 808 } 809 return (success ? 0 : 1); 810 } 811 812 public static void main(String[] args) { 813 try (RegionMover mover = new RegionMover()) { 814 mover.doStaticMain(args); 815 } 816 } 817}