001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.util;
021
022import java.io.BufferedInputStream;
023import java.io.BufferedOutputStream;
024import java.io.Closeable;
025import java.io.DataInputStream;
026import java.io.DataOutputStream;
027import java.io.File;
028import java.io.FileInputStream;
029import java.io.FileOutputStream;
030import java.io.IOException;
031import java.nio.file.Files;
032import java.nio.file.Paths;
033import java.util.ArrayList;
034import java.util.Collections;
035import java.util.EnumSet;
036import java.util.HashSet;
037import java.util.Iterator;
038import java.util.List;
039import java.util.Locale;
040import java.util.Optional;
041import java.util.Set;
042import java.util.concurrent.Callable;
043import java.util.concurrent.CancellationException;
044import java.util.concurrent.ExecutionException;
045import java.util.concurrent.ExecutorService;
046import java.util.concurrent.Executors;
047import java.util.concurrent.Future;
048import java.util.concurrent.TimeUnit;
049import java.util.concurrent.TimeoutException;
050import java.util.function.Predicate;
051import org.apache.commons.io.IOUtils;
052import org.apache.hadoop.conf.Configuration;
053import org.apache.hadoop.hbase.ClusterMetrics.Option;
054import org.apache.hadoop.hbase.HBaseConfiguration;
055import org.apache.hadoop.hbase.HConstants;
056import org.apache.hadoop.hbase.ServerName;
057import org.apache.hadoop.hbase.UnknownRegionException;
058import org.apache.hadoop.hbase.client.Admin;
059import org.apache.hadoop.hbase.client.Connection;
060import org.apache.hadoop.hbase.client.ConnectionFactory;
061import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
062import org.apache.hadoop.hbase.client.RegionInfo;
063import org.apache.hadoop.hbase.master.RackManager;
064import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
065import org.apache.yetus.audience.InterfaceAudience;
066import org.slf4j.Logger;
067import org.slf4j.LoggerFactory;
068
069import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
070import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils;
071
072/**
073 * Tool for loading/unloading regions to/from given regionserver This tool can be run from Command
074 * line directly as a utility. Supports Ack/No Ack mode for loading/unloading operations.Ack mode
075 * acknowledges if regions are online after movement while noAck mode is best effort mode that
076 * improves performance but will still move on if region is stuck/not moved. Motivation behind noAck
077 * mode being RS shutdown where even if a Region is stuck, upon shutdown master will move it
078 * anyways. This can also be used by constructiong an Object using the builder and then calling
079 * {@link #load()} or {@link #unload()} methods for the desired operations.
080 */
081@InterfaceAudience.Public
082public class RegionMover extends AbstractHBaseTool implements Closeable {
083  public static final String MOVE_RETRIES_MAX_KEY = "hbase.move.retries.max";
084  public static final String MOVE_WAIT_MAX_KEY = "hbase.move.wait.max";
085  public static final String SERVERSTART_WAIT_MAX_KEY = "hbase.serverstart.wait.max";
086  public static final int DEFAULT_MOVE_RETRIES_MAX = 5;
087  public static final int DEFAULT_MOVE_WAIT_MAX = 60;
088  public static final int DEFAULT_SERVERSTART_WAIT_MAX = 180;
089
090  private static final Logger LOG = LoggerFactory.getLogger(RegionMover.class);
091
092  private RegionMoverBuilder rmbuilder;
093  private boolean ack = true;
094  private int maxthreads = 1;
095  private int timeout;
096  private String loadUnload;
097  private String hostname;
098  private String filename;
099  private String excludeFile;
100  private String designatedFile;
101  private int port;
102  private Connection conn;
103  private Admin admin;
104  private RackManager rackManager;
105
106  private RegionMover(RegionMoverBuilder builder) throws IOException {
107    this.hostname = builder.hostname;
108    this.filename = builder.filename;
109    this.excludeFile = builder.excludeFile;
110    this.designatedFile = builder.designatedFile;
111    this.maxthreads = builder.maxthreads;
112    this.ack = builder.ack;
113    this.port = builder.port;
114    this.timeout = builder.timeout;
115    setConf(builder.conf);
116    this.conn = ConnectionFactory.createConnection(conf);
117    this.admin = conn.getAdmin();
118    // Only while running unit tests, builder.rackManager will not be null for the convenience of
119    // providing custom rackManager. Otherwise for regular workflow/user triggered action,
120    // builder.rackManager is supposed to be null. Hence, setter of builder.rackManager is
121    // provided as @InterfaceAudience.Private and it is commented that this is just
122    // to be used by unit test.
123    rackManager = builder.rackManager == null ? new RackManager(conf) : builder.rackManager;
124  }
125
126  private RegionMover() {
127  }
128
129  @Override
130  public void close() {
131    IOUtils.closeQuietly(this.admin, e -> LOG.warn("failed to close admin", e));
132    IOUtils.closeQuietly(this.conn, e -> LOG.warn("failed to close conn", e));
133  }
134
135  /**
136   * Builder for Region mover. Use the {@link #build()} method to create RegionMover object. Has
137   * {@link #filename(String)}, {@link #excludeFile(String)}, {@link #maxthreads(int)},
138   * {@link #ack(boolean)}, {@link #timeout(int)}, {@link #designatedFile(String)} methods to set
139   * the corresponding options.
140   */
141  public static class RegionMoverBuilder {
142    private boolean ack = true;
143    private int maxthreads = 1;
144    private int timeout = Integer.MAX_VALUE;
145    private String hostname;
146    private String filename;
147    private String excludeFile = null;
148    private String designatedFile = null;
149    private String defaultDir = System.getProperty("java.io.tmpdir");
150    @InterfaceAudience.Private
151    final int port;
152    private final Configuration conf;
153    private RackManager rackManager;
154
155    public RegionMoverBuilder(String hostname) {
156      this(hostname, createConf());
157    }
158
159    /**
160     * Creates a new configuration and sets region mover specific overrides
161     */
162    private static Configuration createConf() {
163      Configuration conf = HBaseConfiguration.create();
164      conf.setInt("hbase.client.prefetch.limit", 1);
165      conf.setInt("hbase.client.pause", 500);
166      conf.setInt("hbase.client.retries.number", 100);
167      return conf;
168    }
169
170    /**
171     * @param hostname Hostname to unload regions from or load regions to. Can be either hostname
172     *     or hostname:port.
173     * @param conf Configuration object
174     */
175    public RegionMoverBuilder(String hostname, Configuration conf) {
176      String[] splitHostname = hostname.toLowerCase().split(":");
177      this.hostname = splitHostname[0];
178      if (splitHostname.length == 2) {
179        this.port = Integer.parseInt(splitHostname[1]);
180      } else {
181        this.port = conf.getInt(HConstants.REGIONSERVER_PORT, HConstants.DEFAULT_REGIONSERVER_PORT);
182      }
183      this.filename = defaultDir + File.separator + System.getProperty("user.name") + this.hostname
184        + ":" + Integer.toString(this.port);
185      this.conf = conf;
186    }
187
188    /**
189     * Path of file where regions will be written to during unloading/read from during loading
190     * @param filename
191     * @return RegionMoverBuilder object
192     */
193    public RegionMoverBuilder filename(String filename) {
194      this.filename = filename;
195      return this;
196    }
197
198    /**
199     * Set the max number of threads that will be used to move regions
200     */
201    public RegionMoverBuilder maxthreads(int threads) {
202      this.maxthreads = threads;
203      return this;
204    }
205
206    /**
207     * Path of file containing hostnames to be excluded during region movement. Exclude file should
208     * have 'host:port' per line. Port is mandatory here as we can have many RS running on a single
209     * host.
210     */
211    public RegionMoverBuilder excludeFile(String excludefile) {
212      this.excludeFile = excludefile;
213      return this;
214    }
215
216    /**
217     * Set the designated file. Designated file contains hostnames where region moves. Designated
218     * file should have 'host:port' per line. Port is mandatory here as we can have many RS running
219     * on a single host.
220     * @param designatedFile The designated file
221     * @return RegionMoverBuilder object
222     */
223    public RegionMoverBuilder designatedFile(String designatedFile) {
224      this.designatedFile = designatedFile;
225      return this;
226    }
227
228    /**
229     * Set ack/noAck mode.
230     * <p>
231     * In ack mode regions are acknowledged before and after moving and the move is retried
232     * hbase.move.retries.max times, if unsuccessful we quit with exit code 1.No Ack mode is a best
233     * effort mode,each region movement is tried once.This can be used during graceful shutdown as
234     * even if we have a stuck region,upon shutdown it'll be reassigned anyway.
235     * <p>
236     * @param ack
237     * @return RegionMoverBuilder object
238     */
239    public RegionMoverBuilder ack(boolean ack) {
240      this.ack = ack;
241      return this;
242    }
243
244    /**
245     * Set the timeout for Load/Unload operation in seconds.This is a global timeout,threadpool for
246     * movers also have a separate time which is hbase.move.wait.max * number of regions to
247     * load/unload
248     * @param timeout in seconds
249     * @return RegionMoverBuilder object
250     */
251    public RegionMoverBuilder timeout(int timeout) {
252      this.timeout = timeout;
253      return this;
254    }
255
256    /**
257     * Set specific rackManager implementation.
258     * This setter method is for testing purpose only.
259     *
260     * @param rackManager rackManager impl
261     * @return RegionMoverBuilder object
262     */
263    @InterfaceAudience.Private
264    public RegionMoverBuilder rackManager(RackManager rackManager) {
265      this.rackManager = rackManager;
266      return this;
267    }
268
269    /**
270     * This method builds the appropriate RegionMover object which can then be used to load/unload
271     * using load and unload methods
272     * @return RegionMover object
273     */
274    public RegionMover build() throws IOException {
275      return new RegionMover(this);
276    }
277  }
278
279  /**
280   * Loads the specified {@link #hostname} with regions listed in the {@link #filename} RegionMover
281   * Object has to be created using {@link #RegionMover(RegionMoverBuilder)}
282   * @return true if loading succeeded, false otherwise
283   */
284  public boolean load() throws ExecutionException, InterruptedException, TimeoutException {
285    ExecutorService loadPool = Executors.newFixedThreadPool(1);
286    Future<Boolean> loadTask = loadPool.submit(getMetaRegionMovePlan());
287    boolean isMetaMoved = waitTaskToFinish(loadPool, loadTask, "loading");
288    if (!isMetaMoved) {
289      return false;
290    }
291    loadPool = Executors.newFixedThreadPool(1);
292    loadTask = loadPool.submit(getNonMetaRegionsMovePlan());
293    return waitTaskToFinish(loadPool, loadTask, "loading");
294  }
295
296  private Callable<Boolean> getMetaRegionMovePlan() {
297    return getRegionsMovePlan(true);
298  }
299
300  private Callable<Boolean> getNonMetaRegionsMovePlan() {
301    return getRegionsMovePlan(false);
302  }
303
304  private Callable<Boolean> getRegionsMovePlan(boolean moveMetaRegion) {
305    return () -> {
306      try {
307        List<RegionInfo> regionsToMove = readRegionsFromFile(filename);
308        if (regionsToMove.isEmpty()) {
309          LOG.info("No regions to load.Exiting");
310          return true;
311        }
312        Optional<RegionInfo> metaRegion = getMetaRegionInfoIfToBeMoved(regionsToMove);
313        if (moveMetaRegion) {
314          if (metaRegion.isPresent()) {
315            loadRegions(Collections.singletonList(metaRegion.get()));
316          }
317        } else {
318          metaRegion.ifPresent(regionsToMove::remove);
319          loadRegions(regionsToMove);
320        }
321      } catch (Exception e) {
322        LOG.error("Error while loading regions to " + hostname, e);
323        return false;
324      }
325      return true;
326    };
327  }
328
329  private Optional<RegionInfo> getMetaRegionInfoIfToBeMoved(List<RegionInfo> regionsToMove) {
330    return regionsToMove.stream().filter(RegionInfo::isMetaRegion).findFirst();
331  }
332
333  private void loadRegions(List<RegionInfo> regionsToMove)
334      throws Exception {
335    ServerName server = getTargetServer();
336    List<RegionInfo> movedRegions = Collections.synchronizedList(new ArrayList<>());
337    LOG.info(
338        "Moving " + regionsToMove.size() + " regions to " + server + " using " + this.maxthreads
339            + " threads.Ack mode:" + this.ack);
340
341    final ExecutorService moveRegionsPool = Executors.newFixedThreadPool(this.maxthreads);
342    List<Future<Boolean>> taskList = new ArrayList<>();
343    int counter = 0;
344    while (counter < regionsToMove.size()) {
345      RegionInfo region = regionsToMove.get(counter);
346      ServerName currentServer = MoveWithAck.getServerNameForRegion(region, admin, conn);
347      if (currentServer == null) {
348        LOG.warn(
349            "Could not get server for Region:" + region.getRegionNameAsString() + " moving on");
350        counter++;
351        continue;
352      } else if (server.equals(currentServer)) {
353        LOG.info(
354            "Region " + region.getRegionNameAsString() + " is already on target server=" + server);
355        counter++;
356        continue;
357      }
358      if (ack) {
359        Future<Boolean> task = moveRegionsPool
360          .submit(new MoveWithAck(conn, region, currentServer, server, movedRegions));
361        taskList.add(task);
362      } else {
363        Future<Boolean> task = moveRegionsPool
364          .submit(new MoveWithoutAck(admin, region, currentServer, server, movedRegions));
365        taskList.add(task);
366      }
367      counter++;
368    }
369
370    moveRegionsPool.shutdown();
371    long timeoutInSeconds = regionsToMove.size() * admin.getConfiguration()
372        .getLong(MOVE_WAIT_MAX_KEY, DEFAULT_MOVE_WAIT_MAX);
373    waitMoveTasksToFinish(moveRegionsPool, taskList, timeoutInSeconds);
374  }
375
376  /**
377   * Unload regions from given {@link #hostname} using ack/noAck mode and {@link #maxthreads}.In
378   * noAck mode we do not make sure that region is successfully online on the target region
379   * server,hence it is best effort.We do not unload regions to hostnames given in
380   * {@link #excludeFile}. If designatedFile is present with some contents, we will unload regions
381   * to hostnames provided in {@link #designatedFile}
382   *
383   * @return true if unloading succeeded, false otherwise
384   */
385  public boolean unload() throws InterruptedException, ExecutionException, TimeoutException {
386    return unloadRegions(false);
387  }
388
389  /**
390   * Unload regions from given {@link #hostname} using ack/noAck mode and {@link #maxthreads}.In
391   * noAck mode we do not make sure that region is successfully online on the target region
392   * server,hence it is best effort.We do not unload regions to hostnames given in
393   * {@link #excludeFile}. If designatedFile is present with some contents, we will unload regions
394   * to hostnames provided in {@link #designatedFile}.
395   * While unloading regions, destination RegionServers are selected from different rack i.e
396   * regions should not move to any RegionServers that belong to same rack as source RegionServer.
397   *
398   * @return true if unloading succeeded, false otherwise
399   */
400  public boolean unloadFromRack()
401      throws InterruptedException, ExecutionException, TimeoutException {
402    return unloadRegions(true);
403  }
404
405  private boolean unloadRegions(boolean unloadFromRack) throws InterruptedException,
406      ExecutionException, TimeoutException {
407    deleteFile(this.filename);
408    ExecutorService unloadPool = Executors.newFixedThreadPool(1);
409    Future<Boolean> unloadTask = unloadPool.submit(() -> {
410      List<RegionInfo> movedRegions = Collections.synchronizedList(new ArrayList<>());
411      try {
412        // Get Online RegionServers
413        List<ServerName> regionServers = new ArrayList<>();
414        regionServers.addAll(admin.getRegionServers());
415        // Remove the host Region server from target Region Servers list
416        ServerName server = stripServer(regionServers, hostname, port);
417        if (server == null) {
418          LOG.info("Could not find server '{}:{}' in the set of region servers. giving up.",
419              hostname, port);
420          LOG.debug("List of region servers: {}", regionServers);
421          return false;
422        }
423        // Remove RS not present in the designated file
424        includeExcludeRegionServers(designatedFile, regionServers, true);
425
426        // Remove RS present in the exclude file
427        includeExcludeRegionServers(excludeFile, regionServers, false);
428
429        if (unloadFromRack) {
430          // remove regionServers that belong to same rack (as source host) since the goal is to
431          // unload regions from source regionServer to destination regionServers
432          // that belong to different rack only.
433          String sourceRack = rackManager.getRack(server);
434          List<String> racks = rackManager.getRack(regionServers);
435          Iterator<ServerName> iterator = regionServers.iterator();
436          int i = 0;
437          while (iterator.hasNext()) {
438            iterator.next();
439            if (racks.size() > i && racks.get(i) != null && racks.get(i).equals(sourceRack)) {
440              iterator.remove();
441            }
442            i++;
443          }
444        }
445
446        // Remove decommissioned RS
447        Set<ServerName> decommissionedRS = new HashSet<>(admin.listDecommissionedRegionServers());
448        if (CollectionUtils.isNotEmpty(decommissionedRS)) {
449          regionServers.removeIf(decommissionedRS::contains);
450          LOG.debug("Excluded RegionServers from unloading regions to because they " +
451            "are marked as decommissioned. Servers: {}", decommissionedRS);
452        }
453
454        stripMaster(regionServers);
455        if (regionServers.isEmpty()) {
456          LOG.warn("No Regions were moved - no servers available");
457          return false;
458        }
459        unloadRegions(server, regionServers, movedRegions);
460      } catch (Exception e) {
461        LOG.error("Error while unloading regions ", e);
462        return false;
463      } finally {
464        if (movedRegions != null) {
465          writeFile(filename, movedRegions);
466        }
467      }
468      return true;
469    });
470    return waitTaskToFinish(unloadPool, unloadTask, "unloading");
471  }
472
473  private void unloadRegions(ServerName server, List<ServerName> regionServers,
474      List<RegionInfo> movedRegions) throws Exception {
475    while (true) {
476      List<RegionInfo> regionsToMove = admin.getRegions(server);
477      regionsToMove.removeAll(movedRegions);
478      if (regionsToMove.isEmpty()) {
479        LOG.info("No Regions to move....Quitting now");
480        break;
481      }
482      LOG.info("Moving {} regions from {} to {} servers using {} threads .Ack Mode: {}",
483        regionsToMove.size(), this.hostname, regionServers.size(), this.maxthreads, ack);
484
485      Optional<RegionInfo> metaRegion = getMetaRegionInfoIfToBeMoved(regionsToMove);
486      if (metaRegion.isPresent()) {
487        RegionInfo meta = metaRegion.get();
488        submitRegionMovesWhileUnloading(server, regionServers, movedRegions,
489          Collections.singletonList(meta));
490        regionsToMove.remove(meta);
491      }
492      submitRegionMovesWhileUnloading(server, regionServers, movedRegions, regionsToMove);
493    }
494  }
495
496  private void submitRegionMovesWhileUnloading(ServerName server, List<ServerName> regionServers,
497    List<RegionInfo> movedRegions, List<RegionInfo> regionsToMove) throws Exception {
498    final ExecutorService moveRegionsPool = Executors.newFixedThreadPool(this.maxthreads);
499    List<Future<Boolean>> taskList = new ArrayList<>();
500    int serverIndex = 0;
501    for (RegionInfo regionToMove : regionsToMove) {
502      if (ack) {
503        Future<Boolean> task = moveRegionsPool.submit(
504          new MoveWithAck(conn, regionToMove, server, regionServers.get(serverIndex),
505            movedRegions));
506        taskList.add(task);
507      } else {
508        Future<Boolean> task = moveRegionsPool.submit(
509          new MoveWithoutAck(admin, regionToMove, server, regionServers.get(serverIndex),
510            movedRegions));
511        taskList.add(task);
512      }
513      serverIndex = (serverIndex + 1) % regionServers.size();
514    }
515    moveRegionsPool.shutdown();
516    long timeoutInSeconds = regionsToMove.size() * admin.getConfiguration()
517      .getLong(MOVE_WAIT_MAX_KEY, DEFAULT_MOVE_WAIT_MAX);
518    waitMoveTasksToFinish(moveRegionsPool, taskList, timeoutInSeconds);
519  }
520
521  private boolean waitTaskToFinish(ExecutorService pool, Future<Boolean> task, String operation)
522      throws TimeoutException, InterruptedException, ExecutionException {
523    pool.shutdown();
524    try {
525      if (!pool.awaitTermination((long) this.timeout, TimeUnit.SECONDS)) {
526        LOG.warn(
527            "Timed out before finishing the " + operation + " operation. Timeout: " + this.timeout
528                + "sec");
529        pool.shutdownNow();
530      }
531    } catch (InterruptedException e) {
532      pool.shutdownNow();
533      Thread.currentThread().interrupt();
534    }
535    try {
536      return task.get(5, TimeUnit.SECONDS);
537    } catch (InterruptedException e) {
538      LOG.warn("Interrupted while " + operation + " Regions on " + this.hostname, e);
539      throw e;
540    } catch (ExecutionException e) {
541      LOG.error("Error while " + operation + " regions on RegionServer " + this.hostname, e);
542      throw e;
543    }
544  }
545
546  private void waitMoveTasksToFinish(ExecutorService moveRegionsPool,
547      List<Future<Boolean>> taskList, long timeoutInSeconds) throws Exception {
548    try {
549      if (!moveRegionsPool.awaitTermination(timeoutInSeconds, TimeUnit.SECONDS)) {
550        moveRegionsPool.shutdownNow();
551      }
552    } catch (InterruptedException e) {
553      moveRegionsPool.shutdownNow();
554      Thread.currentThread().interrupt();
555    }
556    for (Future<Boolean> future : taskList) {
557      try {
558        // if even after shutdownNow threads are stuck we wait for 5 secs max
559        if (!future.get(5, TimeUnit.SECONDS)) {
560          LOG.error("Was Not able to move region....Exiting Now");
561          throw new Exception("Could not move region Exception");
562        }
563      } catch (InterruptedException e) {
564        LOG.error("Interrupted while waiting for Thread to Complete " + e.getMessage(), e);
565        throw e;
566      } catch (ExecutionException e) {
567        boolean ignoreFailure = ignoreRegionMoveFailure(e);
568        if (ignoreFailure) {
569          LOG.debug("Ignore region move failure, it might have been split/merged.", e);
570        } else {
571          LOG.error("Got Exception From Thread While moving region {}", e.getMessage(), e);
572          throw e;
573        }
574      } catch (CancellationException e) {
575        LOG.error("Thread for moving region cancelled. Timeout for cancellation:" + timeoutInSeconds
576            + "secs", e);
577        throw e;
578      }
579    }
580  }
581
582  private boolean ignoreRegionMoveFailure(ExecutionException e) {
583    boolean ignoreFailure = false;
584    if (e.getCause() instanceof UnknownRegionException) {
585      // region does not exist anymore
586      ignoreFailure = true;
587    } else if (e.getCause() instanceof DoNotRetryRegionException
588        && e.getCause().getMessage() != null && e.getCause().getMessage()
589        .contains(AssignmentManager.UNEXPECTED_STATE_REGION + "state=SPLIT,")) {
590      // region is recently split
591      ignoreFailure = true;
592    }
593    return ignoreFailure;
594  }
595
596  private ServerName getTargetServer() throws Exception {
597    ServerName server = null;
598    int maxWaitInSeconds =
599        admin.getConfiguration().getInt(SERVERSTART_WAIT_MAX_KEY, DEFAULT_SERVERSTART_WAIT_MAX);
600    long maxWait = EnvironmentEdgeManager.currentTime() + maxWaitInSeconds * 1000;
601    while (EnvironmentEdgeManager.currentTime() < maxWait) {
602      try {
603        List<ServerName> regionServers = new ArrayList<>();
604        regionServers.addAll(admin.getRegionServers());
605        // Remove the host Region server from target Region Servers list
606        server = stripServer(regionServers, hostname, port);
607        if (server != null) {
608          break;
609        } else {
610          LOG.warn("Server " + hostname + ":" + port + " is not up yet, waiting");
611        }
612      } catch (IOException e) {
613        LOG.warn("Could not get list of region servers", e);
614      }
615      Thread.sleep(500);
616    }
617    if (server == null) {
618      LOG.error("Server " + hostname + ":" + port + " is not up. Giving up.");
619      throw new Exception("Server " + hostname + ":" + port + " to load regions not online");
620    }
621    return server;
622  }
623
624  private List<RegionInfo> readRegionsFromFile(String filename) throws IOException {
625    List<RegionInfo> regions = new ArrayList<>();
626    File f = new File(filename);
627    if (!f.exists()) {
628      return regions;
629    }
630    try (DataInputStream dis = new DataInputStream(
631        new BufferedInputStream(new FileInputStream(f)))) {
632      int numRegions = dis.readInt();
633      int index = 0;
634      while (index < numRegions) {
635        regions.add(RegionInfo.parseFromOrNull(Bytes.readByteArray(dis)));
636        index++;
637      }
638    } catch (IOException e) {
639      LOG.error("Error while reading regions from file:" + filename, e);
640      throw e;
641    }
642    return regions;
643  }
644
645  /**
646   * Write the number of regions moved in the first line followed by regions moved in subsequent
647   * lines
648   */
649  private void writeFile(String filename, List<RegionInfo> movedRegions) throws IOException {
650    try (DataOutputStream dos = new DataOutputStream(
651        new BufferedOutputStream(new FileOutputStream(filename)))) {
652      dos.writeInt(movedRegions.size());
653      for (RegionInfo region : movedRegions) {
654        Bytes.writeByteArray(dos, RegionInfo.toByteArray(region));
655      }
656    } catch (IOException e) {
657      LOG.error(
658          "ERROR: Was Not able to write regions moved to output file but moved " + movedRegions
659              .size() + " regions", e);
660      throw e;
661    }
662  }
663
664  private void deleteFile(String filename) {
665    File f = new File(filename);
666    if (f.exists()) {
667      f.delete();
668    }
669  }
670
671  /**
672   * @param filename The file should have 'host:port' per line
673   * @return List of servers from the file in format 'hostname:port'.
674   */
675  private List<String> readServersFromFile(String filename) throws IOException {
676    List<String> servers = new ArrayList<>();
677    if (filename != null) {
678      try {
679        Files.readAllLines(Paths.get(filename)).stream().map(String::trim)
680          .filter(((Predicate<String>) String::isEmpty).negate()).map(String::toLowerCase)
681          .forEach(servers::add);
682      } catch (IOException e) {
683        LOG.error("Exception while reading servers from file,", e);
684        throw e;
685      }
686    }
687    return servers;
688  }
689
690  /**
691   * Designates or excludes the servername whose hostname and port portion matches the list given
692   * in the file.
693   * Example:<br>
694   * If you want to designated RSs, suppose designatedFile has RS1, regionServers has RS1, RS2 and
695   * RS3. When we call includeExcludeRegionServers(designatedFile, regionServers, true), RS2 and
696   * RS3 are removed from regionServers list so that regions can move to only RS1.
697   * If you want to exclude RSs, suppose excludeFile has RS1, regionServers has RS1, RS2 and RS3.
698   * When we call includeExcludeRegionServers(excludeFile, servers, false), RS1 is removed from
699   * regionServers list so that regions can move to only RS2 and RS3.
700   */
701  private void includeExcludeRegionServers(String fileName, List<ServerName> regionServers,
702      boolean isInclude) throws IOException {
703    if (fileName != null) {
704      List<String> servers = readServersFromFile(fileName);
705      if (servers.isEmpty()) {
706        LOG.warn("No servers provided in the file: {}." + fileName);
707        return;
708      }
709      Iterator<ServerName> i = regionServers.iterator();
710      while (i.hasNext()) {
711        String rs = i.next().getServerName();
712        String rsPort = rs.split(ServerName.SERVERNAME_SEPARATOR)[0].toLowerCase() + ":" + rs
713          .split(ServerName.SERVERNAME_SEPARATOR)[1];
714        if (isInclude != servers.contains(rsPort)) {
715          i.remove();
716        }
717      }
718    }
719  }
720
721  /**
722   * Exclude master from list of RSs to move regions to
723   */
724  private void stripMaster(List<ServerName> regionServers) throws IOException {
725    ServerName master = admin.getClusterMetrics(EnumSet.of(Option.MASTER)).getMasterName();
726    stripServer(regionServers, master.getHostname(), master.getPort());
727  }
728
729  /**
730   * Remove the servername whose hostname and port portion matches from the passed array of servers.
731   * Returns as side-effect the servername removed.
732   * @return server removed from list of Region Servers
733   */
734  private ServerName stripServer(List<ServerName> regionServers, String hostname, int port) {
735    for (Iterator<ServerName> iter = regionServers.iterator(); iter.hasNext();) {
736      ServerName server = iter.next();
737      if (server.getAddress().getHostname().equalsIgnoreCase(hostname) &&
738        server.getAddress().getPort() == port) {
739        iter.remove();
740        return server;
741      }
742    }
743    return null;
744  }
745
746  @Override
747  protected void addOptions() {
748    this.addRequiredOptWithArg("r", "regionserverhost", "region server <hostname>|<hostname:port>");
749    this.addRequiredOptWithArg("o", "operation", "Expected: load/unload/unload_from_rack");
750    this.addOptWithArg("m", "maxthreads",
751        "Define the maximum number of threads to use to unload and reload the regions");
752    this.addOptWithArg("x", "excludefile",
753        "File with <hostname:port> per line to exclude as unload targets; default excludes only "
754            + "target host; useful for rack decommisioning.");
755    this.addOptWithArg("d","designatedfile","File with <hostname:port> per line as unload targets;"
756            + "default is all online hosts");
757    this.addOptWithArg("f", "filename",
758        "File to save regions list into unloading, or read from loading; "
759            + "default /tmp/<usernamehostname:port>");
760    this.addOptNoArg("n", "noack",
761        "Turn on No-Ack mode(default: false) which won't check if region is online on target "
762            + "RegionServer, hence best effort. This is more performant in unloading and loading "
763            + "but might lead to region being unavailable for some time till master reassigns it "
764            + "in case the move failed");
765    this.addOptWithArg("t", "timeout", "timeout in seconds after which the tool will exit "
766        + "irrespective of whether it finished or not;default Integer.MAX_VALUE");
767  }
768
769  @Override
770  protected void processOptions(CommandLine cmd) {
771    String hostname = cmd.getOptionValue("r");
772    rmbuilder = new RegionMoverBuilder(hostname);
773    if (cmd.hasOption('m')) {
774      rmbuilder.maxthreads(Integer.parseInt(cmd.getOptionValue('m')));
775    }
776    if (cmd.hasOption('n')) {
777      rmbuilder.ack(false);
778    }
779    if (cmd.hasOption('f')) {
780      rmbuilder.filename(cmd.getOptionValue('f'));
781    }
782    if (cmd.hasOption('x')) {
783      rmbuilder.excludeFile(cmd.getOptionValue('x'));
784    }
785    if (cmd.hasOption('d')) {
786      rmbuilder.designatedFile(cmd.getOptionValue('d'));
787    }
788    if (cmd.hasOption('t')) {
789      rmbuilder.timeout(Integer.parseInt(cmd.getOptionValue('t')));
790    }
791    this.loadUnload = cmd.getOptionValue("o").toLowerCase(Locale.ROOT);
792  }
793
794  @Override
795  protected int doWork() throws Exception {
796    boolean success;
797    try (RegionMover rm = rmbuilder.build()) {
798      if (loadUnload.equalsIgnoreCase("load")) {
799        success = rm.load();
800      } else if (loadUnload.equalsIgnoreCase("unload")) {
801        success = rm.unload();
802      } else if (loadUnload.equalsIgnoreCase("unload_from_rack")) {
803        success = rm.unloadFromRack();
804      } else {
805        printUsage();
806        success = false;
807      }
808    }
809    return (success ? 0 : 1);
810  }
811
812  public static void main(String[] args) {
813    try (RegionMover mover = new RegionMover()) {
814      mover.doStaticMain(args);
815    }
816  }
817}