View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.io.Closeable;
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.io.InterruptedIOException;
24  import java.io.PrintWriter;
25  import java.io.StringWriter;
26  import java.net.InetAddress;
27  import java.net.URI;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.Collection;
31  import java.util.Collections;
32  import java.util.Comparator;
33  import java.util.HashMap;
34  import java.util.HashSet;
35  import java.util.Iterator;
36  import java.util.List;
37  import java.util.Locale;
38  import java.util.Map;
39  import java.util.Map.Entry;
40  import java.util.Set;
41  import java.util.SortedMap;
42  import java.util.SortedSet;
43  import java.util.TreeMap;
44  import java.util.TreeSet;
45  import java.util.Vector;
46  import java.util.concurrent.Callable;
47  import java.util.concurrent.ConcurrentSkipListMap;
48  import java.util.concurrent.ExecutionException;
49  import java.util.concurrent.ExecutorService;
50  import java.util.concurrent.Executors;
51  import java.util.concurrent.Future;
52  import java.util.concurrent.FutureTask;
53  import java.util.concurrent.ScheduledThreadPoolExecutor;
54  import java.util.concurrent.TimeUnit;
55  import java.util.concurrent.TimeoutException;
56  import java.util.concurrent.atomic.AtomicBoolean;
57  import java.util.concurrent.atomic.AtomicInteger;
58  
59  import org.apache.commons.io.IOUtils;
60  import org.apache.commons.lang.StringUtils;
61  import org.apache.commons.logging.Log;
62  import org.apache.commons.logging.LogFactory;
63  import org.apache.hadoop.conf.Configuration;
64  import org.apache.hadoop.conf.Configured;
65  import org.apache.hadoop.fs.FSDataOutputStream;
66  import org.apache.hadoop.fs.FileStatus;
67  import org.apache.hadoop.fs.FileSystem;
68  import org.apache.hadoop.fs.Path;
69  import org.apache.hadoop.fs.permission.FsAction;
70  import org.apache.hadoop.fs.permission.FsPermission;
71  import org.apache.hadoop.hbase.Abortable;
72  import org.apache.hadoop.hbase.Cell;
73  import org.apache.hadoop.hbase.ClusterStatus;
74  import org.apache.hadoop.hbase.CoordinatedStateException;
75  import org.apache.hadoop.hbase.HBaseConfiguration;
76  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
77  import org.apache.hadoop.hbase.HColumnDescriptor;
78  import org.apache.hadoop.hbase.HConstants;
79  import org.apache.hadoop.hbase.HRegionInfo;
80  import org.apache.hadoop.hbase.HRegionLocation;
81  import org.apache.hadoop.hbase.HTableDescriptor;
82  import org.apache.hadoop.hbase.KeyValue;
83  import org.apache.hadoop.hbase.MasterNotRunningException;
84  import org.apache.hadoop.hbase.MetaTableAccessor;
85  import org.apache.hadoop.hbase.RegionLocations;
86  import org.apache.hadoop.hbase.ServerName;
87  import org.apache.hadoop.hbase.TableName;
88  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
89  import org.apache.hadoop.hbase.classification.InterfaceAudience;
90  import org.apache.hadoop.hbase.classification.InterfaceStability;
91  import org.apache.hadoop.hbase.client.Admin;
92  import org.apache.hadoop.hbase.client.ClusterConnection;
93  import org.apache.hadoop.hbase.client.ConnectionFactory;
94  import org.apache.hadoop.hbase.client.Delete;
95  import org.apache.hadoop.hbase.client.Get;
96  import org.apache.hadoop.hbase.client.HBaseAdmin;
97  import org.apache.hadoop.hbase.client.HConnectable;
98  import org.apache.hadoop.hbase.client.HConnection;
99  import org.apache.hadoop.hbase.client.HConnectionManager;
100 import org.apache.hadoop.hbase.client.MetaScanner;
101 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
102 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
103 import org.apache.hadoop.hbase.client.Put;
104 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
105 import org.apache.hadoop.hbase.client.Result;
106 import org.apache.hadoop.hbase.client.RowMutations;
107 import org.apache.hadoop.hbase.client.Table;
108 import org.apache.hadoop.hbase.io.FileLink;
109 import org.apache.hadoop.hbase.io.HFileLink;
110 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
111 import org.apache.hadoop.hbase.io.hfile.HFile;
112 import org.apache.hadoop.hbase.master.MasterFileSystem;
113 import org.apache.hadoop.hbase.master.RegionState;
114 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
115 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
116 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
117 import org.apache.hadoop.hbase.regionserver.HRegion;
118 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
119 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
120 import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
121 import org.apache.hadoop.hbase.security.AccessDeniedException;
122 import org.apache.hadoop.hbase.security.UserProvider;
123 import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
124 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
125 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
126 import org.apache.hadoop.hbase.util.hbck.ReplicationChecker;
127 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandler;
128 import org.apache.hadoop.hbase.util.hbck.TableIntegrityErrorHandlerImpl;
129 import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
130 import org.apache.hadoop.hbase.wal.WALSplitter;
131 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
132 import org.apache.hadoop.hbase.zookeeper.ZKTableStateClientSideReader;
133 import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
134 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
135 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
136 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
137 import org.apache.hadoop.ipc.RemoteException;
138 import org.apache.hadoop.security.UserGroupInformation;
139 import org.apache.hadoop.util.ReflectionUtils;
140 import org.apache.hadoop.util.Tool;
141 import org.apache.hadoop.util.ToolRunner;
142 import org.apache.zookeeper.KeeperException;
143 
144 import com.google.common.base.Joiner;
145 import com.google.common.base.Preconditions;
146 import com.google.common.collect.ImmutableList;
147 import com.google.common.collect.Lists;
148 import com.google.common.collect.Multimap;
149 import com.google.common.collect.Ordering;
150 import com.google.common.collect.TreeMultimap;
151 import com.google.protobuf.ServiceException;
152 
153 /**
154  * HBaseFsck (hbck) is a tool for checking and repairing region consistency and
155  * table integrity problems in a corrupted HBase.
156  * <p>
157  * Region consistency checks verify that hbase:meta, region deployment on region
158  * servers and the state of data in HDFS (.regioninfo files) all are in
159  * accordance.
160  * <p>
161  * Table integrity checks verify that all possible row keys resolve to exactly
162  * one region of a table.  This means there are no individual degenerate
163  * or backwards regions; no holes between regions; and that there are no
164  * overlapping regions.
165  * <p>
166  * The general repair strategy works in two phases:
167  * <ol>
168  * <li> Repair Table Integrity on HDFS. (merge or fabricate regions)
169  * <li> Repair Region Consistency with hbase:meta and assignments
170  * </ol>
171  * <p>
172  * For table integrity repairs, the tables' region directories are scanned
173  * for .regioninfo files.  Each table's integrity is then verified.  If there
174  * are any orphan regions (regions with no .regioninfo files) or holes, new
175  * regions are fabricated.  Backwards regions are sidelined as well as empty
176  * degenerate (endkey==startkey) regions.  If there are any overlapping regions,
177  * a new region is created and all data is merged into the new region.
178  * <p>
179  * Table integrity repairs deal solely with HDFS and could potentially be done
180  * offline -- the hbase region servers or master do not need to be running.
181  * This phase can eventually be used to completely reconstruct the hbase:meta table in
182  * an offline fashion.
183  * <p>
184  * Region consistency requires three conditions -- 1) valid .regioninfo file
185  * present in an HDFS region dir,  2) valid row with .regioninfo data in META,
186  * and 3) a region is deployed only at the regionserver that was assigned to
187  * with proper state in the master.
188  * <p>
189  * Region consistency repairs require hbase to be online so that hbck can
190  * contact the HBase master and region servers.  The hbck#connect() method must
191  * first be called successfully.  Much of the region consistency information
192  * is transient and less risky to repair.
193  * <p>
194  * If hbck is run from the command line, there are a handful of arguments that
195  * can be used to limit the kinds of repairs hbck will do.  See the code in
196  * {@link #printUsageAndExit()} for more details.
197  */
198 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
199 @InterfaceStability.Evolving
200 public class HBaseFsck extends Configured implements Closeable {
201   public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
202   public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
203   private static final int MAX_NUM_THREADS = 50; // #threads to contact regions
204   private static boolean rsSupportsOffline = true;
205   private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
206   private static final int DEFAULT_MAX_MERGE = 5;
207   private static final String TO_BE_LOADED = "to_be_loaded";
208   private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
209   private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
210   private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
211   private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
212   // We have to set the timeout value > HdfsConstants.LEASE_SOFTLIMIT_PERIOD.
213   // In HADOOP-2.6 and later, the Namenode proxy now created with custom RetryPolicy for
214   // AlreadyBeingCreatedException which is implies timeout on this operations up to
215   // HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
216   private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
217   private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5;
218   private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
219   private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
220 
221   /**********************
222    * Internal resources
223    **********************/
224   private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
225   private ClusterStatus status;
226   private ClusterConnection connection;
227   private Admin admin;
228   private Table meta;
229   // threads to do ||izable tasks: retrieve data from regionservers, handle overlapping regions
230   protected ExecutorService executor;
231   private long startMillis = EnvironmentEdgeManager.currentTime();
232   private HFileCorruptionChecker hfcc;
233   private int retcode = 0;
234   private Path HBCK_LOCK_PATH;
235   private FSDataOutputStream hbckOutFd;
236   // This lock is to prevent cleanup of balancer resources twice between
237   // ShutdownHook and the main code. We cleanup only if the connect() is
238   // successful
239   private final AtomicBoolean hbckLockCleanup = new AtomicBoolean(false);
240 
241   /***********
242    * Options
243    ***********/
244   private static boolean details = false; // do we display the full report
245   private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
246   private static boolean forceExclusive = false; // only this hbck can modify HBase
247   private boolean fixAssignments = false; // fix assignment errors?
248   private boolean fixMeta = false; // fix meta errors?
249   private boolean checkHdfs = true; // load and check fs consistency?
250   private boolean fixHdfsHoles = false; // fix fs holes?
251   private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
252   private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
253   private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
254   private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
255   private boolean fixSplitParents = false; // fix lingering split parents
256   private boolean removeParents = false; // remove split parents
257   private boolean fixReferenceFiles = false; // fix lingering reference store file
258   private boolean fixHFileLinks = false; // fix lingering HFileLinks
259   private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
260   private boolean fixTableLocks = false; // fix table locks which are expired
261   private boolean fixTableZNodes = false; // fix table Znodes which are orphaned
262   private boolean fixReplication = false; // fix undeleted replication queues for removed peer
263   private boolean fixAny = false; // Set to true if any of the fix is required.
264 
265   // limit checking/fixes to listed tables, if empty attempt to check/fix all
266   // hbase:meta are always checked
267   private Set<TableName> tablesIncluded = new HashSet<TableName>();
268   private int maxMerge = DEFAULT_MAX_MERGE; // maximum number of overlapping regions to merge
269   private int maxOverlapsToSideline = DEFAULT_OVERLAPS_TO_SIDELINE; // maximum number of overlapping regions to sideline
270   private boolean sidelineBigOverlaps = false; // sideline overlaps with >maxMerge regions
271   private Path sidelineDir = null;
272 
273   private boolean rerun = false; // if we tried to fix something, rerun hbck
274   private static boolean SUMMARY = false; // if we want to print less output
275   private boolean checkMetaOnly = false;
276   private boolean checkRegionBoundaries = false;
277   private boolean ignorePreCheckPermission = false; // if pre-check permission
278 
279   /*********
280    * State
281    *********/
282   final private ErrorReporter errors;
283   int fixes = 0;
284 
285   /**
286    * This map contains the state of all hbck items.  It maps from encoded region
287    * name to HbckInfo structure.  The information contained in HbckInfo is used
288    * to detect and correct consistency (hdfs/meta/deployment) problems.
289    */
290   private TreeMap<String, HbckInfo> regionInfoMap = new TreeMap<String, HbckInfo>();
291   private TreeSet<TableName> disabledTables =
292     new TreeSet<TableName>();
293   // Empty regioninfo qualifiers in hbase:meta
294   private Set<Result> emptyRegionInfoQualifiers = new HashSet<Result>();
295 
296   /**
297    * This map from Tablename -> TableInfo contains the structures necessary to
298    * detect table consistency problems (holes, dupes, overlaps).  It is sorted
299    * to prevent dupes.
300    *
301    * If tablesIncluded is empty, this map contains all tables.
302    * Otherwise, it contains only meta tables and tables in tablesIncluded,
303    * unless checkMetaOnly is specified, in which case, it contains only
304    * the meta table
305    */
306   private SortedMap<TableName, TableInfo> tablesInfo =
307       new ConcurrentSkipListMap<TableName, TableInfo>();
308 
309   /**
310    * When initially looking at HDFS, we attempt to find any orphaned data.
311    */
312   private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
313 
314   private Map<TableName, Set<String>> orphanTableDirs =
315       new HashMap<TableName, Set<String>>();
316 
317   private Map<TableName, Set<String>> skippedRegions = new HashMap<TableName, Set<String>>();
318 
319   /**
320    * List of orphaned table ZNodes
321    */
322   private Set<TableName> orphanedTableZNodes = new HashSet<TableName>();
323   private final RetryCounterFactory lockFileRetryCounterFactory;
324   private final RetryCounterFactory createZNodeRetryCounterFactory;
325 
326   private ZooKeeperWatcher zkw = null;
327   private String hbckEphemeralNodePath = null;
328   private boolean hbckZodeCreated = false;
329 
330   /**
331    * Constructor
332    *
333    * @param conf Configuration object
334    * @throws MasterNotRunningException if the master is not running
335    * @throws ZooKeeperConnectionException if unable to connect to ZooKeeper
336    */
337   public HBaseFsck(Configuration conf) throws MasterNotRunningException,
338       ZooKeeperConnectionException, IOException, ClassNotFoundException {
339     this(conf, createThreadPool(conf));
340   }
341 
342   private static ExecutorService createThreadPool(Configuration conf) {
343     int numThreads = conf.getInt("hbasefsck.numthreads", MAX_NUM_THREADS);
344     return new ScheduledThreadPoolExecutor(numThreads, Threads.newDaemonThreadFactory("hbasefsck"));
345   }
346 
347   /**
348    * Constructor
349    *
350    * @param conf
351    *          Configuration object
352    * @throws MasterNotRunningException
353    *           if the master is not running
354    * @throws ZooKeeperConnectionException
355    *           if unable to connect to ZooKeeper
356    */
357   public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException,
358       ZooKeeperConnectionException, IOException, ClassNotFoundException {
359     super(conf);
360     errors = getErrorReporter(getConf());
361     this.executor = exec;
362     lockFileRetryCounterFactory = new RetryCounterFactory(
363       getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
364       getConf().getInt(
365         "hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
366       getConf().getInt(
367         "hbase.hbck.lockfile.attempt.maxsleeptime", DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
368     createZNodeRetryCounterFactory = new RetryCounterFactory(
369       getConf().getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
370       getConf().getInt(
371         "hbase.hbck.createznode.attempt.sleep.interval",
372         DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
373       getConf().getInt(
374         "hbase.hbck.createznode.attempt.maxsleeptime",
375         DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
376     zkw = createZooKeeperWatcher();
377   }
378 
379   private class FileLockCallable implements Callable<FSDataOutputStream> {
380     RetryCounter retryCounter;
381 
382     public FileLockCallable(RetryCounter retryCounter) {
383       this.retryCounter = retryCounter;
384     }
385     @Override
386     public FSDataOutputStream call() throws IOException {
387       try {
388         FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
389         FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
390             HConstants.DATA_FILE_UMASK_KEY);
391         Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
392         fs.mkdirs(tmpDir);
393         HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
394         final FSDataOutputStream out = createFileWithRetries(fs, HBCK_LOCK_PATH, defaultPerms);
395         out.writeBytes(InetAddress.getLocalHost().toString());
396         out.flush();
397         return out;
398       } catch(RemoteException e) {
399         if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
400           return null;
401         } else {
402           throw e;
403         }
404       }
405     }
406 
407     private FSDataOutputStream createFileWithRetries(final FileSystem fs,
408         final Path hbckLockFilePath, final FsPermission defaultPerms)
409         throws IOException {
410 
411       IOException exception = null;
412       do {
413         try {
414           return FSUtils.create(fs, hbckLockFilePath, defaultPerms, false);
415         } catch (IOException ioe) {
416           LOG.info("Failed to create lock file " + hbckLockFilePath.getName()
417               + ", try=" + (retryCounter.getAttemptTimes() + 1) + " of "
418               + retryCounter.getMaxAttempts());
419           LOG.debug("Failed to create lock file " + hbckLockFilePath.getName(), 
420               ioe);
421           try {
422             exception = ioe;
423             retryCounter.sleepUntilNextRetry();
424           } catch (InterruptedException ie) {
425             throw (InterruptedIOException) new InterruptedIOException(
426                 "Can't create lock file " + hbckLockFilePath.getName())
427             .initCause(ie);
428           }
429         }
430       } while (retryCounter.shouldRetry());
431 
432       throw exception;
433     }
434   }
435 
436   /**
437    * This method maintains a lock using a file. If the creation fails we return null
438    *
439    * @return FSDataOutputStream object corresponding to the newly opened lock file
440    * @throws IOException
441    */
442   private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
443     RetryCounter retryCounter = lockFileRetryCounterFactory.create();
444     FileLockCallable callable = new FileLockCallable(retryCounter);
445     ExecutorService executor = Executors.newFixedThreadPool(1);
446     FutureTask<FSDataOutputStream> futureTask = new FutureTask<FSDataOutputStream>(callable);
447     executor.execute(futureTask);
448     final int timeoutInSeconds = getConf().getInt(
449       "hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
450     FSDataOutputStream stream = null;
451     try {
452       stream = futureTask.get(timeoutInSeconds, TimeUnit.SECONDS);
453     } catch (ExecutionException ee) {
454       LOG.warn("Encountered exception when opening lock file", ee);
455     } catch (InterruptedException ie) {
456       LOG.warn("Interrupted when opening lock file", ie);
457       Thread.currentThread().interrupt();
458     } catch (TimeoutException exception) {
459       // took too long to obtain lock
460       LOG.warn("Took more than " + timeoutInSeconds + " seconds in obtaining lock");
461       futureTask.cancel(true);
462     } finally {
463       executor.shutdownNow();
464     }
465     return stream;
466   }
467 
468   private void unlockHbck() {
469     if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) {
470       RetryCounter retryCounter = lockFileRetryCounterFactory.create();
471       do {
472         try {
473           IOUtils.closeQuietly(hbckOutFd);
474           FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()),
475               HBCK_LOCK_PATH, true);
476           LOG.info("Finishing hbck");
477           return;
478         } catch (IOException ioe) {
479           LOG.info("Failed to delete " + HBCK_LOCK_PATH + ", try="
480               + (retryCounter.getAttemptTimes() + 1) + " of "
481               + retryCounter.getMaxAttempts());
482           LOG.debug("Failed to delete " + HBCK_LOCK_PATH, ioe);
483           try {
484             retryCounter.sleepUntilNextRetry();
485           } catch (InterruptedException ie) {
486             Thread.currentThread().interrupt();
487             LOG.warn("Interrupted while deleting lock file" +
488                 HBCK_LOCK_PATH);
489             return;
490           }
491         }
492       } while (retryCounter.shouldRetry());
493     }
494   }
495 
496   /**
497    * To repair region consistency, one must call connect() in order to repair
498    * online state.
499    */
500   public void connect() throws IOException {
501 
502     if (isExclusive()) {
503       // Grab the lock
504       hbckOutFd = checkAndMarkRunningHbck();
505       if (hbckOutFd == null) {
506         setRetCode(-1);
507         LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " +
508             "[If you are sure no other instance is running, delete the lock file " +
509             HBCK_LOCK_PATH + " and rerun the tool]");
510         throw new IOException("Duplicate hbck - Abort");
511       }
512 
513       // Make sure to cleanup the lock
514       hbckLockCleanup.set(true);
515     }
516 
517 
518     // Add a shutdown hook to this thread, in case user tries to
519     // kill the hbck with a ctrl-c, we want to cleanup the lock so that
520     // it is available for further calls
521     Runtime.getRuntime().addShutdownHook(new Thread() {
522       @Override
523       public void run() {
524         IOUtils.closeQuietly(HBaseFsck.this);
525         cleanupHbckZnode();
526         unlockHbck();
527       }
528     });
529 
530     LOG.info("Launching hbck");
531 
532     connection = (ClusterConnection)ConnectionFactory.createConnection(getConf());
533     admin = connection.getAdmin();
534     meta = connection.getTable(TableName.META_TABLE_NAME);
535     status = admin.getClusterStatus();
536   }
537 
538   /**
539    * Get deployed regions according to the region servers.
540    */
541   private void loadDeployedRegions() throws IOException, InterruptedException {
542     // From the master, get a list of all known live region servers
543     Collection<ServerName> regionServers = status.getServers();
544     errors.print("Number of live region servers: " + regionServers.size());
545     if (details) {
546       for (ServerName rsinfo: regionServers) {
547         errors.print("  " + rsinfo.getServerName());
548       }
549     }
550 
551     // From the master, get a list of all dead region servers
552     Collection<ServerName> deadRegionServers = status.getDeadServerNames();
553     errors.print("Number of dead region servers: " + deadRegionServers.size());
554     if (details) {
555       for (ServerName name: deadRegionServers) {
556         errors.print("  " + name);
557       }
558     }
559 
560     // Print the current master name and state
561     errors.print("Master: " + status.getMaster());
562 
563     // Print the list of all backup masters
564     Collection<ServerName> backupMasters = status.getBackupMasters();
565     errors.print("Number of backup masters: " + backupMasters.size());
566     if (details) {
567       for (ServerName name: backupMasters) {
568         errors.print("  " + name);
569       }
570     }
571 
572     errors.print("Average load: " + status.getAverageLoad());
573     errors.print("Number of requests: " + status.getRequestsCount());
574     errors.print("Number of regions: " + status.getRegionsCount());
575 
576     Set<RegionState> rits = status.getRegionsInTransition();
577     if (rits != null) {
578       errors.print("Number of regions in transition: " + rits.size());
579       if (details) {
580         for (RegionState state: rits) {
581           errors.print("  " + state.toDescriptiveString());
582         }
583       }
584     }
585 
586     // Determine what's deployed
587     processRegionServers(regionServers);
588   }
589 
590   /**
591    * Clear the current state of hbck.
592    */
593   private void clearState() {
594     // Make sure regionInfo is empty before starting
595     fixes = 0;
596     regionInfoMap.clear();
597     emptyRegionInfoQualifiers.clear();
598     disabledTables.clear();
599     errors.clear();
600     tablesInfo.clear();
601     orphanHdfsDirs.clear();
602     skippedRegions.clear();
603   }
604 
605   /**
606    * This repair method analyzes hbase data in hdfs and repairs it to satisfy
607    * the table integrity rules.  HBase doesn't need to be online for this
608    * operation to work.
609    */
610   public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
611     // Initial pass to fix orphans.
612     if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
613         || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
614       LOG.info("Loading regioninfos HDFS");
615       // if nothing is happening this should always complete in two iterations.
616       int maxIterations = getConf().getInt("hbase.hbck.integrityrepair.iterations.max", 3);
617       int curIter = 0;
618       do {
619         clearState(); // clears hbck state and reset fixes to 0 and.
620         // repair what's on HDFS
621         restoreHdfsIntegrity();
622         curIter++;// limit the number of iterations.
623       } while (fixes > 0 && curIter <= maxIterations);
624 
625       // Repairs should be done in the first iteration and verification in the second.
626       // If there are more than 2 passes, something funny has happened.
627       if (curIter > 2) {
628         if (curIter == maxIterations) {
629           LOG.warn("Exiting integrity repairs after max " + curIter + " iterations. "
630               + "Tables integrity may not be fully repaired!");
631         } else {
632           LOG.info("Successfully exiting integrity repairs after " + curIter + " iterations");
633         }
634       }
635     }
636   }
637 
638   /**
639    * This repair method requires the cluster to be online since it contacts
640    * region servers and the masters.  It makes each region's state in HDFS, in
641    * hbase:meta, and deployments consistent.
642    *
643    * @return If &gt; 0 , number of errors detected, if &lt; 0 there was an unrecoverable
644    * error.  If 0, we have a clean hbase.
645    */
646   public int onlineConsistencyRepair() throws IOException, KeeperException,
647     InterruptedException {
648 
649     // get regions according to what is online on each RegionServer
650     loadDeployedRegions();
651     // check whether hbase:meta is deployed and online
652     recordMetaRegion();
653     // Check if hbase:meta is found only once and in the right place
654     if (!checkMetaRegion()) {
655       String errorMsg = "hbase:meta table is not consistent. ";
656       if (shouldFixAssignments()) {
657         errorMsg += "HBCK will try fixing it. Rerun once hbase:meta is back to consistent state.";
658       } else {
659         errorMsg += "Run HBCK with proper fix options to fix hbase:meta inconsistency.";
660       }
661       errors.reportError(errorMsg + " Exiting...");
662       return -2;
663     }
664     // Not going with further consistency check for tables when hbase:meta itself is not consistent.
665     LOG.info("Loading regionsinfo from the hbase:meta table");
666     boolean success = loadMetaEntries();
667     if (!success) return -1;
668 
669     // Empty cells in hbase:meta?
670     reportEmptyMetaCells();
671 
672     // Check if we have to cleanup empty REGIONINFO_QUALIFIER rows from hbase:meta
673     if (shouldFixEmptyMetaCells()) {
674       fixEmptyMetaCells();
675     }
676 
677     // get a list of all tables that have not changed recently.
678     if (!checkMetaOnly) {
679       reportTablesInFlux();
680     }
681 
682     // load regiondirs and regioninfos from HDFS
683     if (shouldCheckHdfs()) {
684       LOG.info("Loading region directories from HDFS");
685       loadHdfsRegionDirs();
686       LOG.info("Loading region information from HDFS");
687       loadHdfsRegionInfos();
688     }
689 
690     // Get disabled tables from ZooKeeper
691     loadDisabledTables();
692 
693     // fix the orphan tables
694     fixOrphanTables();
695 
696     LOG.info("Checking and fixing region consistency");
697     // Check and fix consistency
698     checkAndFixConsistency();
699 
700     // Check integrity (does not fix)
701     checkIntegrity();
702     return errors.getErrorList().size();
703   }
704 
705   /**
706    * This method maintains an ephemeral znode. If the creation fails we return false or throw
707    * exception
708    *
709    * @return true if creating znode succeeds; false otherwise
710    * @throws IOException if IO failure occurs
711    */
712   private boolean setMasterInMaintenanceMode() throws IOException {
713     RetryCounter retryCounter = createZNodeRetryCounterFactory.create();
714     hbckEphemeralNodePath = ZKUtil.joinZNode(
715       ZooKeeperWatcher.masterMaintZNode,
716       "hbck-" + Long.toString(EnvironmentEdgeManager.currentTime()));
717     do {
718       try {
719         hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null);
720         if (hbckZodeCreated) {
721           break;
722         }
723       } catch (KeeperException e) {
724         if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) {
725            throw new IOException("Can't create znode " + hbckEphemeralNodePath, e);
726         }
727         // fall through and retry
728       }
729 
730       LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try=" +
731           (retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
732 
733       try {
734         retryCounter.sleepUntilNextRetry();
735       } catch (InterruptedException ie) {
736         throw (InterruptedIOException) new InterruptedIOException(
737               "Can't create znode " + hbckEphemeralNodePath).initCause(ie);
738       }
739     } while (retryCounter.shouldRetry());
740     return hbckZodeCreated;
741   }
742 
743   private void cleanupHbckZnode() {
744     try {
745       if (zkw != null && hbckZodeCreated) {
746         ZKUtil.deleteNode(zkw, hbckEphemeralNodePath);
747         hbckZodeCreated = false;
748       }
749     } catch (KeeperException e) {
750       // Ignore
751       if (!e.code().equals(KeeperException.Code.NONODE)) {
752         LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e);
753       }
754     }
755   }
756 
757   /**
758    * Contacts the master and prints out cluster-wide information
759    * @return 0 on success, non-zero on failure
760    */
761   public int onlineHbck()
762       throws IOException, KeeperException, InterruptedException, ServiceException {
763     // print hbase server version
764     errors.print("Version: " + status.getHBaseVersion());
765 
766     // Clean start
767     clearState();
768     // Do offline check and repair first
769     offlineHdfsIntegrityRepair();
770     offlineReferenceFileRepair();
771     offlineHLinkFileRepair();
772     // If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
773     // hbck, it is likely that hbck would be misled and report transient errors.  Therefore, it
774     // is better to set Master into maintenance mode during online hbck.
775     //
776     if (!setMasterInMaintenanceMode()) {
777       LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient "
778         + "error.  Please run HBCK multiple times to reduce the chance of transient error.");
779     }
780 
781     onlineConsistencyRepair();
782 
783     if (checkRegionBoundaries) {
784       checkRegionBoundaries();
785     }
786 
787     checkAndFixTableLocks();
788 
789     // Check (and fix if requested) orphaned table ZNodes
790     checkAndFixOrphanedTableZNodes();
791 
792     checkAndFixReplication();
793 
794     // Remove the hbck znode
795     cleanupHbckZnode();
796 
797     // Remove the hbck lock
798     unlockHbck();
799 
800     // Print table summary
801     printTableSummary(tablesInfo);
802     return errors.summarize();
803   }
804 
805   public static byte[] keyOnly (byte[] b) {
806     if (b == null)
807       return b;
808     int rowlength = Bytes.toShort(b, 0);
809     byte[] result = new byte[rowlength];
810     System.arraycopy(b, Bytes.SIZEOF_SHORT, result, 0, rowlength);
811     return result;
812   }
813 
814   @Override
815   public void close() throws IOException {
816     try {
817       cleanupHbckZnode();
818       unlockHbck();
819     } catch (Exception io) {
820       LOG.warn(io);
821     } finally {
822       if (zkw != null) {
823         zkw.close();
824         zkw = null;
825       }
826       IOUtils.closeQuietly(admin);
827       IOUtils.closeQuietly(meta);
828       IOUtils.closeQuietly(connection);
829     }
830   }
831 
832   private static class RegionBoundariesInformation {
833     public byte [] regionName;
834     public byte [] metaFirstKey;
835     public byte [] metaLastKey;
836     public byte [] storesFirstKey;
837     public byte [] storesLastKey;
838     @Override
839     public String toString () {
840       return "regionName=" + Bytes.toStringBinary(regionName) +
841              "\nmetaFirstKey=" + Bytes.toStringBinary(metaFirstKey) +
842              "\nmetaLastKey=" + Bytes.toStringBinary(metaLastKey) +
843              "\nstoresFirstKey=" + Bytes.toStringBinary(storesFirstKey) +
844              "\nstoresLastKey=" + Bytes.toStringBinary(storesLastKey);
845     }
846   }
847 
848   public void checkRegionBoundaries() {
849     try {
850       ByteArrayComparator comparator = new ByteArrayComparator();
851       List<HRegionInfo> regions = MetaScanner.listAllRegions(getConf(), connection, false);
852       final RegionBoundariesInformation currentRegionBoundariesInformation =
853           new RegionBoundariesInformation();
854       Path hbaseRoot = FSUtils.getRootDir(getConf());
855       for (HRegionInfo regionInfo : regions) {
856         Path tableDir = FSUtils.getTableDir(hbaseRoot, regionInfo.getTable());
857         currentRegionBoundariesInformation.regionName = regionInfo.getRegionName();
858         // For each region, get the start and stop key from the META and compare them to the
859         // same information from the Stores.
860         Path path = new Path(tableDir, regionInfo.getEncodedName());
861         FileSystem fs = path.getFileSystem(getConf());
862         FileStatus[] files = fs.listStatus(path);
863         // For all the column families in this region...
864         byte[] storeFirstKey = null;
865         byte[] storeLastKey = null;
866         for (FileStatus file : files) {
867           String fileName = file.getPath().toString();
868           fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
869           if (!fileName.startsWith(".") && !fileName.endsWith("recovered.edits")) {
870             FileStatus[] storeFiles = fs.listStatus(file.getPath());
871             // For all the stores in this column family.
872             for (FileStatus storeFile : storeFiles) {
873               HFile.Reader reader = HFile.createReader(fs, storeFile.getPath(), new CacheConfig(
874                   getConf()), getConf());
875               if (reader.getFirstKey() != null) {
876                 byte[] firstKey = keyOnly(reader.getFirstKey());
877                 if (storeFirstKey == null || comparator.compare(storeFirstKey, firstKey) > 0) {
878                   storeFirstKey = firstKey;
879                 }
880               }
881               if (reader.getLastKey() != null) {
882                 byte[] lastKey = keyOnly(reader.getLastKey());
883                 if (storeLastKey == null || comparator.compare(storeLastKey, lastKey) < 0) {
884                   storeLastKey = lastKey;
885                 }
886               }
887               reader.close();
888             }
889           }
890         }
891         currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey();
892         currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey();
893         currentRegionBoundariesInformation.storesFirstKey = storeFirstKey;
894         currentRegionBoundariesInformation.storesLastKey = storeLastKey;
895         if (currentRegionBoundariesInformation.metaFirstKey.length == 0)
896           currentRegionBoundariesInformation.metaFirstKey = null;
897         if (currentRegionBoundariesInformation.metaLastKey.length == 0)
898           currentRegionBoundariesInformation.metaLastKey = null;
899 
900         // For a region to be correct, we need the META start key to be smaller or equal to the
901         // smallest start key from all the stores, and the start key from the next META entry to
902         // be bigger than the last key from all the current stores. First region start key is null;
903         // Last region end key is null; some regions can be empty and not have any store.
904 
905         boolean valid = true;
906         // Checking start key.
907         if ((currentRegionBoundariesInformation.storesFirstKey != null)
908             && (currentRegionBoundariesInformation.metaFirstKey != null)) {
909           valid = valid
910               && comparator.compare(currentRegionBoundariesInformation.storesFirstKey,
911                 currentRegionBoundariesInformation.metaFirstKey) >= 0;
912         }
913         // Checking stop key.
914         if ((currentRegionBoundariesInformation.storesLastKey != null)
915             && (currentRegionBoundariesInformation.metaLastKey != null)) {
916           valid = valid
917               && comparator.compare(currentRegionBoundariesInformation.storesLastKey,
918                 currentRegionBoundariesInformation.metaLastKey) < 0;
919         }
920         if (!valid) {
921           errors.reportError(ERROR_CODE.BOUNDARIES_ERROR, "Found issues with regions boundaries",
922             tablesInfo.get(regionInfo.getTable()));
923           LOG.warn("Region's boundaries not aligned between stores and META for:");
924           LOG.warn(currentRegionBoundariesInformation);
925         }
926       }
927     } catch (IOException e) {
928       LOG.error(e);
929     }
930   }
931 
932   /**
933    * Iterates through the list of all orphan/invalid regiondirs.
934    */
935   private void adoptHdfsOrphans(Collection<HbckInfo> orphanHdfsDirs) throws IOException {
936     for (HbckInfo hi : orphanHdfsDirs) {
937       LOG.info("Attempting to handle orphan hdfs dir: " + hi.getHdfsRegionDir());
938       adoptHdfsOrphan(hi);
939     }
940   }
941 
942   /**
943    * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
944    * these orphans by creating a new region, and moving the column families,
945    * recovered edits, WALs, into the new region dir.  We determine the region
946    * startkey and endkeys by looking at all of the hfiles inside the column
947    * families to identify the min and max keys. The resulting region will
948    * likely violate table integrity but will be dealt with by merging
949    * overlapping regions.
950    */
951   @SuppressWarnings("deprecation")
952   private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
953     Path p = hi.getHdfsRegionDir();
954     FileSystem fs = p.getFileSystem(getConf());
955     FileStatus[] dirs = fs.listStatus(p);
956     if (dirs == null) {
957       LOG.warn("Attempt to adopt orphan hdfs region skipped because no files present in " +
958           p + ". This dir could probably be deleted.");
959       return ;
960     }
961 
962     TableName tableName = hi.getTableName();
963     TableInfo tableInfo = tablesInfo.get(tableName);
964     Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
965     HTableDescriptor template = tableInfo.getHTD();
966 
967     // find min and max key values
968     Pair<byte[],byte[]> orphanRegionRange = null;
969     for (FileStatus cf : dirs) {
970       String cfName= cf.getPath().getName();
971       // TODO Figure out what the special dirs are
972       if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME)) continue;
973 
974       FileStatus[] hfiles = fs.listStatus(cf.getPath());
975       for (FileStatus hfile : hfiles) {
976         byte[] start, end;
977         HFile.Reader hf = null;
978         try {
979           CacheConfig cacheConf = new CacheConfig(getConf());
980           hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf());
981           hf.loadFileInfo();
982           KeyValue startKv = KeyValue.createKeyValueFromKey(hf.getFirstKey());
983           start = startKv.getRow();
984           KeyValue endKv = KeyValue.createKeyValueFromKey(hf.getLastKey());
985           end = endKv.getRow();
986         } catch (IOException ioe) {
987           LOG.warn("Problem reading orphan file " + hfile + ", skipping");
988           continue;
989         } catch (NullPointerException ioe) {
990           LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
991           continue;
992         } finally {
993           if (hf != null) {
994             hf.close();
995           }
996         }
997 
998         // expand the range to include the range of all hfiles
999         if (orphanRegionRange == null) {
1000           // first range
1001           orphanRegionRange = new Pair<byte[], byte[]>(start, end);
1002         } else {
1003           // TODO add test
1004 
1005           // expand range only if the hfile is wider.
1006           if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
1007             orphanRegionRange.setFirst(start);
1008           }
1009           if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0 ) {
1010             orphanRegionRange.setSecond(end);
1011           }
1012         }
1013       }
1014     }
1015     if (orphanRegionRange == null) {
1016       LOG.warn("No data in dir " + p + ", sidelining data");
1017       fixes++;
1018       sidelineRegionDir(fs, hi);
1019       return;
1020     }
1021     LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " +
1022         Bytes.toString(orphanRegionRange.getSecond()) + ")");
1023 
1024     // create new region on hdfs.  move data into place.
1025     HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(), 
1026       Bytes.add(orphanRegionRange.getSecond(), new byte[1]));
1027     LOG.info("Creating new region : " + hri);
1028     HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
1029     Path target = region.getRegionFileSystem().getRegionDir();
1030 
1031     // rename all the data to new region
1032     mergeRegionDirs(target, hi);
1033     fixes++;
1034   }
1035 
1036   /**
1037    * This method determines if there are table integrity errors in HDFS.  If
1038    * there are errors and the appropriate "fix" options are enabled, the method
1039    * will first correct orphan regions making them into legit regiondirs, and
1040    * then reload to merge potentially overlapping regions.
1041    *
1042    * @return number of table integrity errors found
1043    */
1044   private int restoreHdfsIntegrity() throws IOException, InterruptedException {
1045     // Determine what's on HDFS
1046     LOG.info("Loading HBase regioninfo from HDFS...");
1047     loadHdfsRegionDirs(); // populating regioninfo table.
1048 
1049     int errs = errors.getErrorList().size();
1050     // First time just get suggestions.
1051     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1052     checkHdfsIntegrity(false, false);
1053 
1054     if (errors.getErrorList().size() == errs) {
1055       LOG.info("No integrity errors.  We are done with this phase. Glorious.");
1056       return 0;
1057     }
1058 
1059     if (shouldFixHdfsOrphans() && orphanHdfsDirs.size() > 0) {
1060       adoptHdfsOrphans(orphanHdfsDirs);
1061       // TODO optimize by incrementally adding instead of reloading.
1062     }
1063 
1064     // Make sure there are no holes now.
1065     if (shouldFixHdfsHoles()) {
1066       clearState(); // this also resets # fixes.
1067       loadHdfsRegionDirs();
1068       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1069       tablesInfo = checkHdfsIntegrity(shouldFixHdfsHoles(), false);
1070     }
1071 
1072     // Now we fix overlaps
1073     if (shouldFixHdfsOverlaps()) {
1074       // second pass we fix overlaps.
1075       clearState(); // this also resets # fixes.
1076       loadHdfsRegionDirs();
1077       tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1078       tablesInfo = checkHdfsIntegrity(false, shouldFixHdfsOverlaps());
1079     }
1080 
1081     return errors.getErrorList().size();
1082   }
1083 
1084   /**
1085    * Scan all the store file names to find any lingering reference files,
1086    * which refer to some none-exiting files. If "fix" option is enabled,
1087    * any lingering reference file will be sidelined if found.
1088    * <p>
1089    * Lingering reference file prevents a region from opening. It has to
1090    * be fixed before a cluster can start properly.
1091    */
1092   private void offlineReferenceFileRepair() throws IOException, InterruptedException {
1093     clearState();
1094     Configuration conf = getConf();
1095     Path hbaseRoot = FSUtils.getRootDir(conf);
1096     FileSystem fs = hbaseRoot.getFileSystem(conf);
1097     LOG.info("Computing mapping of all store files");
1098     Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot,
1099       new FSUtils.ReferenceFileFilter(fs), executor, errors);
1100     errors.print("");
1101     LOG.info("Validating mapping using HDFS state");
1102     for (Path path: allFiles.values()) {
1103       Path referredToFile = StoreFileInfo.getReferredToFile(path);
1104       if (fs.exists(referredToFile)) continue;  // good, expected
1105 
1106       // Found a lingering reference file
1107       errors.reportError(ERROR_CODE.LINGERING_REFERENCE_HFILE,
1108         "Found lingering reference file " + path);
1109       if (!shouldFixReferenceFiles()) continue;
1110 
1111       // Now, trying to fix it since requested
1112       boolean success = false;
1113       String pathStr = path.toString();
1114 
1115       // A reference file path should be like
1116       // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/referred_file.region_name
1117       // Up 5 directories to get the root folder.
1118       // So the file will be sidelined to a similar folder structure.
1119       int index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR);
1120       for (int i = 0; index > 0 && i < 5; i++) {
1121         index = pathStr.lastIndexOf(Path.SEPARATOR_CHAR, index - 1);
1122       }
1123       if (index > 0) {
1124         Path rootDir = getSidelineDir();
1125         Path dst = new Path(rootDir, pathStr.substring(index + 1));
1126         fs.mkdirs(dst.getParent());
1127         LOG.info("Trying to sideline reference file "
1128           + path + " to " + dst);
1129         setShouldRerun();
1130 
1131         success = fs.rename(path, dst);
1132         debugLsr(dst);
1133 
1134       }
1135       if (!success) {
1136         LOG.error("Failed to sideline reference file " + path);
1137       }
1138     }
1139   }
1140 
1141   /**
1142    * Scan all the store file names to find any lingering HFileLink files,
1143    * which refer to some none-exiting files. If "fix" option is enabled,
1144    * any lingering HFileLink file will be sidelined if found.
1145    */
1146   private void offlineHLinkFileRepair() throws IOException, InterruptedException {
1147     Configuration conf = getConf();
1148     Path hbaseRoot = FSUtils.getRootDir(conf);
1149     FileSystem fs = hbaseRoot.getFileSystem(conf);
1150     LOG.info("Computing mapping of all link files");
1151     Map<String, Path> allFiles = FSUtils
1152         .getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, errors);
1153     errors.print("");
1154 
1155     LOG.info("Validating mapping using HDFS state");
1156     for (Path path : allFiles.values()) {
1157       // building HFileLink object to gather locations
1158       HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path);
1159       if (actualLink.exists(fs)) continue; // good, expected
1160 
1161       // Found a lingering HFileLink
1162       errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path);
1163       if (!shouldFixHFileLinks()) continue;
1164 
1165       // Now, trying to fix it since requested
1166       setShouldRerun();
1167 
1168       // An HFileLink path should be like
1169       // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
1170       // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1171       boolean success = sidelineFile(fs, hbaseRoot, path);
1172 
1173       if (!success) {
1174         LOG.error("Failed to sideline HFileLink file " + path);
1175       }
1176 
1177       // An HFileLink backreference path should be like
1178       // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
1179       // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
1180       Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil
1181               .getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()),
1182                   HFileLink.getReferencedRegionName(path.getName().toString()),
1183                   path.getParent().getName()),
1184           HFileLink.getReferencedHFileName(path.getName().toString()));
1185       success = sidelineFile(fs, hbaseRoot, backRefPath);
1186 
1187       if (!success) {
1188         LOG.error("Failed to sideline HFileLink backreference file " + path);
1189       }
1190     }
1191   }
1192 
1193   private boolean sidelineFile(FileSystem fs, Path hbaseRoot, Path path) throws IOException {
1194     URI uri = hbaseRoot.toUri().relativize(path.toUri());
1195     if (uri.isAbsolute()) return false;
1196     String relativePath = uri.getPath();
1197     Path rootDir = getSidelineDir();
1198     Path dst = new Path(rootDir, relativePath);
1199     boolean pathCreated = fs.mkdirs(dst.getParent());
1200     if (!pathCreated) {
1201       LOG.error("Failed to create path: " + dst.getParent());
1202       return false;
1203     }
1204     LOG.info("Trying to sideline file " + path + " to " + dst);
1205     return fs.rename(path, dst);
1206   }
1207 
1208   /**
1209    * TODO -- need to add tests for this.
1210    */
1211   private void reportEmptyMetaCells() {
1212     errors.print("Number of empty REGIONINFO_QUALIFIER rows in hbase:meta: " +
1213       emptyRegionInfoQualifiers.size());
1214     if (details) {
1215       for (Result r: emptyRegionInfoQualifiers) {
1216         errors.print("  " + r);
1217       }
1218     }
1219   }
1220 
1221   /**
1222    * TODO -- need to add tests for this.
1223    */
1224   private void reportTablesInFlux() {
1225     AtomicInteger numSkipped = new AtomicInteger(0);
1226     HTableDescriptor[] allTables = getTables(numSkipped);
1227     errors.print("Number of Tables: " + allTables.length);
1228     if (details) {
1229       if (numSkipped.get() > 0) {
1230         errors.detail("Number of Tables in flux: " + numSkipped.get());
1231       }
1232       for (HTableDescriptor td : allTables) {
1233         errors.detail("  Table: " + td.getTableName() + "\t" +
1234                            (td.isReadOnly() ? "ro" : "rw") + "\t" +
1235                             (td.isMetaRegion() ? "META" : "    ") + "\t" +
1236                            " families: " + td.getFamilies().size());
1237       }
1238     }
1239   }
1240 
1241   public ErrorReporter getErrors() {
1242     return errors;
1243   }
1244 
1245   /**
1246    * Read the .regioninfo file from the file system.  If there is no
1247    * .regioninfo, add it to the orphan hdfs region list.
1248    */
1249   private void loadHdfsRegioninfo(HbckInfo hbi) throws IOException {
1250     Path regionDir = hbi.getHdfsRegionDir();
1251     if (regionDir == null) {
1252       if (hbi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
1253         // Log warning only for default/ primary replica with no region dir
1254         LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry);
1255       }
1256       return;
1257     }
1258 
1259     if (hbi.hdfsEntry.hri != null) {
1260       // already loaded data
1261       return;
1262     }
1263 
1264     FileSystem fs = FileSystem.get(getConf());
1265     HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
1266     LOG.debug("HRegionInfo read: " + hri.toString());
1267     hbi.hdfsEntry.hri = hri;
1268   }
1269 
1270   /**
1271    * Exception thrown when a integrity repair operation fails in an
1272    * unresolvable way.
1273    */
1274   public static class RegionRepairException extends IOException {
1275     private static final long serialVersionUID = 1L;
1276     final IOException ioe;
1277     public RegionRepairException(String s, IOException ioe) {
1278       super(s);
1279       this.ioe = ioe;
1280     }
1281   }
1282 
1283   /**
1284    * Populate hbi's from regionInfos loaded from file system.
1285    */
1286   private SortedMap<TableName, TableInfo> loadHdfsRegionInfos()
1287       throws IOException, InterruptedException {
1288     tablesInfo.clear(); // regenerating the data
1289     // generate region split structure
1290     Collection<HbckInfo> hbckInfos = regionInfoMap.values();
1291 
1292     // Parallelized read of .regioninfo files.
1293     List<WorkItemHdfsRegionInfo> hbis = new ArrayList<WorkItemHdfsRegionInfo>(hbckInfos.size());
1294     List<Future<Void>> hbiFutures;
1295 
1296     for (HbckInfo hbi : hbckInfos) {
1297       WorkItemHdfsRegionInfo work = new WorkItemHdfsRegionInfo(hbi, this, errors);
1298       hbis.add(work);
1299     }
1300 
1301     // Submit and wait for completion
1302     hbiFutures = executor.invokeAll(hbis);
1303 
1304     for(int i=0; i<hbiFutures.size(); i++) {
1305       WorkItemHdfsRegionInfo work = hbis.get(i);
1306       Future<Void> f = hbiFutures.get(i);
1307       try {
1308         f.get();
1309       } catch(ExecutionException e) {
1310         LOG.warn("Failed to read .regioninfo file for region " +
1311               work.hbi.getRegionNameAsString(), e.getCause());
1312       }
1313     }
1314 
1315     Path hbaseRoot = FSUtils.getRootDir(getConf());
1316     FileSystem fs = hbaseRoot.getFileSystem(getConf());
1317     // serialized table info gathering.
1318     for (HbckInfo hbi: hbckInfos) {
1319 
1320       if (hbi.getHdfsHRI() == null) {
1321         // was an orphan
1322         continue;
1323       }
1324 
1325 
1326       // get table name from hdfs, populate various HBaseFsck tables.
1327       TableName tableName = hbi.getTableName();
1328       if (tableName == null) {
1329         // There was an entry in hbase:meta not in the HDFS?
1330         LOG.warn("tableName was null for: " + hbi);
1331         continue;
1332       }
1333 
1334       TableInfo modTInfo = tablesInfo.get(tableName);
1335       if (modTInfo == null) {
1336         // only executed once per table.
1337         modTInfo = new TableInfo(tableName);
1338         tablesInfo.put(tableName, modTInfo);
1339         try {
1340           HTableDescriptor htd =
1341               FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName);
1342           modTInfo.htds.add(htd);
1343         } catch (IOException ioe) {
1344           if (!orphanTableDirs.containsKey(tableName)) {
1345             LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
1346             //should only report once for each table
1347             errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
1348                 "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
1349             Set<String> columns = new HashSet<String>();
1350             orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
1351           }
1352         }
1353       }
1354       if (!hbi.isSkipChecks()) {
1355         modTInfo.addRegionInfo(hbi);
1356       }
1357     }
1358 
1359     loadTableInfosForTablesWithNoRegion();
1360     errors.print("");
1361 
1362     return tablesInfo;
1363   }
1364 
1365   /**
1366    * To get the column family list according to the column family dirs
1367    * @param columns
1368    * @param hbi
1369    * @return a set of column families
1370    * @throws IOException
1371    */
1372   private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
1373     Path regionDir = hbi.getHdfsRegionDir();
1374     FileSystem fs = regionDir.getFileSystem(getConf());
1375     FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
1376     for (FileStatus subdir : subDirs) {
1377       String columnfamily = subdir.getPath().getName();
1378       columns.add(columnfamily);
1379     }
1380     return columns;
1381   }
1382 
1383   /**
1384    * To fabricate a .tableinfo file with following contents<br>
1385    * 1. the correct tablename <br>
1386    * 2. the correct colfamily list<br>
1387    * 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1388    * @throws IOException
1389    */
1390   private boolean fabricateTableInfo(FSTableDescriptors fstd, TableName tableName,
1391       Set<String> columns) throws IOException {
1392     if (columns ==null || columns.isEmpty()) return false;
1393     HTableDescriptor htd = new HTableDescriptor(tableName);
1394     for (String columnfamimly : columns) {
1395       htd.addFamily(new HColumnDescriptor(columnfamimly));
1396     }
1397     fstd.createTableDescriptor(htd, true);
1398     return true;
1399   }
1400 
1401   /**
1402    * To fix the empty REGIONINFO_QUALIFIER rows from hbase:meta <br>
1403    * @throws IOException
1404    */
1405   public void fixEmptyMetaCells() throws IOException {
1406     if (shouldFixEmptyMetaCells() && !emptyRegionInfoQualifiers.isEmpty()) {
1407       LOG.info("Trying to fix empty REGIONINFO_QUALIFIER hbase:meta rows.");
1408       for (Result region : emptyRegionInfoQualifiers) {
1409         deleteMetaRegion(region.getRow());
1410         errors.getErrorList().remove(ERROR_CODE.EMPTY_META_CELL);
1411       }
1412       emptyRegionInfoQualifiers.clear();
1413     }
1414   }
1415 
1416   /**
1417    * To fix orphan table by creating a .tableinfo file under tableDir <br>
1418    * 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
1419    * 2. else create a default .tableinfo file with following items<br>
1420    * &nbsp;2.1 the correct tablename <br>
1421    * &nbsp;2.2 the correct colfamily list<br>
1422    * &nbsp;2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
1423    * @throws IOException
1424    */
1425   public void fixOrphanTables() throws IOException {
1426     if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
1427 
1428       List<TableName> tmpList = new ArrayList<TableName>();
1429       tmpList.addAll(orphanTableDirs.keySet());
1430       HTableDescriptor[] htds = getHTableDescriptors(tmpList);
1431       Iterator<Entry<TableName, Set<String>>> iter =
1432           orphanTableDirs.entrySet().iterator();
1433       int j = 0;
1434       int numFailedCase = 0;
1435       FSTableDescriptors fstd = new FSTableDescriptors(getConf());
1436       while (iter.hasNext()) {
1437         Entry<TableName, Set<String>> entry =
1438             iter.next();
1439         TableName tableName = entry.getKey();
1440         LOG.info("Trying to fix orphan table error: " + tableName);
1441         if (j < htds.length) {
1442           if (tableName.equals(htds[j].getTableName())) {
1443             HTableDescriptor htd = htds[j];
1444             LOG.info("fixing orphan table: " + tableName + " from cache");
1445             fstd.createTableDescriptor(htd, true);
1446             j++;
1447             iter.remove();
1448           }
1449         } else {
1450           if (fabricateTableInfo(fstd, tableName, entry.getValue())) {
1451             LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
1452             LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
1453             iter.remove();
1454           } else {
1455             LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
1456             numFailedCase++;
1457           }
1458         }
1459         fixes++;
1460       }
1461 
1462       if (orphanTableDirs.isEmpty()) {
1463         // all orphanTableDirs are luckily recovered
1464         // re-run doFsck after recovering the .tableinfo file
1465         setShouldRerun();
1466         LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
1467       } else if (numFailedCase > 0) {
1468         LOG.error("Failed to fix " + numFailedCase
1469             + " OrphanTables with default .tableinfo files");
1470       }
1471 
1472     }
1473     //cleanup the list
1474     orphanTableDirs.clear();
1475 
1476   }
1477 
1478   /**
1479    * This borrows code from MasterFileSystem.bootstrap()
1480    *
1481    * @return an open hbase:meta HRegion
1482    */
1483   private HRegion createNewMeta() throws IOException {
1484       Path rootdir = FSUtils.getRootDir(getConf());
1485     Configuration c = getConf();
1486     HRegionInfo metaHRI = new HRegionInfo(HRegionInfo.FIRST_META_REGIONINFO);
1487     HTableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
1488     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, false);
1489     HRegion meta = HRegion.createHRegion(metaHRI, rootdir, c, metaDescriptor);
1490     MasterFileSystem.setInfoFamilyCachingForMeta(metaDescriptor, true);
1491     return meta;
1492   }
1493 
1494   /**
1495    * Generate set of puts to add to new meta.  This expects the tables to be
1496    * clean with no overlaps or holes.  If there are any problems it returns null.
1497    *
1498    * @return An array list of puts to do in bulk, null if tables have problems
1499    */
1500   private ArrayList<Put> generatePuts(
1501       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1502     ArrayList<Put> puts = new ArrayList<Put>();
1503     boolean hasProblems = false;
1504     for (Entry<TableName, TableInfo> e : tablesInfo.entrySet()) {
1505       TableName name = e.getKey();
1506 
1507       // skip "hbase:meta"
1508       if (name.compareTo(TableName.META_TABLE_NAME) == 0) {
1509         continue;
1510       }
1511 
1512       TableInfo ti = e.getValue();
1513       for (Entry<byte[], Collection<HbckInfo>> spl : ti.sc.getStarts().asMap()
1514           .entrySet()) {
1515         Collection<HbckInfo> his = spl.getValue();
1516         int sz = his.size();
1517         if (sz != 1) {
1518           // problem
1519           LOG.error("Split starting at " + Bytes.toStringBinary(spl.getKey())
1520               + " had " +  sz + " regions instead of exactly 1." );
1521           hasProblems = true;
1522           continue;
1523         }
1524 
1525         // add the row directly to meta.
1526         HbckInfo hi = his.iterator().next();
1527         HRegionInfo hri = hi.getHdfsHRI(); // hi.metaEntry;
1528         Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
1529         puts.add(p);
1530       }
1531     }
1532     return hasProblems ? null : puts;
1533   }
1534 
1535   /**
1536    * Suggest fixes for each table
1537    */
1538   private void suggestFixes(
1539       SortedMap<TableName, TableInfo> tablesInfo) throws IOException {
1540     logParallelMerge();
1541     for (TableInfo tInfo : tablesInfo.values()) {
1542       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1543       tInfo.checkRegionChain(handler);
1544     }
1545   }
1546 
1547   /**
1548    * Rebuilds meta from information in hdfs/fs.  Depends on configuration
1549    * settings passed into hbck constructor to point to a particular fs/dir.
1550    *
1551    * @param fix flag that determines if method should attempt to fix holes
1552    * @return true if successful, false if attempt failed.
1553    */
1554   public boolean rebuildMeta(boolean fix) throws IOException,
1555       InterruptedException {
1556 
1557     // TODO check to make sure hbase is offline. (or at least the table
1558     // currently being worked on is off line)
1559 
1560     // Determine what's on HDFS
1561     LOG.info("Loading HBase regioninfo from HDFS...");
1562     loadHdfsRegionDirs(); // populating regioninfo table.
1563 
1564     int errs = errors.getErrorList().size();
1565     tablesInfo = loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1566     checkHdfsIntegrity(false, false);
1567 
1568     // make sure ok.
1569     if (errors.getErrorList().size() != errs) {
1570       // While in error state, iterate until no more fixes possible
1571       while(true) {
1572         fixes = 0;
1573         suggestFixes(tablesInfo);
1574         errors.clear();
1575         loadHdfsRegionInfos(); // update tableInfos based on region info in fs.
1576         checkHdfsIntegrity(shouldFixHdfsHoles(), shouldFixHdfsOverlaps());
1577 
1578         int errCount = errors.getErrorList().size();
1579 
1580         if (fixes == 0) {
1581           if (errCount > 0) {
1582             return false; // failed to fix problems.
1583           } else {
1584             break; // no fixes and no problems? drop out and fix stuff!
1585           }
1586         }
1587       }
1588     }
1589 
1590     // we can rebuild, move old meta out of the way and start
1591     LOG.info("HDFS regioninfo's seems good.  Sidelining old hbase:meta");
1592     Path backupDir = sidelineOldMeta();
1593 
1594     LOG.info("Creating new hbase:meta");
1595     HRegion meta = createNewMeta();
1596 
1597     // populate meta
1598     List<Put> puts = generatePuts(tablesInfo);
1599     if (puts == null) {
1600       LOG.fatal("Problem encountered when creating new hbase:meta entries.  " +
1601         "You may need to restore the previously sidelined hbase:meta");
1602       return false;
1603     }
1604     meta.batchMutate(puts.toArray(new Put[puts.size()]));
1605     HRegion.closeHRegion(meta);
1606     // clean up the temporary hbck meta recovery WAL directory
1607     removeHBCKMetaRecoveryWALDir(meta);
1608     LOG.info("Success! hbase:meta table rebuilt.");
1609     LOG.info("Old hbase:meta is moved into " + backupDir);
1610     return true;
1611   }
1612 
1613   /**
1614    * Removes the empty Meta recovery WAL directory.
1615    * @param meta Meta region
1616    */
1617   private void removeHBCKMetaRecoveryWALDir(HRegion meta) throws IOException {
1618     // TODO Since HBASE-11983 not available in this branch, so we don't know the walFactoryId.
1619     // Retrieving WAL directory
1620     Path walLogDir = ((FSHLog) meta.getWAL()).getCurrentFileName().getParent();
1621     FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
1622     FileStatus[] walFiles = FSUtils.listStatus(fs, walLogDir, null);
1623     if (walFiles == null || walFiles.length == 0) {
1624       LOG.info("HBCK meta recovery WAL directory is empty, removing it now.");
1625       if (!FSUtils.deleteDirectory(fs, walLogDir)) {
1626         LOG.warn("Couldn't clear the HBCK Meta recovery WAL directory " + walLogDir);
1627       }
1628     }
1629   }
1630 
1631   /**
1632    * Log an appropriate message about whether or not overlapping merges are computed in parallel.
1633    */
1634   private void logParallelMerge() {
1635     if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
1636       LOG.info("Handling overlap merges in parallel. set hbasefsck.overlap.merge.parallel to" +
1637           " false to run serially.");
1638     } else {
1639       LOG.info("Handling overlap merges serially.  set hbasefsck.overlap.merge.parallel to" +
1640           " true to run in parallel.");
1641     }
1642   }
1643 
1644   private SortedMap<TableName, TableInfo> checkHdfsIntegrity(boolean fixHoles,
1645       boolean fixOverlaps) throws IOException {
1646     LOG.info("Checking HBase region split map from HDFS data...");
1647     logParallelMerge();
1648     for (TableInfo tInfo : tablesInfo.values()) {
1649       TableIntegrityErrorHandler handler;
1650       if (fixHoles || fixOverlaps) {
1651         handler = tInfo.new HDFSIntegrityFixer(tInfo, errors, getConf(),
1652           fixHoles, fixOverlaps);
1653       } else {
1654         handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
1655       }
1656       if (!tInfo.checkRegionChain(handler)) {
1657         // should dump info as well.
1658         errors.report("Found inconsistency in table " + tInfo.getName());
1659       }
1660     }
1661     return tablesInfo;
1662   }
1663 
1664   private Path getSidelineDir() throws IOException {
1665     if (sidelineDir == null) {
1666       Path hbaseDir = FSUtils.getRootDir(getConf());
1667       Path hbckDir = new Path(hbaseDir, HConstants.HBCK_SIDELINEDIR_NAME);
1668       sidelineDir = new Path(hbckDir, hbaseDir.getName() + "-"
1669           + startMillis);
1670     }
1671     return sidelineDir;
1672   }
1673 
1674   /**
1675    * Sideline a region dir (instead of deleting it)
1676    */
1677   Path sidelineRegionDir(FileSystem fs, HbckInfo hi) throws IOException {
1678     return sidelineRegionDir(fs, null, hi);
1679   }
1680 
1681   /**
1682    * Sideline a region dir (instead of deleting it)
1683    *
1684    * @param parentDir if specified, the region will be sidelined to
1685    * folder like .../parentDir/<table name>/<region name>. The purpose
1686    * is to group together similar regions sidelined, for example, those
1687    * regions should be bulk loaded back later on. If null, it is ignored.
1688    */
1689   Path sidelineRegionDir(FileSystem fs,
1690       String parentDir, HbckInfo hi) throws IOException {
1691     TableName tableName = hi.getTableName();
1692     Path regionDir = hi.getHdfsRegionDir();
1693 
1694     if (!fs.exists(regionDir)) {
1695       LOG.warn("No previous " + regionDir + " exists.  Continuing.");
1696       return null;
1697     }
1698 
1699     Path rootDir = getSidelineDir();
1700     if (parentDir != null) {
1701       rootDir = new Path(rootDir, parentDir);
1702     }
1703     Path sidelineTableDir= FSUtils.getTableDir(rootDir, tableName);
1704     Path sidelineRegionDir = new Path(sidelineTableDir, regionDir.getName());
1705     fs.mkdirs(sidelineRegionDir);
1706     boolean success = false;
1707     FileStatus[] cfs =  fs.listStatus(regionDir);
1708     if (cfs == null) {
1709       LOG.info("Region dir is empty: " + regionDir);
1710     } else {
1711       for (FileStatus cf : cfs) {
1712         Path src = cf.getPath();
1713         Path dst =  new Path(sidelineRegionDir, src.getName());
1714         if (fs.isFile(src)) {
1715           // simple file
1716           success = fs.rename(src, dst);
1717           if (!success) {
1718             String msg = "Unable to rename file " + src +  " to " + dst;
1719             LOG.error(msg);
1720             throw new IOException(msg);
1721           }
1722           continue;
1723         }
1724 
1725         // is a directory.
1726         fs.mkdirs(dst);
1727 
1728         LOG.info("Sidelining files from " + src + " into containing region " + dst);
1729         // FileSystem.rename is inconsistent with directories -- if the
1730         // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
1731         // it moves the src into the dst dir resulting in (foo/a/b).  If
1732         // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
1733         FileStatus[] hfiles = fs.listStatus(src);
1734         if (hfiles != null && hfiles.length > 0) {
1735           for (FileStatus hfile : hfiles) {
1736             success = fs.rename(hfile.getPath(), dst);
1737             if (!success) {
1738               String msg = "Unable to rename file " + src +  " to " + dst;
1739               LOG.error(msg);
1740               throw new IOException(msg);
1741             }
1742           }
1743         }
1744         LOG.debug("Sideline directory contents:");
1745         debugLsr(sidelineRegionDir);
1746       }
1747     }
1748 
1749     LOG.info("Removing old region dir: " + regionDir);
1750     success = fs.delete(regionDir, true);
1751     if (!success) {
1752       String msg = "Unable to delete dir " + regionDir;
1753       LOG.error(msg);
1754       throw new IOException(msg);
1755     }
1756     return sidelineRegionDir;
1757   }
1758 
1759   /**
1760    * Side line an entire table.
1761    */
1762   void sidelineTable(FileSystem fs, TableName tableName, Path hbaseDir,
1763       Path backupHbaseDir) throws IOException {
1764     Path tableDir = FSUtils.getTableDir(hbaseDir, tableName);
1765     if (fs.exists(tableDir)) {
1766       Path backupTableDir= FSUtils.getTableDir(backupHbaseDir, tableName);
1767       fs.mkdirs(backupTableDir.getParent());
1768       boolean success = fs.rename(tableDir, backupTableDir);
1769       if (!success) {
1770         throw new IOException("Failed to move  " + tableName + " from "
1771             +  tableDir + " to " + backupTableDir);
1772       }
1773     } else {
1774       LOG.info("No previous " + tableName +  " exists.  Continuing.");
1775     }
1776   }
1777 
1778   /**
1779    * @return Path to backup of original directory
1780    */
1781   Path sidelineOldMeta() throws IOException {
1782     // put current hbase:meta aside.
1783     Path hbaseDir = FSUtils.getRootDir(getConf());
1784     FileSystem fs = hbaseDir.getFileSystem(getConf());
1785     Path backupDir = getSidelineDir();
1786     fs.mkdirs(backupDir);
1787 
1788     try {
1789       sidelineTable(fs, TableName.META_TABLE_NAME, hbaseDir, backupDir);
1790     } catch (IOException e) {
1791         LOG.fatal("... failed to sideline meta. Currently in inconsistent state.  To restore "
1792             + "try to rename hbase:meta in " + backupDir.getName() + " to "
1793             + hbaseDir.getName() + ".", e);
1794       throw e; // throw original exception
1795     }
1796     return backupDir;
1797   }
1798 
1799   /**
1800    * Load the list of disabled tables in ZK into local set.
1801    * @throws ZooKeeperConnectionException
1802    * @throws IOException
1803    */
1804   private void loadDisabledTables()
1805   throws ZooKeeperConnectionException, IOException {
1806     HConnectionManager.execute(new HConnectable<Void>(getConf()) {
1807       @Override
1808       public Void connect(HConnection connection) throws IOException {
1809         try {
1810           for (TableName tableName :
1811               ZKTableStateClientSideReader.getDisabledOrDisablingTables(zkw)) {
1812             disabledTables.add(tableName);
1813           }
1814         } catch (KeeperException ke) {
1815           throw new IOException(ke);
1816         } catch (InterruptedException e) {
1817           throw new InterruptedIOException();
1818         }
1819         return null;
1820       }
1821     });
1822   }
1823 
1824   /**
1825    * Check if the specified region's table is disabled.
1826    */
1827   private boolean isTableDisabled(HRegionInfo regionInfo) {
1828     return disabledTables.contains(regionInfo.getTable());
1829   }
1830 
1831   /**
1832    * Scan HDFS for all regions, recording their information into
1833    * regionInfoMap
1834    */
1835   public void loadHdfsRegionDirs() throws IOException, InterruptedException {
1836     Path rootDir = FSUtils.getRootDir(getConf());
1837     FileSystem fs = rootDir.getFileSystem(getConf());
1838 
1839     // list all tables from HDFS
1840     List<FileStatus> tableDirs = Lists.newArrayList();
1841 
1842     boolean foundVersionFile = fs.exists(new Path(rootDir, HConstants.VERSION_FILE_NAME));
1843 
1844     List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
1845     for (Path path : paths) {
1846       TableName tableName = FSUtils.getTableName(path);
1847        if ((!checkMetaOnly &&
1848            isTableIncluded(tableName)) ||
1849            tableName.equals(TableName.META_TABLE_NAME)) {
1850          tableDirs.add(fs.getFileStatus(path));
1851        }
1852     }
1853 
1854     // verify that version file exists
1855     if (!foundVersionFile) {
1856       errors.reportError(ERROR_CODE.NO_VERSION_FILE,
1857           "Version file does not exist in root dir " + rootDir);
1858       if (shouldFixVersionFile()) {
1859         LOG.info("Trying to create a new " + HConstants.VERSION_FILE_NAME
1860             + " file.");
1861         setShouldRerun();
1862         FSUtils.setVersion(fs, rootDir, getConf().getInt(
1863             HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000), getConf().getInt(
1864             HConstants.VERSION_FILE_WRITE_ATTEMPTS,
1865             HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
1866       }
1867     }
1868 
1869     // Avoid multithreading at table-level because already multithreaded internally at
1870     // region-level.  Additionally multithreading at table-level can lead to deadlock
1871     // if there are many tables in the cluster.  Since there are a limited # of threads
1872     // in the executor's thread pool and if we multithread at the table-level by putting
1873     // WorkItemHdfsDir callables into the executor, then we will have some threads in the
1874     // executor tied up solely in waiting for the tables' region-level calls to complete.
1875     // If there are enough tables then there will be no actual threads in the pool left
1876     // for the region-level callables to be serviced.
1877     for (FileStatus tableDir : tableDirs) {
1878       LOG.debug("Loading region dirs from " +tableDir.getPath());
1879       WorkItemHdfsDir item = new WorkItemHdfsDir(fs, errors, tableDir);
1880       try {
1881         item.call();
1882       } catch (ExecutionException e) {
1883         LOG.warn("Could not completely load table dir " +
1884             tableDir.getPath(), e.getCause());
1885       }
1886     }
1887     errors.print("");
1888   }
1889 
1890   /**
1891    * Record the location of the hbase:meta region as found in ZooKeeper.
1892    */
1893   private boolean recordMetaRegion() throws IOException {
1894     RegionLocations rl = ((ClusterConnection)connection).locateRegion(TableName.META_TABLE_NAME,
1895         HConstants.EMPTY_START_ROW, false, false);
1896     if (rl == null) {
1897       errors.reportError(ERROR_CODE.NULL_META_REGION,
1898           "META region or some of its attributes are null.");
1899       return false;
1900     }
1901     for (HRegionLocation metaLocation : rl.getRegionLocations()) {
1902       // Check if Meta region is valid and existing
1903       if (metaLocation == null || metaLocation.getRegionInfo() == null ||
1904           metaLocation.getHostname() == null) {
1905         errors.reportError(ERROR_CODE.NULL_META_REGION,
1906             "META region or some of its attributes are null.");
1907         return false;
1908       }
1909       ServerName sn = metaLocation.getServerName();
1910       MetaEntry m = new MetaEntry(metaLocation.getRegionInfo(), sn, EnvironmentEdgeManager.currentTime());
1911       HbckInfo hbckInfo = regionInfoMap.get(metaLocation.getRegionInfo().getEncodedName());
1912       if (hbckInfo == null) {
1913         regionInfoMap.put(metaLocation.getRegionInfo().getEncodedName(), new HbckInfo(m));
1914       } else {
1915         hbckInfo.metaEntry = m;
1916       }
1917     }
1918     return true;
1919   }
1920 
1921   private ZooKeeperWatcher createZooKeeperWatcher() throws IOException {
1922     return new ZooKeeperWatcher(getConf(), "hbase Fsck", new Abortable() {
1923       @Override
1924       public void abort(String why, Throwable e) {
1925         LOG.error(why, e);
1926         System.exit(1);
1927       }
1928 
1929       @Override
1930       public boolean isAborted() {
1931         return false;
1932       }
1933 
1934     });
1935   }
1936 
1937 
1938   /**
1939    * Contacts each regionserver and fetches metadata about regions.
1940    * @param regionServerList - the list of region servers to connect to
1941    * @throws IOException if a remote or network exception occurs
1942    */
1943   void processRegionServers(Collection<ServerName> regionServerList)
1944     throws IOException, InterruptedException {
1945 
1946     List<WorkItemRegion> workItems = new ArrayList<WorkItemRegion>(regionServerList.size());
1947     List<Future<Void>> workFutures;
1948 
1949     // loop to contact each region server in parallel
1950     for (ServerName rsinfo: regionServerList) {
1951       workItems.add(new WorkItemRegion(this, rsinfo, errors, connection));
1952     }
1953 
1954     workFutures = executor.invokeAll(workItems);
1955 
1956     for(int i=0; i<workFutures.size(); i++) {
1957       WorkItemRegion item = workItems.get(i);
1958       Future<Void> f = workFutures.get(i);
1959       try {
1960         f.get();
1961       } catch(ExecutionException e) {
1962         LOG.warn("Could not process regionserver " + item.rsinfo.getHostAndPort(),
1963             e.getCause());
1964       }
1965     }
1966   }
1967 
1968   /**
1969    * Check consistency of all regions that have been found in previous phases.
1970    */
1971   private void checkAndFixConsistency()
1972   throws IOException, KeeperException, InterruptedException {
1973 	  // Divide the checks in two phases. One for default/primary replicas and another
1974 	  // for the non-primary ones. Keeps code cleaner this way.
1975     List<CheckRegionConsistencyWorkItem> workItems =
1976         new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1977     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1978       if (e.getValue().getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
1979         workItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1980       }
1981     }
1982     checkRegionConsistencyConcurrently(workItems);
1983 
1984     boolean prevHdfsCheck = shouldCheckHdfs();
1985     setCheckHdfs(false); //replicas don't have any hdfs data
1986     // Run a pass over the replicas and fix any assignment issues that exist on the currently
1987     // deployed/undeployed replicas.
1988     List<CheckRegionConsistencyWorkItem> replicaWorkItems =
1989         new ArrayList<CheckRegionConsistencyWorkItem>(regionInfoMap.size());
1990     for (java.util.Map.Entry<String, HbckInfo> e: regionInfoMap.entrySet()) {
1991       if (e.getValue().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
1992         replicaWorkItems.add(new CheckRegionConsistencyWorkItem(e.getKey(), e.getValue()));
1993       }
1994     }
1995     checkRegionConsistencyConcurrently(replicaWorkItems);
1996     setCheckHdfs(prevHdfsCheck);
1997 
1998     // If some regions is skipped during checkRegionConsistencyConcurrently() phase, we might
1999     // not get accurate state of the hbase if continuing. The config here allows users to tune
2000     // the tolerance of number of skipped region.
2001     // TODO: evaluate the consequence to continue the hbck operation without config.
2002     int terminateThreshold =  getConf().getInt("hbase.hbck.skipped.regions.limit", 0);
2003     int numOfSkippedRegions = skippedRegions.size();
2004     if (numOfSkippedRegions > 0 && numOfSkippedRegions > terminateThreshold) {
2005       throw new IOException(numOfSkippedRegions
2006         + " region(s) could not be checked or repaired.  See logs for detail.");
2007     }
2008   }
2009 
2010   /**
2011    * Check consistency of all regions using mulitple threads concurrently.
2012    */
2013   private void checkRegionConsistencyConcurrently(
2014     final List<CheckRegionConsistencyWorkItem> workItems)
2015     throws IOException, KeeperException, InterruptedException {
2016     if (workItems.isEmpty()) {
2017       return;  // nothing to check
2018     }
2019 
2020     List<Future<Void>> workFutures = executor.invokeAll(workItems);
2021     for(Future<Void> f: workFutures) {
2022       try {
2023         f.get();
2024       } catch(ExecutionException e1) {
2025         LOG.warn("Could not check region consistency " , e1.getCause());
2026         if (e1.getCause() instanceof IOException) {
2027           throw (IOException)e1.getCause();
2028         } else if (e1.getCause() instanceof KeeperException) {
2029           throw (KeeperException)e1.getCause();
2030         } else if (e1.getCause() instanceof InterruptedException) {
2031           throw (InterruptedException)e1.getCause();
2032         } else {
2033           throw new IOException(e1.getCause());
2034         }
2035       }
2036     }
2037   }
2038 
2039   class CheckRegionConsistencyWorkItem implements Callable<Void> {
2040     private final String key;
2041     private final HbckInfo hbi;
2042 
2043     CheckRegionConsistencyWorkItem(String key, HbckInfo hbi) {
2044       this.key = key;
2045       this.hbi = hbi;
2046     }
2047 
2048     @Override
2049     public synchronized Void call() throws Exception {
2050       try {
2051         checkRegionConsistency(key, hbi);
2052       } catch (Exception e) {
2053         // If the region is non-META region, skip this region and send warning/error message; if
2054         // the region is META region, we should not continue.
2055         LOG.warn("Unable to complete check or repair the region '" + hbi.getRegionNameAsString()
2056           + "'.", e);
2057         if (hbi.getHdfsHRI().isMetaRegion()) {
2058           throw e;
2059         }
2060         LOG.warn("Skip region '" + hbi.getRegionNameAsString() + "'");
2061         addSkippedRegion(hbi);
2062       }
2063       return null;
2064     }
2065   }
2066 
2067   private void addSkippedRegion(final HbckInfo hbi) {
2068     Set<String> skippedRegionNames = skippedRegions.get(hbi.getTableName());
2069     if (skippedRegionNames == null) {
2070       skippedRegionNames = new HashSet<String>();
2071     }
2072     skippedRegionNames.add(hbi.getRegionNameAsString());
2073     skippedRegions.put(hbi.getTableName(), skippedRegionNames);
2074   }
2075 
2076   private void preCheckPermission() throws IOException, AccessDeniedException {
2077     if (shouldIgnorePreCheckPermission()) {
2078       return;
2079     }
2080 
2081     Path hbaseDir = FSUtils.getRootDir(getConf());
2082     FileSystem fs = hbaseDir.getFileSystem(getConf());
2083     UserProvider userProvider = UserProvider.instantiate(getConf());
2084     UserGroupInformation ugi = userProvider.getCurrent().getUGI();
2085     FileStatus[] files = fs.listStatus(hbaseDir);
2086     for (FileStatus file : files) {
2087       try {
2088         FSUtils.checkAccess(ugi, file, FsAction.WRITE);
2089       } catch (AccessDeniedException ace) {
2090         LOG.warn("Got AccessDeniedException when preCheckPermission ", ace);
2091         errors.reportError(ERROR_CODE.WRONG_USAGE, "Current user " + ugi.getUserName()
2092           + " does not have write perms to " + file.getPath()
2093           + ". Please rerun hbck as hdfs user " + file.getOwner());
2094         throw ace;
2095       }
2096     }
2097   }
2098 
2099   /**
2100    * Deletes region from meta table
2101    */
2102   private void deleteMetaRegion(HbckInfo hi) throws IOException {
2103     deleteMetaRegion(hi.metaEntry.getRegionName());
2104   }
2105 
2106   /**
2107    * Deletes region from meta table
2108    */
2109   private void deleteMetaRegion(byte[] metaKey) throws IOException {
2110     Delete d = new Delete(metaKey);
2111     meta.delete(d);
2112     LOG.info("Deleted " + Bytes.toString(metaKey) + " from META" );
2113   }
2114 
2115   /**
2116    * Reset the split parent region info in meta table
2117    */
2118   private void resetSplitParent(HbckInfo hi) throws IOException {
2119     RowMutations mutations = new RowMutations(hi.metaEntry.getRegionName());
2120     Delete d = new Delete(hi.metaEntry.getRegionName());
2121     d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITA_QUALIFIER);
2122     d.deleteColumn(HConstants.CATALOG_FAMILY, HConstants.SPLITB_QUALIFIER);
2123     mutations.add(d);
2124 
2125     HRegionInfo hri = new HRegionInfo(hi.metaEntry);
2126     hri.setOffline(false);
2127     hri.setSplit(false);
2128     Put p = MetaTableAccessor.makePutFromRegionInfo(hri);
2129     mutations.add(p);
2130 
2131     meta.mutateRow(mutations);
2132     LOG.info("Reset split parent " + hi.metaEntry.getRegionNameAsString() + " in META" );
2133   }
2134 
2135   /**
2136    * This backwards-compatibility wrapper for permanently offlining a region
2137    * that should not be alive.  If the region server does not support the
2138    * "offline" method, it will use the closest unassign method instead.  This
2139    * will basically work until one attempts to disable or delete the affected
2140    * table.  The problem has to do with in-memory only master state, so
2141    * restarting the HMaster or failing over to another should fix this.
2142    */
2143   private void offline(byte[] regionName) throws IOException {
2144     String regionString = Bytes.toStringBinary(regionName);
2145     if (!rsSupportsOffline) {
2146       LOG.warn("Using unassign region " + regionString
2147           + " instead of using offline method, you should"
2148           + " restart HMaster after these repairs");
2149       admin.unassign(regionName, true);
2150       return;
2151     }
2152 
2153     // first time we assume the rs's supports #offline.
2154     try {
2155       LOG.info("Offlining region " + regionString);
2156       admin.offline(regionName);
2157     } catch (IOException ioe) {
2158       String notFoundMsg = "java.lang.NoSuchMethodException: " +
2159         "org.apache.hadoop.hbase.master.HMaster.offline([B)";
2160       if (ioe.getMessage().contains(notFoundMsg)) {
2161         LOG.warn("Using unassign region " + regionString
2162             + " instead of using offline method, you should"
2163             + " restart HMaster after these repairs");
2164         rsSupportsOffline = false; // in the future just use unassign
2165         admin.unassign(regionName, true);
2166         return;
2167       }
2168       throw ioe;
2169     }
2170   }
2171 
2172   private void undeployRegions(HbckInfo hi) throws IOException, InterruptedException {
2173     undeployRegionsForHbi(hi);
2174     // undeploy replicas of the region (but only if the method is invoked for the primary)
2175     if (hi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2176       return;
2177     }
2178     int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2179     for (int i = 1; i < numReplicas; i++) {
2180       if (hi.getPrimaryHRIForDeployedReplica() == null) continue;
2181       HRegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
2182           hi.getPrimaryHRIForDeployedReplica(), i);
2183       HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2184       if (h != null) {
2185         undeployRegionsForHbi(h);
2186         //set skip checks; we undeployed it, and we don't want to evaluate this anymore
2187         //in consistency checks
2188         h.setSkipChecks(true);
2189       }
2190     }
2191   }
2192 
2193   private void undeployRegionsForHbi(HbckInfo hi) throws IOException, InterruptedException {
2194     for (OnlineEntry rse : hi.deployedEntries) {
2195       LOG.debug("Undeploy region "  + rse.hri + " from " + rse.hsa);
2196       try {
2197         HBaseFsckRepair.closeRegionSilentlyAndWait(connection, rse.hsa, rse.hri);
2198         offline(rse.hri.getRegionName());
2199       } catch (IOException ioe) {
2200         LOG.warn("Got exception when attempting to offline region "
2201             + Bytes.toString(rse.hri.getRegionName()), ioe);
2202       }
2203     }
2204   }
2205 
2206   /**
2207    * Attempts to undeploy a region from a region server based in information in
2208    * META.  Any operations that modify the file system should make sure that
2209    * its corresponding region is not deployed to prevent data races.
2210    *
2211    * A separate call is required to update the master in-memory region state
2212    * kept in the AssignementManager.  Because disable uses this state instead of
2213    * that found in META, we can't seem to cleanly disable/delete tables that
2214    * have been hbck fixed.  When used on a version of HBase that does not have
2215    * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master
2216    * restart or failover may be required.
2217    */
2218   private void closeRegion(HbckInfo hi) throws IOException, InterruptedException {
2219     if (hi.metaEntry == null && hi.hdfsEntry == null) {
2220       undeployRegions(hi);
2221       return;
2222     }
2223 
2224     // get assignment info and hregioninfo from meta.
2225     Get get = new Get(hi.getRegionName());
2226     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2227     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
2228     get.addColumn(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
2229     // also get the locations of the replicas to close if the primary region is being closed
2230     if (hi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2231       int numReplicas = admin.getTableDescriptor(hi.getTableName()).getRegionReplication();
2232       for (int i = 0; i < numReplicas; i++) {
2233         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i));
2234         get.addColumn(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i));
2235       }
2236     }
2237     Result r = meta.get(get);
2238     RegionLocations rl = MetaTableAccessor.getRegionLocations(r);
2239     if (rl == null) {
2240       LOG.warn("Unable to close region " + hi.getRegionNameAsString() +
2241           " since meta does not have handle to reach it");
2242       return;
2243     }
2244     for (HRegionLocation h : rl.getRegionLocations()) {
2245       ServerName serverName = h.getServerName();
2246       if (serverName == null) {
2247         errors.reportError("Unable to close region "
2248             + hi.getRegionNameAsString() +  " because meta does not "
2249             + "have handle to reach it.");
2250         continue;
2251       }
2252       HRegionInfo hri = h.getRegionInfo();
2253       if (hri == null) {
2254         LOG.warn("Unable to close region " + hi.getRegionNameAsString()
2255             + " because hbase:meta had invalid or missing "
2256             + HConstants.CATALOG_FAMILY_STR + ":"
2257             + Bytes.toString(HConstants.REGIONINFO_QUALIFIER)
2258             + " qualifier value.");
2259         continue;
2260       }
2261       // close the region -- close files and remove assignment
2262       HBaseFsckRepair.closeRegionSilentlyAndWait(connection, serverName, hri);
2263     }
2264   }
2265 
2266   private void tryAssignmentRepair(HbckInfo hbi, String msg) throws IOException,
2267     KeeperException, InterruptedException {
2268     // If we are trying to fix the errors
2269     if (shouldFixAssignments()) {
2270       errors.print(msg);
2271       undeployRegions(hbi);
2272       setShouldRerun();
2273       HRegionInfo hri = hbi.getHdfsHRI();
2274       if (hri == null) {
2275         hri = hbi.metaEntry;
2276       }
2277       HBaseFsckRepair.fixUnassigned(admin, hri);
2278       HBaseFsckRepair.waitUntilAssigned(admin, hri);
2279 
2280       // also assign replicas if needed (do it only when this call operates on a primary replica)
2281       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) return;
2282       int replicationCount = admin.getTableDescriptor(hri.getTable()).getRegionReplication();
2283       for (int i = 1; i < replicationCount; i++) {
2284         hri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
2285         HbckInfo h = regionInfoMap.get(hri.getEncodedName());
2286         if (h != null) {
2287           undeployRegions(h);
2288           //set skip checks; we undeploy & deploy it; we don't want to evaluate this hbi anymore
2289           //in consistency checks
2290           h.setSkipChecks(true);
2291         }
2292         HBaseFsckRepair.fixUnassigned(admin, hri);
2293         HBaseFsckRepair.waitUntilAssigned(admin, hri);
2294       }
2295 
2296     }
2297   }
2298 
2299   /**
2300    * Check a single region for consistency and correct deployment.
2301    */
2302   private void checkRegionConsistency(final String key, final HbckInfo hbi)
2303   throws IOException, KeeperException, InterruptedException {
2304 
2305 	if (hbi.isSkipChecks()) return;
2306 	String descriptiveName = hbi.toString();
2307     boolean inMeta = hbi.metaEntry != null;
2308     // In case not checking HDFS, assume the region is on HDFS
2309     boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
2310     boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
2311     boolean isDeployed = !hbi.deployedOn.isEmpty();
2312     boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
2313     boolean deploymentMatchesMeta =
2314       hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
2315       hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
2316     boolean splitParent =
2317       (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
2318     boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
2319     boolean recentlyModified = inHdfs &&
2320       hbi.getModTime() + timelag > EnvironmentEdgeManager.currentTime();
2321 
2322     // ========== First the healthy cases =============
2323     if (hbi.containsOnlyHdfsEdits()) {
2324       return;
2325     }
2326     if (inMeta && inHdfs && isDeployed && deploymentMatchesMeta && shouldBeDeployed) {
2327       return;
2328     } else if (inMeta && inHdfs && !shouldBeDeployed && !isDeployed) {
2329       LOG.info("Region " + descriptiveName + " is in META, and in a disabled " +
2330         "tabled that is not deployed");
2331       return;
2332     } else if (recentlyModified) {
2333       LOG.warn("Region " + descriptiveName + " was recently modified -- skipping");
2334       return;
2335     }
2336     // ========== Cases where the region is not in hbase:meta =============
2337     else if (!inMeta && !inHdfs && !isDeployed) {
2338       // We shouldn't have record of this region at all then!
2339       assert false : "Entry for region with no data";
2340     } else if (!inMeta && !inHdfs && isDeployed) {
2341       errors.reportError(ERROR_CODE.NOT_IN_META_HDFS, "Region "
2342           + descriptiveName + ", key=" + key + ", not on HDFS or in hbase:meta but " +
2343           "deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2344       if (shouldFixAssignments()) {
2345         undeployRegions(hbi);
2346       }
2347 
2348     } else if (!inMeta && inHdfs && !isDeployed) {
2349       if (hbi.isMerged()) {
2350         // This region has already been merged, the remaining hdfs file will be
2351         // cleaned by CatalogJanitor later
2352         hbi.setSkipChecks(true);
2353         LOG.info("Region " + descriptiveName
2354             + " got merge recently, its file(s) will be cleaned by CatalogJanitor later");
2355         return;
2356       }
2357       errors.reportError(ERROR_CODE.NOT_IN_META_OR_DEPLOYED, "Region "
2358           + descriptiveName + " on HDFS, but not listed in hbase:meta " +
2359           "or deployed on any region server");
2360       // restore region consistency of an adopted orphan
2361       if (shouldFixMeta()) {
2362         if (!hbi.isHdfsRegioninfoPresent()) {
2363           LOG.error("Region " + hbi.getHdfsHRI() + " could have been repaired"
2364               +  " in table integrity repair phase if -fixHdfsOrphans was" +
2365               " used.");
2366           return;
2367         }
2368 
2369         HRegionInfo hri = hbi.getHdfsHRI();
2370         TableInfo tableInfo = tablesInfo.get(hri.getTable());
2371 
2372         for (HRegionInfo region : tableInfo.getRegionsFromMeta()) {
2373           if (Bytes.compareTo(region.getStartKey(), hri.getStartKey()) <= 0
2374               && (region.getEndKey().length == 0 || Bytes.compareTo(region.getEndKey(),
2375                 hri.getEndKey()) >= 0)
2376               && Bytes.compareTo(region.getStartKey(), hri.getEndKey()) <= 0) {
2377             if(region.isSplit() || region.isOffline()) continue;
2378             Path regionDir = hbi.getHdfsRegionDir();
2379             FileSystem fs = regionDir.getFileSystem(getConf());
2380             List<Path> familyDirs = FSUtils.getFamilyDirs(fs, regionDir);
2381             for (Path familyDir : familyDirs) {
2382               List<Path> referenceFilePaths = FSUtils.getReferenceFilePaths(fs, familyDir);
2383               for (Path referenceFilePath : referenceFilePaths) {
2384                 Path parentRegionDir =
2385                     StoreFileInfo.getReferredToFile(referenceFilePath).getParent().getParent();
2386                 if (parentRegionDir.toString().endsWith(region.getEncodedName())) {
2387                   LOG.warn(hri + " start and stop keys are in the range of " + region
2388                       + ". The region might not be cleaned up from hdfs when region " + region
2389                       + " split failed. Hence deleting from hdfs.");
2390                   HRegionFileSystem.deleteRegionFromFileSystem(getConf(), fs,
2391                     regionDir.getParent(), hri);
2392                   return;
2393                 }
2394               }
2395             }
2396           }
2397         }
2398 
2399         LOG.info("Patching hbase:meta with .regioninfo: " + hbi.getHdfsHRI());
2400         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2401         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2402             admin.getClusterStatus().getServers(), numReplicas);
2403 
2404         tryAssignmentRepair(hbi, "Trying to reassign region...");
2405       }
2406 
2407     } else if (!inMeta && inHdfs && isDeployed) {
2408       errors.reportError(ERROR_CODE.NOT_IN_META, "Region " + descriptiveName
2409           + " not in META, but deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2410       debugLsr(hbi.getHdfsRegionDir());
2411       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2412         // for replicas, this means that we should undeploy the region (we would have
2413         // gone over the primaries and fixed meta holes in first phase under
2414         // checkAndFixConsistency; we shouldn't get the condition !inMeta at
2415         // this stage unless unwanted replica)
2416         if (shouldFixAssignments()) {
2417           undeployRegionsForHbi(hbi);
2418         }
2419       }
2420       if (shouldFixMeta() && hbi.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2421         if (!hbi.isHdfsRegioninfoPresent()) {
2422           LOG.error("This should have been repaired in table integrity repair phase");
2423           return;
2424         }
2425 
2426         LOG.info("Patching hbase:meta with with .regioninfo: " + hbi.getHdfsHRI());
2427         int numReplicas = admin.getTableDescriptor(hbi.getTableName()).getRegionReplication();
2428         HBaseFsckRepair.fixMetaHoleOnlineAndAddReplicas(getConf(), hbi.getHdfsHRI(),
2429             admin.getClusterStatus().getServers(), numReplicas);
2430         tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2431       }
2432 
2433     // ========== Cases where the region is in hbase:meta =============
2434     } else if (inMeta && inHdfs && !isDeployed && splitParent) {
2435       // check whether this is an actual error, or just transient state where parent
2436       // is not cleaned
2437       if (hbi.metaEntry.splitA != null && hbi.metaEntry.splitB != null) {
2438         // check that split daughters are there
2439         HbckInfo infoA = this.regionInfoMap.get(hbi.metaEntry.splitA.getEncodedName());
2440         HbckInfo infoB = this.regionInfoMap.get(hbi.metaEntry.splitB.getEncodedName());
2441         if (infoA != null && infoB != null) {
2442           // we already processed or will process daughters. Move on, nothing to see here.
2443           hbi.setSkipChecks(true);
2444           return;
2445         }
2446       }
2447 
2448       // For Replica region, we need to do a similar check. If replica is not split successfully,
2449       // error is going to be reported against primary daughter region.
2450       if (hbi.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
2451         LOG.info("Region " + descriptiveName + " is a split parent in META, in HDFS, "
2452             + "and not deployed on any region server. This may be transient.");
2453         hbi.setSkipChecks(true);
2454         return;
2455       }
2456 
2457       errors.reportError(ERROR_CODE.LINGERING_SPLIT_PARENT, "Region "
2458           + descriptiveName + " is a split parent in META, in HDFS, "
2459           + "and not deployed on any region server. This could be transient, "
2460           + "consider to run the catalog janitor first!");
2461       if (shouldFixSplitParents()) {
2462         setShouldRerun();
2463         resetSplitParent(hbi);
2464       }
2465     } else if (inMeta && !inHdfs && !isDeployed) {
2466       errors.reportError(ERROR_CODE.NOT_IN_HDFS_OR_DEPLOYED, "Region "
2467           + descriptiveName + " found in META, but not in HDFS "
2468           + "or deployed on any region server.");
2469       if (shouldFixMeta()) {
2470         deleteMetaRegion(hbi);
2471       }
2472     } else if (inMeta && !inHdfs && isDeployed) {
2473       errors.reportError(ERROR_CODE.NOT_IN_HDFS, "Region " + descriptiveName
2474           + " found in META, but not in HDFS, " +
2475           "and deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2476       // We treat HDFS as ground truth.  Any information in meta is transient
2477       // and equivalent data can be regenerated.  So, lets unassign and remove
2478       // these problems from META.
2479       if (shouldFixAssignments()) {
2480         errors.print("Trying to fix unassigned region...");
2481         undeployRegions(hbi);
2482       }
2483       if (shouldFixMeta()) {
2484         // wait for it to complete
2485         deleteMetaRegion(hbi);
2486       }
2487     } else if (inMeta && inHdfs && !isDeployed && shouldBeDeployed) {
2488       errors.reportError(ERROR_CODE.NOT_DEPLOYED, "Region " + descriptiveName
2489           + " not deployed on any region server.");
2490       tryAssignmentRepair(hbi, "Trying to fix unassigned region...");
2491     } else if (inMeta && inHdfs && isDeployed && !shouldBeDeployed) {
2492       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
2493           "Region " + descriptiveName + " should not be deployed according " +
2494           "to META, but is deployed on " + Joiner.on(", ").join(hbi.deployedOn));
2495       if (shouldFixAssignments()) {
2496         errors.print("Trying to close the region " + descriptiveName);
2497         setShouldRerun();
2498         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2499       }
2500     } else if (inMeta && inHdfs && isMultiplyDeployed) {
2501       errors.reportError(ERROR_CODE.MULTI_DEPLOYED, "Region " + descriptiveName
2502           + " is listed in hbase:meta on region server " + hbi.metaEntry.regionServer
2503           + " but is multiply assigned to region servers " +
2504           Joiner.on(", ").join(hbi.deployedOn));
2505       // If we are trying to fix the errors
2506       if (shouldFixAssignments()) {
2507         errors.print("Trying to fix assignment error...");
2508         setShouldRerun();
2509         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2510       }
2511     } else if (inMeta && inHdfs && isDeployed && !deploymentMatchesMeta) {
2512       errors.reportError(ERROR_CODE.SERVER_DOES_NOT_MATCH_META, "Region "
2513           + descriptiveName + " listed in hbase:meta on region server " +
2514           hbi.metaEntry.regionServer + " but found on region server " +
2515           hbi.deployedOn.get(0));
2516       // If we are trying to fix the errors
2517       if (shouldFixAssignments()) {
2518         errors.print("Trying to fix assignment error...");
2519         setShouldRerun();
2520         HBaseFsckRepair.fixMultiAssignment(connection, hbi.metaEntry, hbi.deployedOn);
2521         HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
2522       }
2523     } else {
2524       errors.reportError(ERROR_CODE.UNKNOWN, "Region " + descriptiveName +
2525           " is in an unforeseen state:" +
2526           " inMeta=" + inMeta +
2527           " inHdfs=" + inHdfs +
2528           " isDeployed=" + isDeployed +
2529           " isMultiplyDeployed=" + isMultiplyDeployed +
2530           " deploymentMatchesMeta=" + deploymentMatchesMeta +
2531           " shouldBeDeployed=" + shouldBeDeployed);
2532     }
2533   }
2534 
2535   /**
2536    * Checks tables integrity. Goes over all regions and scans the tables.
2537    * Collects all the pieces for each table and checks if there are missing,
2538    * repeated or overlapping ones.
2539    * @throws IOException
2540    */
2541   SortedMap<TableName, TableInfo> checkIntegrity() throws IOException {
2542     tablesInfo = new TreeMap<TableName,TableInfo> ();
2543     LOG.debug("There are " + regionInfoMap.size() + " region info entries");
2544     for (HbckInfo hbi : regionInfoMap.values()) {
2545       // Check only valid, working regions
2546       if (hbi.metaEntry == null) {
2547         // this assumes that consistency check has run loadMetaEntry
2548         Path p = hbi.getHdfsRegionDir();
2549         if (p == null) {
2550           errors.report("No regioninfo in Meta or HDFS. " + hbi);
2551         }
2552 
2553         // TODO test.
2554         continue;
2555       }
2556       if (hbi.metaEntry.regionServer == null) {
2557         errors.detail("Skipping region because no region server: " + hbi);
2558         continue;
2559       }
2560       if (hbi.metaEntry.isOffline()) {
2561         errors.detail("Skipping region because it is offline: " + hbi);
2562         continue;
2563       }
2564       if (hbi.containsOnlyHdfsEdits()) {
2565         errors.detail("Skipping region because it only contains edits" + hbi);
2566         continue;
2567       }
2568 
2569       // Missing regionDir or over-deployment is checked elsewhere. Include
2570       // these cases in modTInfo, so we can evaluate those regions as part of
2571       // the region chain in META
2572       //if (hbi.foundRegionDir == null) continue;
2573       //if (hbi.deployedOn.size() != 1) continue;
2574       if (hbi.deployedOn.size() == 0) continue;
2575 
2576       // We should be safe here
2577       TableName tableName = hbi.metaEntry.getTable();
2578       TableInfo modTInfo = tablesInfo.get(tableName);
2579       if (modTInfo == null) {
2580         modTInfo = new TableInfo(tableName);
2581       }
2582       for (ServerName server : hbi.deployedOn) {
2583         modTInfo.addServer(server);
2584       }
2585 
2586       if (!hbi.isSkipChecks()) {
2587         modTInfo.addRegionInfo(hbi);
2588       }
2589 
2590       tablesInfo.put(tableName, modTInfo);
2591     }
2592 
2593     loadTableInfosForTablesWithNoRegion();
2594 
2595     logParallelMerge();
2596     for (TableInfo tInfo : tablesInfo.values()) {
2597       TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors);
2598       if (!tInfo.checkRegionChain(handler)) {
2599         errors.report("Found inconsistency in table " + tInfo.getName());
2600       }
2601     }
2602     return tablesInfo;
2603   }
2604 
2605   /** Loads table info's for tables that may not have been included, since there are no
2606    * regions reported for the table, but table dir is there in hdfs
2607    */
2608   private void loadTableInfosForTablesWithNoRegion() throws IOException {
2609     Map<String, HTableDescriptor> allTables = new FSTableDescriptors(getConf()).getAll();
2610     for (HTableDescriptor htd : allTables.values()) {
2611       if (checkMetaOnly && !htd.isMetaTable()) {
2612         continue;
2613       }
2614 
2615       TableName tableName = htd.getTableName();
2616       if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) {
2617         TableInfo tableInfo = new TableInfo(tableName);
2618         tableInfo.htds.add(htd);
2619         tablesInfo.put(htd.getTableName(), tableInfo);
2620       }
2621     }
2622   }
2623 
2624   /**
2625    * Merge hdfs data by moving from contained HbckInfo into targetRegionDir.
2626    * @return number of file move fixes done to merge regions.
2627    */
2628   public int mergeRegionDirs(Path targetRegionDir, HbckInfo contained) throws IOException {
2629     int fileMoves = 0;
2630     String thread = Thread.currentThread().getName();
2631     LOG.debug("[" + thread + "] Contained region dir after close and pause");
2632     debugLsr(contained.getHdfsRegionDir());
2633 
2634     // rename the contained into the container.
2635     FileSystem fs = targetRegionDir.getFileSystem(getConf());
2636     FileStatus[] dirs = null;
2637     try {
2638       dirs = fs.listStatus(contained.getHdfsRegionDir());
2639     } catch (FileNotFoundException fnfe) {
2640       // region we are attempting to merge in is not present!  Since this is a merge, there is
2641       // no harm skipping this region if it does not exist.
2642       if (!fs.exists(contained.getHdfsRegionDir())) {
2643         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2644             + " is missing. Assuming already sidelined or moved.");
2645       } else {
2646         sidelineRegionDir(fs, contained);
2647       }
2648       return fileMoves;
2649     }
2650 
2651     if (dirs == null) {
2652       if (!fs.exists(contained.getHdfsRegionDir())) {
2653         LOG.warn("[" + thread + "] HDFS region dir " + contained.getHdfsRegionDir()
2654             + " already sidelined.");
2655       } else {
2656         sidelineRegionDir(fs, contained);
2657       }
2658       return fileMoves;
2659     }
2660 
2661     for (FileStatus cf : dirs) {
2662       Path src = cf.getPath();
2663       Path dst =  new Path(targetRegionDir, src.getName());
2664 
2665       if (src.getName().equals(HRegionFileSystem.REGION_INFO_FILE)) {
2666         // do not copy the old .regioninfo file.
2667         continue;
2668       }
2669 
2670       if (src.getName().equals(HConstants.HREGION_OLDLOGDIR_NAME)) {
2671         // do not copy the .oldlogs files
2672         continue;
2673       }
2674 
2675       LOG.info("[" + thread + "] Moving files from " + src + " into containing region " + dst);
2676       // FileSystem.rename is inconsistent with directories -- if the
2677       // dst (foo/a) exists and is a dir, and the src (foo/b) is a dir,
2678       // it moves the src into the dst dir resulting in (foo/a/b).  If
2679       // the dst does not exist, and the src a dir, src becomes dst. (foo/b)
2680       for (FileStatus hfile : fs.listStatus(src)) {
2681         boolean success = fs.rename(hfile.getPath(), dst);
2682         if (success) {
2683           fileMoves++;
2684         }
2685       }
2686       LOG.debug("[" + thread + "] Sideline directory contents:");
2687       debugLsr(targetRegionDir);
2688     }
2689 
2690     // if all success.
2691     sidelineRegionDir(fs, contained);
2692     LOG.info("[" + thread + "] Sidelined region dir "+ contained.getHdfsRegionDir() + " into " +
2693         getSidelineDir());
2694     debugLsr(contained.getHdfsRegionDir());
2695 
2696     return fileMoves;
2697   }
2698 
2699 
2700   static class WorkItemOverlapMerge implements Callable<Void> {
2701     private TableIntegrityErrorHandler handler;
2702     Collection<HbckInfo> overlapgroup;
2703 
2704     WorkItemOverlapMerge(Collection<HbckInfo> overlapgroup, TableIntegrityErrorHandler handler) {
2705       this.handler = handler;
2706       this.overlapgroup = overlapgroup;
2707     }
2708 
2709     @Override
2710     public Void call() throws Exception {
2711       handler.handleOverlapGroup(overlapgroup);
2712       return null;
2713     }
2714   };
2715 
2716 
2717   /**
2718    * Maintain information about a particular table.
2719    */
2720   public class TableInfo {
2721     TableName tableName;
2722     TreeSet <ServerName> deployedOn;
2723 
2724     // backwards regions
2725     final List<HbckInfo> backwards = new ArrayList<HbckInfo>();
2726 
2727     // sidelined big overlapped regions
2728     final Map<Path, HbckInfo> sidelinedRegions = new HashMap<Path, HbckInfo>();
2729 
2730     // region split calculator
2731     final RegionSplitCalculator<HbckInfo> sc = new RegionSplitCalculator<HbckInfo>(cmp);
2732 
2733     // Histogram of different HTableDescriptors found.  Ideally there is only one!
2734     final Set<HTableDescriptor> htds = new HashSet<HTableDescriptor>();
2735 
2736     // key = start split, values = set of splits in problem group
2737     final Multimap<byte[], HbckInfo> overlapGroups =
2738       TreeMultimap.create(RegionSplitCalculator.BYTES_COMPARATOR, cmp);
2739 
2740     // list of regions derived from meta entries.
2741     private ImmutableList<HRegionInfo> regionsFromMeta = null;
2742 
2743     TableInfo(TableName name) {
2744       this.tableName = name;
2745       deployedOn = new TreeSet <ServerName>();
2746     }
2747 
2748     /**
2749      * @return descriptor common to all regions.  null if are none or multiple!
2750      */
2751     private HTableDescriptor getHTD() {
2752       if (htds.size() == 1) {
2753         return (HTableDescriptor)htds.toArray()[0];
2754       } else {
2755         LOG.error("None/Multiple table descriptors found for table '"
2756           + tableName + "' regions: " + htds);
2757       }
2758       return null;
2759     }
2760 
2761     public void addRegionInfo(HbckInfo hir) {
2762       if (Bytes.equals(hir.getEndKey(), HConstants.EMPTY_END_ROW)) {
2763         // end key is absolute end key, just add it.
2764         // ignore replicas other than primary for these checks
2765         if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2766         return;
2767       }
2768 
2769       // if not the absolute end key, check for cycle
2770       if (Bytes.compareTo(hir.getStartKey(), hir.getEndKey()) > 0) {
2771         errors.reportError(
2772             ERROR_CODE.REGION_CYCLE,
2773             String.format("The endkey for this region comes before the "
2774                 + "startkey, startkey=%s, endkey=%s",
2775                 Bytes.toStringBinary(hir.getStartKey()),
2776                 Bytes.toStringBinary(hir.getEndKey())), this, hir);
2777         backwards.add(hir);
2778         return;
2779       }
2780 
2781       // main case, add to split calculator
2782       // ignore replicas other than primary for these checks
2783       if (hir.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) sc.add(hir);
2784     }
2785 
2786     public void addServer(ServerName server) {
2787       this.deployedOn.add(server);
2788     }
2789 
2790     public TableName getName() {
2791       return tableName;
2792     }
2793 
2794     public int getNumRegions() {
2795       return sc.getStarts().size() + backwards.size();
2796     }
2797 
2798     public synchronized ImmutableList<HRegionInfo> getRegionsFromMeta() {
2799       // lazy loaded, synchronized to ensure a single load
2800       if (regionsFromMeta == null) {
2801         List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
2802         for (HbckInfo h : HBaseFsck.this.regionInfoMap.values()) {
2803           if (tableName.equals(h.getTableName())) {
2804             if (h.metaEntry != null) {
2805               regions.add((HRegionInfo) h.metaEntry);
2806             }
2807           }
2808         }
2809         regionsFromMeta = Ordering.natural().immutableSortedCopy(regions);
2810       }
2811       
2812       return regionsFromMeta;
2813     }
2814     
2815 
2816       private class IntegrityFixSuggester extends TableIntegrityErrorHandlerImpl {
2817       ErrorReporter errors;
2818 
2819       IntegrityFixSuggester(TableInfo ti, ErrorReporter errors) {
2820         this.errors = errors;
2821         setTableInfo(ti);
2822       }
2823 
2824       @Override
2825       public void handleRegionStartKeyNotEmpty(HbckInfo hi) throws IOException{
2826         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2827             "First region should start with an empty key.  You need to "
2828             + " create a new region and regioninfo in HDFS to plug the hole.",
2829             getTableInfo(), hi);
2830       }
2831 
2832       @Override
2833       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2834         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2835             "Last region should end with an empty key. You need to "
2836                 + "create a new region and regioninfo in HDFS to plug the hole.", getTableInfo());
2837       }
2838 
2839       @Override
2840       public void handleDegenerateRegion(HbckInfo hi) throws IOException{
2841         errors.reportError(ERROR_CODE.DEGENERATE_REGION,
2842             "Region has the same start and end key.", getTableInfo(), hi);
2843       }
2844 
2845       @Override
2846       public void handleDuplicateStartKeys(HbckInfo r1, HbckInfo r2) throws IOException{
2847         byte[] key = r1.getStartKey();
2848         // dup start key
2849         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2850             "Multiple regions have the same startkey: "
2851             + Bytes.toStringBinary(key), getTableInfo(), r1);
2852         errors.reportError(ERROR_CODE.DUPE_STARTKEYS,
2853             "Multiple regions have the same startkey: "
2854             + Bytes.toStringBinary(key), getTableInfo(), r2);
2855       }
2856 
2857       @Override
2858       public void handleSplit(HbckInfo r1, HbckInfo r2) throws IOException{
2859         byte[] key = r1.getStartKey();
2860         // dup start key
2861         errors.reportError(ERROR_CODE.DUPE_ENDKEYS,
2862           "Multiple regions have the same regionID: "
2863             + Bytes.toStringBinary(key), getTableInfo(), r1);
2864         errors.reportError(ERROR_CODE.DUPE_ENDKEYS,
2865           "Multiple regions have the same regionID: "
2866             + Bytes.toStringBinary(key), getTableInfo(), r2);
2867       }
2868 
2869       @Override
2870       public void handleOverlapInRegionChain(HbckInfo hi1, HbckInfo hi2) throws IOException{
2871         errors.reportError(ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
2872             "There is an overlap in the region chain.",
2873             getTableInfo(), hi1, hi2);
2874       }
2875 
2876       @Override
2877       public void handleHoleInRegionChain(byte[] holeStart, byte[] holeStop) throws IOException{
2878         errors.reportError(
2879             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2880             "There is a hole in the region chain between "
2881                 + Bytes.toStringBinary(holeStart) + " and "
2882                 + Bytes.toStringBinary(holeStop)
2883                 + ".  You need to create a new .regioninfo and region "
2884                 + "dir in hdfs to plug the hole.");
2885       }
2886     };
2887 
2888     /**
2889      * This handler fixes integrity errors from hdfs information.  There are
2890      * basically three classes of integrity problems 1) holes, 2) overlaps, and
2891      * 3) invalid regions.
2892      *
2893      * This class overrides methods that fix holes and the overlap group case.
2894      * Individual cases of particular overlaps are handled by the general
2895      * overlap group merge repair case.
2896      *
2897      * If hbase is online, this forces regions offline before doing merge
2898      * operations.
2899      */
2900     private class HDFSIntegrityFixer extends IntegrityFixSuggester {
2901       Configuration conf;
2902 
2903       boolean fixOverlaps = true;
2904 
2905       HDFSIntegrityFixer(TableInfo ti, ErrorReporter errors, Configuration conf,
2906           boolean fixHoles, boolean fixOverlaps) {
2907         super(ti, errors);
2908         this.conf = conf;
2909         this.fixOverlaps = fixOverlaps;
2910         // TODO properly use fixHoles
2911       }
2912 
2913       /**
2914        * This is a special case hole -- when the first region of a table is
2915        * missing from META, HBase doesn't acknowledge the existance of the
2916        * table.
2917        */
2918       @Override
2919       public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException {
2920         errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY,
2921             "First region should start with an empty key.  Creating a new " +
2922             "region and regioninfo in HDFS to plug the hole.",
2923             getTableInfo(), next);
2924         HTableDescriptor htd = getTableInfo().getHTD();
2925         // from special EMPTY_START_ROW to next region's startKey
2926         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(),
2927             HConstants.EMPTY_START_ROW, next.getStartKey());
2928 
2929         // TODO test
2930         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2931         LOG.info("Table region start key was not empty.  Created new empty region: "
2932             + newRegion + " " +region);
2933         fixes++;
2934       }
2935 
2936       @Override
2937       public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException {
2938         errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY,
2939             "Last region should end with an empty key.  Creating a new "
2940                 + "region and regioninfo in HDFS to plug the hole.", getTableInfo());
2941         HTableDescriptor htd = getTableInfo().getHTD();
2942         // from curEndKey to EMPTY_START_ROW
2943         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), curEndKey,
2944             HConstants.EMPTY_START_ROW);
2945 
2946         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2947         LOG.info("Table region end key was not empty.  Created new empty region: " + newRegion
2948             + " " + region);
2949         fixes++;
2950       }
2951 
2952       /**
2953        * There is a hole in the hdfs regions that violates the table integrity
2954        * rules.  Create a new empty region that patches the hole.
2955        */
2956       @Override
2957       public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException {
2958         errors.reportError(
2959             ERROR_CODE.HOLE_IN_REGION_CHAIN,
2960             "There is a hole in the region chain between "
2961                 + Bytes.toStringBinary(holeStartKey) + " and "
2962                 + Bytes.toStringBinary(holeStopKey)
2963                 + ".  Creating a new regioninfo and region "
2964                 + "dir in hdfs to plug the hole.");
2965         HTableDescriptor htd = getTableInfo().getHTD();
2966         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), holeStartKey, holeStopKey);
2967         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
2968         LOG.info("Plugged hole by creating new empty region: "+ newRegion + " " +region);
2969         fixes++;
2970       }
2971 
2972       /**
2973        * This takes set of overlapping regions and merges them into a single
2974        * region.  This covers cases like degenerate regions, shared start key,
2975        * general overlaps, duplicate ranges, and partial overlapping regions.
2976        *
2977        * Cases:
2978        * - Clean regions that overlap
2979        * - Only .oldlogs regions (can't find start/stop range, or figure out)
2980        *
2981        * This is basically threadsafe, except for the fixer increment in mergeOverlaps.
2982        */
2983       @Override
2984       public void handleOverlapGroup(Collection<HbckInfo> overlap)
2985           throws IOException {
2986         Preconditions.checkNotNull(overlap);
2987         Preconditions.checkArgument(overlap.size() >0);
2988 
2989         if (!this.fixOverlaps) {
2990           LOG.warn("Not attempting to repair overlaps.");
2991           return;
2992         }
2993 
2994         if (overlap.size() > maxMerge) {
2995           LOG.warn("Overlap group has " + overlap.size() + " overlapping " +
2996             "regions which is greater than " + maxMerge + ", the max number of regions to merge");
2997           if (sidelineBigOverlaps) {
2998             // we only sideline big overlapped groups that exceeds the max number of regions to merge
2999             sidelineBigOverlaps(overlap);
3000           }
3001           return;
3002         }
3003         if (shouldRemoveParents()) {
3004           removeParentsAndFixSplits(overlap);
3005         }
3006         mergeOverlaps(overlap);
3007       }
3008 
3009       void removeParentsAndFixSplits(Collection<HbckInfo> overlap) throws IOException {
3010         Pair<byte[], byte[]> range = null;
3011         HbckInfo parent = null;
3012         HbckInfo daughterA = null;
3013         HbckInfo daughterB = null;
3014         Collection<HbckInfo> daughters = new ArrayList<HbckInfo>(overlap);
3015 
3016         String thread = Thread.currentThread().getName();
3017         LOG.info("== [" + thread + "] Attempting fix splits in overlap state.");
3018 
3019         // we only can handle a single split per group at the time
3020         if (overlap.size() > 3) {
3021           LOG.info("Too many overlaps were found on this group, falling back to regular merge.");
3022           return;
3023         }
3024 
3025         for (HbckInfo hi : overlap) {
3026           if (range == null) {
3027             range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
3028           } else {
3029             if (RegionSplitCalculator.BYTES_COMPARATOR
3030               .compare(hi.getStartKey(), range.getFirst()) < 0) {
3031               range.setFirst(hi.getStartKey());
3032             }
3033             if (RegionSplitCalculator.BYTES_COMPARATOR
3034               .compare(hi.getEndKey(), range.getSecond()) > 0) {
3035               range.setSecond(hi.getEndKey());
3036             }
3037           }
3038         }
3039 
3040         LOG.info("This group range is [" + Bytes.toStringBinary(range.getFirst()) + ", "
3041           + Bytes.toStringBinary(range.getSecond()) + "]");
3042 
3043         // attempt to find a possible parent for the edge case of a split
3044         for (HbckInfo hi : overlap) {
3045           if (Bytes.compareTo(hi.getHdfsHRI().getStartKey(), range.getFirst()) == 0
3046             && Bytes.compareTo(hi.getHdfsHRI().getEndKey(), range.getSecond()) == 0) {
3047             LOG.info("This is a parent for this group: " + hi.toString());
3048             parent = hi;
3049           }
3050         }
3051 
3052         // Remove parent regions from daughters collection
3053         if (parent != null) {
3054           daughters.remove(parent);
3055         }
3056 
3057         // Lets verify that daughters share the regionID at split time and they
3058         // were created after the parent
3059         for (HbckInfo hi : daughters) {
3060           if (Bytes.compareTo(hi.getHdfsHRI().getStartKey(), range.getFirst()) == 0) {
3061             if (parent.getHdfsHRI().getRegionId() < hi.getHdfsHRI().getRegionId()) {
3062               daughterA = hi;
3063             }
3064           }
3065           if (Bytes.compareTo(hi.getHdfsHRI().getEndKey(), range.getSecond()) == 0) {
3066             if (parent.getHdfsHRI().getRegionId() < hi.getHdfsHRI().getRegionId()) {
3067               daughterB = hi;
3068             }
3069           }
3070         }
3071 
3072         // daughters must share the same regionID and we should have a parent too
3073         if (daughterA.getHdfsHRI().getRegionId() != daughterB.getHdfsHRI().getRegionId() || parent == null)
3074           return;
3075 
3076         FileSystem fs = FileSystem.get(conf);
3077         LOG.info("Found parent: " + parent.getRegionNameAsString());
3078         LOG.info("Found potential daughter a: " + daughterA.getRegionNameAsString());
3079         LOG.info("Found potential daughter b: " + daughterB.getRegionNameAsString());
3080         LOG.info("Trying to fix parent in overlap by removing the parent.");
3081         try {
3082           closeRegion(parent);
3083         } catch (IOException ioe) {
3084           LOG.warn("Parent region could not be closed, continuing with regular merge...", ioe);
3085           return;
3086         } catch (InterruptedException ie) {
3087           LOG.warn("Parent region could not be closed, continuing with regular merge...", ie);
3088           return;
3089         }
3090 
3091         try {
3092           offline(parent.getRegionName());
3093         } catch (IOException ioe) {
3094           LOG.warn("Unable to offline parent region: " + parent.getRegionNameAsString()
3095             + ".  Just continuing with regular merge... ", ioe);
3096           return;
3097         }
3098 
3099         try {
3100           HBaseFsckRepair.removeParentInMeta(conf, parent.getHdfsHRI());
3101         } catch (IOException ioe) {
3102           LOG.warn("Unable to remove parent region in META: " + parent.getRegionNameAsString()
3103             + ".  Just continuing with regular merge... ", ioe);
3104           return;
3105         }
3106 
3107         sidelineRegionDir(fs, parent);
3108         LOG.info("[" + thread + "] Sidelined parent region dir "+ parent.getHdfsRegionDir() + " into " +
3109           getSidelineDir());
3110         debugLsr(parent.getHdfsRegionDir());
3111 
3112         // Make sure we don't have the parents and daughters around
3113         overlap.remove(parent);
3114         overlap.remove(daughterA);
3115         overlap.remove(daughterB);
3116 
3117         LOG.info("Done fixing split.");
3118 
3119       }
3120 
3121       void mergeOverlaps(Collection<HbckInfo> overlap)
3122           throws IOException {
3123         String thread = Thread.currentThread().getName();
3124         LOG.info("== [" + thread + "] Merging regions into one region: "
3125           + Joiner.on(",").join(overlap));
3126         // get the min / max range and close all concerned regions
3127         Pair<byte[], byte[]> range = null;
3128         for (HbckInfo hi : overlap) {
3129           if (range == null) {
3130             range = new Pair<byte[], byte[]>(hi.getStartKey(), hi.getEndKey());
3131           } else {
3132             if (RegionSplitCalculator.BYTES_COMPARATOR
3133                 .compare(hi.getStartKey(), range.getFirst()) < 0) {
3134               range.setFirst(hi.getStartKey());
3135             }
3136             if ((RegionSplitCalculator.BYTES_COMPARATOR
3137                 .compare(range.getSecond(), HConstants.EMPTY_END_ROW) == 0)
3138                 || (RegionSplitCalculator.BYTES_COMPARATOR.compare(hi.getEndKey(),
3139                   HConstants.EMPTY_END_ROW) == 0)) {
3140               range.setSecond(HConstants.EMPTY_END_ROW);
3141             } else if (RegionSplitCalculator.BYTES_COMPARATOR
3142                 .compare(hi.getEndKey(), range.getSecond()) > 0) {
3143               range.setSecond(hi.getEndKey());
3144             }
3145           }
3146           // need to close files so delete can happen.
3147           LOG.debug("[" + thread + "] Closing region before moving data around: " +  hi);
3148           LOG.debug("[" + thread + "] Contained region dir before close");
3149           debugLsr(hi.getHdfsRegionDir());
3150           try {
3151             LOG.info("[" + thread + "] Closing region: " + hi);
3152             closeRegion(hi);
3153           } catch (IOException ioe) {
3154             LOG.warn("[" + thread + "] Was unable to close region " + hi
3155               + ".  Just continuing... ", ioe);
3156           } catch (InterruptedException e) {
3157             LOG.warn("[" + thread + "] Was unable to close region " + hi
3158               + ".  Just continuing... ", e);
3159           }
3160 
3161           try {
3162             LOG.info("[" + thread + "] Offlining region: " + hi);
3163             offline(hi.getRegionName());
3164           } catch (IOException ioe) {
3165             LOG.warn("[" + thread + "] Unable to offline region from master: " + hi
3166               + ".  Just continuing... ", ioe);
3167           }
3168         }
3169 
3170         // create new empty container region.
3171         HTableDescriptor htd = getTableInfo().getHTD();
3172         // from start key to end Key
3173         HRegionInfo newRegion = new HRegionInfo(htd.getTableName(), range.getFirst(),
3174             range.getSecond());
3175         HRegion region = HBaseFsckRepair.createHDFSRegionDir(conf, newRegion, htd);
3176         LOG.info("[" + thread + "] Created new empty container region: " +
3177             newRegion + " to contain regions: " + Joiner.on(",").join(overlap));
3178         debugLsr(region.getRegionFileSystem().getRegionDir());
3179 
3180         // all target regions are closed, should be able to safely cleanup.
3181         boolean didFix= false;
3182         Path target = region.getRegionFileSystem().getRegionDir();
3183         for (HbckInfo contained : overlap) {
3184           LOG.info("[" + thread + "] Merging " + contained  + " into " + target );
3185           int merges = mergeRegionDirs(target, contained);
3186           if (merges > 0) {
3187             didFix = true;
3188           }
3189         }
3190         if (didFix) {
3191           fixes++;
3192         }
3193       }
3194 
3195       /**
3196        * Sideline some regions in a big overlap group so that it
3197        * will have fewer regions, and it is easier to merge them later on.
3198        *
3199        * @param bigOverlap the overlapped group with regions more than maxMerge
3200        * @throws IOException
3201        */
3202       void sidelineBigOverlaps(
3203           Collection<HbckInfo> bigOverlap) throws IOException {
3204         int overlapsToSideline = bigOverlap.size() - maxMerge;
3205         if (overlapsToSideline > maxOverlapsToSideline) {
3206           overlapsToSideline = maxOverlapsToSideline;
3207         }
3208         List<HbckInfo> regionsToSideline =
3209           RegionSplitCalculator.findBigRanges(bigOverlap, overlapsToSideline);
3210         FileSystem fs = FileSystem.get(conf);
3211         for (HbckInfo regionToSideline: regionsToSideline) {
3212           try {
3213             LOG.info("Closing region: " + regionToSideline);
3214             closeRegion(regionToSideline);
3215           } catch (IOException ioe) {
3216             LOG.warn("Was unable to close region " + regionToSideline
3217               + ".  Just continuing... ", ioe);
3218           } catch (InterruptedException e) {
3219             LOG.warn("Was unable to close region " + regionToSideline
3220               + ".  Just continuing... ", e);
3221           }
3222 
3223           try {
3224             LOG.info("Offlining region: " + regionToSideline);
3225             offline(regionToSideline.getRegionName());
3226           } catch (IOException ioe) {
3227             LOG.warn("Unable to offline region from master: " + regionToSideline
3228               + ".  Just continuing... ", ioe);
3229           }
3230 
3231           LOG.info("Before sideline big overlapped region: " + regionToSideline.toString());
3232           Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline);
3233           if (sidelineRegionDir != null) {
3234             sidelinedRegions.put(sidelineRegionDir, regionToSideline);
3235             LOG.info("After sidelined big overlapped region: "
3236               + regionToSideline.getRegionNameAsString()
3237               + " to " + sidelineRegionDir.toString());
3238             fixes++;
3239           }
3240         }
3241       }
3242     }
3243 
3244     /**
3245      * Check the region chain (from META) of this table.  We are looking for
3246      * holes, overlaps, and cycles.
3247      * @return false if there are errors
3248      * @throws IOException
3249      */
3250     public boolean checkRegionChain(TableIntegrityErrorHandler handler) throws IOException {
3251       // When table is disabled no need to check for the region chain. Some of the regions
3252       // accidently if deployed, this below code might report some issues like missing start
3253       // or end regions or region hole in chain and may try to fix which is unwanted.
3254       if (disabledTables.contains(this.tableName)) {
3255         return true;
3256       }
3257       int originalErrorsCount = errors.getErrorList().size();
3258       Multimap<byte[], HbckInfo> regions = sc.calcCoverage();
3259       SortedSet<byte[]> splits = sc.getSplits();
3260 
3261       byte[] prevKey = null;
3262       byte[] problemKey = null;
3263 
3264       if (splits.size() == 0) {
3265         // no region for this table
3266         handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
3267       }
3268 
3269       for (byte[] key : splits) {
3270         Collection<HbckInfo> ranges = regions.get(key);
3271         if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) {
3272           for (HbckInfo rng : ranges) {
3273             handler.handleRegionStartKeyNotEmpty(rng);
3274           }
3275         }
3276 
3277         // check for degenerate ranges
3278         for (HbckInfo rng : ranges) {
3279           // special endkey case converts '' to null
3280           byte[] endKey = rng.getEndKey();
3281           endKey = (endKey.length == 0) ? null : endKey;
3282           if (Bytes.equals(rng.getStartKey(),endKey)) {
3283             handler.handleDegenerateRegion(rng);
3284           }
3285         }
3286 
3287         if (ranges.size() == 1) {
3288           // this split key is ok -- no overlap, not a hole.
3289           if (problemKey != null) {
3290             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3291           }
3292           problemKey = null; // fell through, no more problem.
3293         } else if (ranges.size() > 1) {
3294           // set the new problem key group name, if already have problem key, just
3295           // keep using it.
3296           if (problemKey == null) {
3297             // only for overlap regions.
3298             LOG.warn("Naming new problem group: " + Bytes.toStringBinary(key));
3299             problemKey = key;
3300           }
3301           overlapGroups.putAll(problemKey, ranges);
3302 
3303           // record errors
3304           ArrayList<HbckInfo> subRange = new ArrayList<HbckInfo>(ranges);
3305           //  this dumb and n^2 but this shouldn't happen often
3306           for (HbckInfo r1 : ranges) {
3307             if (r1.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
3308             subRange.remove(r1);
3309             for (HbckInfo r2 : subRange) {
3310               if (r2.getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) continue;
3311               // general case of same start key
3312               if (Bytes.compareTo(r1.getStartKey(), r2.getStartKey())==0) {
3313                 handler.handleDuplicateStartKeys(r1,r2);
3314               } else if (Bytes.compareTo(r1.getEndKey(), r2.getStartKey())==0 &&
3315                 r1.getHdfsHRI().getRegionId() == r2.getHdfsHRI().getRegionId()) {
3316                 LOG.info("this is a split, log to splits");
3317                 handler.handleSplit(r1, r2);
3318               } else {
3319                 // overlap
3320                 handler.handleOverlapInRegionChain(r1, r2);
3321               }
3322             }
3323           }
3324 
3325         } else if (ranges.size() == 0) {
3326           if (problemKey != null) {
3327             LOG.warn("reached end of problem group: " + Bytes.toStringBinary(key));
3328           }
3329           problemKey = null;
3330 
3331           byte[] holeStopKey = sc.getSplits().higher(key);
3332           // if higher key is null we reached the top.
3333           if (holeStopKey != null) {
3334             // hole
3335             handler.handleHoleInRegionChain(key, holeStopKey);
3336           }
3337         }
3338         prevKey = key;
3339       }
3340 
3341       // When the last region of a table is proper and having an empty end key, 'prevKey'
3342       // will be null.
3343       if (prevKey != null) {
3344         handler.handleRegionEndKeyNotEmpty(prevKey);
3345       }
3346 
3347       // TODO fold this into the TableIntegrityHandler
3348       if (getConf().getBoolean("hbasefsck.overlap.merge.parallel", true)) {
3349         boolean ok = handleOverlapsParallel(handler, prevKey);
3350         if (!ok) {
3351           return false;
3352         }
3353       } else {
3354         for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3355           handler.handleOverlapGroup(overlap);
3356         }
3357       }
3358 
3359       if (details) {
3360         // do full region split map dump
3361         errors.print("---- Table '"  +  this.tableName
3362             + "': region split map");
3363         dump(splits, regions);
3364         errors.print("---- Table '"  +  this.tableName
3365             + "': overlap groups");
3366         dumpOverlapProblems(overlapGroups);
3367         errors.print("There are " + overlapGroups.keySet().size()
3368             + " overlap groups with " + overlapGroups.size()
3369             + " overlapping regions");
3370       }
3371       if (!sidelinedRegions.isEmpty()) {
3372         LOG.warn("Sidelined big overlapped regions, please bulk load them!");
3373         errors.print("---- Table '"  +  this.tableName
3374             + "': sidelined big overlapped regions");
3375         dumpSidelinedRegions(sidelinedRegions);
3376       }
3377       return errors.getErrorList().size() == originalErrorsCount;
3378     }
3379 
3380     private boolean handleOverlapsParallel(TableIntegrityErrorHandler handler, byte[] prevKey)
3381         throws IOException {
3382       // we parallelize overlap handler for the case we have lots of groups to fix.  We can
3383       // safely assume each group is independent.
3384       List<WorkItemOverlapMerge> merges = new ArrayList<WorkItemOverlapMerge>(overlapGroups.size());
3385       List<Future<Void>> rets;
3386       for (Collection<HbckInfo> overlap : overlapGroups.asMap().values()) {
3387         //
3388         merges.add(new WorkItemOverlapMerge(overlap, handler));
3389       }
3390       try {
3391         rets = executor.invokeAll(merges);
3392       } catch (InterruptedException e) {
3393         LOG.error("Overlap merges were interrupted", e);
3394         return false;
3395       }
3396       for(int i=0; i<merges.size(); i++) {
3397         WorkItemOverlapMerge work = merges.get(i);
3398         Future<Void> f = rets.get(i);
3399         try {
3400           f.get();
3401         } catch(ExecutionException e) {
3402           LOG.warn("Failed to merge overlap group" + work, e.getCause());
3403         } catch (InterruptedException e) {
3404           LOG.error("Waiting for overlap merges was interrupted", e);
3405           return false;
3406         }
3407       }
3408       return true;
3409     }
3410 
3411     /**
3412      * This dumps data in a visually reasonable way for visual debugging
3413      *
3414      * @param splits
3415      * @param regions
3416      */
3417     void dump(SortedSet<byte[]> splits, Multimap<byte[], HbckInfo> regions) {
3418       // we display this way because the last end key should be displayed as well.
3419       StringBuilder sb = new StringBuilder();
3420       for (byte[] k : splits) {
3421         sb.setLength(0); // clear out existing buffer, if any.
3422         sb.append(Bytes.toStringBinary(k) + ":\t");
3423         for (HbckInfo r : regions.get(k)) {
3424           sb.append("[ "+ r.toString() + ", "
3425               + Bytes.toStringBinary(r.getEndKey())+ "]\t");
3426         }
3427         errors.print(sb.toString());
3428       }
3429     }
3430   }
3431 
3432   public void dumpOverlapProblems(Multimap<byte[], HbckInfo> regions) {
3433     // we display this way because the last end key should be displayed as
3434     // well.
3435     for (byte[] k : regions.keySet()) {
3436       errors.print(Bytes.toStringBinary(k) + ":");
3437       for (HbckInfo r : regions.get(k)) {
3438         errors.print("[ " + r.toString() + ", "
3439             + Bytes.toStringBinary(r.getEndKey()) + "]");
3440       }
3441       errors.print("----");
3442     }
3443   }
3444 
3445   public void dumpSidelinedRegions(Map<Path, HbckInfo> regions) {
3446     for (Map.Entry<Path, HbckInfo> entry: regions.entrySet()) {
3447       TableName tableName = entry.getValue().getTableName();
3448       Path path = entry.getKey();
3449       errors.print("This sidelined region dir should be bulk loaded: "
3450         + path.toString());
3451       errors.print("Bulk load command looks like: "
3452         + "hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles "
3453         + path.toUri().getPath() + " "+ tableName);
3454     }
3455   }
3456 
3457   public Multimap<byte[], HbckInfo> getOverlapGroups(
3458       TableName table) {
3459     TableInfo ti = tablesInfo.get(table);
3460     return ti.overlapGroups;
3461   }
3462 
3463   /**
3464    * Return a list of user-space table names whose metadata have not been
3465    * modified in the last few milliseconds specified by timelag
3466    * if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
3467    * SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
3468    * milliseconds specified by timelag, then the table is a candidate to be returned.
3469    * @return tables that have not been modified recently
3470    * @throws IOException if an error is encountered
3471    */
3472   HTableDescriptor[] getTables(AtomicInteger numSkipped) {
3473     List<TableName> tableNames = new ArrayList<TableName>();
3474     long now = EnvironmentEdgeManager.currentTime();
3475 
3476     for (HbckInfo hbi : regionInfoMap.values()) {
3477       MetaEntry info = hbi.metaEntry;
3478 
3479       // if the start key is zero, then we have found the first region of a table.
3480       // pick only those tables that were not modified in the last few milliseconds.
3481       if (info != null && info.getStartKey().length == 0 && !info.isMetaRegion()) {
3482         if (info.modTime + timelag < now) {
3483           tableNames.add(info.getTable());
3484         } else {
3485           numSkipped.incrementAndGet(); // one more in-flux table
3486         }
3487       }
3488     }
3489     return getHTableDescriptors(tableNames);
3490   }
3491 
3492   HTableDescriptor[] getHTableDescriptors(List<TableName> tableNames) {
3493     HTableDescriptor[] htd = new HTableDescriptor[0];
3494     Admin admin = null;
3495     try {
3496       LOG.info("getHTableDescriptors == tableNames => " + tableNames);
3497       admin = new HBaseAdmin(getConf());
3498       htd = admin.getTableDescriptorsByTableName(tableNames);
3499     } catch (IOException e) {
3500       LOG.debug("Exception getting table descriptors", e);
3501     } finally {
3502       if (admin != null) {
3503         try {
3504           admin.close();
3505         } catch (IOException e) {
3506           LOG.debug("Exception closing HBaseAdmin", e);
3507         }
3508       }
3509     }
3510     return htd;
3511   }
3512 
3513   /**
3514    * Gets the entry in regionInfo corresponding to the the given encoded
3515    * region name. If the region has not been seen yet, a new entry is added
3516    * and returned.
3517    */
3518   private synchronized HbckInfo getOrCreateInfo(String name) {
3519     HbckInfo hbi = regionInfoMap.get(name);
3520     if (hbi == null) {
3521       hbi = new HbckInfo(null);
3522       regionInfoMap.put(name, hbi);
3523     }
3524     return hbi;
3525   }
3526 
3527   private void checkAndFixTableLocks() throws IOException {
3528     TableLockChecker checker = new TableLockChecker(zkw, errors);
3529     checker.checkTableLocks();
3530 
3531     if (this.fixTableLocks) {
3532       checker.fixExpiredTableLocks();
3533     }
3534   }
3535 
3536   private void checkAndFixReplication() throws IOException {
3537     ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, connection, errors);
3538     checker.checkUnDeletedQueues();
3539 
3540     if (checker.hasUnDeletedQueues() && this.fixReplication) {
3541       checker.fixUnDeletedQueues();
3542       setShouldRerun();
3543     }
3544   }
3545 
3546   /**
3547    * Check whether a orphaned table ZNode exists and fix it if requested.
3548    * @throws IOException
3549    * @throws KeeperException
3550    * @throws InterruptedException
3551    */
3552   private void checkAndFixOrphanedTableZNodes()
3553       throws IOException, KeeperException, InterruptedException {
3554     Set<TableName> enablingTables = ZKTableStateClientSideReader.getEnablingTables(zkw);
3555     String msg;
3556     TableInfo tableInfo;
3557 
3558     for (TableName tableName : enablingTables) {
3559       // Check whether the table exists in hbase
3560       tableInfo = tablesInfo.get(tableName);
3561       if (tableInfo != null) {
3562         // Table exists.  This table state is in transit.  No problem for this table.
3563         continue;
3564       }
3565 
3566       msg = "Table " + tableName + " not found in hbase:meta. Orphaned table ZNode found.";
3567       LOG.warn(msg);
3568       orphanedTableZNodes.add(tableName);
3569       errors.reportError(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY, msg);
3570     }
3571 
3572     if (orphanedTableZNodes.size() > 0 && this.fixTableZNodes) {
3573       ZKTableStateManager zkTableStateMgr = new ZKTableStateManager(zkw);
3574 
3575       for (TableName tableName : orphanedTableZNodes) {
3576         try {
3577           // Set the table state to be disabled so that if we made mistake, we can trace
3578           // the history and figure it out.
3579           // Another choice is to call checkAndRemoveTableState() to delete the orphaned ZNode.
3580           // Both approaches works.
3581           zkTableStateMgr.setTableState(tableName, ZooKeeperProtos.Table.State.DISABLED);
3582         } catch (CoordinatedStateException e) {
3583           // This exception should not happen here
3584           LOG.error(
3585             "Got a CoordinatedStateException while fixing the ENABLING table znode " + tableName,
3586             e);
3587         }
3588       }
3589     }
3590   }
3591 
3592   /**
3593     * Check values in regionInfo for hbase:meta
3594     * Check if zero or more than one regions with hbase:meta are found.
3595     * If there are inconsistencies (i.e. zero or more than one regions
3596     * pretend to be holding the hbase:meta) try to fix that and report an error.
3597     * @throws IOException from HBaseFsckRepair functions
3598     * @throws KeeperException
3599     * @throws InterruptedException
3600     */
3601   boolean checkMetaRegion() throws IOException, KeeperException, InterruptedException {
3602     Map<Integer, HbckInfo> metaRegions = new HashMap<Integer, HbckInfo>();
3603     for (HbckInfo value : regionInfoMap.values()) {
3604       if (value.metaEntry != null && value.metaEntry.isMetaRegion()) {
3605         metaRegions.put(value.getReplicaId(), value);
3606       }
3607     }
3608     int metaReplication = admin.getTableDescriptor(TableName.META_TABLE_NAME)
3609         .getRegionReplication();
3610     boolean noProblem = true;
3611     // There will be always entries in regionInfoMap corresponding to hbase:meta & its replicas
3612     // Check the deployed servers. It should be exactly one server for each replica.
3613     for (int i = 0; i < metaReplication; i++) {
3614       HbckInfo metaHbckInfo = metaRegions.remove(i);
3615       List<ServerName> servers = new ArrayList<ServerName>();
3616       if (metaHbckInfo != null) {
3617         servers = metaHbckInfo.deployedOn;
3618       }
3619       if (servers.size() != 1) {
3620         noProblem = false;
3621         if (servers.size() == 0) {
3622           assignMetaReplica(i);
3623         } else if (servers.size() > 1) {
3624           errors
3625           .reportError(ERROR_CODE.MULTI_META_REGION, "hbase:meta, replicaId " +
3626                        metaHbckInfo.getReplicaId() + " is found on more than one region.");
3627           if (shouldFixAssignments()) {
3628             errors.print("Trying to fix a problem with hbase:meta, replicaId " +
3629                          metaHbckInfo.getReplicaId() +"..");
3630             setShouldRerun();
3631             // try fix it (treat is a dupe assignment)
3632             HBaseFsckRepair.fixMultiAssignment(connection, metaHbckInfo.metaEntry, servers);
3633           }
3634         }
3635       }
3636     }
3637     // unassign whatever is remaining in metaRegions. They are excess replicas.
3638     for (Map.Entry<Integer, HbckInfo> entry : metaRegions.entrySet()) {
3639       noProblem = false;
3640       errors.reportError(ERROR_CODE.SHOULD_NOT_BE_DEPLOYED,
3641           "hbase:meta replicas are deployed in excess. Configured " + metaReplication +
3642           ", deployed " + metaRegions.size());
3643       if (shouldFixAssignments()) {
3644         errors.print("Trying to undeploy excess replica, replicaId: " + entry.getKey() +
3645             " of hbase:meta..");
3646         setShouldRerun();
3647         unassignMetaReplica(entry.getValue());
3648       }
3649     }
3650     // if noProblem is false, rerun hbck with hopefully fixed META
3651     // if noProblem is true, no errors, so continue normally
3652     return noProblem;
3653   }
3654 
3655   private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException,
3656   KeeperException {
3657     undeployRegions(hi);
3658     ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(hi.metaEntry.getReplicaId()));
3659   }
3660 
3661   private void assignMetaReplica(int replicaId)
3662       throws IOException, KeeperException, InterruptedException {
3663     errors.reportError(ERROR_CODE.NO_META_REGION, "hbase:meta, replicaId " +
3664         replicaId +" is not found on any region.");
3665     if (shouldFixAssignments()) {
3666       errors.print("Trying to fix a problem with hbase:meta..");
3667       setShouldRerun();
3668       // try to fix it (treat it as unassigned region)
3669       HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
3670           HRegionInfo.FIRST_META_REGIONINFO, replicaId);
3671       HBaseFsckRepair.fixUnassigned(admin, h);
3672       HBaseFsckRepair.waitUntilAssigned(admin, h);
3673     }
3674   }
3675 
3676   /**
3677    * Scan hbase:meta, adding all regions found to the regionInfo map.
3678    * @throws IOException if an error is encountered
3679    */
3680   boolean loadMetaEntries() throws IOException {
3681     MetaScannerVisitor visitor = new MetaScannerVisitorBase() {
3682       int countRecord = 1;
3683 
3684       // comparator to sort KeyValues with latest modtime
3685       final Comparator<Cell> comp = new Comparator<Cell>() {
3686         @Override
3687         public int compare(Cell k1, Cell k2) {
3688           return Long.compare(k1.getTimestamp(), k2.getTimestamp());
3689         }
3690       };
3691 
3692       @Override
3693       public boolean processRow(Result result) throws IOException {
3694         try {
3695 
3696           // record the latest modification of this META record
3697           long ts =  Collections.max(result.listCells(), comp).getTimestamp();
3698           RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
3699           if (rl == null) {
3700             emptyRegionInfoQualifiers.add(result);
3701             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3702               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3703             return true;
3704           }
3705           ServerName sn = null;
3706           if (rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID) == null ||
3707               rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo() == null) {
3708             emptyRegionInfoQualifiers.add(result);
3709             errors.reportError(ERROR_CODE.EMPTY_META_CELL,
3710               "Empty REGIONINFO_QUALIFIER found in hbase:meta");
3711             return true;
3712           }
3713           HRegionInfo hri = rl.getRegionLocation(HRegionInfo.DEFAULT_REPLICA_ID).getRegionInfo();
3714           if (!(isTableIncluded(hri.getTable())
3715               || hri.isMetaRegion())) {
3716             return true;
3717           }
3718           PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(result);
3719           for (HRegionLocation h : rl.getRegionLocations()) {
3720             if (h == null || h.getRegionInfo() == null) {
3721               continue;
3722             }
3723             sn = h.getServerName();
3724             hri = h.getRegionInfo();
3725 
3726             MetaEntry m = null;
3727             if (hri.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
3728               m = new MetaEntry(hri, sn, ts, daughters.getFirst(), daughters.getSecond());
3729             } else {
3730               m = new MetaEntry(hri, sn, ts, null, null);
3731             }
3732             HbckInfo previous = regionInfoMap.get(hri.getEncodedName());
3733             if (previous == null) {
3734               regionInfoMap.put(hri.getEncodedName(), new HbckInfo(m));
3735             } else if (previous.metaEntry == null) {
3736               previous.metaEntry = m;
3737             } else {
3738               throw new IOException("Two entries in hbase:meta are same " + previous);
3739             }
3740           }
3741           PairOfSameType<HRegionInfo> mergeRegions = HRegionInfo.getMergeRegions(result);
3742           for (HRegionInfo mergeRegion : new HRegionInfo[] {
3743               mergeRegions.getFirst(), mergeRegions.getSecond() }) {
3744             if (mergeRegion != null) {
3745               // This region is already been merged
3746               HbckInfo hbInfo = getOrCreateInfo(mergeRegion.getEncodedName());
3747               hbInfo.setMerged(true);
3748             }
3749           }
3750 
3751           // show proof of progress to the user, once for every 100 records.
3752           if (countRecord % 100 == 0) {
3753             errors.progress();
3754           }
3755           countRecord++;
3756           return true;
3757         } catch (RuntimeException e) {
3758           LOG.error("Result=" + result);
3759           throw e;
3760         }
3761       }
3762     };
3763     if (!checkMetaOnly) {
3764       // Scan hbase:meta to pick up user regions
3765       MetaScanner.metaScan(connection, visitor);
3766     }
3767 
3768     errors.print("");
3769     return true;
3770   }
3771 
3772   /**
3773    * Stores the regioninfo entries scanned from META
3774    */
3775   static class MetaEntry extends HRegionInfo {
3776     ServerName regionServer;   // server hosting this region
3777     long modTime;          // timestamp of most recent modification metadata
3778     HRegionInfo splitA, splitB; //split daughters
3779 
3780     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime) {
3781       this(rinfo, regionServer, modTime, null, null);
3782     }
3783 
3784     public MetaEntry(HRegionInfo rinfo, ServerName regionServer, long modTime,
3785         HRegionInfo splitA, HRegionInfo splitB) {
3786       super(rinfo);
3787       this.regionServer = regionServer;
3788       this.modTime = modTime;
3789       this.splitA = splitA;
3790       this.splitB = splitB;
3791     }
3792 
3793     @Override
3794     public boolean equals(Object o) {
3795       boolean superEq = super.equals(o);
3796       if (!superEq) {
3797         return superEq;
3798       }
3799 
3800       MetaEntry me = (MetaEntry) o;
3801       if (!regionServer.equals(me.regionServer)) {
3802         return false;
3803       }
3804       return (modTime == me.modTime);
3805     }
3806 
3807     @Override
3808     public int hashCode() {
3809       int hash = Arrays.hashCode(getRegionName());
3810       hash = (int) (hash ^ getRegionId());
3811       hash ^= Arrays.hashCode(getStartKey());
3812       hash ^= Arrays.hashCode(getEndKey());
3813       hash ^= Boolean.valueOf(isOffline()).hashCode();
3814       hash ^= getTable().hashCode();
3815       if (regionServer != null) {
3816         hash ^= regionServer.hashCode();
3817       }
3818       hash = (int) (hash ^ modTime);
3819       return hash;
3820     }
3821   }
3822 
3823   /**
3824    * Stores the regioninfo entries from HDFS
3825    */
3826   static class HdfsEntry {
3827     HRegionInfo hri;
3828     Path hdfsRegionDir = null;
3829     long hdfsRegionDirModTime  = 0;
3830     boolean hdfsRegioninfoFilePresent = false;
3831     boolean hdfsOnlyEdits = false;
3832   }
3833 
3834   /**
3835    * Stores the regioninfo retrieved from Online region servers.
3836    */
3837   static class OnlineEntry {
3838     HRegionInfo hri;
3839     ServerName hsa;
3840 
3841     @Override
3842     public String toString() {
3843       return hsa.toString() + ";" + hri.getRegionNameAsString();
3844     }
3845   }
3846 
3847   /**
3848    * Maintain information about a particular region.  It gathers information
3849    * from three places -- HDFS, META, and region servers.
3850    */
3851   public static class HbckInfo implements KeyRange {
3852     private MetaEntry metaEntry = null; // info in META
3853     private HdfsEntry hdfsEntry = null; // info in HDFS
3854     private List<OnlineEntry> deployedEntries = Lists.newArrayList(); // on Region Server
3855     private List<ServerName> deployedOn = Lists.newArrayList(); // info on RS's
3856     private boolean skipChecks = false; // whether to skip further checks to this region info.
3857     private boolean isMerged = false;// whether this region has already been merged into another one
3858     private int deployedReplicaId = HRegionInfo.DEFAULT_REPLICA_ID;
3859     private HRegionInfo primaryHRIForDeployedReplica = null;
3860 
3861     HbckInfo(MetaEntry metaEntry) {
3862       this.metaEntry = metaEntry;
3863     }
3864 
3865     public synchronized int getReplicaId() {
3866       return metaEntry != null? metaEntry.getReplicaId(): deployedReplicaId;
3867     }
3868 
3869     public synchronized void addServer(HRegionInfo hri, ServerName server) {
3870       OnlineEntry rse = new OnlineEntry() ;
3871       rse.hri = hri;
3872       rse.hsa = server;
3873       this.deployedEntries.add(rse);
3874       this.deployedOn.add(server);
3875       // save the replicaId that we see deployed in the cluster
3876       this.deployedReplicaId = hri.getReplicaId();
3877       this.primaryHRIForDeployedReplica =
3878           RegionReplicaUtil.getRegionInfoForDefaultReplica(hri);
3879     }
3880 
3881     @Override
3882     public synchronized String toString() {
3883       StringBuilder sb = new StringBuilder();
3884       sb.append("{ meta => ");
3885       sb.append((metaEntry != null)? metaEntry.getRegionNameAsString() : "null");
3886       sb.append( ", hdfs => " + getHdfsRegionDir());
3887       sb.append( ", deployed => " + Joiner.on(", ").join(deployedEntries));
3888       sb.append( ", replicaId => " + getReplicaId());
3889       sb.append(" }");
3890       return sb.toString();
3891     }
3892 
3893     @Override
3894     public byte[] getStartKey() {
3895       if (this.metaEntry != null) {
3896         return this.metaEntry.getStartKey();
3897       } else if (this.hdfsEntry != null) {
3898         return this.hdfsEntry.hri.getStartKey();
3899       } else {
3900         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3901         return null;
3902       }
3903     }
3904 
3905     @Override
3906     public byte[] getEndKey() {
3907       if (this.metaEntry != null) {
3908         return this.metaEntry.getEndKey();
3909       } else if (this.hdfsEntry != null) {
3910         return this.hdfsEntry.hri.getEndKey();
3911       } else {
3912         LOG.error("Entry " + this + " has no meta or hdfs region start key.");
3913         return null;
3914       }
3915     }
3916 
3917     public TableName getTableName() {
3918       if (this.metaEntry != null) {
3919         return this.metaEntry.getTable();
3920       } else if (this.hdfsEntry != null) {
3921         // we are only guaranteed to have a path and not an HRI for hdfsEntry,
3922         // so we get the name from the Path
3923         Path tableDir = this.hdfsEntry.hdfsRegionDir.getParent();
3924         return FSUtils.getTableName(tableDir);
3925       } else {
3926         // return the info from the first online/deployed hri
3927         for (OnlineEntry e : deployedEntries) {
3928           return e.hri.getTable();
3929         }
3930         return null;
3931       }
3932     }
3933 
3934     public String getRegionNameAsString() {
3935       if (metaEntry != null) {
3936         return metaEntry.getRegionNameAsString();
3937       } else if (hdfsEntry != null) {
3938         if (hdfsEntry.hri != null) {
3939           return hdfsEntry.hri.getRegionNameAsString();
3940         }
3941       } else {
3942         // return the info from the first online/deployed hri
3943         for (OnlineEntry e : deployedEntries) {
3944           return e.hri.getRegionNameAsString();
3945         }
3946       }
3947       return null;
3948     }
3949 
3950     public byte[] getRegionName() {
3951       if (metaEntry != null) {
3952         return metaEntry.getRegionName();
3953       } else if (hdfsEntry != null) {
3954         return hdfsEntry.hri.getRegionName();
3955       } else {
3956         // return the info from the first online/deployed hri
3957         for (OnlineEntry e : deployedEntries) {
3958           return e.hri.getRegionName();
3959         }
3960         return null;
3961       }
3962     }
3963 
3964     public HRegionInfo getPrimaryHRIForDeployedReplica() {
3965       return primaryHRIForDeployedReplica;
3966     }
3967 
3968     Path getHdfsRegionDir() {
3969       if (hdfsEntry == null) {
3970         return null;
3971       }
3972       return hdfsEntry.hdfsRegionDir;
3973     }
3974 
3975     boolean containsOnlyHdfsEdits() {
3976       if (hdfsEntry == null) {
3977         return false;
3978       }
3979       return hdfsEntry.hdfsOnlyEdits;
3980     }
3981 
3982     boolean isHdfsRegioninfoPresent() {
3983       if (hdfsEntry == null) {
3984         return false;
3985       }
3986       return hdfsEntry.hdfsRegioninfoFilePresent;
3987     }
3988 
3989     long getModTime() {
3990       if (hdfsEntry == null) {
3991         return 0;
3992       }
3993       return hdfsEntry.hdfsRegionDirModTime;
3994     }
3995 
3996     HRegionInfo getHdfsHRI() {
3997       if (hdfsEntry == null) {
3998         return null;
3999       }
4000       return hdfsEntry.hri;
4001     }
4002 
4003     public void setSkipChecks(boolean skipChecks) {
4004       this.skipChecks = skipChecks;
4005     }
4006 
4007     public boolean isSkipChecks() {
4008       return skipChecks;
4009     }
4010 
4011     public void setMerged(boolean isMerged) {
4012       this.isMerged = isMerged;
4013     }
4014 
4015     public boolean isMerged() {
4016       return this.isMerged;
4017     }
4018   }
4019 
4020   final static Comparator<HbckInfo> cmp = new Comparator<HbckInfo>() {
4021     @Override
4022     public int compare(HbckInfo l, HbckInfo r) {
4023       if (l == r) {
4024         // same instance
4025         return 0;
4026       }
4027 
4028       int tableCompare = l.getTableName().compareTo(r.getTableName());
4029       if (tableCompare != 0) {
4030         return tableCompare;
4031       }
4032 
4033       int startComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
4034           l.getStartKey(), r.getStartKey());
4035       if (startComparison != 0) {
4036         return startComparison;
4037       }
4038 
4039       // Special case for absolute endkey
4040       byte[] endKey = r.getEndKey();
4041       endKey = (endKey.length == 0) ? null : endKey;
4042       byte[] endKey2 = l.getEndKey();
4043       endKey2 = (endKey2.length == 0) ? null : endKey2;
4044       int endComparison = RegionSplitCalculator.BYTES_COMPARATOR.compare(
4045           endKey2,  endKey);
4046 
4047       if (endComparison != 0) {
4048         return endComparison;
4049       }
4050 
4051       // use regionId as tiebreaker.
4052       // Null is considered after all possible values so make it bigger.
4053       if (l.hdfsEntry == null && r.hdfsEntry == null) {
4054         return 0;
4055       }
4056       if (l.hdfsEntry == null && r.hdfsEntry != null) {
4057         return 1;
4058       }
4059       // l.hdfsEntry must not be null
4060       if (r.hdfsEntry == null) {
4061         return -1;
4062       }
4063       // both l.hdfsEntry and r.hdfsEntry must not be null.
4064       return (int) (l.hdfsEntry.hri.getRegionId()- r.hdfsEntry.hri.getRegionId());
4065     }
4066   };
4067 
4068   /**
4069    * Prints summary of all tables found on the system.
4070    */
4071   private void printTableSummary(SortedMap<TableName, TableInfo> tablesInfo) {
4072     StringBuilder sb = new StringBuilder();
4073     int numOfSkippedRegions;
4074     errors.print("Summary:");
4075     for (TableInfo tInfo : tablesInfo.values()) {
4076       numOfSkippedRegions = (skippedRegions.containsKey(tInfo.getName())) ?
4077           skippedRegions.get(tInfo.getName()).size() : 0;
4078 
4079       if (errors.tableHasErrors(tInfo)) {
4080         errors.print("Table " + tInfo.getName() + " is inconsistent.");
4081       } else if (numOfSkippedRegions > 0){
4082         errors.print("Table " + tInfo.getName() + " is okay (with "
4083           + numOfSkippedRegions + " skipped regions).");
4084       }
4085       else {
4086         errors.print("Table " + tInfo.getName() + " is okay.");
4087       }
4088       errors.print("    Number of regions: " + tInfo.getNumRegions());
4089       if (numOfSkippedRegions > 0) {
4090         Set<String> skippedRegionStrings = skippedRegions.get(tInfo.getName());
4091         System.out.println("    Number of skipped regions: " + numOfSkippedRegions);
4092         System.out.println("      List of skipped regions:");
4093         for(String sr : skippedRegionStrings) {
4094           System.out.println("        " + sr);
4095         }
4096       }
4097       sb.setLength(0); // clear out existing buffer, if any.
4098       sb.append("    Deployed on: ");
4099       for (ServerName server : tInfo.deployedOn) {
4100         sb.append(" " + server.toString());
4101       }
4102       errors.print(sb.toString());
4103     }
4104   }
4105 
4106   static ErrorReporter getErrorReporter(
4107       final Configuration conf) throws ClassNotFoundException {
4108     Class<? extends ErrorReporter> reporter = conf.getClass("hbasefsck.errorreporter", PrintingErrorReporter.class, ErrorReporter.class);
4109     return ReflectionUtils.newInstance(reporter, conf);
4110   }
4111 
4112   public interface ErrorReporter {
4113     enum ERROR_CODE {
4114       UNKNOWN, NO_META_REGION, NULL_META_REGION, NO_VERSION_FILE, NOT_IN_META_HDFS, NOT_IN_META,
4115       NOT_IN_META_OR_DEPLOYED, NOT_IN_HDFS_OR_DEPLOYED, NOT_IN_HDFS, SERVER_DOES_NOT_MATCH_META, NOT_DEPLOYED,
4116       MULTI_DEPLOYED, SHOULD_NOT_BE_DEPLOYED, MULTI_META_REGION, RS_CONNECT_FAILURE,
4117       FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
4118       HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
4119       ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
4120       LINGERING_HFILELINK, WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK,
4121       ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR, UNDELETED_REPLICATION_QUEUE, DUPE_ENDKEYS
4122     }
4123     void clear();
4124     void report(String message);
4125     void reportError(String message);
4126     void reportError(ERROR_CODE errorCode, String message);
4127     void reportError(ERROR_CODE errorCode, String message, TableInfo table);
4128     void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info);
4129     void reportError(
4130       ERROR_CODE errorCode,
4131       String message,
4132       TableInfo table,
4133       HbckInfo info1,
4134       HbckInfo info2
4135     );
4136     int summarize();
4137     void detail(String details);
4138     ArrayList<ERROR_CODE> getErrorList();
4139     void progress();
4140     void print(String message);
4141     void resetErrors();
4142     boolean tableHasErrors(TableInfo table);
4143   }
4144 
4145   static class PrintingErrorReporter implements ErrorReporter {
4146     public int errorCount = 0;
4147     private int showProgress;
4148     // How frequently calls to progress() will create output
4149     private static final int progressThreshold = 100;
4150 
4151     Set<TableInfo> errorTables = new HashSet<TableInfo>();
4152 
4153     // for use by unit tests to verify which errors were discovered
4154     private ArrayList<ERROR_CODE> errorList = new ArrayList<ERROR_CODE>();
4155 
4156     @Override
4157     public void clear() {
4158       errorTables.clear();
4159       errorList.clear();
4160       errorCount = 0;
4161     }
4162 
4163     @Override
4164     public synchronized void reportError(ERROR_CODE errorCode, String message) {
4165       if (errorCode == ERROR_CODE.WRONG_USAGE) {
4166         System.err.println(message);
4167         return;
4168       }
4169 
4170       errorList.add(errorCode);
4171       if (!getSUMMARY()) {
4172         System.out.println("ERROR: " + message);
4173       }
4174       errorCount++;
4175       showProgress = 0;
4176     }
4177 
4178     @Override
4179     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
4180       errorTables.add(table);
4181       reportError(errorCode, message);
4182     }
4183 
4184     @Override
4185     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
4186                                          HbckInfo info) {
4187       errorTables.add(table);
4188       String reference = "(region " + info.getRegionNameAsString() + ")";
4189       reportError(errorCode, reference + " " + message);
4190     }
4191 
4192     @Override
4193     public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table,
4194                                          HbckInfo info1, HbckInfo info2) {
4195       errorTables.add(table);
4196       String reference = "(regions " + info1.getRegionNameAsString()
4197           + " and " + info2.getRegionNameAsString() + ")";
4198       reportError(errorCode, reference + " " + message);
4199     }
4200 
4201     @Override
4202     public synchronized void reportError(String message) {
4203       reportError(ERROR_CODE.UNKNOWN, message);
4204     }
4205 
4206     /**
4207      * Report error information, but do not increment the error count.  Intended for cases
4208      * where the actual error would have been reported previously.
4209      * @param message
4210      */
4211     @Override
4212     public synchronized void report(String message) {
4213       if (!getSUMMARY()) {
4214         System.out.println("ERROR: " + message);
4215       }
4216       showProgress = 0;
4217     }
4218 
4219     @Override
4220     public synchronized int summarize() {
4221       System.out.println(Integer.toString(errorCount) +
4222                          " inconsistencies detected.");
4223       if (errorCount == 0) {
4224         System.out.println("Status: OK");
4225         return 0;
4226       } else {
4227         System.out.println("Status: INCONSISTENT");
4228         return -1;
4229       }
4230     }
4231 
4232     @Override
4233     public ArrayList<ERROR_CODE> getErrorList() {
4234       return errorList;
4235     }
4236 
4237     @Override
4238     public synchronized void print(String message) {
4239       if (!getSUMMARY()) {
4240         System.out.println(message);
4241       }
4242     }
4243 
4244     private synchronized static boolean getSUMMARY() {
4245       return SUMMARY;
4246     }
4247 
4248     @Override
4249     public boolean tableHasErrors(TableInfo table) {
4250       return errorTables.contains(table);
4251     }
4252 
4253     @Override
4254     public void resetErrors() {
4255       errorCount = 0;
4256     }
4257 
4258     @Override
4259     public synchronized void detail(String message) {
4260       if (details) {
4261         System.out.println(message);
4262       }
4263       showProgress = 0;
4264     }
4265 
4266     @Override
4267     public synchronized void progress() {
4268       if (showProgress++ == progressThreshold) {
4269         if (!getSUMMARY()) {
4270           System.out.print(".");
4271         }
4272         showProgress = 0;
4273       }
4274     }
4275   }
4276 
4277   /**
4278    * Contact a region server and get all information from it
4279    */
4280   static class WorkItemRegion implements Callable<Void> {
4281     private HBaseFsck hbck;
4282     private ServerName rsinfo;
4283     private ErrorReporter errors;
4284     private HConnection connection;
4285 
4286     WorkItemRegion(HBaseFsck hbck, ServerName info,
4287                    ErrorReporter errors, HConnection connection) {
4288       this.hbck = hbck;
4289       this.rsinfo = info;
4290       this.errors = errors;
4291       this.connection = connection;
4292     }
4293 
4294     @Override
4295     public synchronized Void call() throws IOException {
4296       errors.progress();
4297       try {
4298         BlockingInterface server = connection.getAdmin(rsinfo);
4299 
4300         // list all online regions from this region server
4301         List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
4302         regions = filterRegions(regions);
4303 
4304         if (details) {
4305           errors.detail("RegionServer: " + rsinfo.getServerName() +
4306                            " number of regions: " + regions.size());
4307           for (HRegionInfo rinfo: regions) {
4308             errors.detail("  " + rinfo.getRegionNameAsString() +
4309                              " id: " + rinfo.getRegionId() +
4310                              " encoded_name: " + rinfo.getEncodedName() +
4311                              " start: " + Bytes.toStringBinary(rinfo.getStartKey()) +
4312                              " end: " + Bytes.toStringBinary(rinfo.getEndKey()));
4313           }
4314         }
4315 
4316         // check to see if the existence of this region matches the region in META
4317         for (HRegionInfo r:regions) {
4318           HbckInfo hbi = hbck.getOrCreateInfo(r.getEncodedName());
4319           hbi.addServer(r, rsinfo);
4320         }
4321       } catch (IOException e) {          // unable to connect to the region server.
4322         errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "RegionServer: " + rsinfo.getServerName() +
4323           " Unable to fetch region information. " + e);
4324         throw e;
4325       }
4326       return null;
4327     }
4328 
4329     private List<HRegionInfo> filterRegions(List<HRegionInfo> regions) {
4330       List<HRegionInfo> ret = Lists.newArrayList();
4331       for (HRegionInfo hri : regions) {
4332         if (hri.isMetaTable() || (!hbck.checkMetaOnly
4333             && hbck.isTableIncluded(hri.getTable()))) {
4334           ret.add(hri);
4335         }
4336       }
4337       return ret;
4338     }
4339   }
4340 
4341   /**
4342    * Contact hdfs and get all information about specified table directory into
4343    * regioninfo list.
4344    */
4345   class WorkItemHdfsDir implements Callable<Void> {
4346     private FileStatus tableDir;
4347     private ErrorReporter errors;
4348     private FileSystem fs;
4349 
4350     WorkItemHdfsDir(FileSystem fs, ErrorReporter errors,
4351                     FileStatus status) {
4352       this.fs = fs;
4353       this.tableDir = status;
4354       this.errors = errors;
4355     }
4356 
4357     @Override
4358     public synchronized Void call() throws InterruptedException, ExecutionException {
4359       final Vector<Exception> exceptions = new Vector<Exception>();
4360 
4361       try {
4362         final FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
4363         final List<Future<?>> futures = new ArrayList<Future<?>>(regionDirs.length);
4364 
4365         for (final FileStatus regionDir : regionDirs) {
4366           errors.progress();
4367           final String encodedName = regionDir.getPath().getName();
4368           // ignore directories that aren't hexadecimal
4369           if (!encodedName.toLowerCase(Locale.ROOT).matches("[0-9a-f]+")) {
4370             continue;
4371           }
4372 
4373           if (!exceptions.isEmpty()) {
4374             break;
4375           }
4376 
4377           futures.add(executor.submit(new Runnable() {
4378             @Override
4379             public void run() {
4380               try {
4381                 LOG.debug("Loading region info from hdfs:"+ regionDir.getPath());
4382 
4383                 Path regioninfoFile = new Path(regionDir.getPath(), HRegionFileSystem.REGION_INFO_FILE);
4384                 boolean regioninfoFileExists = fs.exists(regioninfoFile);
4385 
4386                 if (!regioninfoFileExists) {
4387                   // As tables become larger it is more and more likely that by the time you
4388                   // reach a given region that it will be gone due to region splits/merges.
4389                   if (!fs.exists(regionDir.getPath())) {
4390                     LOG.warn("By the time we tried to process this region dir it was already gone: "
4391                         + regionDir.getPath());
4392                     return;
4393                   }
4394                 }
4395 
4396                 HbckInfo hbi = HBaseFsck.this.getOrCreateInfo(encodedName);
4397                 HdfsEntry he = new HdfsEntry();
4398                 synchronized (hbi) {
4399                   if (hbi.getHdfsRegionDir() != null) {
4400                     errors.print("Directory " + encodedName + " duplicate??" +
4401                                  hbi.getHdfsRegionDir());
4402                   }
4403 
4404                   he.hdfsRegionDir = regionDir.getPath();
4405                   he.hdfsRegionDirModTime = regionDir.getModificationTime();
4406                   he.hdfsRegioninfoFilePresent = regioninfoFileExists;
4407                   // we add to orphan list when we attempt to read .regioninfo
4408 
4409                   // Set a flag if this region contains only edits
4410                   // This is special case if a region is left after split
4411                   he.hdfsOnlyEdits = true;
4412                   FileStatus[] subDirs = fs.listStatus(regionDir.getPath());
4413                   Path ePath = WALSplitter.getRegionDirRecoveredEditsDir(regionDir.getPath());
4414                   for (FileStatus subDir : subDirs) {
4415                     errors.progress();
4416                     String sdName = subDir.getPath().getName();
4417                     if (!sdName.startsWith(".") && !sdName.equals(ePath.getName())) {
4418                       he.hdfsOnlyEdits = false;
4419                       break;
4420                     }
4421                   }
4422                   hbi.hdfsEntry = he;
4423                 }
4424               } catch (Exception e) {
4425                 LOG.error("Could not load region dir", e);
4426                 exceptions.add(e);
4427               }
4428             }
4429           }));
4430         }
4431 
4432         // Ensure all pending tasks are complete (or that we run into an exception)
4433         for (Future<?> f : futures) {
4434           if (!exceptions.isEmpty()) {
4435             break;
4436           }
4437           try {
4438             f.get();
4439           } catch (ExecutionException e) {
4440             LOG.error("Unexpected exec exception!  Should've been caught already.  (Bug?)", e);
4441             // Shouldn't happen, we already logged/caught any exceptions in the Runnable
4442           };
4443         }
4444       } catch (IOException e) {
4445         LOG.error("Cannot execute WorkItemHdfsDir for " + tableDir, e);
4446         exceptions.add(e);
4447       } finally {
4448         if (!exceptions.isEmpty()) {
4449           errors.reportError(ERROR_CODE.RS_CONNECT_FAILURE, "Table Directory: "
4450               + tableDir.getPath().getName()
4451               + " Unable to fetch all HDFS region information. ");
4452           // Just throw the first exception as an indication something bad happened
4453           // Don't need to propagate all the exceptions, we already logged them all anyway
4454           throw new ExecutionException("First exception in WorkItemHdfsDir", exceptions.firstElement());
4455         }
4456       }
4457       return null;
4458     }
4459   }
4460 
4461   /**
4462    * Contact hdfs and get all information about specified table directory into
4463    * regioninfo list.
4464    */
4465   static class WorkItemHdfsRegionInfo implements Callable<Void> {
4466     private HbckInfo hbi;
4467     private HBaseFsck hbck;
4468     private ErrorReporter errors;
4469 
4470     WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) {
4471       this.hbi = hbi;
4472       this.hbck = hbck;
4473       this.errors = errors;
4474     }
4475 
4476     @Override
4477     public synchronized Void call() throws IOException {
4478       // only load entries that haven't been loaded yet.
4479       if (hbi.getHdfsHRI() == null) {
4480         try {
4481           errors.progress();
4482           hbck.loadHdfsRegioninfo(hbi);
4483         } catch (IOException ioe) {
4484           String msg = "Orphan region in HDFS: Unable to load .regioninfo from table "
4485               + hbi.getTableName() + " in hdfs dir "
4486               + hbi.getHdfsRegionDir()
4487               + "!  It may be an invalid format or version file.  Treating as "
4488               + "an orphaned regiondir.";
4489           errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg);
4490           try {
4491             hbck.debugLsr(hbi.getHdfsRegionDir());
4492           } catch (IOException ioe2) {
4493             LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2);
4494             throw ioe2;
4495           }
4496           hbck.orphanHdfsDirs.add(hbi);
4497           throw ioe;
4498         }
4499       }
4500       return null;
4501     }
4502   };
4503 
4504   /**
4505    * Display the full report from fsck. This displays all live and dead region
4506    * servers, and all known regions.
4507    */
4508   public static void setDisplayFullReport() {
4509     details = true;
4510   }
4511 
4512   /**
4513    * Set exclusive mode.
4514    */
4515   public static void setForceExclusive() {
4516     forceExclusive = true;
4517   }
4518 
4519   /**
4520    * Only one instance of hbck can modify HBase at a time.
4521    */
4522   public boolean isExclusive() {
4523     return fixAny || forceExclusive;
4524   }
4525 
4526   /**
4527    * Set summary mode.
4528    * Print only summary of the tables and status (OK or INCONSISTENT)
4529    */
4530   synchronized static void setSummary() {
4531     SUMMARY = true;
4532   }
4533 
4534   /**
4535    * Set hbase:meta check mode.
4536    * Print only info about hbase:meta table deployment/state
4537    */
4538   void setCheckMetaOnly() {
4539     checkMetaOnly = true;
4540   }
4541 
4542   /**
4543    * Set region boundaries check mode.
4544    */
4545   void setRegionBoundariesCheck() {
4546     checkRegionBoundaries = true;
4547   }
4548 
4549   /**
4550    * Set table locks fix mode.
4551    * Delete table locks held for a long time
4552    */
4553   public void setFixTableLocks(boolean shouldFix) {
4554     fixTableLocks = shouldFix;
4555     fixAny |= shouldFix;
4556   }
4557 
4558   /**
4559    * Set replication fix mode.
4560    */
4561   public void setFixReplication(boolean shouldFix) {
4562     fixReplication = shouldFix;
4563     fixAny |= shouldFix;
4564   }
4565 
4566   /**
4567    * Set orphaned table ZNodes fix mode.
4568    * Set the table state to disable in the orphaned table ZNode.
4569    */
4570   public void setFixTableZNodes(boolean shouldFix) {
4571     fixTableZNodes = shouldFix;
4572     fixAny |= shouldFix;
4573   }
4574 
4575   /**
4576    * Check if we should rerun fsck again. This checks if we've tried to
4577    * fix something and we should rerun fsck tool again.
4578    * Display the full report from fsck. This displays all live and dead
4579    * region servers, and all known regions.
4580    */
4581   void setShouldRerun() {
4582     rerun = true;
4583   }
4584 
4585   boolean shouldRerun() {
4586     return rerun;
4587   }
4588 
4589   /**
4590    * Fix inconsistencies found by fsck. This should try to fix errors (if any)
4591    * found by fsck utility.
4592    */
4593   public void setFixAssignments(boolean shouldFix) {
4594     fixAssignments = shouldFix;
4595     fixAny |= shouldFix;
4596   }
4597 
4598   boolean shouldFixAssignments() {
4599     return fixAssignments;
4600   }
4601 
4602   public void setFixMeta(boolean shouldFix) {
4603     fixMeta = shouldFix;
4604     fixAny |= shouldFix;
4605   }
4606 
4607   boolean shouldFixMeta() {
4608     return fixMeta;
4609   }
4610 
4611   public void setFixEmptyMetaCells(boolean shouldFix) {
4612     fixEmptyMetaCells = shouldFix;
4613     fixAny |= shouldFix;
4614   }
4615 
4616   boolean shouldFixEmptyMetaCells() {
4617     return fixEmptyMetaCells;
4618   }
4619 
4620   public void setCheckHdfs(boolean checking) {
4621     checkHdfs = checking;
4622   }
4623 
4624   boolean shouldCheckHdfs() {
4625     return checkHdfs;
4626   }
4627 
4628   public void setFixHdfsHoles(boolean shouldFix) {
4629     fixHdfsHoles = shouldFix;
4630     fixAny |= shouldFix;
4631   }
4632 
4633   boolean shouldFixHdfsHoles() {
4634     return fixHdfsHoles;
4635   }
4636 
4637   public void setFixTableOrphans(boolean shouldFix) {
4638     fixTableOrphans = shouldFix;
4639     fixAny |= shouldFix;
4640   }
4641 
4642   boolean shouldFixTableOrphans() {
4643     return fixTableOrphans;
4644   }
4645 
4646   public void setFixHdfsOverlaps(boolean shouldFix) {
4647     fixHdfsOverlaps = shouldFix;
4648     fixAny |= shouldFix;
4649   }
4650 
4651   boolean shouldFixHdfsOverlaps() {
4652     return fixHdfsOverlaps;
4653   }
4654 
4655   public void setFixHdfsOrphans(boolean shouldFix) {
4656     fixHdfsOrphans = shouldFix;
4657     fixAny |= shouldFix;
4658   }
4659 
4660   boolean shouldFixHdfsOrphans() {
4661     return fixHdfsOrphans;
4662   }
4663 
4664   public void setFixVersionFile(boolean shouldFix) {
4665     fixVersionFile = shouldFix;
4666     fixAny |= shouldFix;
4667   }
4668 
4669   public boolean shouldFixVersionFile() {
4670     return fixVersionFile;
4671   }
4672 
4673   public void setSidelineBigOverlaps(boolean sbo) {
4674     this.sidelineBigOverlaps = sbo;
4675   }
4676 
4677   public boolean shouldSidelineBigOverlaps() {
4678     return sidelineBigOverlaps;
4679   }
4680 
4681   public void setFixSplitParents(boolean shouldFix) {
4682     fixSplitParents = shouldFix;
4683     fixAny |= shouldFix;
4684   }
4685 
4686   public void setRemoveParents(boolean shouldFix) {
4687     removeParents = shouldFix;
4688     fixAny |= shouldFix;
4689   }
4690 
4691   boolean shouldFixSplitParents() {
4692     return fixSplitParents;
4693   }
4694 
4695   boolean shouldRemoveParents() {
4696     return removeParents;
4697   }
4698 
4699   public void setFixReferenceFiles(boolean shouldFix) {
4700     fixReferenceFiles = shouldFix;
4701     fixAny |= shouldFix;
4702   }
4703 
4704   boolean shouldFixReferenceFiles() {
4705     return fixReferenceFiles;
4706   }
4707 
4708   public void setFixHFileLinks(boolean shouldFix) {
4709     fixHFileLinks = shouldFix;
4710     fixAny |= shouldFix;
4711   }
4712 
4713   boolean shouldFixHFileLinks() {
4714     return fixHFileLinks;
4715   }
4716 
4717   public boolean shouldIgnorePreCheckPermission() {
4718     return !fixAny || ignorePreCheckPermission;
4719   }
4720 
4721   public void setIgnorePreCheckPermission(boolean ignorePreCheckPermission) {
4722     this.ignorePreCheckPermission = ignorePreCheckPermission;
4723   }
4724 
4725   /**
4726    * @param mm maximum number of regions to merge into a single region.
4727    */
4728   public void setMaxMerge(int mm) {
4729     this.maxMerge = mm;
4730   }
4731 
4732   public int getMaxMerge() {
4733     return maxMerge;
4734   }
4735 
4736   public void setMaxOverlapsToSideline(int mo) {
4737     this.maxOverlapsToSideline = mo;
4738   }
4739 
4740   public int getMaxOverlapsToSideline() {
4741     return maxOverlapsToSideline;
4742   }
4743 
4744   /**
4745    * Only check/fix tables specified by the list,
4746    * Empty list means all tables are included.
4747    */
4748   boolean isTableIncluded(TableName table) {
4749     return (tablesIncluded.size() == 0) || tablesIncluded.contains(table);
4750   }
4751 
4752   public void includeTable(TableName table) {
4753     tablesIncluded.add(table);
4754   }
4755 
4756   Set<TableName> getIncludedTables() {
4757     return new HashSet<TableName>(tablesIncluded);
4758   }
4759 
4760   /**
4761    * We are interested in only those tables that have not changed their state in
4762    * hbase:meta during the last few seconds specified by hbase.admin.fsck.timelag
4763    * @param seconds - the time in seconds
4764    */
4765   public void setTimeLag(long seconds) {
4766     timelag = seconds * 1000; // convert to milliseconds
4767   }
4768 
4769   /**
4770    *
4771    * @param sidelineDir - HDFS path to sideline data
4772    */
4773   public void setSidelineDir(String sidelineDir) {
4774     this.sidelineDir = new Path(sidelineDir);
4775   }
4776 
4777   protected HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
4778     return new HFileCorruptionChecker(getConf(), executor, sidelineCorruptHFiles);
4779   }
4780 
4781   public HFileCorruptionChecker getHFilecorruptionChecker() {
4782     return hfcc;
4783   }
4784 
4785   public void setHFileCorruptionChecker(HFileCorruptionChecker hfcc) {
4786     this.hfcc = hfcc;
4787   }
4788 
4789   public void setRetCode(int code) {
4790     this.retcode = code;
4791   }
4792 
4793   public int getRetCode() {
4794     return retcode;
4795   }
4796 
4797   protected HBaseFsck printUsageAndExit() {
4798     StringWriter sw = new StringWriter(2048);
4799     PrintWriter out = new PrintWriter(sw);
4800     out.println("Usage: fsck [opts] {only tables}");
4801     out.println(" where [opts] are:");
4802     out.println("   -help Display help options (this)");
4803     out.println("   -details Display full report of all regions.");
4804     out.println("   -timelag <timeInSeconds>  Process only regions that " +
4805                        " have not experienced any metadata updates in the last " +
4806                        " <timeInSeconds> seconds.");
4807     out.println("   -sleepBeforeRerun <timeInSeconds> Sleep this many seconds" +
4808         " before checking if the fix worked if run with -fix");
4809     out.println("   -summary Print only summary of the tables and status.");
4810     out.println("   -metaonly Only check the state of the hbase:meta table.");
4811     out.println("   -sidelineDir <hdfs://> HDFS path to backup existing meta.");
4812     out.println("   -boundaries Verify that regions boundaries are the same between META and store files.");
4813     out.println("   -exclusive Abort if another hbck is exclusive or fixing.");
4814 
4815     out.println("");
4816     out.println("  Metadata Repair options: (expert features, use with caution!)");
4817     out.println("   -fix              Try to fix region assignments.  This is for backwards compatiblity");
4818     out.println("   -fixAssignments   Try to fix region assignments.  Replaces the old -fix");
4819     out.println("   -fixMeta          Try to fix meta problems.  This assumes HDFS region info is good.");
4820     out.println("   -noHdfsChecking   Don't load/check region info from HDFS."
4821         + " Assumes hbase:meta region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
4822     out.println("   -fixHdfsHoles     Try to fix region holes in hdfs.");
4823     out.println("   -fixHdfsOrphans   Try to fix region dirs with no .regioninfo file in hdfs");
4824     out.println("   -fixTableOrphans  Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
4825     out.println("   -fixHdfsOverlaps  Try to fix region overlaps in hdfs.");
4826     out.println("   -fixVersionFile   Try to fix missing hbase.version file in hdfs.");
4827     out.println("   -maxMerge <n>     When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
4828     out.println("   -sidelineBigOverlaps  When fixing region overlaps, allow to sideline big overlaps");
4829     out.println("   -maxOverlapsToSideline <n>  When fixing region overlaps, allow at most <n> regions to sideline per group. (n=" + DEFAULT_OVERLAPS_TO_SIDELINE +" by default)");
4830     out.println("   -fixSplitParents  Try to force offline split parents to be online.");
4831     out.println("   -removeParents    Try to offline and sideline lingering parents and keep daughter regions.");
4832     out.println("   -ignorePreCheckPermission  ignore filesystem permission pre-check");
4833     out.println("   -fixReferenceFiles  Try to offline lingering reference store files");
4834     out.println("   -fixHFileLinks  Try to offline lingering HFileLinks");
4835     out.println("   -fixEmptyMetaCells  Try to fix hbase:meta entries not referencing any region"
4836         + " (empty REGIONINFO_QUALIFIER rows)");
4837 
4838     out.println("");
4839     out.println("  Datafile Repair options: (expert features, use with caution!)");
4840     out.println("   -checkCorruptHFiles     Check all Hfiles by opening them to make sure they are valid");
4841     out.println("   -sidelineCorruptHFiles  Quarantine corrupted HFiles.  implies -checkCorruptHFiles");
4842 
4843     out.println("");
4844     out.println("  Metadata Repair shortcuts");
4845     out.println("   -repair           Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
4846         "-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps " +
4847         "-fixReferenceFiles -fixHFileLinks -fixTableLocks -fixOrphanedTableZnodes");
4848 
4849     out.println("   -repairHoles      Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
4850 
4851     out.println("");
4852     out.println("  Table lock options");
4853     out.println("   -fixTableLocks    Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
4854 
4855     out.println("");
4856     out.println("  Table Znode options");
4857     out.println("   -fixOrphanedTableZnodes    Set table state in ZNode to disabled if table does not exists");
4858 
4859     out.println("");
4860     out.println(" Replication options");
4861     out.println("   -fixReplication   Deletes replication queues for removed peers");
4862 
4863     out.flush();
4864     errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
4865 
4866     setRetCode(-2);
4867     return this;
4868   }
4869 
4870   /**
4871    * Main program
4872    *
4873    * @param args
4874    * @throws Exception
4875    */
4876   public static void main(String[] args) throws Exception {
4877     // create a fsck object
4878     Configuration conf = HBaseConfiguration.create();
4879     Path hbasedir = FSUtils.getRootDir(conf);
4880     URI defaultFs = hbasedir.getFileSystem(conf).getUri();
4881     FSUtils.setFsDefault(conf, new Path(defaultFs));
4882     int ret = ToolRunner.run(new HBaseFsckTool(conf), args);
4883     System.exit(ret);
4884   }
4885 
4886   /**
4887    * This is a Tool wrapper that gathers -Dxxx=yyy configuration settings from the command line.
4888    */
4889   static class HBaseFsckTool extends Configured implements Tool {
4890     HBaseFsckTool(Configuration conf) { super(conf); }
4891     @Override
4892     public int run(String[] args) throws Exception {
4893       HBaseFsck hbck = new HBaseFsck(getConf());
4894       hbck.exec(hbck.executor, args);
4895       hbck.close();
4896       return hbck.getRetCode();
4897     }
4898   };
4899 
4900 
4901   public HBaseFsck exec(ExecutorService exec, String[] args) throws KeeperException, IOException,
4902     ServiceException, InterruptedException {
4903     long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
4904 
4905     boolean checkCorruptHFiles = false;
4906     boolean sidelineCorruptHFiles = false;
4907 
4908     // Process command-line args.
4909     for (int i = 0; i < args.length; i++) {
4910       String cmd = args[i];
4911       if (cmd.equals("-help") || cmd.equals("-h")) {
4912         return printUsageAndExit();
4913       } else if (cmd.equals("-details")) {
4914         setDisplayFullReport();
4915       } else if (cmd.equals("-exclusive")) {
4916         setForceExclusive();
4917       } else if (cmd.equals("-timelag")) {
4918         if (i == args.length - 1) {
4919           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");
4920           return printUsageAndExit();
4921         }
4922         try {
4923           long timelag = Long.parseLong(args[i+1]);
4924           setTimeLag(timelag);
4925         } catch (NumberFormatException e) {
4926           errors.reportError(ERROR_CODE.WRONG_USAGE, "-timelag needs a numeric value.");
4927           return printUsageAndExit();
4928         }
4929         i++;
4930       } else if (cmd.equals("-sleepBeforeRerun")) {
4931         if (i == args.length - 1) {
4932           errors.reportError(ERROR_CODE.WRONG_USAGE,
4933             "HBaseFsck: -sleepBeforeRerun needs a value.");
4934           return printUsageAndExit();
4935         }
4936         try {
4937           sleepBeforeRerun = Long.parseLong(args[i+1]);
4938         } catch (NumberFormatException e) {
4939           errors.reportError(ERROR_CODE.WRONG_USAGE, "-sleepBeforeRerun needs a numeric value.");
4940           return printUsageAndExit();
4941         }
4942         i++;
4943       } else if (cmd.equals("-sidelineDir")) {
4944         if (i == args.length - 1) {
4945           errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -sidelineDir needs a value.");
4946           return printUsageAndExit();
4947         }
4948         i++;
4949         setSidelineDir(args[i]);
4950       } else if (cmd.equals("-fix")) {
4951         errors.reportError(ERROR_CODE.WRONG_USAGE,
4952           "This option is deprecated, please use  -fixAssignments instead.");
4953         setFixAssignments(true);
4954       } else if (cmd.equals("-fixAssignments")) {
4955         setFixAssignments(true);
4956       } else if (cmd.equals("-fixMeta")) {
4957         setFixMeta(true);
4958       } else if (cmd.equals("-noHdfsChecking")) {
4959         setCheckHdfs(false);
4960       } else if (cmd.equals("-fixHdfsHoles")) {
4961         setFixHdfsHoles(true);
4962       } else if (cmd.equals("-fixHdfsOrphans")) {
4963         setFixHdfsOrphans(true);
4964       } else if (cmd.equals("-fixTableOrphans")) {
4965         setFixTableOrphans(true);
4966       } else if (cmd.equals("-fixHdfsOverlaps")) {
4967         setFixHdfsOverlaps(true);
4968       } else if (cmd.equals("-fixVersionFile")) {
4969         setFixVersionFile(true);
4970       } else if (cmd.equals("-sidelineBigOverlaps")) {
4971         setSidelineBigOverlaps(true);
4972       } else if (cmd.equals("-fixSplitParents")) {
4973         setFixSplitParents(true);
4974       } else if (cmd.equals("-removeParents")) {
4975         setRemoveParents(true);
4976       } else if (cmd.equals("-ignorePreCheckPermission")) {
4977         setIgnorePreCheckPermission(true);
4978       } else if (cmd.equals("-checkCorruptHFiles")) {
4979         checkCorruptHFiles = true;
4980       } else if (cmd.equals("-sidelineCorruptHFiles")) {
4981         sidelineCorruptHFiles = true;
4982       } else if (cmd.equals("-fixReferenceFiles")) {
4983         setFixReferenceFiles(true);
4984       } else if (cmd.equals("-fixHFileLinks")) {
4985         setFixHFileLinks(true);
4986       } else if (cmd.equals("-fixEmptyMetaCells")) {
4987         setFixEmptyMetaCells(true);
4988       } else if (cmd.equals("-repair")) {
4989         // this attempts to merge overlapping hdfs regions, needs testing
4990         // under load
4991         setFixHdfsHoles(true);
4992         setFixHdfsOrphans(true);
4993         setFixMeta(true);
4994         setFixAssignments(true);
4995         setFixHdfsOverlaps(true);
4996         setFixVersionFile(true);
4997         setSidelineBigOverlaps(true);
4998         setFixSplitParents(false);
4999         setCheckHdfs(true);
5000         setFixReferenceFiles(true);
5001         setFixHFileLinks(true);
5002         setFixTableLocks(true);
5003         setFixTableZNodes(true);
5004       } else if (cmd.equals("-repairHoles")) {
5005         // this will make all missing hdfs regions available but may lose data
5006         setFixHdfsHoles(true);
5007         setFixHdfsOrphans(false);
5008         setFixMeta(true);
5009         setFixAssignments(true);
5010         setFixHdfsOverlaps(false);
5011         setSidelineBigOverlaps(false);
5012         setFixSplitParents(false);
5013         setCheckHdfs(true);
5014       } else if (cmd.equals("-maxOverlapsToSideline")) {
5015         if (i == args.length - 1) {
5016           errors.reportError(ERROR_CODE.WRONG_USAGE,
5017             "-maxOverlapsToSideline needs a numeric value argument.");
5018           return printUsageAndExit();
5019         }
5020         try {
5021           int maxOverlapsToSideline = Integer.parseInt(args[i+1]);
5022           setMaxOverlapsToSideline(maxOverlapsToSideline);
5023         } catch (NumberFormatException e) {
5024           errors.reportError(ERROR_CODE.WRONG_USAGE,
5025             "-maxOverlapsToSideline needs a numeric value argument.");
5026           return printUsageAndExit();
5027         }
5028         i++;
5029       } else if (cmd.equals("-maxMerge")) {
5030         if (i == args.length - 1) {
5031           errors.reportError(ERROR_CODE.WRONG_USAGE,
5032             "-maxMerge needs a numeric value argument.");
5033           return printUsageAndExit();
5034         }
5035         try {
5036           int maxMerge = Integer.parseInt(args[i+1]);
5037           setMaxMerge(maxMerge);
5038         } catch (NumberFormatException e) {
5039           errors.reportError(ERROR_CODE.WRONG_USAGE,
5040             "-maxMerge needs a numeric value argument.");
5041           return printUsageAndExit();
5042         }
5043         i++;
5044       } else if (cmd.equals("-summary")) {
5045         setSummary();
5046       } else if (cmd.equals("-metaonly")) {
5047         setCheckMetaOnly();
5048       } else if (cmd.equals("-boundaries")) {
5049         setRegionBoundariesCheck();
5050       } else if (cmd.equals("-fixTableLocks")) {
5051         setFixTableLocks(true);
5052       } else if (cmd.equals("-fixReplication")) {
5053         setFixReplication(true);
5054       } else if (cmd.equals("-fixOrphanedTableZnodes")) {
5055         setFixTableZNodes(true);
5056       } else if (cmd.startsWith("-")) {
5057         errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
5058         return printUsageAndExit();
5059       } else {
5060         includeTable(TableName.valueOf(cmd));
5061         errors.print("Allow checking/fixes for table: " + cmd);
5062       }
5063     }
5064 
5065     errors.print("HBaseFsck command line options: " + StringUtils.join(args, " "));
5066 
5067     // pre-check current user has FS write permission or not
5068     try {
5069       preCheckPermission();
5070     } catch (AccessDeniedException ace) {
5071       Runtime.getRuntime().exit(-1);
5072     } catch (IOException ioe) {
5073       Runtime.getRuntime().exit(-1);
5074     }
5075 
5076     // do the real work of hbck
5077     connect();
5078 
5079     try {
5080       // if corrupt file mode is on, first fix them since they may be opened later
5081       if (checkCorruptHFiles || sidelineCorruptHFiles) {
5082         LOG.info("Checking all hfiles for corruption");
5083         HFileCorruptionChecker hfcc = createHFileCorruptionChecker(sidelineCorruptHFiles);
5084         setHFileCorruptionChecker(hfcc); // so we can get result
5085         Collection<TableName> tables = getIncludedTables();
5086         Collection<Path> tableDirs = new ArrayList<Path>();
5087         Path rootdir = FSUtils.getRootDir(getConf());
5088         if (tables.size() > 0) {
5089           for (TableName t : tables) {
5090             tableDirs.add(FSUtils.getTableDir(rootdir, t));
5091           }
5092         } else {
5093           tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(getConf()), rootdir);
5094         }
5095         hfcc.checkTables(tableDirs);
5096         hfcc.report(errors);
5097       }
5098 
5099       // check and fix table integrity, region consistency.
5100       int code = onlineHbck();
5101       setRetCode(code);
5102       // If we have changed the HBase state it is better to run hbck again
5103       // to see if we haven't broken something else in the process.
5104       // We run it only once more because otherwise we can easily fall into
5105       // an infinite loop.
5106       if (shouldRerun()) {
5107         try {
5108           LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
5109           Thread.sleep(sleepBeforeRerun);
5110         } catch (InterruptedException ie) {
5111           LOG.warn("Interrupted while sleeping");
5112           return this;
5113         }
5114         // Just report
5115         setFixAssignments(false);
5116         setFixMeta(false);
5117         setFixHdfsHoles(false);
5118         setFixHdfsOverlaps(false);
5119         setFixVersionFile(false);
5120         setFixTableOrphans(false);
5121         errors.resetErrors();
5122         code = onlineHbck();
5123         setRetCode(code);
5124       }
5125     } finally {
5126       IOUtils.closeQuietly(this);
5127     }
5128     return this;
5129   }
5130 
5131   /**
5132    * ls -r for debugging purposes
5133    */
5134   void debugLsr(Path p) throws IOException {
5135     debugLsr(getConf(), p, errors);
5136   }
5137 
5138   /**
5139    * ls -r for debugging purposes
5140    */
5141   public static void debugLsr(Configuration conf,
5142       Path p) throws IOException {
5143     debugLsr(conf, p, new PrintingErrorReporter());
5144   }
5145 
5146   /**
5147    * ls -r for debugging purposes
5148    */
5149   public static void debugLsr(Configuration conf,
5150       Path p, ErrorReporter errors) throws IOException {
5151     if (!LOG.isDebugEnabled() || p == null) {
5152       return;
5153     }
5154     FileSystem fs = p.getFileSystem(conf);
5155 
5156     if (!fs.exists(p)) {
5157       // nothing
5158       return;
5159     }
5160     errors.print(p.toString());
5161 
5162     if (fs.isFile(p)) {
5163       return;
5164     }
5165 
5166     if (fs.getFileStatus(p).isDirectory()) {
5167       FileStatus[] fss= fs.listStatus(p);
5168       for (FileStatus status : fss) {
5169         debugLsr(conf, status.getPath(), errors);
5170       }
5171     }
5172   }
5173 }