1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.util;
20
21 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
22 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
23 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.checkRegionBoundaries;
24 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
25 import static org.junit.Assert.assertEquals;
26 import static org.junit.Assert.assertFalse;
27 import static org.junit.Assert.assertNotEquals;
28 import static org.junit.Assert.assertNotNull;
29 import static org.junit.Assert.assertTrue;
30 import static org.junit.Assert.fail;
31
32 import java.io.IOException;
33 import java.util.ArrayList;
34 import java.util.Arrays;
35 import java.util.Collection;
36 import java.util.HashMap;
37 import java.util.HashSet;
38 import java.util.LinkedList;
39 import java.util.List;
40 import java.util.Map;
41 import java.util.NavigableMap;
42 import java.util.Set;
43 import java.util.concurrent.Callable;
44 import java.util.concurrent.CountDownLatch;
45 import java.util.concurrent.ExecutorService;
46 import java.util.concurrent.Executors;
47 import java.util.concurrent.Future;
48 import java.util.concurrent.ScheduledThreadPoolExecutor;
49 import java.util.concurrent.SynchronousQueue;
50 import java.util.concurrent.ThreadPoolExecutor;
51 import java.util.concurrent.TimeUnit;
52 import java.util.concurrent.atomic.AtomicBoolean;
53
54 import org.apache.commons.io.IOUtils;
55 import org.apache.commons.logging.Log;
56 import org.apache.commons.logging.LogFactory;
57 import org.apache.hadoop.conf.Configuration;
58 import org.apache.hadoop.fs.FileStatus;
59 import org.apache.hadoop.fs.FileSystem;
60 import org.apache.hadoop.fs.Path;
61 import org.apache.hadoop.hbase.ClusterStatus;
62 import org.apache.hadoop.hbase.HBaseTestingUtility;
63 import org.apache.hadoop.hbase.HColumnDescriptor;
64 import org.apache.hadoop.hbase.HConstants;
65 import org.apache.hadoop.hbase.HRegionInfo;
66 import org.apache.hadoop.hbase.HRegionLocation;
67 import org.apache.hadoop.hbase.HTableDescriptor;
68 import org.apache.hadoop.hbase.MetaTableAccessor;
69 import org.apache.hadoop.hbase.MiniHBaseCluster;
70 import org.apache.hadoop.hbase.ServerName;
71 import org.apache.hadoop.hbase.TableExistsException;
72 import org.apache.hadoop.hbase.TableName;
73 import org.apache.hadoop.hbase.Waiter.Predicate;
74 import org.apache.hadoop.hbase.client.Admin;
75 import org.apache.hadoop.hbase.client.ClusterConnection;
76 import org.apache.hadoop.hbase.client.Connection;
77 import org.apache.hadoop.hbase.client.ConnectionFactory;
78 import org.apache.hadoop.hbase.client.Delete;
79 import org.apache.hadoop.hbase.client.Durability;
80 import org.apache.hadoop.hbase.client.Get;
81 import org.apache.hadoop.hbase.client.HBaseAdmin;
82 import org.apache.hadoop.hbase.client.HConnection;
83 import org.apache.hadoop.hbase.client.HTable;
84 import org.apache.hadoop.hbase.client.MetaScanner;
85 import org.apache.hadoop.hbase.client.Put;
86 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
87 import org.apache.hadoop.hbase.client.Result;
88 import org.apache.hadoop.hbase.client.ResultScanner;
89 import org.apache.hadoop.hbase.client.Scan;
90 import org.apache.hadoop.hbase.client.Table;
91 import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
92 import org.apache.hadoop.hbase.coprocessor.BaseMasterObserver;
93 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
94 import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
95 import org.apache.hadoop.hbase.coprocessor.ObserverContext;
96 import org.apache.hadoop.hbase.io.HFileLink;
97 import org.apache.hadoop.hbase.io.hfile.HFile;
98 import org.apache.hadoop.hbase.io.hfile.HFileContext;
99 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
100 import org.apache.hadoop.hbase.io.hfile.TestHFile;
101 import org.apache.hadoop.hbase.master.AssignmentManager;
102 import org.apache.hadoop.hbase.master.HMaster;
103 import org.apache.hadoop.hbase.master.RegionState;
104 import org.apache.hadoop.hbase.master.RegionStates;
105 import org.apache.hadoop.hbase.master.TableLockManager;
106 import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
107 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
108 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
109 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
110 import org.apache.hadoop.hbase.regionserver.HRegion;
111 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
112 import org.apache.hadoop.hbase.regionserver.HRegionServer;
113 import org.apache.hadoop.hbase.regionserver.SplitTransactionImpl;
114 import org.apache.hadoop.hbase.replication.ReplicationFactory;
115 import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
116 import org.apache.hadoop.hbase.replication.ReplicationQueues;
117 import org.apache.hadoop.hbase.testclassification.LargeTests;
118 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
119 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
120 import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
121 import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
122 import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
123 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
124 import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
125 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
126 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
127 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
128 import org.apache.zookeeper.KeeperException;
129 import org.junit.AfterClass;
130 import org.junit.Assert;
131 import org.junit.Before;
132 import org.junit.BeforeClass;
133 import org.junit.Ignore;
134 import org.junit.Test;
135 import org.junit.experimental.categories.Category;
136 import org.junit.rules.TestName;
137
138 import com.google.common.collect.Multimap;
139
140
141
142
143 @Category(LargeTests.class)
144 public class TestHBaseFsck {
145 static final int POOL_SIZE = 7;
146 private static final Log LOG = LogFactory.getLog(TestHBaseFsck.class);
147 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
148 private final static Configuration conf = TEST_UTIL.getConfiguration();
149 private final static String FAM_STR = "fam";
150 private final static byte[] FAM = Bytes.toBytes(FAM_STR);
151 private final static int REGION_ONLINE_TIMEOUT = 800;
152 private static RegionStates regionStates;
153 private static ExecutorService tableExecutorService;
154 private static ScheduledThreadPoolExecutor hbfsckExecutorService;
155 private static ClusterConnection connection;
156 private static Admin admin;
157
158
159 private HTable tbl;
160 private final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
161 Bytes.toBytes("B"), Bytes.toBytes("C") };
162
163 private final static byte[][] ROWKEYS= new byte[][] {
164 Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
165 Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
166
167 @BeforeClass
168 public static void setUpBeforeClass() throws Exception {
169 TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
170 MasterSyncObserver.class.getName());
171
172 conf.setInt("hbase.regionserver.handler.count", 2);
173 conf.setInt("hbase.regionserver.metahandler.count", 30);
174
175 conf.setInt("hbase.htable.threads.max", POOL_SIZE);
176 conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
177 conf.setInt("hbase.hconnection.threads.core", POOL_SIZE);
178 conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
179 conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 8 * REGION_ONLINE_TIMEOUT);
180 TEST_UTIL.startMiniCluster(3);
181
182 tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
183 new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
184
185 hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
186
187 AssignmentManager assignmentManager =
188 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
189 regionStates = assignmentManager.getRegionStates();
190
191 connection = (ClusterConnection) TEST_UTIL.getConnection();
192
193 admin = connection.getAdmin();
194 admin.setBalancerRunning(false, true);
195
196 TEST_UTIL.waitUntilAllRegionsAssigned(TableName.META_TABLE_NAME);
197 TEST_UTIL.waitUntilAllRegionsAssigned(TableName.NAMESPACE_TABLE_NAME);
198 }
199
200 @AfterClass
201 public static void tearDownAfterClass() throws Exception {
202 tableExecutorService.shutdown();
203 hbfsckExecutorService.shutdown();
204 admin.close();
205 TEST_UTIL.shutdownMiniCluster();
206 }
207
208 @Before
209 public void setUp() {
210 EnvironmentEdgeManager.reset();
211 }
212
213
214
215
216
217 @Test public void testHbckReportReplicaLingeringSplitParent() throws Exception {
218 TableName table = TableName.valueOf("testHbckReportReplicaLingeringSplitParent");
219
220 try {
221 setupTableWithRegionReplica(table, 2);
222 TEST_UTIL.getHBaseAdmin().flush(table.getName());
223
224
225 TEST_UTIL.getHBaseAdmin().enableCatalogJanitor(false);
226 admin.split(table, Bytes.toBytes("A1"));
227
228 Thread.sleep(1000);
229
230 assertNoErrors(doFsck(conf, false));
231 } finally {
232 cleanupTable(table);
233
234 TEST_UTIL.getHBaseAdmin().enableCatalogJanitor(true);
235 }
236 }
237
238 @Test (timeout=180000)
239 public void testHBaseFsck() throws Exception {
240 assertNoErrors(doFsck(conf, false));
241 TableName table = TableName.valueOf("tableBadMetaAssign");
242 HTableDescriptor desc = new HTableDescriptor(table);
243 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
244 desc.addFamily(hcd);
245 createTable(TEST_UTIL, desc, null);
246
247
248 assertNoErrors(doFsck(conf, false));
249
250
251
252 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
253 Scan scan = new Scan();
254 scan.setStartRow(Bytes.toBytes(table+",,"));
255 ResultScanner scanner = meta.getScanner(scan);
256 HRegionInfo hri = null;
257
258 Result res = scanner.next();
259 ServerName currServer =
260 ServerName.parseFrom(res.getValue(HConstants.CATALOG_FAMILY,
261 HConstants.SERVER_QUALIFIER));
262 long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
263 HConstants.STARTCODE_QUALIFIER));
264
265 for (JVMClusterUtil.RegionServerThread rs :
266 TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
267
268 ServerName sn = rs.getRegionServer().getServerName();
269
270
271 if (!currServer.getHostAndPort().equals(sn.getHostAndPort()) ||
272 startCode != sn.getStartcode()) {
273 Put put = new Put(res.getRow());
274 put.setDurability(Durability.SKIP_WAL);
275 put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
276 Bytes.toBytes(sn.getHostAndPort()));
277 put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
278 Bytes.toBytes(sn.getStartcode()));
279 meta.put(put);
280 hri = MetaTableAccessor.getHRegionInfo(res);
281 break;
282 }
283 }
284
285
286 assertErrors(doFsck(conf, true), new ERROR_CODE[]{
287 ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
288
289 TEST_UTIL.getHBaseCluster().getMaster()
290 .getAssignmentManager().waitForAssignment(hri);
291
292
293 assertNoErrors(doFsck(conf, false));
294
295
296 Table t = connection.getTable(table, tableExecutorService);
297 ResultScanner s = t.getScanner(new Scan());
298 s.close();
299 t.close();
300
301 scanner.close();
302 meta.close();
303 }
304
305 @Test(timeout=180000)
306 public void testFixAssignmentsWhenMETAinTransition() throws Exception {
307 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
308 admin.closeRegion(cluster.getServerHoldingMeta(), HRegionInfo.FIRST_META_REGIONINFO);
309 regionStates.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
310 new MetaTableLocator().deleteMetaLocation(cluster.getMaster().getZooKeeper());
311 assertFalse(regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
312 HBaseFsck hbck = doFsck(conf, true);
313 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNKNOWN, ERROR_CODE.NO_META_REGION,
314 ERROR_CODE.NULL_META_REGION });
315 assertNoErrors(doFsck(conf, false));
316 }
317
318
319
320
321
322 @Test (timeout=180000)
323 public void testSplitAndDupeRegionWithRegionReplica() throws Exception {
324 TableName table =
325 TableName.valueOf("testSplitAndDupeRegionWithRegionReplica");
326 Table meta = null;
327
328 try {
329 setupTableWithRegionReplica(table, 2);
330
331 assertNoErrors(doFsck(conf, false));
332 assertEquals(ROWKEYS.length, countRows());
333
334
335 admin.enableCatalogJanitor(false);
336 meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
337 HRegionLocation loc = this.connection.getRegionLocation(table, SPLITS[0], false);
338 HRegionInfo hriParent = loc.getRegionInfo();
339
340
341 this.connection.getAdmin().split(table, Bytes.toBytes("A@"));
342 Thread.sleep(1000);
343
344
345 regionStates.updateRegionState(hriParent, RegionState.State.CLOSED);
346 TEST_UTIL.assignRegion(hriParent);
347 MetaTableAccessor.addRegionToMeta(meta, hriParent);
348 ServerName server = regionStates.getRegionServerOfRegion(hriParent);
349
350 if (server != null)
351 TEST_UTIL.assertRegionOnServer(hriParent, server, REGION_ONLINE_TIMEOUT);
352
353 while (findDeployedHSI(getDeployedHRIs((HBaseAdmin) admin), hriParent) == null) {
354 Thread.sleep(250);
355 }
356
357 LOG.debug("Finished assignment of parent region");
358
359
360 HBaseFsck hbck = doFsck(conf, false);
361 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.NOT_DEPLOYED,
362 HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS,
363 HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS, HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
364 assertEquals(3, hbck.getOverlapGroups(table).size());
365
366
367 hbck = new HBaseFsck(conf, hbfsckExecutorService);
368 hbck.setDisplayFullReport();
369 hbck.setTimeLag(0);
370 hbck.setFixHdfsOverlaps(true);
371 hbck.setRemoveParents(true);
372 hbck.setFixReferenceFiles(true);
373 hbck.setFixHFileLinks(true);
374 hbck.connect();
375 hbck.onlineHbck();
376 hbck.close();
377
378 hbck = doFsck(conf, false);
379
380 assertNoErrors(hbck);
381 assertEquals(0, hbck.getOverlapGroups(table).size());
382 assertEquals(ROWKEYS.length, countRows());
383 } finally {
384 cleanupTable(table);
385 }
386 }
387
388
389
390
391
392 @Test (timeout=180000)
393 public void testSplitAndDupeRegion() throws Exception {
394 TableName table =
395 TableName.valueOf("testSplitAndDupeRegion");
396 Table meta = null;
397
398 try {
399 setupTable(table);
400
401 assertNoErrors(doFsck(conf, false));
402 assertEquals(ROWKEYS.length, countRows());
403
404
405 admin.enableCatalogJanitor(false);
406 meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
407 HRegionLocation loc = this.connection.getRegionLocation(table, SPLITS[0], false);
408 HRegionInfo hriParent = loc.getRegionInfo();
409
410
411 this.connection.getAdmin().split(table, Bytes.toBytes("A@"));
412 Thread.sleep(1000);
413
414
415 regionStates.updateRegionState(hriParent, RegionState.State.CLOSED);
416 TEST_UTIL.assignRegion(hriParent);
417 MetaTableAccessor.addRegionToMeta(meta, hriParent);
418 ServerName server = regionStates.getRegionServerOfRegion(hriParent);
419
420 if (server != null)
421 TEST_UTIL.assertRegionOnServer(hriParent, server, REGION_ONLINE_TIMEOUT);
422
423 while (findDeployedHSI(getDeployedHRIs((HBaseAdmin) admin), hriParent) == null) {
424 Thread.sleep(250);
425 }
426
427 LOG.debug("Finished assignment of parent region");
428
429
430 HBaseFsck hbck = doFsck(conf, false);
431 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS,
432 HBaseFsck.ErrorReporter.ERROR_CODE.DUPE_STARTKEYS, HBaseFsck.ErrorReporter.ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
433 assertEquals(3, hbck.getOverlapGroups(table).size());
434
435
436 hbck = new HBaseFsck(conf, hbfsckExecutorService);
437 hbck.setDisplayFullReport();
438 hbck.setTimeLag(0);
439 hbck.setFixHdfsOverlaps(true);
440 hbck.setRemoveParents(true);
441 hbck.setFixReferenceFiles(true);
442 hbck.setFixHFileLinks(true);
443 hbck.connect();
444 hbck.onlineHbck();
445 hbck.close();
446
447 hbck = doFsck(conf, false);
448
449 assertNoErrors(hbck);
450 assertEquals(0, hbck.getOverlapGroups(table).size());
451 assertEquals(ROWKEYS.length, countRows());
452 } finally {
453 cleanupTable(table);
454 }
455 }
456
457
458
459
460 private HRegionInfo createRegion(final HTableDescriptor
461 htd, byte[] startKey, byte[] endKey)
462 throws IOException {
463 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
464 HRegionInfo hri = new HRegionInfo(htd.getTableName(), startKey, endKey);
465 MetaTableAccessor.addRegionToMeta(meta, hri);
466 meta.close();
467 return hri;
468 }
469
470
471
472
473 private void dumpMeta(TableName tableName) throws IOException {
474 List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
475 for (byte[] row : metaRows) {
476 LOG.info(Bytes.toString(row));
477 }
478 }
479
480
481
482
483
484 private void undeployRegion(Connection conn, ServerName sn,
485 HRegionInfo hri) throws IOException, InterruptedException {
486 try {
487 HBaseFsckRepair.closeRegionSilentlyAndWait((HConnection) conn, sn, hri);
488 if (!hri.isMetaTable()) {
489 admin.offline(hri.getRegionName());
490 }
491 } catch (IOException ioe) {
492 LOG.warn("Got exception when attempting to offline region "
493 + Bytes.toString(hri.getRegionName()), ioe);
494 }
495 }
496
497
498
499
500
501
502 private void deleteRegion(Configuration conf, final HTableDescriptor htd,
503 byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
504 boolean hdfs) throws IOException, InterruptedException {
505 deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false, HRegionInfo.DEFAULT_REPLICA_ID);
506 }
507
508
509
510
511
512
513
514
515
516 private void deleteRegion(Configuration conf, final HTableDescriptor htd,
517 byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
518 boolean hdfs, boolean regionInfoOnly, int replicaId)
519 throws IOException, InterruptedException {
520 LOG.info("** Before delete:");
521 dumpMeta(htd.getTableName());
522
523 List<HRegionLocation> locations = tbl.getAllRegionLocations();
524 for (HRegionLocation location : locations) {
525 HRegionInfo hri = location.getRegionInfo();
526 ServerName hsa = location.getServerName();
527 if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
528 && Bytes.compareTo(hri.getEndKey(), endKey) == 0
529 && hri.getReplicaId() == replicaId) {
530
531 LOG.info("RegionName: " +hri.getRegionNameAsString());
532 byte[] deleteRow = hri.getRegionName();
533
534 if (unassign) {
535 LOG.info("Undeploying region " + hri + " from server " + hsa);
536 undeployRegion(connection, hsa, hri);
537 }
538
539 if (regionInfoOnly) {
540 LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
541 Path rootDir = FSUtils.getRootDir(conf);
542 FileSystem fs = rootDir.getFileSystem(conf);
543 Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
544 hri.getEncodedName());
545 Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
546 fs.delete(hriPath, true);
547 }
548
549 if (hdfs) {
550 LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
551 Path rootDir = FSUtils.getRootDir(conf);
552 FileSystem fs = rootDir.getFileSystem(conf);
553 Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
554 hri.getEncodedName());
555 HBaseFsck.debugLsr(conf, p);
556 boolean success = fs.delete(p, true);
557 LOG.info("Deleted " + p + " sucessfully? " + success);
558 HBaseFsck.debugLsr(conf, p);
559 }
560
561 if (metaRow) {
562 try (Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService)) {
563 Delete delete = new Delete(deleteRow);
564 meta.delete(delete);
565 }
566 }
567 }
568 LOG.info(hri.toString() + hsa.toString());
569 }
570
571 TEST_UTIL.getMetaTableRows(htd.getTableName());
572 LOG.info("*** After delete:");
573 dumpMeta(htd.getTableName());
574 }
575
576
577
578
579
580
581
582
583
584
585 void setupTable(TableName tablename) throws Exception {
586 setupTableWithRegionReplica(tablename, 1);
587 }
588
589
590
591
592
593
594
595
596
597
598 void setupTableWithRegionReplica(TableName tablename, int replicaCount) throws Exception {
599 HTableDescriptor desc = new HTableDescriptor(tablename);
600 desc.setRegionReplication(replicaCount);
601 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
602 desc.addFamily(hcd);
603 createTable(TEST_UTIL, desc, SPLITS);
604
605 tbl = (HTable) connection.getTable(tablename, tableExecutorService);
606 List<Put> puts = new ArrayList<Put>();
607 for (byte[] row : ROWKEYS) {
608 Put p = new Put(row);
609 p.add(FAM, Bytes.toBytes("val"), row);
610 puts.add(p);
611 }
612 tbl.put(puts);
613 tbl.flushCommits();
614 }
615
616
617
618
619 int countRows() throws IOException {
620 Scan s = new Scan();
621 ResultScanner rs = tbl.getScanner(s);
622 int i = 0;
623 while(rs.next() !=null) {
624 i++;
625 }
626 return i;
627 }
628
629
630
631
632 int countRows(byte[] start, byte[] end) throws IOException {
633 Scan s = new Scan(start, end);
634 ResultScanner rs = tbl.getScanner(s);
635 int i = 0;
636 while (rs.next() != null) {
637 i++;
638 }
639 return i;
640 }
641
642
643
644
645
646
647 void cleanupTable(TableName tablename) throws Exception {
648 if (tbl != null) {
649 tbl.close();
650 tbl = null;
651 }
652
653 ((ClusterConnection) connection).clearRegionCache();
654 deleteTable(TEST_UTIL, tablename);
655 }
656
657
658
659
660 @Test (timeout=180000)
661 public void testHBaseFsckClean() throws Exception {
662 assertNoErrors(doFsck(conf, false));
663 TableName table = TableName.valueOf("tableClean");
664 try {
665 HBaseFsck hbck = doFsck(conf, false);
666 assertNoErrors(hbck);
667
668 setupTable(table);
669 assertEquals(ROWKEYS.length, countRows());
670
671
672 hbck = doFsck(conf, false);
673 assertNoErrors(hbck);
674 assertEquals(0, hbck.getOverlapGroups(table).size());
675 assertEquals(ROWKEYS.length, countRows());
676 } finally {
677 cleanupTable(table);
678 }
679 }
680
681
682
683
684 @Test (timeout=180000)
685 public void testHbckThreadpooling() throws Exception {
686 TableName table =
687 TableName.valueOf("tableDupeStartKey");
688 try {
689
690 setupTable(table);
691
692
693 Configuration newconf = new Configuration(conf);
694 newconf.setInt("hbasefsck.numthreads", 1);
695 assertNoErrors(doFsck(newconf, false));
696
697
698 } finally {
699 cleanupTable(table);
700 }
701 }
702
703 @Test (timeout=180000)
704 public void testHbckFixOrphanTable() throws Exception {
705 TableName table = TableName.valueOf("tableInfo");
706 FileSystem fs = null;
707 Path tableinfo = null;
708 try {
709 setupTable(table);
710
711 Path hbaseTableDir = FSUtils.getTableDir(
712 FSUtils.getRootDir(conf), table);
713 fs = hbaseTableDir.getFileSystem(conf);
714 FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
715 tableinfo = status.getPath();
716 fs.rename(tableinfo, new Path("/.tableinfo"));
717
718
719 HBaseFsck hbck = doFsck(conf, false);
720 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
721
722
723 hbck = doFsck(conf, true);
724 assertNoErrors(hbck);
725 status = null;
726 status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
727 assertNotNull(status);
728
729 HTableDescriptor htd = admin.getTableDescriptor(table);
730 htd.setValue("NOT_DEFAULT", "true");
731 admin.disableTable(table);
732 admin.modifyTable(table, htd);
733 admin.enableTable(table);
734 fs.delete(status.getPath(), true);
735
736
737 htd = admin.getTableDescriptor(table);
738 hbck = doFsck(conf, true);
739 assertNoErrors(hbck);
740 status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
741 assertNotNull(status);
742 htd = admin.getTableDescriptor(table);
743 assertEquals(htd.getValue("NOT_DEFAULT"), "true");
744 } finally {
745 if (fs != null) {
746 fs.rename(new Path("/.tableinfo"), tableinfo);
747 }
748 cleanupTable(table);
749 }
750 }
751
752
753
754
755
756
757 @Test (timeout=180000)
758 public void testParallelHbck() throws Exception {
759 final ExecutorService service;
760 final Future<HBaseFsck> hbck1,hbck2;
761
762 class RunHbck implements Callable<HBaseFsck>{
763 boolean fail = true;
764 @Override
765 public HBaseFsck call(){
766 Configuration c = new Configuration(conf);
767 c.setInt("hbase.hbck.lockfile.attempts", 1);
768
769
770 c.setInt("hbase.hbck.lockfile.maxwaittime", 3);
771 try{
772 return doFsck(c, true);
773 } catch(Exception e){
774 if (e.getMessage().contains("Duplicate hbck")) {
775 fail = false;
776 }
777 }
778
779 if (fail) fail();
780 return null;
781 }
782 }
783 service = Executors.newFixedThreadPool(2);
784 hbck1 = service.submit(new RunHbck());
785 hbck2 = service.submit(new RunHbck());
786 service.shutdown();
787
788 service.awaitTermination(15, TimeUnit.SECONDS);
789 HBaseFsck h1 = hbck1.get();
790 HBaseFsck h2 = hbck2.get();
791
792 assert(h1 == null || h2 == null);
793 if (h1 != null) {
794 assert(h1.getRetCode() >= 0);
795 }
796 if (h2 != null) {
797 assert(h2.getRetCode() >= 0);
798 }
799 }
800
801
802
803
804
805
806
807 @Test (timeout=180000)
808 public void testParallelWithRetriesHbck() throws Exception {
809 final ExecutorService service;
810 final Future<HBaseFsck> hbck1,hbck2;
811
812
813
814
815
816
817
818 final int timeoutInSeconds = 80;
819 final int sleepIntervalInMilliseconds = 200;
820 final int maxSleepTimeInMilliseconds = 6000;
821 final int maxRetryAttempts = 15;
822
823 class RunHbck implements Callable<HBaseFsck>{
824
825 @Override
826 public HBaseFsck call() throws Exception {
827
828 Configuration c = new Configuration(conf);
829 c.setInt("hbase.hbck.lockfile.maxwaittime", timeoutInSeconds);
830 c.setInt("hbase.hbck.lockfile.attempt.sleep.interval", sleepIntervalInMilliseconds);
831 c.setInt("hbase.hbck.lockfile.attempt.maxsleeptime", maxSleepTimeInMilliseconds);
832 c.setInt("hbase.hbck.lockfile.attempts", maxRetryAttempts);
833 return doFsck(c, false);
834 }
835 }
836
837 service = Executors.newFixedThreadPool(2);
838 hbck1 = service.submit(new RunHbck());
839 hbck2 = service.submit(new RunHbck());
840 service.shutdown();
841
842 service.awaitTermination(timeoutInSeconds * 2, TimeUnit.SECONDS);
843 HBaseFsck h1 = hbck1.get();
844 HBaseFsck h2 = hbck2.get();
845
846 assertNotNull(h1);
847 assertNotNull(h2);
848 assert(h1.getRetCode() >= 0);
849 assert(h2.getRetCode() >= 0);
850
851 }
852
853
854
855
856
857 @Test (timeout=180000)
858 public void testDupeStartKey() throws Exception {
859 TableName table =
860 TableName.valueOf("tableDupeStartKey");
861 try {
862 setupTable(table);
863 assertNoErrors(doFsck(conf, false));
864 assertEquals(ROWKEYS.length, countRows());
865
866
867 HRegionInfo hriDupe =
868 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("A2"));
869 TEST_UTIL.assignRegion(hriDupe);
870 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
871 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
872
873 HBaseFsck hbck = doFsck(conf, false);
874 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
875 ERROR_CODE.DUPE_STARTKEYS});
876 assertEquals(2, hbck.getOverlapGroups(table).size());
877 assertEquals(ROWKEYS.length, countRows());
878
879
880 doFsck(conf,true);
881
882
883 HBaseFsck hbck2 = doFsck(conf,false);
884 assertNoErrors(hbck2);
885 assertEquals(0, hbck2.getOverlapGroups(table).size());
886 assertEquals(ROWKEYS.length, countRows());
887
888 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
889 long totalRegions = cluster.countServedRegions();
890
891
892 cluster.stopRegionServer(server);
893 cluster.waitForRegionServerToStop(server, 60);
894
895
896 while (cluster.countServedRegions() < totalRegions) {
897 Thread.sleep(100);
898 }
899
900
901 HBaseFsck hbck3 = doFsck(conf,false);
902 assertNoErrors(hbck3);
903 } finally {
904 cleanupTable(table);
905 }
906 }
907
908
909
910
911 @Test(timeout=180000)
912 public void testOverlapRegions() throws Exception {
913 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
914 TableName table =
915 TableName.valueOf("tableOverlapRegions");
916 HRegionInfo hri;
917 ServerName server;
918 try {
919 setupTable(table);
920 assertNoErrors(doFsck(conf, false));
921 assertEquals(ROWKEYS.length, countRows());
922
923
924 hri = createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2"));
925 TEST_UTIL.assignRegion(hri);
926 server = regionStates.getRegionServerOfRegion(hri);
927 TEST_UTIL.assertRegionOnServer(hri, server, REGION_ONLINE_TIMEOUT);
928
929 HBaseFsck hbck = doFsck(conf, false);
930 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
931 ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
932 assertEquals(3, hbck.getOverlapGroups(table).size());
933 assertEquals(ROWKEYS.length, countRows());
934
935
936 doFsck(conf, true);
937
938
939 HBaseFsck hbck2 = doFsck(conf,false);
940 assertNoErrors(hbck2);
941 assertEquals(0, hbck2.getOverlapGroups(table).size());
942 assertEquals(ROWKEYS.length, countRows());
943
944 long totalRegions = cluster.countServedRegions();
945
946
947 cluster.stopRegionServer(server);
948 cluster.waitForRegionServerToStop(server, 60);
949
950
951 while (cluster.countServedRegions() < totalRegions) {
952 Thread.sleep(100);
953 }
954
955 HBaseFsck hbck3 = doFsck(conf,false);
956 assertNoErrors(hbck3);
957 } finally {
958 cleanupTable(table);
959 }
960 }
961
962
963
964
965
966 @Test (timeout=180000)
967 public void testHbckWithRegionReplica() throws Exception {
968 TableName table =
969 TableName.valueOf("testHbckWithRegionReplica");
970 try {
971 setupTableWithRegionReplica(table, 2);
972 TEST_UTIL.getHBaseAdmin().flush(table.getName());
973 assertNoErrors(doFsck(conf, false));
974 } finally {
975 cleanupTable(table);
976 }
977 }
978
979
980
981
982
983 @Test
984 public void testHbckReplicaRegionAsKeyInMeta() throws Exception {
985 TableName table = TableName.valueOf("testHbckReplicaRegionAsKeyInMeta");
986 try {
987 setupTableWithRegionReplica(table, 2);
988 TEST_UTIL.getHBaseAdmin().flush(table.getName());
989
990 HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
991 HRegionInfo hri = new HRegionInfo(table, SPLITS[0], SPLITS[2], false, 1500328224175L, 1);
992 Put put = MetaTableAccessor.makePutFromRegionInfo(hri);
993 meta.put(put);
994
995 assertErrors(doFsck(conf, false),
996 new HBaseFsck.ErrorReporter.ERROR_CODE[] {
997 HBaseFsck.ErrorReporter.ERROR_CODE.EMPTY_META_CELL });
998
999
1000 doFsck(conf, true);
1001
1002
1003 assertNoErrors(doFsck(conf, false));
1004 } finally {
1005 cleanupTable(table);
1006 }
1007 }
1008
1009 @Test
1010 public void testHbckWithFewerReplica() throws Exception {
1011 TableName table =
1012 TableName.valueOf("testHbckWithFewerReplica");
1013 try {
1014 setupTableWithRegionReplica(table, 2);
1015 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1016 assertNoErrors(doFsck(conf, false));
1017 assertEquals(ROWKEYS.length, countRows());
1018 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1019 Bytes.toBytes("C"), true, false, false, false, 1);
1020
1021 HBaseFsck hbck = doFsck(conf, false);
1022 assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.NOT_DEPLOYED});
1023
1024 hbck = doFsck(conf, true);
1025
1026 hbck = doFsck(conf, false);
1027 assertErrors(hbck, new ERROR_CODE[]{});
1028 } finally {
1029 cleanupTable(table);
1030 }
1031 }
1032
1033 @Test
1034 public void testHbckWithExcessReplica() throws Exception {
1035 TableName table =
1036 TableName.valueOf("testHbckWithExcessReplica");
1037 try {
1038 setupTableWithRegionReplica(table, 2);
1039 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1040 assertNoErrors(doFsck(conf, false));
1041 assertEquals(ROWKEYS.length, countRows());
1042
1043
1044
1045
1046 HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
1047 List<HRegionInfo> regions = TEST_UTIL.getHBaseAdmin().getTableRegions(table);
1048 byte[] startKey = Bytes.toBytes("B");
1049 byte[] endKey = Bytes.toBytes("C");
1050 byte[] metaKey = null;
1051 HRegionInfo newHri = null;
1052 for (HRegionInfo h : regions) {
1053 if (Bytes.compareTo(h.getStartKey(), startKey) == 0 &&
1054 Bytes.compareTo(h.getEndKey(), endKey) == 0 &&
1055 h.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
1056 metaKey = h.getRegionName();
1057
1058 newHri = RegionReplicaUtil.getRegionInfoForReplica(h, 2);
1059 break;
1060 }
1061 }
1062 Put put = new Put(metaKey);
1063 ServerName sn = TEST_UTIL.getHBaseAdmin().getClusterStatus().getServers()
1064 .toArray(new ServerName[0])[0];
1065
1066 MetaTableAccessor.addLocation(put, sn, sn.getStartcode(), -1, 2);
1067 meta.put(put);
1068 meta.flushCommits();
1069
1070 HBaseFsckRepair.fixUnassigned((HBaseAdmin)TEST_UTIL.getHBaseAdmin(), newHri);
1071 HBaseFsckRepair.waitUntilAssigned((HBaseAdmin)TEST_UTIL.getHBaseAdmin(), newHri);
1072
1073 Delete delete = new Delete(metaKey);
1074 delete.deleteColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(2));
1075 delete.deleteColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(2));
1076 delete.deleteColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getSeqNumColumn(2));
1077 meta.delete(delete);
1078 meta.flushCommits();
1079 meta.close();
1080
1081 HBaseFsck hbck = doFsck(conf, false);
1082 assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.NOT_IN_META});
1083
1084 hbck = doFsck(conf, true);
1085
1086 hbck = doFsck(conf, false);
1087 assertErrors(hbck, new ERROR_CODE[]{});
1088 } finally {
1089 cleanupTable(table);
1090 }
1091 }
1092
1093
1094
1095 Map<ServerName, List<String>> getDeployedHRIs(final HBaseAdmin admin) throws IOException {
1096 ClusterStatus status = admin.getClusterStatus();
1097 Collection<ServerName> regionServers = status.getServers();
1098 Map<ServerName, List<String>> mm =
1099 new HashMap<ServerName, List<String>>();
1100 for (ServerName hsi : regionServers) {
1101 AdminProtos.AdminService.BlockingInterface server = ((HConnection) connection).getAdmin(hsi);
1102
1103
1104 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
1105 List<String> regionNames = new ArrayList<String>();
1106 for (HRegionInfo hri : regions) {
1107 regionNames.add(hri.getRegionNameAsString());
1108 }
1109 mm.put(hsi, regionNames);
1110 }
1111 return mm;
1112 }
1113
1114
1115
1116
1117 ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
1118 for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
1119 if (e.getValue().contains(hri.getRegionNameAsString())) {
1120 return e.getKey();
1121 }
1122 }
1123 return null;
1124 }
1125
1126
1127
1128
1129
1130 @Test (timeout=180000)
1131 public void testDupeRegion() throws Exception {
1132 TableName table =
1133 TableName.valueOf("tableDupeRegion");
1134 try {
1135 setupTable(table);
1136 assertNoErrors(doFsck(conf, false));
1137 assertEquals(ROWKEYS.length, countRows());
1138
1139
1140 HRegionInfo hriDupe =
1141 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"));
1142
1143 TEST_UTIL.assignRegion(hriDupe);
1144 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
1145 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
1146
1147
1148
1149
1150
1151 while (findDeployedHSI(getDeployedHRIs((HBaseAdmin) admin), hriDupe) == null) {
1152 Thread.sleep(250);
1153 }
1154
1155 LOG.debug("Finished assignment of dupe region");
1156
1157
1158 HBaseFsck hbck = doFsck(conf, false);
1159 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
1160 ERROR_CODE.DUPE_STARTKEYS});
1161 assertEquals(2, hbck.getOverlapGroups(table).size());
1162 assertEquals(ROWKEYS.length, countRows());
1163
1164
1165 doFsck(conf,true);
1166
1167
1168 HBaseFsck hbck2 = doFsck(conf,false);
1169 assertNoErrors(hbck2);
1170 assertEquals(0, hbck2.getOverlapGroups(table).size());
1171 assertEquals(ROWKEYS.length, countRows());
1172 } finally {
1173 cleanupTable(table);
1174 }
1175 }
1176
1177
1178
1179
1180 @Test (timeout=180000)
1181 public void testDegenerateRegions() throws Exception {
1182 TableName table = TableName.valueOf("tableDegenerateRegions");
1183 try {
1184 setupTable(table);
1185 assertNoErrors(doFsck(conf,false));
1186 assertEquals(ROWKEYS.length, countRows());
1187
1188
1189 HRegionInfo hriDupe =
1190 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("B"));
1191 TEST_UTIL.assignRegion(hriDupe);
1192 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
1193 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
1194
1195 HBaseFsck hbck = doFsck(conf,false);
1196 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION, ERROR_CODE.DUPE_STARTKEYS,
1197 ERROR_CODE.DUPE_STARTKEYS });
1198 assertEquals(2, hbck.getOverlapGroups(table).size());
1199 assertEquals(ROWKEYS.length, countRows());
1200
1201
1202 doFsck(conf,true);
1203
1204
1205 HBaseFsck hbck2 = doFsck(conf,false);
1206 assertNoErrors(hbck2);
1207 assertEquals(0, hbck2.getOverlapGroups(table).size());
1208 assertEquals(ROWKEYS.length, countRows());
1209 } finally {
1210 cleanupTable(table);
1211 }
1212 }
1213
1214
1215
1216
1217
1218 @Test (timeout=180000)
1219 public void testContainedRegionOverlap() throws Exception {
1220 TableName table =
1221 TableName.valueOf("tableContainedRegionOverlap");
1222 try {
1223 setupTable(table);
1224 assertEquals(ROWKEYS.length, countRows());
1225
1226
1227 HRegionInfo hriOverlap =
1228 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
1229 TEST_UTIL.assignRegion(hriOverlap);
1230
1231 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1232 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1233
1234 HBaseFsck hbck = doFsck(conf, false);
1235 assertErrors(hbck, new ERROR_CODE[] {
1236 ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
1237 assertEquals(2, hbck.getOverlapGroups(table).size());
1238 assertEquals(ROWKEYS.length, countRows());
1239
1240
1241 doFsck(conf, true);
1242
1243
1244 HBaseFsck hbck2 = doFsck(conf,false);
1245 assertNoErrors(hbck2);
1246 assertEquals(0, hbck2.getOverlapGroups(table).size());
1247 assertEquals(ROWKEYS.length, countRows());
1248 } finally {
1249 cleanupTable(table);
1250 }
1251 }
1252
1253
1254
1255
1256
1257
1258
1259 @Test (timeout=180000)
1260 public void testSidelineOverlapRegion() throws Exception {
1261 TableName table =
1262 TableName.valueOf("testSidelineOverlapRegion");
1263 try {
1264 setupTable(table);
1265 assertEquals(ROWKEYS.length, countRows());
1266
1267
1268 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1269 HMaster master = cluster.getMaster();
1270 HRegionInfo hriOverlap1 =
1271 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("AB"));
1272 TEST_UTIL.assignRegion(hriOverlap1);
1273 HRegionInfo hriOverlap2 =
1274 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("AB"), Bytes.toBytes("B"));
1275 TEST_UTIL.assignRegion(hriOverlap2);
1276
1277 HBaseFsck hbck = doFsck(conf, false);
1278 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.DUPE_STARTKEYS,
1279 ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
1280 assertEquals(3, hbck.getOverlapGroups(table).size());
1281 assertEquals(ROWKEYS.length, countRows());
1282
1283
1284 Multimap<byte[], HbckInfo> overlapGroups = hbck.getOverlapGroups(table);
1285 ServerName serverName = null;
1286 byte[] regionName = null;
1287 for (HbckInfo hbi: overlapGroups.values()) {
1288 if ("A".equals(Bytes.toString(hbi.getStartKey()))
1289 && "B".equals(Bytes.toString(hbi.getEndKey()))) {
1290 regionName = hbi.getRegionName();
1291
1292
1293 int k = cluster.getServerWith(regionName);
1294 for (int i = 0; i < 3; i++) {
1295 if (i != k) {
1296 HRegionServer rs = cluster.getRegionServer(i);
1297 serverName = rs.getServerName();
1298 break;
1299 }
1300 }
1301
1302 HBaseFsckRepair.closeRegionSilentlyAndWait((HConnection) connection,
1303 cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
1304 admin.offline(regionName);
1305 break;
1306 }
1307 }
1308
1309 assertNotNull(regionName);
1310 assertNotNull(serverName);
1311 try (Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService)) {
1312 Put put = new Put(regionName);
1313 put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
1314 Bytes.toBytes(serverName.getHostAndPort()));
1315 meta.put(put);
1316 }
1317
1318
1319 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
1320 fsck.connect();
1321 fsck.setDisplayFullReport();
1322 fsck.setTimeLag(0);
1323 fsck.setFixAssignments(true);
1324 fsck.setFixMeta(true);
1325 fsck.setFixHdfsHoles(true);
1326 fsck.setFixHdfsOverlaps(true);
1327 fsck.setFixHdfsOrphans(true);
1328 fsck.setFixVersionFile(true);
1329 fsck.setSidelineBigOverlaps(true);
1330 fsck.setMaxMerge(2);
1331 fsck.onlineHbck();
1332 fsck.close();
1333
1334
1335
1336 HBaseFsck hbck2 = doFsck(conf,false);
1337 assertNoErrors(hbck2);
1338 assertEquals(0, hbck2.getOverlapGroups(table).size());
1339 assertTrue(ROWKEYS.length > countRows());
1340 } finally {
1341 cleanupTable(table);
1342 }
1343 }
1344
1345
1346
1347
1348
1349 @Test (timeout=180000)
1350 public void testOverlapAndOrphan() throws Exception {
1351 TableName table =
1352 TableName.valueOf("tableOverlapAndOrphan");
1353 try {
1354 setupTable(table);
1355 assertEquals(ROWKEYS.length, countRows());
1356
1357
1358 admin.disableTable(table);
1359 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1360 Bytes.toBytes("B"), true, true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
1361 TEST_UTIL.getHBaseAdmin().enableTable(table);
1362
1363 HRegionInfo hriOverlap =
1364 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
1365 TEST_UTIL.assignRegion(hriOverlap);
1366 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1367 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1368
1369 HBaseFsck hbck = doFsck(conf, false);
1370 assertErrors(hbck, new ERROR_CODE[] {
1371 ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1372 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1373
1374
1375 doFsck(conf, true);
1376
1377
1378 HBaseFsck hbck2 = doFsck(conf,false);
1379 assertNoErrors(hbck2);
1380 assertEquals(0, hbck2.getOverlapGroups(table).size());
1381 assertEquals(ROWKEYS.length, countRows());
1382 } finally {
1383 cleanupTable(table);
1384 }
1385 }
1386
1387
1388
1389
1390
1391
1392 @Test (timeout=180000)
1393 public void testCoveredStartKey() throws Exception {
1394 TableName table =
1395 TableName.valueOf("tableCoveredStartKey");
1396 try {
1397 setupTable(table);
1398 assertEquals(ROWKEYS.length, countRows());
1399
1400
1401 HRegionInfo hriOverlap =
1402 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2"));
1403 TEST_UTIL.assignRegion(hriOverlap);
1404 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1405 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1406
1407 HBaseFsck hbck = doFsck(conf, false);
1408 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
1409 ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
1410 assertEquals(3, hbck.getOverlapGroups(table).size());
1411 assertEquals(ROWKEYS.length, countRows());
1412
1413
1414 doFsck(conf, true);
1415
1416
1417 HBaseFsck hbck2 = doFsck(conf, false);
1418 assertErrors(hbck2, new ERROR_CODE[0]);
1419 assertEquals(0, hbck2.getOverlapGroups(table).size());
1420 assertEquals(ROWKEYS.length, countRows());
1421 } finally {
1422 cleanupTable(table);
1423 }
1424 }
1425
1426
1427
1428
1429
1430 @Test (timeout=180000)
1431 public void testRegionHole() throws Exception {
1432 TableName table =
1433 TableName.valueOf("tableRegionHole");
1434 try {
1435 setupTable(table);
1436 assertEquals(ROWKEYS.length, countRows());
1437
1438
1439 admin.disableTable(table);
1440 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1441 Bytes.toBytes("C"), true, true, true);
1442 admin.enableTable(table);
1443
1444 HBaseFsck hbck = doFsck(conf, false);
1445 assertErrors(hbck, new ERROR_CODE[] {
1446 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1447
1448 assertEquals(0, hbck.getOverlapGroups(table).size());
1449
1450
1451 doFsck(conf, true);
1452
1453
1454 assertNoErrors(doFsck(conf,false));
1455 assertEquals(ROWKEYS.length - 2 , countRows());
1456 } finally {
1457 cleanupTable(table);
1458 }
1459 }
1460
1461
1462
1463
1464
1465 @Test (timeout=180000)
1466 public void testHDFSRegioninfoMissing() throws Exception {
1467 TableName table = TableName.valueOf("tableHDFSRegioninfoMissing");
1468 try {
1469 setupTable(table);
1470 assertEquals(ROWKEYS.length, countRows());
1471
1472
1473 admin.disableTable(table);
1474 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1475 Bytes.toBytes("C"), true, true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
1476 TEST_UTIL.getHBaseAdmin().enableTable(table);
1477
1478 HBaseFsck hbck = doFsck(conf, false);
1479 assertErrors(hbck, new ERROR_CODE[] {
1480 ERROR_CODE.ORPHAN_HDFS_REGION,
1481 ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1482 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1483
1484 assertEquals(0, hbck.getOverlapGroups(table).size());
1485
1486
1487 doFsck(conf, true);
1488
1489
1490 assertNoErrors(doFsck(conf, false));
1491 assertEquals(ROWKEYS.length, countRows());
1492 } finally {
1493 cleanupTable(table);
1494 }
1495 }
1496
1497
1498
1499
1500
1501
1502 @Test(timeout = 180000)
1503 public void testHDFSRegioninfoMissingAndCheckRegionBoundary() throws Exception {
1504 TableName table = TableName.valueOf("testHDFSRegioninfoMissingAndCheckRegionBoundary");
1505 try {
1506 setupTable(table);
1507 assertEquals(ROWKEYS.length, countRows());
1508
1509
1510 admin.disableTable(table);
1511 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("C"), true,
1512 true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
1513 admin.enableTable(table);
1514
1515 HBaseFsck hbck = doFsck(conf, false);
1516 assertErrors(hbck,
1517 new HBaseFsck.ErrorReporter.ERROR_CODE[] {
1518 HBaseFsck.ErrorReporter.ERROR_CODE.ORPHAN_HDFS_REGION,
1519 HBaseFsck.ErrorReporter.ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1520 HBaseFsck.ErrorReporter.ERROR_CODE.HOLE_IN_REGION_CHAIN });
1521
1522 assertEquals(0, hbck.getOverlapGroups(table).size());
1523
1524
1525 doFsck(conf, true);
1526
1527
1528 assertNoErrors(doFsck(conf, false));
1529
1530
1531 for (int i = 0; i < ROWKEYS.length; i++) {
1532 if (i != ROWKEYS.length - 1) {
1533 assertEquals(1, countRows(ROWKEYS[i], ROWKEYS[i + 1]));
1534 } else {
1535 assertEquals(1, countRows(ROWKEYS[i], null));
1536 }
1537 }
1538
1539 } finally {
1540 cleanupTable(table);
1541 }
1542 }
1543
1544
1545
1546
1547
1548 @Test (timeout=180000)
1549 public void testNotInMetaOrDeployedHole() throws Exception {
1550 TableName table =
1551 TableName.valueOf("tableNotInMetaOrDeployedHole");
1552 try {
1553 setupTable(table);
1554 assertEquals(ROWKEYS.length, countRows());
1555
1556
1557 admin.disableTable(table);
1558 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1559 Bytes.toBytes("C"), true, true, false);
1560 admin.enableTable(table);
1561
1562 HBaseFsck hbck = doFsck(conf, false);
1563 assertErrors(hbck, new ERROR_CODE[] {
1564 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1565
1566 assertEquals(0, hbck.getOverlapGroups(table).size());
1567
1568
1569 assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1570 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1571
1572
1573 assertNoErrors(doFsck(conf,false));
1574 assertEquals(ROWKEYS.length, countRows());
1575 } finally {
1576 cleanupTable(table);
1577 }
1578 }
1579
1580
1581
1582
1583 @Test (timeout=180000)
1584 public void testNotInMetaHole() throws Exception {
1585 TableName table =
1586 TableName.valueOf("tableNotInMetaHole");
1587 try {
1588 setupTable(table);
1589 assertEquals(ROWKEYS.length, countRows());
1590
1591
1592 admin.disableTable(table);
1593 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1594 Bytes.toBytes("C"), false, true, false);
1595 admin.enableTable(table);
1596
1597 HBaseFsck hbck = doFsck(conf, false);
1598 assertErrors(hbck, new ERROR_CODE[] {
1599 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1600
1601 assertEquals(0, hbck.getOverlapGroups(table).size());
1602
1603
1604 assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1605 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1606
1607
1608 assertNoErrors(doFsck(conf,false));
1609 assertEquals(ROWKEYS.length, countRows());
1610 } finally {
1611 cleanupTable(table);
1612 }
1613 }
1614
1615
1616
1617
1618
1619 @Test (timeout=180000)
1620 public void testNotInHdfs() throws Exception {
1621 TableName table =
1622 TableName.valueOf("tableNotInHdfs");
1623 try {
1624 setupTable(table);
1625 assertEquals(ROWKEYS.length, countRows());
1626
1627
1628 admin.flush(table);
1629
1630
1631 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1632 Bytes.toBytes("C"), false, false, true);
1633
1634 HBaseFsck hbck = doFsck(conf, false);
1635 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1636
1637 assertEquals(0, hbck.getOverlapGroups(table).size());
1638
1639
1640 doFsck(conf, true);
1641
1642
1643 assertNoErrors(doFsck(conf,false));
1644 assertEquals(ROWKEYS.length - 2, countRows());
1645 } finally {
1646 cleanupTable(table);
1647 }
1648 }
1649
1650
1651
1652
1653
1654 @Test (timeout=180000)
1655 public void testNotInHdfsWithReplicas() throws Exception {
1656 TableName table =
1657 TableName.valueOf("tableNotInHdfs");
1658 HBaseAdmin admin = new HBaseAdmin(conf);
1659 try {
1660 HRegionInfo[] oldHris = new HRegionInfo[2];
1661 setupTableWithRegionReplica(table, 2);
1662 assertEquals(ROWKEYS.length, countRows());
1663 NavigableMap<HRegionInfo, ServerName> map = MetaScanner.allTableRegions(TEST_UTIL.getConnection(),
1664 tbl.getName());
1665 int i = 0;
1666
1667 for (Map.Entry<HRegionInfo, ServerName> m : map.entrySet()) {
1668 if (m.getKey().getStartKey().length > 0 &&
1669 m.getKey().getStartKey()[0] == Bytes.toBytes("B")[0]) {
1670 LOG.debug("Initially server hosting " + m.getKey() + " is " + m.getValue());
1671 oldHris[i++] = m.getKey();
1672 }
1673 }
1674
1675 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1676
1677
1678 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1679 Bytes.toBytes("C"), false, false, true);
1680
1681 HBaseFsck hbck = doFsck(conf, false);
1682 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1683
1684
1685 doFsck(conf, true);
1686
1687
1688 assertNoErrors(doFsck(conf,false));
1689 assertEquals(ROWKEYS.length - 2, countRows());
1690
1691
1692
1693 i = 0;
1694 HRegionInfo[] newHris = new HRegionInfo[2];
1695
1696 map = MetaScanner.allTableRegions(TEST_UTIL.getConnection(), tbl.getName());
1697
1698 for (Map.Entry<HRegionInfo, ServerName> m : map.entrySet()) {
1699 if (m.getKey().getStartKey().length > 0 &&
1700 m.getKey().getStartKey()[0] == Bytes.toBytes("B")[0]) {
1701 newHris[i++] = m.getKey();
1702 }
1703 }
1704
1705 Collection<ServerName> servers = admin.getClusterStatus().getServers();
1706 Set<HRegionInfo> onlineRegions = new HashSet<HRegionInfo>();
1707 for (ServerName s : servers) {
1708 List<HRegionInfo> list = admin.getOnlineRegions(s);
1709 onlineRegions.addAll(list);
1710 }
1711
1712 assertTrue(onlineRegions.containsAll(Arrays.asList(newHris)));
1713
1714
1715 assertFalse(onlineRegions.removeAll(Arrays.asList(oldHris)));
1716 } finally {
1717 cleanupTable(table);
1718 admin.close();
1719 }
1720 }
1721
1722
1723
1724
1725
1726
1727 @Test (timeout=180000)
1728 public void testNoHdfsTable() throws Exception {
1729 TableName table = TableName.valueOf("NoHdfsTable");
1730 setupTable(table);
1731 assertEquals(ROWKEYS.length, countRows());
1732
1733
1734 admin.flush(table);
1735
1736
1737 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
1738 Bytes.toBytes("A"), false, false, true);
1739 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1740 Bytes.toBytes("B"), false, false, true);
1741 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1742 Bytes.toBytes("C"), false, false, true);
1743 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
1744 Bytes.toBytes(""), false, false, true);
1745
1746
1747 deleteTableDir(table);
1748
1749 HBaseFsck hbck = doFsck(conf, false);
1750 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS,
1751 ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS,
1752 ERROR_CODE.NOT_IN_HDFS,});
1753
1754 assertEquals(0, hbck.getOverlapGroups(table).size());
1755
1756
1757 doFsck(conf, true);
1758
1759
1760 assertNoErrors(doFsck(conf,false));
1761 assertFalse("Table " + table + " should have been deleted", admin.tableExists(table));
1762 }
1763
1764 public void deleteTableDir(TableName table) throws IOException {
1765 Path rootDir = FSUtils.getRootDir(conf);
1766 FileSystem fs = rootDir.getFileSystem(conf);
1767 Path p = FSUtils.getTableDir(rootDir, table);
1768 HBaseFsck.debugLsr(conf, p);
1769 boolean success = fs.delete(p, true);
1770 LOG.info("Deleted " + p + " sucessfully? " + success);
1771 }
1772
1773
1774
1775
1776 @Test (timeout=180000)
1777 public void testNoVersionFile() throws Exception {
1778
1779 Path rootDir = FSUtils.getRootDir(conf);
1780 FileSystem fs = rootDir.getFileSystem(conf);
1781 Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
1782 fs.delete(versionFile, true);
1783
1784
1785 HBaseFsck hbck = doFsck(conf, false);
1786 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_VERSION_FILE });
1787
1788 doFsck(conf, true);
1789
1790
1791 assertNoErrors(doFsck(conf, false));
1792 }
1793
1794
1795
1796
1797 @Test (timeout=180000)
1798 public void testRegionShouldNotBeDeployed() throws Exception {
1799 TableName table =
1800 TableName.valueOf("tableRegionShouldNotBeDeployed");
1801 try {
1802 LOG.info("Starting testRegionShouldNotBeDeployed.");
1803 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1804 assertTrue(cluster.waitForActiveAndReadyMaster());
1805
1806
1807 byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
1808 Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
1809 HTableDescriptor htdDisabled = new HTableDescriptor(table);
1810 htdDisabled.addFamily(new HColumnDescriptor(FAM));
1811
1812
1813 FSTableDescriptors fstd = new FSTableDescriptors(conf);
1814 fstd.createTableDescriptor(htdDisabled);
1815 List<HRegionInfo> disabledRegions =
1816 TEST_UTIL.createMultiRegionsInMeta(conf, htdDisabled, SPLIT_KEYS);
1817
1818
1819 HRegionServer hrs = cluster.getRegionServer(0);
1820
1821
1822 admin.disableTable(table);
1823 admin.enableTable(table);
1824
1825
1826 admin.disableTable(table);
1827 HRegionInfo region = disabledRegions.remove(0);
1828 byte[] regionName = region.getRegionName();
1829
1830
1831 assertTrue(cluster.getServerWith(regionName) == -1);
1832
1833
1834
1835
1836
1837 HRegion r = HRegion.openHRegion(
1838 region, htdDisabled, hrs.getWAL(region), conf);
1839 hrs.addToOnlineRegions(r);
1840
1841 HBaseFsck hbck = doFsck(conf, false);
1842 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
1843
1844
1845 doFsck(conf, true);
1846
1847
1848 assertNoErrors(doFsck(conf, false));
1849 } finally {
1850 admin.enableTable(table);
1851 cleanupTable(table);
1852 }
1853 }
1854
1855
1856
1857
1858 @Test (timeout=180000)
1859 public void testFixByTable() throws Exception {
1860 TableName table1 =
1861 TableName.valueOf("testFixByTable1");
1862 TableName table2 =
1863 TableName.valueOf("testFixByTable2");
1864 try {
1865 setupTable(table1);
1866
1867 admin.flush(table1);
1868
1869 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1870 Bytes.toBytes("C"), false, false, true);
1871
1872 setupTable(table2);
1873
1874 admin.flush(table2);
1875
1876 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1877 Bytes.toBytes("C"), false, false, true);
1878
1879 HBaseFsck hbck = doFsck(conf, false);
1880 assertErrors(hbck, new ERROR_CODE[] {
1881 ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS});
1882
1883
1884 doFsck(conf, true, table1);
1885
1886 assertNoErrors(doFsck(conf, false, table1));
1887
1888 assertErrors(doFsck(conf, false, table2),
1889 new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1890
1891
1892 doFsck(conf, true, table2);
1893
1894 assertNoErrors(doFsck(conf, false));
1895 assertEquals(ROWKEYS.length - 2, countRows());
1896 } finally {
1897 cleanupTable(table1);
1898 cleanupTable(table2);
1899 }
1900 }
1901
1902
1903
1904 @Test (timeout=180000)
1905 public void testLingeringSplitParent() throws Exception {
1906 TableName table =
1907 TableName.valueOf("testLingeringSplitParent");
1908 Table meta = null;
1909 try {
1910 setupTable(table);
1911 assertEquals(ROWKEYS.length, countRows());
1912
1913
1914 admin.flush(table);
1915 HRegionLocation location = tbl.getRegionLocation("B");
1916
1917
1918 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1919 Bytes.toBytes("C"), true, true, false);
1920
1921
1922 meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
1923 HRegionInfo hri = location.getRegionInfo();
1924
1925 HRegionInfo a = new HRegionInfo(tbl.getName(),
1926 Bytes.toBytes("B"), Bytes.toBytes("BM"));
1927 HRegionInfo b = new HRegionInfo(tbl.getName(),
1928 Bytes.toBytes("BM"), Bytes.toBytes("C"));
1929
1930 hri.setOffline(true);
1931 hri.setSplit(true);
1932
1933 MetaTableAccessor.addRegionToMeta(meta, hri, a, b);
1934 meta.close();
1935 admin.flush(TableName.META_TABLE_NAME);
1936
1937 HBaseFsck hbck = doFsck(conf, false);
1938 assertErrors(hbck, new ERROR_CODE[] {
1939 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1940
1941
1942 hbck = doFsck(conf, true);
1943 assertErrors(hbck, new ERROR_CODE[] {
1944 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN });
1945 assertFalse(hbck.shouldRerun());
1946 hbck = doFsck(conf, false);
1947 assertErrors(hbck, new ERROR_CODE[] {
1948 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1949
1950
1951 hbck = new HBaseFsck(conf, hbfsckExecutorService);
1952 hbck.connect();
1953 hbck.setDisplayFullReport();
1954 hbck.setTimeLag(0);
1955 hbck.setFixSplitParents(true);
1956 hbck.onlineHbck();
1957 assertTrue(hbck.shouldRerun());
1958 hbck.close();
1959
1960 Get get = new Get(hri.getRegionName());
1961 Result result = meta.get(get);
1962 assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1963 HConstants.SPLITA_QUALIFIER).isEmpty());
1964 assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1965 HConstants.SPLITB_QUALIFIER).isEmpty());
1966 admin.flush(TableName.META_TABLE_NAME);
1967
1968
1969 doFsck(conf, true);
1970
1971
1972 assertNoErrors(doFsck(conf, false));
1973 assertEquals(ROWKEYS.length, countRows());
1974 } finally {
1975 cleanupTable(table);
1976 IOUtils.closeQuietly(meta);
1977 }
1978 }
1979
1980
1981
1982
1983
1984 @Test (timeout=180000)
1985 public void testValidLingeringSplitParent() throws Exception {
1986 final TableName table =
1987 TableName.valueOf("testLingeringSplitParent");
1988 Table meta = null;
1989 try {
1990 setupTable(table);
1991 assertEquals(ROWKEYS.length, countRows());
1992
1993
1994 admin.flush(table);
1995 HRegionLocation location = tbl.getRegionLocation(Bytes.toBytes("B"));
1996
1997 meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
1998 HRegionInfo hri = location.getRegionInfo();
1999
2000 splitAndWait(table, location);
2001
2002
2003
2004
2005 HBaseFsck hbck = doFsck(
2006 conf, true, true, false, false, false, true, true, true, false, false, false, false, false, null);
2007 assertErrors(hbck, new ERROR_CODE[] {});
2008
2009
2010 Get get = new Get(hri.getRegionName());
2011 Result result = meta.get(get);
2012 assertNotNull(result);
2013 assertNotNull(MetaTableAccessor.getHRegionInfo(result));
2014
2015 assertEquals(ROWKEYS.length, countRows());
2016
2017
2018 assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1);
2019 assertNoErrors(doFsck(conf, false));
2020 } finally {
2021 cleanupTable(table);
2022 IOUtils.closeQuietly(meta);
2023 }
2024 }
2025
2026 private byte[] splitAndWait(final TableName table, HRegionLocation location)
2027 throws IOException, Exception {
2028
2029
2030 final List<HRegion> regions = TEST_UTIL.getMiniHBaseCluster().getRegions(table);
2031 byte[] regionName = location.getRegionInfo().getRegionName();
2032 admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
2033 TEST_UTIL.waitFor(60000, new Predicate<Exception>() {
2034 @Override
2035 public boolean evaluate() throws Exception {
2036 List<HRegion> regions1 = TEST_UTIL.getMiniHBaseCluster().getRegions(table);
2037 regions1.removeAll(regions);
2038 return regions1.size() == 2;
2039 }
2040 });
2041
2042 return regionName;
2043 }
2044
2045
2046
2047
2048
2049 @Test(timeout=75000)
2050 public void testSplitDaughtersNotInMeta() throws Exception {
2051 TableName table = TableName.valueOf("testSplitdaughtersNotInMeta");
2052 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
2053 try {
2054 setupTable(table);
2055 assertEquals(ROWKEYS.length, countRows());
2056
2057
2058 admin.flush(table);
2059 HRegionLocation location = tbl.getRegionLocation(Bytes.toBytes("B"));
2060
2061 HRegionInfo hri = location.getRegionInfo();
2062
2063
2064
2065 admin.enableCatalogJanitor(false);
2066
2067 byte[] regionName = splitAndWait(table, location);
2068 PairOfSameType<HRegionInfo> daughters =
2069 MetaTableAccessor.getDaughterRegions(meta.get(new Get(regionName)));
2070
2071
2072 Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
2073 undeployRegion(connection, hris.get(daughters.getFirst()), daughters.getFirst());
2074 undeployRegion(connection, hris.get(daughters.getSecond()), daughters.getSecond());
2075
2076 List<Delete> deletes = new ArrayList<>();
2077 deletes.add(new Delete(daughters.getFirst().getRegionName()));
2078 deletes.add(new Delete(daughters.getSecond().getRegionName()));
2079 meta.delete(deletes);
2080
2081
2082 RegionStates regionStates = TEST_UTIL.getMiniHBaseCluster().getMaster().
2083 getAssignmentManager().getRegionStates();
2084 regionStates.deleteRegion(daughters.getFirst());
2085 regionStates.deleteRegion(daughters.getSecond());
2086
2087 HBaseFsck hbck = doFsck(conf, false);
2088 assertErrors(hbck,
2089 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
2090 ERROR_CODE.HOLE_IN_REGION_CHAIN });
2091
2092
2093 hbck = doFsck(
2094 conf, true, true, false, false, false, false, false, false, false, false, false, false,false,null);
2095 assertErrors(hbck,
2096 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
2097 ERROR_CODE.HOLE_IN_REGION_CHAIN });
2098
2099
2100 Get get = new Get(hri.getRegionName());
2101 Result result = meta.get(get);
2102 assertNotNull(result);
2103 assertNotNull(MetaTableAccessor.getHRegionInfo(result));
2104
2105 assertEquals(ROWKEYS.length, countRows());
2106
2107
2108 assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1);
2109 assertNoErrors(doFsck(conf, false));
2110 } finally {
2111 admin.enableCatalogJanitor(true);
2112 meta.close();
2113 cleanupTable(table);
2114 }
2115 }
2116
2117
2118
2119
2120
2121 @Test(timeout=120000)
2122 public void testMissingFirstRegion() throws Exception {
2123 TableName table = TableName.valueOf("testMissingFirstRegion");
2124 try {
2125 setupTable(table);
2126 assertEquals(ROWKEYS.length, countRows());
2127
2128
2129 admin.disableTable(table);
2130 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
2131 true, true);
2132 admin.enableTable(table);
2133
2134 HBaseFsck hbck = doFsck(conf, false);
2135 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
2136
2137 doFsck(conf, true);
2138
2139 assertNoErrors(doFsck(conf, false));
2140 } finally {
2141 cleanupTable(table);
2142 }
2143 }
2144
2145
2146
2147
2148
2149 @Test(timeout=120000)
2150 public void testRegionDeployedNotInHdfs() throws Exception {
2151 TableName table =
2152 TableName.valueOf("testSingleRegionDeployedNotInHdfs");
2153 try {
2154 setupTable(table);
2155 admin.flush(table);
2156
2157
2158 deleteRegion(conf, tbl.getTableDescriptor(),
2159 HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false,
2160 false, true);
2161
2162 HBaseFsck hbck = doFsck(conf, false);
2163 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
2164
2165 doFsck(conf, true);
2166
2167 assertNoErrors(doFsck(conf, false));
2168 } finally {
2169 cleanupTable(table);
2170 }
2171 }
2172
2173
2174
2175
2176
2177 @Test(timeout=120000)
2178 public void testMissingLastRegion() throws Exception {
2179 TableName table =
2180 TableName.valueOf("testMissingLastRegion");
2181 try {
2182 setupTable(table);
2183 assertEquals(ROWKEYS.length, countRows());
2184
2185
2186 admin.disableTable(table);
2187 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
2188 true, true);
2189 admin.enableTable(table);
2190
2191 HBaseFsck hbck = doFsck(conf, false);
2192 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
2193
2194 doFsck(conf, true);
2195
2196 assertNoErrors(doFsck(conf, false));
2197 } finally {
2198 cleanupTable(table);
2199 }
2200 }
2201
2202
2203
2204
2205 @Test (timeout=180000)
2206 public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
2207 TableName table =
2208 TableName.valueOf("testFixAssignmentsAndNoHdfsChecking");
2209 try {
2210 setupTable(table);
2211 assertEquals(ROWKEYS.length, countRows());
2212
2213
2214 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
2215 Bytes.toBytes("B"), true, false, false, false, HRegionInfo.DEFAULT_REPLICA_ID);
2216
2217
2218 HBaseFsck hbck = doFsck(conf, false);
2219 assertErrors(hbck, new ERROR_CODE[] {
2220 ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
2221
2222
2223 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
2224 fsck.connect();
2225 fsck.setDisplayFullReport();
2226 fsck.setTimeLag(0);
2227 fsck.setCheckHdfs(false);
2228 fsck.onlineHbck();
2229 assertErrors(fsck, new ERROR_CODE[] {
2230 ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
2231 fsck.close();
2232
2233
2234 fsck = new HBaseFsck(conf, hbfsckExecutorService);
2235 fsck.connect();
2236 fsck.setDisplayFullReport();
2237 fsck.setTimeLag(0);
2238 fsck.setCheckHdfs(false);
2239 fsck.setFixAssignments(true);
2240 fsck.onlineHbck();
2241 assertTrue(fsck.shouldRerun());
2242 fsck.onlineHbck();
2243 assertNoErrors(fsck);
2244
2245 assertEquals(ROWKEYS.length, countRows());
2246
2247 fsck.close();
2248 } finally {
2249 cleanupTable(table);
2250 }
2251 }
2252
2253
2254
2255
2256
2257
2258 @Test (timeout=180000)
2259 public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
2260 TableName table =
2261 TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking");
2262 try {
2263 setupTable(table);
2264 assertEquals(ROWKEYS.length, countRows());
2265
2266
2267 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
2268 Bytes.toBytes("B"), false, true, false, false, HRegionInfo.DEFAULT_REPLICA_ID);
2269
2270
2271 HBaseFsck hbck = doFsck(conf, false);
2272 assertErrors(hbck,
2273 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
2274
2275
2276 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
2277 fsck.connect();
2278 fsck.setDisplayFullReport();
2279 fsck.setTimeLag(0);
2280 fsck.setCheckHdfs(false);
2281 fsck.onlineHbck();
2282 assertErrors(fsck,
2283 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
2284 fsck.close();
2285
2286
2287 fsck = new HBaseFsck(conf, hbfsckExecutorService);
2288 fsck.connect();
2289 fsck.setDisplayFullReport();
2290 fsck.setTimeLag(0);
2291 fsck.setCheckHdfs(false);
2292 fsck.setFixAssignments(true);
2293 fsck.setFixMeta(true);
2294 fsck.onlineHbck();
2295 assertFalse(fsck.shouldRerun());
2296 assertErrors(fsck,
2297 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
2298 fsck.close();
2299
2300
2301 fsck = doFsck(conf, true);
2302 assertTrue(fsck.shouldRerun());
2303 fsck = doFsck(conf, true);
2304 assertNoErrors(fsck);
2305 } finally {
2306 cleanupTable(table);
2307 }
2308 }
2309
2310
2311
2312
2313
2314 @Test (timeout=180000)
2315 public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
2316 TableName table =
2317 TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking");
2318 try {
2319 setupTable(table);
2320 assertEquals(ROWKEYS.length, countRows());
2321
2322
2323 admin.disableTable(table);
2324 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
2325 Bytes.toBytes("B"), true, true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
2326 TEST_UTIL.getHBaseAdmin().enableTable(table);
2327
2328 HRegionInfo hriOverlap =
2329 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
2330 TEST_UTIL.assignRegion(hriOverlap);
2331 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
2332 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
2333
2334 HBaseFsck hbck = doFsck(conf, false);
2335 assertErrors(hbck, new ERROR_CODE[] {
2336 ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
2337 ERROR_CODE.HOLE_IN_REGION_CHAIN});
2338
2339
2340 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
2341 fsck.connect();
2342 fsck.setDisplayFullReport();
2343 fsck.setTimeLag(0);
2344 fsck.setCheckHdfs(false);
2345 fsck.onlineHbck();
2346 assertErrors(fsck, new ERROR_CODE[] {
2347 ERROR_CODE.HOLE_IN_REGION_CHAIN});
2348 fsck.close();
2349
2350
2351 fsck = new HBaseFsck(conf, hbfsckExecutorService);
2352 fsck.connect();
2353 fsck.setDisplayFullReport();
2354 fsck.setTimeLag(0);
2355 fsck.setCheckHdfs(false);
2356 fsck.setFixHdfsHoles(true);
2357 fsck.setFixHdfsOverlaps(true);
2358 fsck.setFixHdfsOrphans(true);
2359 fsck.onlineHbck();
2360 assertFalse(fsck.shouldRerun());
2361 assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN});
2362 fsck.close();
2363 } finally {
2364 if (admin.isTableDisabled(table)) {
2365 admin.enableTable(table);
2366 }
2367 cleanupTable(table);
2368 }
2369 }
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379 Path getFlushedHFile(FileSystem fs, TableName table) throws IOException {
2380 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
2381 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
2382 Path famDir = new Path(regionDir, FAM_STR);
2383
2384
2385 while (true) {
2386 FileStatus[] hfFss = fs.listStatus(famDir);
2387 if (hfFss.length == 0) {
2388 continue;
2389 }
2390 for (FileStatus hfs : hfFss) {
2391 if (!hfs.isDirectory()) {
2392 return hfs.getPath();
2393 }
2394 }
2395 }
2396 }
2397
2398
2399
2400
2401 @Test(timeout=180000)
2402 public void testQuarantineCorruptHFile() throws Exception {
2403 TableName table = TableName.valueOf(name.getMethodName());
2404 try {
2405 setupTable(table);
2406 assertEquals(ROWKEYS.length, countRows());
2407 admin.flush(table);
2408
2409 FileSystem fs = FileSystem.get(conf);
2410 Path hfile = getFlushedHFile(fs, table);
2411
2412
2413 admin.disableTable(table);
2414
2415
2416 Path corrupt = new Path(hfile.getParent(), "deadbeef");
2417 TestHFile.truncateFile(fs, hfile, corrupt);
2418 LOG.info("Created corrupted file " + corrupt);
2419 HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
2420
2421
2422 HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
2423 assertEquals(res.getRetCode(), 0);
2424 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
2425 assertEquals(hfcc.getHFilesChecked(), 5);
2426 assertEquals(hfcc.getCorrupted().size(), 1);
2427 assertEquals(hfcc.getFailures().size(), 0);
2428 assertEquals(hfcc.getQuarantined().size(), 1);
2429 assertEquals(hfcc.getMissing().size(), 0);
2430
2431
2432 admin.enableTable(table);
2433 } finally {
2434 cleanupTable(table);
2435 }
2436 }
2437
2438
2439
2440
2441 private void doQuarantineTest(TableName table, HBaseFsck hbck, int check,
2442 int corrupt, int fail, int quar, int missing) throws Exception {
2443 try {
2444 setupTable(table);
2445 assertEquals(ROWKEYS.length, countRows());
2446 admin.flush(table);
2447
2448
2449 admin.disableTable(table);
2450
2451 String[] args = {"-sidelineCorruptHFiles", "-repairHoles", "-ignorePreCheckPermission",
2452 table.getNameAsString()};
2453 HBaseFsck res = hbck.exec(hbfsckExecutorService, args);
2454
2455 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
2456 assertEquals(hfcc.getHFilesChecked(), check);
2457 assertEquals(hfcc.getCorrupted().size(), corrupt);
2458 assertEquals(hfcc.getFailures().size(), fail);
2459 assertEquals(hfcc.getQuarantined().size(), quar);
2460 assertEquals(hfcc.getMissing().size(), missing);
2461
2462
2463 admin.enableTableAsync(table);
2464 while (!admin.isTableEnabled(table)) {
2465 try {
2466 Thread.sleep(250);
2467 } catch (InterruptedException e) {
2468 e.printStackTrace();
2469 fail("Interrupted when trying to enable table " + table);
2470 }
2471 }
2472 } finally {
2473 cleanupTable(table);
2474 }
2475 }
2476
2477
2478
2479
2480
2481 @Test(timeout=180000)
2482 public void testQuarantineMissingHFile() throws Exception {
2483 TableName table = TableName.valueOf(name.getMethodName());
2484
2485
2486 final FileSystem fs = FileSystem.get(conf);
2487 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
2488 @Override
2489 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
2490 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
2491 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
2492 @Override
2493 protected void checkHFile(Path p) throws IOException {
2494 if (attemptedFirstHFile.compareAndSet(false, true)) {
2495 assertTrue(fs.delete(p, true));
2496 }
2497 super.checkHFile(p);
2498 }
2499 };
2500 }
2501 };
2502 doQuarantineTest(table, hbck, 4, 0, 0, 0, 1);
2503 hbck.close();
2504 }
2505
2506
2507
2508
2509
2510
2511
2512 @Ignore @Test(timeout=180000)
2513 public void testQuarantineMissingFamdir() throws Exception {
2514 TableName table = TableName.valueOf(name.getMethodName());
2515
2516 final FileSystem fs = FileSystem.get(conf);
2517 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
2518 @Override
2519 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
2520 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
2521 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
2522 @Override
2523 protected void checkColFamDir(Path p) throws IOException {
2524 if (attemptedFirstHFile.compareAndSet(false, true)) {
2525 assertTrue(fs.delete(p, true));
2526 }
2527 super.checkColFamDir(p);
2528 }
2529 };
2530 }
2531 };
2532 doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
2533 hbck.close();
2534 }
2535
2536 @Test(timeout=60000)
2537 public void testCheckReplication() throws Exception {
2538
2539 HBaseFsck hbck = doFsck(conf, false);
2540 assertNoErrors(hbck);
2541
2542
2543 ReplicationAdmin replicationAdmin = new ReplicationAdmin(conf);
2544 Assert.assertEquals(0, replicationAdmin.getPeersCount());
2545 String zkPort = conf.get(HConstants.ZOOKEEPER_CLIENT_PORT);
2546 ReplicationPeerConfig rpc = new ReplicationPeerConfig();
2547 rpc.setClusterKey("127.0.0.1:2181" + zkPort + ":/hbase");
2548 replicationAdmin.addPeer("1", rpc);
2549 replicationAdmin.getPeersCount();
2550 Assert.assertEquals(1, replicationAdmin.getPeersCount());
2551
2552
2553 ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "Test Hbase Fsck", connection);
2554 ReplicationQueues repQueues =
2555 ReplicationFactory.getReplicationQueues(zkw, conf, connection);
2556 repQueues.init("server1");
2557
2558 repQueues.addLog("1", "file1");
2559 repQueues.addLog("1-server2", "file1");
2560 Assert.assertEquals(2, repQueues.getAllQueues().size());
2561 hbck = doFsck(conf, false);
2562 assertNoErrors(hbck);
2563
2564
2565 repQueues.addLog("2", "file1");
2566 repQueues.addLog("2-server2", "file1");
2567 Assert.assertEquals(4, repQueues.getAllQueues().size());
2568 hbck = doFsck(conf, false);
2569 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNDELETED_REPLICATION_QUEUE,
2570 ERROR_CODE.UNDELETED_REPLICATION_QUEUE });
2571
2572
2573 hbck = doFsck(conf, true);
2574 hbck = doFsck(conf, false);
2575 assertNoErrors(hbck);
2576
2577 Assert.assertEquals(2, repQueues.getAllQueues().size());
2578 Assert.assertNull(repQueues.getLogsInQueue("2"));
2579 Assert.assertNull(repQueues.getLogsInQueue("2-sever2"));
2580
2581 replicationAdmin.removePeer("1");
2582 repQueues.removeAllQueues();
2583 zkw.close();
2584 replicationAdmin.close();
2585 }
2586
2587
2588
2589
2590
2591 @Test(timeout=180000)
2592 public void testQuarantineMissingRegionDir() throws Exception {
2593 TableName table = TableName.valueOf(name.getMethodName());
2594
2595 final FileSystem fs = FileSystem.get(conf);
2596 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
2597 @Override
2598 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
2599 throws IOException {
2600 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
2601 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
2602 @Override
2603 protected void checkRegionDir(Path p) throws IOException {
2604 if (attemptedFirstHFile.compareAndSet(false, true)) {
2605 assertTrue(fs.delete(p, true));
2606 }
2607 super.checkRegionDir(p);
2608 }
2609 };
2610 }
2611 };
2612 doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
2613 hbck.close();
2614 }
2615
2616
2617
2618
2619 @Test (timeout=180000)
2620 public void testLingeringReferenceFile() throws Exception {
2621 TableName table =
2622 TableName.valueOf("testLingeringReferenceFile");
2623 try {
2624 setupTable(table);
2625 assertEquals(ROWKEYS.length, countRows());
2626
2627
2628 FileSystem fs = FileSystem.get(conf);
2629 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
2630 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
2631 Path famDir = new Path(regionDir, FAM_STR);
2632 Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538");
2633 fs.create(fakeReferenceFile);
2634
2635 HBaseFsck hbck = doFsck(conf, false);
2636 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_REFERENCE_HFILE });
2637
2638 doFsck(conf, true);
2639
2640 assertNoErrors(doFsck(conf, false));
2641 } finally {
2642 cleanupTable(table);
2643 }
2644 }
2645
2646
2647
2648
2649 @Test(timeout = 180000)
2650 public void testLingeringHFileLinks() throws Exception {
2651 TableName table = TableName.valueOf("testLingeringHFileLinks");
2652 try {
2653 setupTable(table);
2654
2655 FileSystem fs = FileSystem.get(conf);
2656 Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
2657 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
2658 String regionName = regionDir.getName();
2659 Path famDir = new Path(regionDir, FAM_STR);
2660 String HFILE_NAME = "01234567abcd";
2661 Path hFilePath = new Path(famDir, HFILE_NAME);
2662
2663
2664 HFileContext context = new HFileContextBuilder().withIncludesTags(false).build();
2665 HFile.Writer w =
2666 HFile.getWriterFactoryNoCache(conf).withPath(fs, hFilePath).withFileContext(context)
2667 .create();
2668 w.close();
2669
2670 HFileLink.create(conf, fs, famDir, table, regionName, HFILE_NAME);
2671
2672
2673 HBaseFsck hbck = doFsck(conf, false);
2674 assertNoErrors(hbck);
2675
2676
2677 fs.delete(hFilePath, true);
2678
2679
2680 hbck = doFsck(conf, false);
2681 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
2682 HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
2683
2684
2685 hbck = doFsck(conf, true);
2686 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
2687 HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
2688
2689
2690 hbck = doFsck(conf, false);
2691 assertNoErrors(hbck);
2692 } finally {
2693 cleanupTable(table);
2694 }
2695 }
2696
2697 @Test(timeout = 180000)
2698 public void testCorruptLinkDirectory() throws Exception {
2699 TableName table = TableName.valueOf("testLingeringHFileLinks");
2700 try {
2701 setupTable(table);
2702 FileSystem fs = FileSystem.get(conf);
2703
2704 Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
2705 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
2706 Path famDir = new Path(regionDir, FAM_STR);
2707 String regionName = regionDir.getName();
2708 String HFILE_NAME = "01234567abcd";
2709 String link = HFileLink.createHFileLinkName(table, regionName, HFILE_NAME);
2710
2711
2712 HBaseFsck hbck = doFsck(conf, false);
2713 assertNoErrors(hbck);
2714
2715
2716 fs.mkdirs(new Path(famDir, link));
2717 fs.create(new Path(new Path(famDir, link), "somefile"));
2718
2719
2720 hbck = doFsck(conf, false);
2721 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
2722 HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
2723
2724
2725 hbck = doFsck(conf, true);
2726 assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] {
2727 HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
2728
2729
2730 hbck = doFsck(conf, false);
2731 assertNoErrors(hbck);
2732 } finally {
2733 cleanupTable(table);
2734 }
2735 }
2736
2737
2738
2739
2740 @Test (timeout=180000)
2741 public void testMissingRegionInfoQualifier() throws Exception {
2742 Connection connection = ConnectionFactory.createConnection(conf);
2743 TableName table = TableName.valueOf("testMissingRegionInfoQualifier");
2744 try {
2745 setupTable(table);
2746
2747
2748 final List<Delete> deletes = new LinkedList<Delete>();
2749 Table meta = connection.getTable(TableName.META_TABLE_NAME, hbfsckExecutorService);
2750 MetaScanner.metaScan(connection, new MetaScanner.MetaScannerVisitor() {
2751
2752 @Override
2753 public boolean processRow(Result rowResult) throws IOException {
2754 HRegionInfo hri = MetaTableAccessor.getHRegionInfo(rowResult);
2755 if (hri != null && !hri.getTable().isSystemTable()) {
2756 Delete delete = new Delete(rowResult.getRow());
2757 delete.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2758 deletes.add(delete);
2759 }
2760 return true;
2761 }
2762
2763 @Override
2764 public void close() throws IOException {
2765 }
2766 });
2767 meta.delete(deletes);
2768
2769
2770 meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
2771 HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes("node1:60020")));
2772 meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
2773 HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(1362150791183L)));
2774 meta.close();
2775
2776 HBaseFsck hbck = doFsck(conf, false);
2777 assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2778
2779
2780 hbck = doFsck(conf, true);
2781
2782
2783 assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2784 } finally {
2785 cleanupTable(table);
2786 }
2787 connection.close();
2788 }
2789
2790
2791
2792
2793
2794 @Test (timeout=180000)
2795 public void testErrorReporter() throws Exception {
2796 try {
2797 MockErrorReporter.calledCount = 0;
2798 doFsck(conf, false);
2799 assertEquals(MockErrorReporter.calledCount, 0);
2800
2801 conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName());
2802 doFsck(conf, false);
2803 assertTrue(MockErrorReporter.calledCount > 20);
2804 } finally {
2805 conf.set("hbasefsck.errorreporter",
2806 PrintingErrorReporter.class.getName());
2807 MockErrorReporter.calledCount = 0;
2808 }
2809 }
2810
2811 static class MockErrorReporter implements ErrorReporter {
2812 static int calledCount = 0;
2813
2814 @Override
2815 public void clear() {
2816 calledCount++;
2817 }
2818
2819 @Override
2820 public void report(String message) {
2821 calledCount++;
2822 }
2823
2824 @Override
2825 public void reportError(String message) {
2826 calledCount++;
2827 }
2828
2829 @Override
2830 public void reportError(ERROR_CODE errorCode, String message) {
2831 calledCount++;
2832 }
2833
2834 @Override
2835 public void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
2836 calledCount++;
2837 }
2838
2839 @Override
2840 public void reportError(ERROR_CODE errorCode,
2841 String message, TableInfo table, HbckInfo info) {
2842 calledCount++;
2843 }
2844
2845 @Override
2846 public void reportError(ERROR_CODE errorCode, String message,
2847 TableInfo table, HbckInfo info1, HbckInfo info2) {
2848 calledCount++;
2849 }
2850
2851 @Override
2852 public int summarize() {
2853 return ++calledCount;
2854 }
2855
2856 @Override
2857 public void detail(String details) {
2858 calledCount++;
2859 }
2860
2861 @Override
2862 public ArrayList<ERROR_CODE> getErrorList() {
2863 calledCount++;
2864 return new ArrayList<ERROR_CODE>();
2865 }
2866
2867 @Override
2868 public void progress() {
2869 calledCount++;
2870 }
2871
2872 @Override
2873 public void print(String message) {
2874 calledCount++;
2875 }
2876
2877 @Override
2878 public void resetErrors() {
2879 calledCount++;
2880 }
2881
2882 @Override
2883 public boolean tableHasErrors(TableInfo table) {
2884 calledCount++;
2885 return false;
2886 }
2887 }
2888
2889 @Test(timeout=180000)
2890 public void testCheckTableLocks() throws Exception {
2891 IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
2892 EnvironmentEdgeManager.injectEdge(edge);
2893
2894 HBaseFsck hbck = doFsck(conf, false);
2895 assertNoErrors(hbck);
2896
2897 ServerName mockName = ServerName.valueOf("localhost", 60000, 1);
2898 final TableName tableName = TableName.valueOf("foo");
2899
2900
2901 final TableLockManager tableLockManager =
2902 TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName);
2903 TableLock writeLock = tableLockManager.writeLock(tableName, "testCheckTableLocks");
2904 writeLock.acquire();
2905 hbck = doFsck(conf, false);
2906 assertNoErrors(hbck);
2907
2908 edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2909 TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS));
2910
2911 hbck = doFsck(conf, false);
2912 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2913
2914 final CountDownLatch latch = new CountDownLatch(1);
2915 new Thread() {
2916 @Override
2917 public void run() {
2918 TableLock readLock = tableLockManager.writeLock(tableName, "testCheckTableLocks");
2919 try {
2920 latch.countDown();
2921 readLock.acquire();
2922 } catch (IOException ex) {
2923 fail();
2924 } catch (IllegalStateException ex) {
2925 return;
2926 }
2927 fail("should not have come here");
2928 };
2929 }.start();
2930
2931 latch.await();
2932 Threads.sleep(300);
2933
2934 hbck = doFsck(conf, false);
2935
2936 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2937
2938 edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2939 TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS));
2940
2941 hbck = doFsck(conf, false);
2942
2943 assertErrors(
2944 hbck,
2945 new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK, ERROR_CODE.EXPIRED_TABLE_LOCK});
2946
2947 Configuration localConf = new Configuration(conf);
2948
2949
2950 localConf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1);
2951 Threads.sleep(10);
2952 hbck = doFsck(localConf, true);
2953
2954 hbck = doFsck(localConf, false);
2955 assertNoErrors(hbck);
2956
2957
2958 writeLock = tableLockManager.writeLock(tableName, "should acquire without blocking");
2959 writeLock.acquire();
2960 writeLock.release();
2961 tableLockManager.tableDeleted(tableName);
2962 }
2963
2964
2965
2966
2967 @Test
2968 public void testOrphanedTableZNode() throws Exception {
2969 TableName table = TableName.valueOf("testOrphanedZKTableEntry");
2970
2971 try {
2972 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getTableStateManager()
2973 .setTableState(table, ZooKeeperProtos.Table.State.ENABLING);
2974
2975 try {
2976 setupTable(table);
2977 Assert.fail(
2978 "Create table should fail when its ZNode has already existed with ENABLING state.");
2979 } catch(TableExistsException t) {
2980
2981 }
2982
2983 try {
2984 cleanupTable(table);
2985 } catch (IOException e) {
2986
2987
2988 }
2989
2990 HBaseFsck hbck = doFsck(conf, false);
2991 assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
2992
2993
2994 hbck = doFsck(conf, true);
2995
2996
2997 hbck = doFsck(conf, false);
2998 assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
2999
3000 setupTable(table);
3001 } finally {
3002
3003
3004 try {
3005 cleanupTable(table);
3006 } catch (IOException e) {
3007
3008
3009 }
3010 }
3011 }
3012
3013 @Test (timeout=180000)
3014 public void testMetaOffline() throws Exception {
3015
3016 HBaseFsck hbck = doFsck(conf, false);
3017 assertNoErrors(hbck);
3018 deleteMetaRegion(conf, true, false, false);
3019 hbck = doFsck(conf, false);
3020
3021
3022 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
3023 hbck = doFsck(conf, true);
3024 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
3025 hbck = doFsck(conf, false);
3026 assertNoErrors(hbck);
3027 }
3028
3029 private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
3030 boolean regionInfoOnly) throws IOException, InterruptedException {
3031 HRegionLocation metaLocation = connection.getRegionLocator(TableName.META_TABLE_NAME)
3032 .getRegionLocation(HConstants.EMPTY_START_ROW);
3033 ServerName hsa = metaLocation.getServerName();
3034 HRegionInfo hri = metaLocation.getRegionInfo();
3035 if (unassign) {
3036 LOG.info("Undeploying meta region " + hri + " from server " + hsa);
3037 try (Connection unmanagedConnection = ConnectionFactory.createConnection(conf)) {
3038 undeployRegion(unmanagedConnection, hsa, hri);
3039 }
3040 }
3041
3042 if (regionInfoOnly) {
3043 LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
3044 Path rootDir = FSUtils.getRootDir(conf);
3045 FileSystem fs = rootDir.getFileSystem(conf);
3046 Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
3047 hri.getEncodedName());
3048 Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
3049 fs.delete(hriPath, true);
3050 }
3051
3052 if (hdfs) {
3053 LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
3054 Path rootDir = FSUtils.getRootDir(conf);
3055 FileSystem fs = rootDir.getFileSystem(conf);
3056 Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
3057 hri.getEncodedName());
3058 HBaseFsck.debugLsr(conf, p);
3059 boolean success = fs.delete(p, true);
3060 LOG.info("Deleted " + p + " sucessfully? " + success);
3061 HBaseFsck.debugLsr(conf, p);
3062 }
3063 }
3064
3065 @Test (timeout=180000)
3066 public void testTableWithNoRegions() throws Exception {
3067
3068
3069 TableName table =
3070 TableName.valueOf(name.getMethodName());
3071 try {
3072
3073 HTableDescriptor desc = new HTableDescriptor(table);
3074 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
3075 desc.addFamily(hcd);
3076 createTable(TEST_UTIL, desc, null);
3077 tbl = (HTable) connection.getTable(table, tableExecutorService);
3078
3079
3080 deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW,
3081 HConstants.EMPTY_END_ROW, false, false, true);
3082
3083 HBaseFsck hbck = doFsck(conf, false);
3084 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
3085
3086 doFsck(conf, true);
3087
3088
3089 doFsck(conf, true);
3090
3091
3092 assertNoErrors(doFsck(conf, false));
3093 } finally {
3094 cleanupTable(table);
3095 }
3096
3097 }
3098
3099 @Test (timeout=180000)
3100 public void testHbckAfterRegionMerge() throws Exception {
3101 TableName table = TableName.valueOf("testMergeRegionFilesInHdfs");
3102 Table meta = null;
3103 try {
3104
3105 TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
3106 setupTable(table);
3107 assertEquals(ROWKEYS.length, countRows());
3108
3109
3110 admin.flush(table);
3111 HRegionInfo region1 = tbl.getRegionLocation(Bytes.toBytes("A")).getRegionInfo();
3112 HRegionInfo region2 = tbl.getRegionLocation(Bytes.toBytes("B")).getRegionInfo();
3113
3114 int regionCountBeforeMerge = tbl.getRegionLocations().size();
3115
3116 assertNotEquals(region1, region2);
3117
3118
3119 admin.mergeRegions(region1.getEncodedNameAsBytes(),
3120 region2.getEncodedNameAsBytes(), false);
3121
3122
3123 long timeout = System.currentTimeMillis() + 30 * 1000;
3124 while (true) {
3125 if (tbl.getRegionLocations().size() < regionCountBeforeMerge) {
3126 break;
3127 } else if (System.currentTimeMillis() > timeout) {
3128 fail("Time out waiting on region " + region1.getEncodedName()
3129 + " and " + region2.getEncodedName() + " be merged");
3130 }
3131 Thread.sleep(10);
3132 }
3133
3134 assertEquals(ROWKEYS.length, countRows());
3135
3136 HBaseFsck hbck = doFsck(conf, false);
3137 assertNoErrors(hbck);
3138
3139 } finally {
3140 TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
3141 cleanupTable(table);
3142 IOUtils.closeQuietly(meta);
3143 }
3144 }
3145
3146 @Test (timeout = 180000)
3147 public void testRegionBoundariesCheck() throws Exception {
3148 TableName tableName = TableName.valueOf("testRegionBoundariesCheck");
3149
3150
3151 HTableDescriptor desc = new HTableDescriptor(tableName);
3152 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
3153 desc.addFamily(hcd);
3154 createTable(TEST_UTIL, desc, SPLITS);
3155
3156 Table table = connection.getTable(tableName, tableExecutorService);
3157 List<Put> puts = new ArrayList<>();
3158
3159
3160 puts.add(new Put(Bytes.toBytes("0")).addColumn(FAM, Bytes.toBytes("col"),
3161 Bytes.toBytes("val")));
3162 puts.add(new Put(Bytes.toBytes("999")).addColumn(FAM, Bytes.toBytes("col"),
3163 Bytes.toBytes("val")));
3164
3165
3166 puts.add(new Put(Bytes.toBytes("AA")).addColumn(FAM, Bytes.toBytes("col"),
3167 Bytes.toBytes("val")));
3168 puts.add(new Put(Bytes.toBytes("AZ")).addColumn(FAM, Bytes.toBytes("col"),
3169 Bytes.toBytes("val")));
3170
3171 table.put(puts);
3172
3173
3174 admin.disableTable(tableName);
3175 admin.enableTable(tableName);
3176
3177
3178 HBaseFsck hbck = checkRegionBoundaries(conf);
3179 assertNoErrors(hbck);
3180
3181
3182 admin.disableTable(tableName);
3183
3184 List<HRegionInfo> tableRegions = admin.getTableRegions(tableName);
3185 HRegionInfo firstRegion = tableRegions.get(0);
3186 HRegionInfo secondRegion = tableRegions.get(1);
3187
3188 FileSystem fs = FileSystem.get(conf);
3189 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), tableName);
3190 Path firstRegionFamDir = new Path(new Path(tableDir, firstRegion.getEncodedName()), FAM_STR);
3191 Path hfileInFirstRegion = getHFilePath(fs, firstRegionFamDir);
3192 Path secondRegionFamDir = new Path(new Path(tableDir, secondRegion.getEncodedName()), FAM_STR);
3193 Path hfileInSecondRegion = getHFilePath(fs, secondRegionFamDir);
3194
3195
3196
3197 fs.rename(hfileInFirstRegion, new Path(firstRegionFamDir, "0"));
3198 fs.rename(hfileInSecondRegion, new Path(firstRegionFamDir, "1"));
3199
3200 admin.enableTable(tableName);
3201
3202
3203 hbck = checkRegionBoundaries(conf);
3204 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.BOUNDARIES_ERROR });
3205 }
3206
3207 private static Path getHFilePath(FileSystem fs, Path famDir) throws IOException {
3208 FileStatus[] hfFss = fs.listStatus(famDir);
3209 for (FileStatus hfs : hfFss) {
3210 if (!hfs.isDirectory()) {
3211 return hfs.getPath();
3212 }
3213 }
3214 return null;
3215 }
3216
3217 @org.junit.Rule
3218 public TestName name = new TestName();
3219
3220 @Test (timeout=180000)
3221 public void testReadOnlyProperty() throws Exception {
3222 HBaseFsck hbck = doFsck(conf, false);
3223 Assert.assertEquals("shouldIgnorePreCheckPermission", true,
3224 hbck.shouldIgnorePreCheckPermission());
3225
3226 hbck = doFsck(conf, true);
3227 Assert.assertEquals("shouldIgnorePreCheckPermission", false,
3228 hbck.shouldIgnorePreCheckPermission());
3229
3230 hbck = doFsck(conf, true);
3231 hbck.setIgnorePreCheckPermission(true);
3232 Assert.assertEquals("shouldIgnorePreCheckPermission", true,
3233 hbck.shouldIgnorePreCheckPermission());
3234 }
3235
3236 @Test (timeout=180000)
3237 public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception {
3238 TableName table = TableName.valueOf("testCleanUpDaughtersNotInMetaAfterFailedSplit");
3239 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
3240 try {
3241 HTableDescriptor desc = new HTableDescriptor(table);
3242 desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
3243 createTable(TEST_UTIL, desc, null);
3244 tbl = new HTable(cluster.getConfiguration(), desc.getTableName());
3245 for (int i = 0; i < 5; i++) {
3246 Put p1 = new Put(("r" + i).getBytes());
3247 p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
3248 tbl.put(p1);
3249 }
3250 admin.flush(desc.getTableName());
3251 List<HRegion> regions = cluster.getRegions(desc.getTableName());
3252 int serverWith = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
3253 HRegionServer regionServer = cluster.getRegionServer(serverWith);
3254 cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
3255 SplitTransactionImpl st = new SplitTransactionImpl(regions.get(0), Bytes.toBytes("r3"));
3256 st.prepare();
3257 st.stepsBeforePONR(regionServer, regionServer, false);
3258 AssignmentManager am = cluster.getMaster().getAssignmentManager();
3259 Set<RegionState> regionsInTransition = am.getRegionStates().getRegionsInTransition();
3260 for (RegionState state : regionsInTransition) {
3261 am.regionOffline(state.getRegion());
3262 }
3263 ZKAssign.deleteNodeFailSilent(regionServer.getZooKeeper(), regions.get(0).getRegionInfo());
3264 Map<HRegionInfo, ServerName> regionsMap = new HashMap<HRegionInfo, ServerName>();
3265 regionsMap.put(regions.get(0).getRegionInfo(), regionServer.getServerName());
3266 am.assign(regionsMap);
3267 am.waitForAssignment(regions.get(0).getRegionInfo());
3268 HBaseFsck hbck = doFsck(conf, false);
3269 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
3270 ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
3271
3272 assertEquals(0, hbck.getOverlapGroups(table).size());
3273
3274
3275 assertErrors(
3276 doFsck(
3277 conf, false, true, false, false, false, false, false, false, false, false, false, false,
3278 false, null),
3279 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
3280 ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
3281
3282
3283 assertNoErrors(doFsck(conf, false));
3284 assertEquals(5, countRows());
3285 } finally {
3286 if (tbl != null) {
3287 tbl.close();
3288 tbl = null;
3289 }
3290 cleanupTable(table);
3291 }
3292 }
3293
3294
3295
3296
3297
3298
3299 @Test(timeout = 180000)
3300 public void testNoDataLossAfterRegionOverlapFix() throws Exception {
3301 int startRow = 0;
3302 int endRow = 5;
3303 TableName table = TableName.valueOf("testNoDataLossAfterRegionOverlapFix");
3304 try {
3305 TEST_UTIL.createTable(table, FAM);
3306 tbl = new HTable(TEST_UTIL.getConfiguration(), table);
3307
3308 TEST_UTIL.loadNumericRows(tbl, FAM, startRow, endRow);
3309 admin.flush(table);
3310
3311 HRegionInfo hriOverlap =
3312 createRegion(tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW, Bytes.toBytes("3"));
3313 TEST_UTIL.assignRegion(hriOverlap);
3314
3315 HBaseFsck hbck = doFsck(conf, false);
3316 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.DUPE_STARTKEYS });
3317 assertEquals(2, hbck.getOverlapGroups(table).size());
3318
3319 doFsck(conf, true);
3320
3321 HBaseFsck hbck2 = doFsck(conf, false);
3322 assertNoErrors(hbck2);
3323 assertEquals(0, hbck2.getOverlapGroups(table).size());
3324
3325 for (int i = startRow; i < endRow; i++) {
3326 assertEquals(endRow - i,
3327 countRows(Bytes.toBytes(String.valueOf(i)), HConstants.EMPTY_BYTE_ARRAY));
3328 }
3329 } finally {
3330 if (tbl != null) {
3331 tbl.close();
3332 tbl = null;
3333 }
3334 cleanupTable(table);
3335 }
3336 }
3337
3338 public static class MasterSyncObserver extends BaseMasterObserver {
3339 volatile CountDownLatch tableCreationLatch = null;
3340 volatile CountDownLatch tableDeletionLatch = null;
3341
3342 @Override
3343 public void postCreateTableHandler(final ObserverContext<MasterCoprocessorEnvironment> ctx,
3344 HTableDescriptor desc, HRegionInfo[] regions) throws IOException {
3345
3346 if (tableCreationLatch != null) {
3347 tableCreationLatch.countDown();
3348 }
3349 }
3350
3351 @Override
3352 public void postDeleteTableHandler(final ObserverContext<MasterCoprocessorEnvironment> ctx,
3353 TableName tableName)
3354 throws IOException {
3355
3356 if (tableDeletionLatch != null) {
3357 tableDeletionLatch.countDown();
3358 }
3359 }
3360 }
3361
3362 public static void createTable(HBaseTestingUtility testUtil, HTableDescriptor htd,
3363 byte [][] splitKeys) throws Exception {
3364
3365
3366 MasterSyncObserver observer = (MasterSyncObserver)testUtil.getHBaseCluster().getMaster()
3367 .getMasterCoprocessorHost().findCoprocessor(MasterSyncObserver.class.getName());
3368 observer.tableCreationLatch = new CountDownLatch(1);
3369 if (splitKeys != null) {
3370 admin.createTable(htd, splitKeys);
3371 } else {
3372 admin.createTable(htd);
3373 }
3374 observer.tableCreationLatch.await();
3375 observer.tableCreationLatch = null;
3376 testUtil.waitUntilAllRegionsAssigned(htd.getTableName());
3377 }
3378
3379 public static void deleteTable(HBaseTestingUtility testUtil, TableName tableName)
3380 throws Exception {
3381
3382
3383 MasterSyncObserver observer = (MasterSyncObserver)testUtil.getHBaseCluster().getMaster()
3384 .getMasterCoprocessorHost().findCoprocessor(MasterSyncObserver.class.getName());
3385 observer.tableDeletionLatch = new CountDownLatch(1);
3386 try {
3387 admin.disableTable(tableName);
3388 } catch (Exception e) {
3389 LOG.debug("Table: " + tableName + " already disabled, so just deleting it.");
3390 }
3391 admin.deleteTable(tableName);
3392 observer.tableDeletionLatch.await();
3393 observer.tableDeletionLatch = null;
3394 }
3395 }