1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import static org.apache.hadoop.hbase.SplitLogCounters.tot_mgr_wait_for_zk_delete;
22 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_final_transition_failed;
23 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_preempt_task;
24 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_acquired;
25 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_done;
26 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_err;
27 import static org.apache.hadoop.hbase.SplitLogCounters.tot_wkr_task_resigned;
28 import static org.junit.Assert.assertEquals;
29 import static org.junit.Assert.assertFalse;
30 import static org.junit.Assert.assertNotNull;
31 import static org.junit.Assert.assertTrue;
32 import static org.junit.Assert.fail;
33
34 import java.io.IOException;
35 import java.util.ArrayList;
36 import java.util.Arrays;
37 import java.util.HashSet;
38 import java.util.Iterator;
39 import java.util.LinkedList;
40 import java.util.List;
41 import java.util.NavigableSet;
42 import java.util.Set;
43 import java.util.concurrent.ExecutorService;
44 import java.util.concurrent.Executors;
45 import java.util.concurrent.Future;
46 import java.util.concurrent.TimeUnit;
47 import java.util.concurrent.TimeoutException;
48 import java.util.concurrent.atomic.AtomicLong;
49
50 import org.apache.commons.logging.Log;
51 import org.apache.commons.logging.LogFactory;
52 import org.apache.hadoop.conf.Configuration;
53 import org.apache.hadoop.fs.FSDataOutputStream;
54 import org.apache.hadoop.fs.FileStatus;
55 import org.apache.hadoop.fs.FileSystem;
56 import org.apache.hadoop.fs.Path;
57 import org.apache.hadoop.fs.PathFilter;
58 import org.apache.hadoop.hbase.HBaseConfiguration;
59 import org.apache.hadoop.hbase.HBaseTestingUtility;
60 import org.apache.hadoop.hbase.HColumnDescriptor;
61 import org.apache.hadoop.hbase.HConstants;
62 import org.apache.hadoop.hbase.HRegionInfo;
63 import org.apache.hadoop.hbase.HTableDescriptor;
64 import org.apache.hadoop.hbase.KeyValue;
65 import org.apache.hadoop.hbase.MiniHBaseCluster;
66 import org.apache.hadoop.hbase.NamespaceDescriptor;
67 import org.apache.hadoop.hbase.ServerName;
68 import org.apache.hadoop.hbase.SplitLogCounters;
69 import org.apache.hadoop.hbase.TableName;
70 import org.apache.hadoop.hbase.Waiter;
71 import org.apache.hadoop.hbase.client.ClusterConnection;
72 import org.apache.hadoop.hbase.client.ConnectionUtils;
73 import org.apache.hadoop.hbase.client.Delete;
74 import org.apache.hadoop.hbase.client.Get;
75 import org.apache.hadoop.hbase.client.Increment;
76 import org.apache.hadoop.hbase.client.NonceGenerator;
77 import org.apache.hadoop.hbase.client.PerClientRandomNonceGenerator;
78 import org.apache.hadoop.hbase.client.Put;
79 import org.apache.hadoop.hbase.client.RegionLocator;
80 import org.apache.hadoop.hbase.client.Result;
81 import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
82 import org.apache.hadoop.hbase.client.Table;
83 import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
84 import org.apache.hadoop.hbase.coordination.ZKSplitLogManagerCoordination;
85 import org.apache.hadoop.hbase.exceptions.OperationConflictException;
86 import org.apache.hadoop.hbase.exceptions.RegionInRecoveryException;
87 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
88 import org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch;
89 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
90 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
91 import org.apache.hadoop.hbase.regionserver.HRegionServer;
92 import org.apache.hadoop.hbase.regionserver.Region;
93 import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
94 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
95 import org.apache.hadoop.hbase.testclassification.LargeTests;
96 import org.apache.hadoop.hbase.util.Bytes;
97 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
98 import org.apache.hadoop.hbase.util.FSUtils;
99 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
100 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
101 import org.apache.hadoop.hbase.util.Threads;
102 import org.apache.hadoop.hbase.wal.DefaultWALProvider;
103 import org.apache.hadoop.hbase.wal.WAL;
104 import org.apache.hadoop.hbase.wal.WALFactory;
105 import org.apache.hadoop.hbase.wal.WALSplitter;
106 import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
107 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
108 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
109 import org.apache.hadoop.hdfs.MiniDFSCluster;
110 import org.junit.After;
111 import org.junit.AfterClass;
112 import org.junit.Assert;
113 import org.junit.Before;
114 import org.junit.BeforeClass;
115 import org.junit.Ignore;
116 import org.junit.Test;
117 import org.junit.experimental.categories.Category;
118
119 @Category({LargeTests.class})
120 @SuppressWarnings("deprecation")
121 public class TestDistributedLogSplitting {
122 private static final Log LOG = LogFactory.getLog(TestSplitLogManager.class);
123 static {
124
125
126
127
128
129
130 System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
131
132 }
133
134
135 static final int NUM_MASTERS = 2;
136 static final int NUM_RS = 6;
137
138 MiniHBaseCluster cluster;
139 HMaster master;
140 Configuration conf;
141 static Configuration originalConf;
142 static HBaseTestingUtility TEST_UTIL;
143 static MiniDFSCluster dfsCluster;
144 static MiniZooKeeperCluster zkCluster;
145
146 @BeforeClass
147 public static void setup() throws Exception {
148 TEST_UTIL = new HBaseTestingUtility(HBaseConfiguration.create());
149 dfsCluster = TEST_UTIL.startMiniDFSCluster(1);
150 zkCluster = TEST_UTIL.startMiniZKCluster();
151 originalConf = TEST_UTIL.getConfiguration();
152 }
153
154 @AfterClass
155 public static void tearDown() throws IOException {
156 TEST_UTIL.shutdownMiniZKCluster();
157 TEST_UTIL.shutdownMiniDFSCluster();
158 TEST_UTIL.shutdownMiniHBaseCluster();
159 }
160
161 private void startCluster(int num_rs) throws Exception {
162 SplitLogCounters.resetCounters();
163 LOG.info("Starting cluster");
164 conf.getLong("hbase.splitlog.max.resubmit", 0);
165
166 conf.setInt("zookeeper.recovery.retry", 0);
167 conf.setInt(HConstants.REGIONSERVER_INFO_PORT, -1);
168 conf.setFloat(HConstants.LOAD_BALANCER_SLOP_KEY, (float) 100.0);
169 conf.setInt("hbase.regionserver.wal.max.splitters", 3);
170 conf.setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
171 TEST_UTIL.shutdownMiniHBaseCluster();
172 TEST_UTIL = new HBaseTestingUtility(conf);
173 TEST_UTIL.setDFSCluster(dfsCluster);
174 TEST_UTIL.setZkCluster(zkCluster);
175 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, num_rs);
176 cluster = TEST_UTIL.getHBaseCluster();
177 LOG.info("Waiting for active/ready master");
178 cluster.waitForActiveAndReadyMaster();
179 master = cluster.getMaster();
180 while (cluster.getLiveRegionServerThreads().size() < num_rs) {
181 Threads.sleep(10);
182 }
183 }
184
185 @Before
186 public void before() throws Exception {
187
188 conf = HBaseConfiguration.create(originalConf);
189 }
190
191 @After
192 public void after() throws Exception {
193 try {
194 if (TEST_UTIL.getHBaseCluster() != null) {
195 for (MasterThread mt : TEST_UTIL.getHBaseCluster().getLiveMasterThreads()) {
196 mt.getMaster().abort("closing...", null);
197 }
198 }
199 TEST_UTIL.shutdownMiniHBaseCluster();
200 } finally {
201 TEST_UTIL.getTestFileSystem().delete(FSUtils.getRootDir(TEST_UTIL.getConfiguration()), true);
202 ZKUtil.deleteNodeRecursively(TEST_UTIL.getZooKeeperWatcher(), "/hbase");
203 }
204 }
205
206 @Ignore("DLR is broken by HBASE-12751") @Test (timeout=300000)
207 public void testRecoveredEdits() throws Exception {
208 LOG.info("testRecoveredEdits");
209 conf.setLong("hbase.regionserver.hlog.blocksize", 30 * 1024);
210 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
211 startCluster(NUM_RS);
212
213 final int NUM_LOG_LINES = 1000;
214 final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
215
216
217 master.balanceSwitch(false);
218 FileSystem fs = master.getMasterFileSystem().getFileSystem();
219
220 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
221
222 Path rootdir = FSUtils.getRootDir(conf);
223
224 Table t = installTable(new ZooKeeperWatcher(conf, "table-creation", null),
225 "table", "family", 40);
226 try {
227 TableName table = t.getName();
228 List<HRegionInfo> regions = null;
229 HRegionServer hrs = null;
230 for (int i = 0; i < NUM_RS; i++) {
231 boolean foundRs = false;
232 hrs = rsts.get(i).getRegionServer();
233 regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
234 for (HRegionInfo region : regions) {
235 if (region.getTable().getNameAsString().equalsIgnoreCase("table")) {
236 foundRs = true;
237 break;
238 }
239 }
240 if (foundRs) break;
241 }
242 final Path logDir = new Path(rootdir, DefaultWALProvider.getWALDirectoryName(hrs
243 .getServerName().toString()));
244
245 LOG.info("#regions = " + regions.size());
246 Iterator<HRegionInfo> it = regions.iterator();
247 while (it.hasNext()) {
248 HRegionInfo region = it.next();
249 if (region.getTable().getNamespaceAsString()
250 .equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) {
251 it.remove();
252 }
253 }
254
255 makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
256
257 slm.splitLogDistributed(logDir);
258
259 int count = 0;
260 for (HRegionInfo hri : regions) {
261
262 Path tdir = FSUtils.getTableDir(rootdir, table);
263 Path editsdir =
264 WALSplitter.getRegionDirRecoveredEditsDir(
265 FSUtils.getRegionDirFromTableDir(tdir, hri));
266 LOG.debug("checking edits dir " + editsdir);
267 FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {
268 @Override
269 public boolean accept(Path p) {
270 if (WALSplitter.isSequenceIdFile(p)) {
271 return false;
272 }
273 return true;
274 }
275 });
276 assertTrue(
277 "edits dir should have more than a single file in it. instead has " + files.length,
278 files.length > 1);
279 for (int i = 0; i < files.length; i++) {
280 int c = countWAL(files[i].getPath(), fs, conf);
281 count += c;
282 }
283 LOG.info(count + " edits in " + files.length + " recovered edits files.");
284 }
285
286
287 assertFalse(fs.exists(logDir));
288
289 assertEquals(NUM_LOG_LINES, count);
290 } finally {
291 if (t != null) t.close();
292 }
293 }
294
295 @Ignore("DLR is broken by HBASE-12751") @Test(timeout = 300000)
296 public void testLogReplayWithNonMetaRSDown() throws Exception {
297 LOG.info("testLogReplayWithNonMetaRSDown");
298 conf.setLong("hbase.regionserver.hlog.blocksize", 30 * 1024);
299 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
300 startCluster(NUM_RS);
301 final int NUM_REGIONS_TO_CREATE = 40;
302 final int NUM_LOG_LINES = 1000;
303
304
305 master.balanceSwitch(false);
306
307 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
308 Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
309 try {
310 HRegionServer hrs = findRSToKill(false, "table");
311 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
312 makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
313
314
315 this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
316 } finally {
317 if (ht != null) ht.close();
318 if (zkw != null) zkw.close();
319 }
320 }
321
322 private static class NonceGeneratorWithDups extends PerClientRandomNonceGenerator {
323 private boolean isDups = false;
324 private LinkedList<Long> nonces = new LinkedList<Long>();
325
326 public void startDups() {
327 isDups = true;
328 }
329
330 @Override
331 public long newNonce() {
332 long nonce = isDups ? nonces.removeFirst() : super.newNonce();
333 if (!isDups) {
334 nonces.add(nonce);
335 }
336 return nonce;
337 }
338 }
339
340 @Ignore("DLR is broken by HBASE-12751") @Test(timeout = 300000)
341 public void testNonceRecovery() throws Exception {
342 LOG.info("testNonceRecovery");
343 final String TABLE_NAME = "table";
344 final String FAMILY_NAME = "family";
345 final int NUM_REGIONS_TO_CREATE = 40;
346
347 conf.setLong("hbase.regionserver.hlog.blocksize", 100*1024);
348 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
349 startCluster(NUM_RS);
350 master.balanceSwitch(false);
351
352 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
353 Table ht = installTable(zkw, TABLE_NAME, FAMILY_NAME, NUM_REGIONS_TO_CREATE);
354 NonceGeneratorWithDups ng = new NonceGeneratorWithDups();
355 NonceGenerator oldNg =
356 ConnectionUtils.injectNonceGeneratorForTesting(
357 (ClusterConnection)TEST_UTIL.getConnection(), ng);
358
359 try {
360 List<Increment> reqs = new ArrayList<Increment>();
361 for (RegionServerThread rst : cluster.getLiveRegionServerThreads()) {
362 HRegionServer hrs = rst.getRegionServer();
363 List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
364 for (HRegionInfo hri : hris) {
365 if (TABLE_NAME.equalsIgnoreCase(hri.getTable().getNameAsString())) {
366 byte[] key = hri.getStartKey();
367 if (key == null || key.length == 0) {
368 key = Bytes.copy(hri.getEndKey());
369 --(key[key.length - 1]);
370 }
371 Increment incr = new Increment(key);
372 incr.addColumn(Bytes.toBytes(FAMILY_NAME), Bytes.toBytes("q"), 1);
373 ht.increment(incr);
374 reqs.add(incr);
375 }
376 }
377 }
378
379 HRegionServer hrs = findRSToKill(false, "table");
380 abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
381 ng.startDups();
382 for (Increment incr : reqs) {
383 try {
384 ht.increment(incr);
385 fail("should have thrown");
386 } catch (OperationConflictException ope) {
387 LOG.debug("Caught as expected: " + ope.getMessage());
388 }
389 }
390 } finally {
391 ConnectionUtils.injectNonceGeneratorForTesting((ClusterConnection)
392 TEST_UTIL.getConnection(), oldNg);
393 if (ht != null) ht.close();
394 if (zkw != null) zkw.close();
395 }
396 }
397
398 @Ignore("DLR is broken by HBASE-12751") @Test(timeout = 300000)
399 public void testLogReplayWithMetaRSDown() throws Exception {
400 LOG.info("testRecoveredEditsReplayWithMetaRSDown");
401 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
402 startCluster(NUM_RS);
403 final int NUM_REGIONS_TO_CREATE = 40;
404 final int NUM_LOG_LINES = 1000;
405
406
407 master.balanceSwitch(false);
408
409 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
410 Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
411 try {
412 HRegionServer hrs = findRSToKill(true, "table");
413 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
414 makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
415
416 this.abortRSAndVerifyRecovery(hrs, ht, zkw, NUM_REGIONS_TO_CREATE, NUM_LOG_LINES);
417 } finally {
418 if (ht != null) ht.close();
419 if (zkw != null) zkw.close();
420 }
421 }
422
423 private void abortRSAndVerifyRecovery(HRegionServer hrs, Table ht, final ZooKeeperWatcher zkw,
424 final int numRegions, final int numofLines) throws Exception {
425
426 abortRSAndWaitForRecovery(hrs, zkw, numRegions);
427 assertEquals(numofLines, TEST_UTIL.countRows(ht));
428 }
429
430 private void abortRSAndWaitForRecovery(HRegionServer hrs, final ZooKeeperWatcher zkw,
431 final int numRegions) throws Exception {
432 final MiniHBaseCluster tmpCluster = this.cluster;
433
434
435 LOG.info("Aborting region server: " + hrs.getServerName());
436 hrs.abort("testing");
437
438
439 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
440 @Override
441 public boolean evaluate() throws Exception {
442 return (tmpCluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
443 }
444 });
445
446
447 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
448 @Override
449 public boolean evaluate() throws Exception {
450 return (HBaseTestingUtility.getAllOnlineRegions(tmpCluster).size()
451 >= (numRegions + 1));
452 }
453 });
454
455
456 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
457 @Override
458 public boolean evaluate() throws Exception {
459 List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
460 zkw.recoveringRegionsZNode, false);
461 return (recoveringRegions != null && recoveringRegions.size() == 0);
462 }
463 });
464 }
465
466 @Ignore("DLR is broken by HBASE-12751") @Test(timeout = 300000)
467 public void testMasterStartsUpWithLogSplittingWork() throws Exception {
468 LOG.info("testMasterStartsUpWithLogSplittingWork");
469 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false);
470 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
471 startCluster(NUM_RS);
472
473 final int NUM_REGIONS_TO_CREATE = 40;
474 final int NUM_LOG_LINES = 1000;
475
476
477 master.balanceSwitch(false);
478
479 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
480 Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
481 try {
482 HRegionServer hrs = findRSToKill(false, "table");
483 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
484 makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
485
486
487 abortMaster(cluster);
488
489
490 LOG.info("Aborting region server: " + hrs.getServerName());
491 hrs.abort("testing");
492
493
494 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
495 @Override
496 public boolean evaluate() throws Exception {
497 return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
498 }
499 });
500
501 Thread.sleep(2000);
502 LOG.info("Current Open Regions:"
503 + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
504
505
506 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
507 @Override
508 public boolean evaluate() throws Exception {
509 return (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
510 >= (NUM_REGIONS_TO_CREATE + 1));
511 }
512 });
513
514 LOG.info("Current Open Regions After Master Node Starts Up:"
515 + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
516
517 assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
518 } finally {
519 if (ht != null) ht.close();
520 if (zkw != null) zkw.close();
521 }
522 }
523
524 @Ignore("DLR is broken by HBASE-12751") @Test(timeout = 300000)
525 public void testMasterStartsUpWithLogReplayWork() throws Exception {
526 LOG.info("testMasterStartsUpWithLogReplayWork");
527 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
528 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, NUM_RS - 1);
529 startCluster(NUM_RS);
530
531 final int NUM_REGIONS_TO_CREATE = 40;
532 final int NUM_LOG_LINES = 1000;
533
534
535 master.balanceSwitch(false);
536
537 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
538 Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
539 try {
540 HRegionServer hrs = findRSToKill(false, "table");
541 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
542 makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
543
544
545 abortMaster(cluster);
546
547
548 LOG.info("Aborting region server: " + hrs.getServerName());
549 hrs.abort("testing");
550
551
552 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
553 @Override
554 public boolean evaluate() throws Exception {
555 return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
556 }
557 });
558
559 Thread.sleep(2000);
560 LOG.info("Current Open Regions:" + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
561
562
563 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
564 @Override
565 public boolean evaluate() throws Exception {
566 List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
567 zkw.recoveringRegionsZNode, false);
568 boolean done = recoveringRegions != null && recoveringRegions.size() == 0;
569 if (!done) {
570 LOG.info("Recovering regions: " + recoveringRegions);
571 }
572 return done;
573 }
574 });
575
576 LOG.info("Current Open Regions After Master Node Starts Up:"
577 + HBaseTestingUtility.getAllOnlineRegions(cluster).size());
578
579 assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
580 } finally {
581 if (ht != null) ht.close();
582 if (zkw != null) zkw.close();
583 }
584 }
585
586
587 @Ignore("DLR is broken by HBASE-12751") @Test(timeout = 300000)
588 public void testLogReplayTwoSequentialRSDown() throws Exception {
589 LOG.info("testRecoveredEditsReplayTwoSequentialRSDown");
590 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
591 startCluster(NUM_RS);
592 final int NUM_REGIONS_TO_CREATE = 40;
593 final int NUM_LOG_LINES = 1000;
594
595
596 master.balanceSwitch(false);
597
598 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
599 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
600 Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
601 try {
602 List<HRegionInfo> regions = null;
603 HRegionServer hrs1 = findRSToKill(false, "table");
604 regions = ProtobufUtil.getOnlineRegions(hrs1.getRSRpcServices());
605
606 makeWAL(hrs1, regions, "table", "family", NUM_LOG_LINES, 100);
607
608
609 LOG.info("Aborting region server: " + hrs1.getServerName());
610 hrs1.abort("testing");
611
612
613 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
614 @Override
615 public boolean evaluate() throws Exception {
616 return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
617 }
618 });
619
620
621 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
622 @Override
623 public boolean evaluate() throws Exception {
624 return (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
625 >= (NUM_REGIONS_TO_CREATE + 1));
626 }
627 });
628
629
630 Thread.sleep(300);
631
632 rsts = cluster.getLiveRegionServerThreads();
633 HRegionServer hrs2 = rsts.get(0).getRegionServer();
634 LOG.info("Aborting one more region server: " + hrs2.getServerName());
635 hrs2.abort("testing");
636
637
638 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
639 @Override
640 public boolean evaluate() throws Exception {
641 return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 2));
642 }
643 });
644
645
646 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
647 @Override
648 public boolean evaluate() throws Exception {
649 return (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
650 >= (NUM_REGIONS_TO_CREATE + 1));
651 }
652 });
653
654
655 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
656 @Override
657 public boolean evaluate() throws Exception {
658 List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
659 zkw.recoveringRegionsZNode, false);
660 return (recoveringRegions != null && recoveringRegions.size() == 0);
661 }
662 });
663
664 assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
665 } finally {
666 if (ht != null) ht.close();
667 if (zkw != null) zkw.close();
668 }
669 }
670
671 @Ignore("DLR is broken by HBASE-12751") @Test(timeout = 300000)
672 public void testMarkRegionsRecoveringInZK() throws Exception {
673 LOG.info("testMarkRegionsRecoveringInZK");
674 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
675 startCluster(NUM_RS);
676 master.balanceSwitch(false);
677 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
678 final ZooKeeperWatcher zkw = master.getZooKeeper();
679 Table ht = installTable(zkw, "table", "family", 40);
680 try {
681 final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
682
683 Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
684 HRegionInfo region = null;
685 HRegionServer hrs = null;
686 ServerName firstFailedServer = null;
687 ServerName secondFailedServer = null;
688 for (int i = 0; i < NUM_RS; i++) {
689 hrs = rsts.get(i).getRegionServer();
690 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
691 if (regions.isEmpty()) continue;
692 region = regions.get(0);
693 regionSet.add(region);
694 firstFailedServer = hrs.getServerName();
695 secondFailedServer = rsts.get((i + 1) % NUM_RS).getRegionServer().getServerName();
696 break;
697 }
698
699 slm.markRegionsRecovering(firstFailedServer, regionSet);
700 slm.markRegionsRecovering(secondFailedServer, regionSet);
701
702 assertNotNull(zkw.recoveringRegionsZNode);
703 List<String> recoveringRegions = ZKUtil.listChildrenNoWatch(zkw,
704 ZKUtil.joinZNode(zkw.recoveringRegionsZNode, region.getEncodedName()));
705
706 assertEquals(recoveringRegions.size(), 2);
707
708
709 final HRegionServer tmphrs = hrs;
710 TEST_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
711 @Override
712 public boolean evaluate() throws Exception {
713 return (tmphrs.getRecoveringRegions().size() == 0);
714 }
715 });
716 } finally {
717 if (ht != null) ht.close();
718 if (zkw != null) zkw.close();
719 }
720 }
721
722 @Ignore("DLR is broken by HBASE-12751") @Test(timeout = 300000)
723 public void testReplayCmd() throws Exception {
724 LOG.info("testReplayCmd");
725 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
726 startCluster(NUM_RS);
727 final int NUM_REGIONS_TO_CREATE = 40;
728
729
730 master.balanceSwitch(false);
731
732 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
733 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
734 Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
735 try {
736 List<HRegionInfo> regions = null;
737 HRegionServer hrs = null;
738 for (int i = 0; i < NUM_RS; i++) {
739 boolean isCarryingMeta = false;
740 hrs = rsts.get(i).getRegionServer();
741 regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
742 for (HRegionInfo region : regions) {
743 if (region.isMetaRegion()) {
744 isCarryingMeta = true;
745 break;
746 }
747 }
748 if (isCarryingMeta) {
749 continue;
750 }
751 if (regions.size() > 0) break;
752 }
753
754 this.prepareData(ht, Bytes.toBytes("family"), Bytes.toBytes("c1"));
755 String originalCheckSum = TEST_UTIL.checksumRows(ht);
756
757
758 abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
759
760 assertEquals("Data should remain after reopening of regions", originalCheckSum,
761 TEST_UTIL.checksumRows(ht));
762 } finally {
763 if (ht != null) ht.close();
764 if (zkw != null) zkw.close();
765 }
766 }
767
768 @Ignore("DLR is broken by HBASE-12751") @Test(timeout = 300000)
769 public void testLogReplayForDisablingTable() throws Exception {
770 LOG.info("testLogReplayForDisablingTable");
771 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
772 startCluster(NUM_RS);
773 final int NUM_REGIONS_TO_CREATE = 40;
774 final int NUM_LOG_LINES = 1000;
775
776 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
777 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
778 Table disablingHT = installTable(zkw, "disableTable", "family", NUM_REGIONS_TO_CREATE);
779 Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE, NUM_REGIONS_TO_CREATE);
780 try {
781
782
783 master.balanceSwitch(false);
784
785 List<HRegionInfo> regions = null;
786 HRegionServer hrs = null;
787 boolean hasRegionsForBothTables = false;
788 String tableName = null;
789 for (int i = 0; i < NUM_RS; i++) {
790 tableName = null;
791 hasRegionsForBothTables = false;
792 boolean isCarryingSystem = false;
793 hrs = rsts.get(i).getRegionServer();
794 regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
795 for (HRegionInfo region : regions) {
796 if (region.getTable().isSystemTable()) {
797 isCarryingSystem = true;
798 break;
799 }
800 if (tableName != null &&
801 !tableName.equalsIgnoreCase(region.getTable().getNameAsString())) {
802
803 hasRegionsForBothTables = true;
804 break;
805 } else if (tableName == null) {
806 tableName = region.getTable().getNameAsString();
807 }
808 }
809 if (isCarryingSystem) {
810 continue;
811 }
812 if (hasRegionsForBothTables) {
813 break;
814 }
815 }
816
817
818 Assert.assertTrue(hasRegionsForBothTables);
819
820 LOG.info("#regions = " + regions.size());
821 Iterator<HRegionInfo> it = regions.iterator();
822 while (it.hasNext()) {
823 HRegionInfo region = it.next();
824 if (region.isMetaTable()) {
825 it.remove();
826 }
827 }
828 makeWAL(hrs, regions, "disableTable", "family", NUM_LOG_LINES, 100, false);
829 makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
830
831 LOG.info("Disabling table\n");
832 TEST_UTIL.getHBaseAdmin().disableTable(TableName.valueOf("disableTable"));
833 TEST_UTIL.waitTableDisabled(TableName.valueOf("disableTable").getName());
834
835
836 LOG.info("Aborting region server: " + hrs.getServerName());
837 hrs.abort("testing");
838
839
840 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
841 @Override
842 public boolean evaluate() throws Exception {
843 return (cluster.getLiveRegionServerThreads().size() <= (NUM_RS - 1));
844 }
845 });
846
847
848 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
849 @Override
850 public boolean evaluate() throws Exception {
851 return (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
852 >= (NUM_REGIONS_TO_CREATE + 1));
853 }
854 });
855
856
857 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
858 @Override
859 public boolean evaluate() throws Exception {
860 List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
861 zkw.recoveringRegionsZNode, false);
862 ServerManager serverManager = master.getServerManager();
863 return (!serverManager.areDeadServersInProgress() &&
864 recoveringRegions != null && recoveringRegions.size() == 0);
865 }
866 });
867
868 int count = 0;
869 FileSystem fs = master.getMasterFileSystem().getFileSystem();
870 Path rootdir = FSUtils.getRootDir(conf);
871 Path tdir = FSUtils.getTableDir(rootdir, TableName.valueOf("disableTable"));
872 for (HRegionInfo hri : regions) {
873 Path editsdir =
874 WALSplitter.getRegionDirRecoveredEditsDir(
875 FSUtils.getRegionDirFromTableDir(tdir, hri));
876 LOG.debug("checking edits dir " + editsdir);
877 if(!fs.exists(editsdir)) continue;
878 FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {
879 @Override
880 public boolean accept(Path p) {
881 if (WALSplitter.isSequenceIdFile(p)) {
882 return false;
883 }
884 return true;
885 }
886 });
887 if(files != null) {
888 for(FileStatus file : files) {
889 int c = countWAL(file.getPath(), fs, conf);
890 count += c;
891 LOG.info(c + " edits in " + file.getPath());
892 }
893 }
894 }
895
896 LOG.info("Verify edits in recovered.edits files");
897 assertEquals(NUM_LOG_LINES, count);
898 LOG.info("Verify replayed edits");
899 assertEquals(NUM_LOG_LINES, TEST_UTIL.countRows(ht));
900
901
902 for (HRegionInfo hri : regions) {
903 Path editsdir =
904 WALSplitter.getRegionDirRecoveredEditsDir(
905 FSUtils.getRegionDirFromTableDir(tdir, hri));
906 fs.delete(editsdir, true);
907 }
908 disablingHT.close();
909 } finally {
910 if (ht != null) ht.close();
911 if (zkw != null) zkw.close();
912 }
913 }
914
915 @Ignore("DLR is broken by HBASE-12751") @Test(timeout = 300000)
916 public void testDisallowWritesInRecovering() throws Exception {
917 LOG.info("testDisallowWritesInRecovering");
918 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
919 conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 3);
920 conf.setBoolean(HConstants.DISALLOW_WRITES_IN_RECOVERING, true);
921 startCluster(NUM_RS);
922 final int NUM_REGIONS_TO_CREATE = 40;
923
924
925 master.balanceSwitch(false);
926
927 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
928 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
929 Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
930 try {
931 final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
932
933 Set<HRegionInfo> regionSet = new HashSet<HRegionInfo>();
934 HRegionInfo region = null;
935 HRegionServer hrs = null;
936 HRegionServer dstRS = null;
937 for (int i = 0; i < NUM_RS; i++) {
938 hrs = rsts.get(i).getRegionServer();
939 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
940 if (regions.isEmpty()) continue;
941 region = regions.get(0);
942 regionSet.add(region);
943 dstRS = rsts.get((i+1) % NUM_RS).getRegionServer();
944 break;
945 }
946
947 assertNotNull(region);
948 assertNotNull(dstRS);
949
950 slm.markRegionsRecovering(hrs.getServerName(), regionSet);
951
952 final HRegionInfo hri = region;
953 final HRegionServer tmpRS = dstRS;
954 TEST_UTIL.getHBaseAdmin().move(region.getEncodedNameAsBytes(),
955 Bytes.toBytes(dstRS.getServerName().getServerName()));
956
957 final RegionStates regionStates =
958 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
959 TEST_UTIL.waitFor(45000, 200, new Waiter.Predicate<Exception>() {
960 @Override
961 public boolean evaluate() throws Exception {
962 ServerName sn = regionStates.getRegionServerOfRegion(hri);
963 return (sn != null && sn.equals(tmpRS.getServerName()));
964 }
965 });
966
967 try {
968 byte[] key = region.getStartKey();
969 if (key == null || key.length == 0) {
970 key = new byte[] { 0, 0, 0, 0, 1 };
971 }
972 Put put = new Put(key);
973 put.add(Bytes.toBytes("family"), Bytes.toBytes("c1"), new byte[]{'b'});
974 ht.put(put);
975 } catch (IOException ioe) {
976 Assert.assertTrue(ioe instanceof RetriesExhaustedWithDetailsException);
977 RetriesExhaustedWithDetailsException re = (RetriesExhaustedWithDetailsException) ioe;
978 boolean foundRegionInRecoveryException = false;
979 for (Throwable t : re.getCauses()) {
980 if (t instanceof RegionInRecoveryException) {
981 foundRegionInRecoveryException = true;
982 break;
983 }
984 }
985 Assert.assertTrue(
986 "No RegionInRecoveryException. Following exceptions returned=" + re.getCauses(),
987 foundRegionInRecoveryException);
988 }
989 } finally {
990 if (ht != null) ht.close();
991 if (ht != null) zkw.close();
992 }
993 }
994
995
996
997
998
999
1000
1001
1002
1003
1004 @Ignore ("Disabled because flakey") @Test (timeout=300000)
1005 public void testWorkerAbort() throws Exception {
1006 LOG.info("testWorkerAbort");
1007 startCluster(3);
1008 final int NUM_LOG_LINES = 10000;
1009 final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
1010 FileSystem fs = master.getMasterFileSystem().getFileSystem();
1011
1012 final List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1013 HRegionServer hrs = findRSToKill(false, "table");
1014 Path rootdir = FSUtils.getRootDir(conf);
1015 final Path logDir = new Path(rootdir,
1016 DefaultWALProvider.getWALDirectoryName(hrs.getServerName().toString()));
1017
1018 Table t = installTable(new ZooKeeperWatcher(conf, "table-creation", null),
1019 "table", "family", 40);
1020 try {
1021 makeWAL(hrs, ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()),
1022 "table", "family", NUM_LOG_LINES, 100);
1023
1024 new Thread() {
1025 @Override
1026 public void run() {
1027 waitForCounter(tot_wkr_task_acquired, 0, 1, 1000);
1028 for (RegionServerThread rst : rsts) {
1029 rst.getRegionServer().abort("testing");
1030 break;
1031 }
1032 }
1033 }.start();
1034
1035 FileStatus[] logfiles = fs.listStatus(logDir);
1036 TaskBatch batch = new TaskBatch();
1037 slm.enqueueSplitTask(logfiles[0].getPath().toString(), batch);
1038
1039 long curt = System.currentTimeMillis();
1040 long waitTime = 80000;
1041 long endt = curt + waitTime;
1042 while (curt < endt) {
1043 if ((tot_wkr_task_resigned.get() + tot_wkr_task_err.get() +
1044 tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() +
1045 tot_wkr_preempt_task.get()) == 0) {
1046 Thread.yield();
1047 curt = System.currentTimeMillis();
1048 } else {
1049 assertTrue(1 <= (tot_wkr_task_resigned.get() + tot_wkr_task_err.get() +
1050 tot_wkr_final_transition_failed.get() + tot_wkr_task_done.get() +
1051 tot_wkr_preempt_task.get()));
1052 return;
1053 }
1054 }
1055 fail("none of the following counters went up in " + waitTime +
1056 " milliseconds - " +
1057 "tot_wkr_task_resigned, tot_wkr_task_err, " +
1058 "tot_wkr_final_transition_failed, tot_wkr_task_done, " +
1059 "tot_wkr_preempt_task");
1060 } finally {
1061 if (t != null) t.close();
1062 }
1063 }
1064
1065 @Test (timeout=300000)
1066 public void testThreeRSAbort() throws Exception {
1067 LOG.info("testThreeRSAbort");
1068 final int NUM_REGIONS_TO_CREATE = 40;
1069 final int NUM_ROWS_PER_REGION = 100;
1070
1071 startCluster(NUM_RS);
1072
1073 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf,
1074 "distributed log splitting test", null);
1075
1076 Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
1077 try {
1078 populateDataInTable(NUM_ROWS_PER_REGION, "family");
1079
1080
1081 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1082 assertEquals(NUM_RS, rsts.size());
1083 rsts.get(0).getRegionServer().abort("testing");
1084 rsts.get(1).getRegionServer().abort("testing");
1085 rsts.get(2).getRegionServer().abort("testing");
1086
1087 long start = EnvironmentEdgeManager.currentTime();
1088 while (cluster.getLiveRegionServerThreads().size() > (NUM_RS - 3)) {
1089 if (EnvironmentEdgeManager.currentTime() - start > 60000) {
1090 assertTrue(false);
1091 }
1092 Thread.sleep(200);
1093 }
1094
1095 start = EnvironmentEdgeManager.currentTime();
1096 while (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
1097 < (NUM_REGIONS_TO_CREATE + 1)) {
1098 if (EnvironmentEdgeManager.currentTime() - start > 60000) {
1099 assertTrue("Timedout", false);
1100 }
1101 Thread.sleep(200);
1102 }
1103
1104
1105 TEST_UTIL.waitFor(180000, 200, new Waiter.Predicate<Exception>() {
1106 @Override
1107 public boolean evaluate() throws Exception {
1108 List<String> recoveringRegions = zkw.getRecoverableZooKeeper().getChildren(
1109 zkw.recoveringRegionsZNode, false);
1110 return (recoveringRegions != null && recoveringRegions.size() == 0);
1111 }
1112 });
1113
1114 assertEquals(NUM_REGIONS_TO_CREATE * NUM_ROWS_PER_REGION,
1115 TEST_UTIL.countRows(ht));
1116 } finally {
1117 if (ht != null) ht.close();
1118 if (zkw != null) zkw.close();
1119 }
1120 }
1121
1122
1123
1124 @Test(timeout=30000)
1125 public void testDelayedDeleteOnFailure() throws Exception {
1126 LOG.info("testDelayedDeleteOnFailure");
1127 startCluster(1);
1128 final SplitLogManager slm = master.getMasterFileSystem().splitLogManager;
1129 final FileSystem fs = master.getMasterFileSystem().getFileSystem();
1130 final Path logDir = new Path(FSUtils.getRootDir(conf), "x");
1131 fs.mkdirs(logDir);
1132 ExecutorService executor = null;
1133 try {
1134 final Path corruptedLogFile = new Path(logDir, "x");
1135 FSDataOutputStream out;
1136 out = fs.create(corruptedLogFile);
1137 out.write(0);
1138 out.write(Bytes.toBytes("corrupted bytes"));
1139 out.close();
1140 ZKSplitLogManagerCoordination coordination =
1141 (ZKSplitLogManagerCoordination) ((BaseCoordinatedStateManager) master
1142 .getCoordinatedStateManager()).getSplitLogManagerCoordination();
1143 coordination.setIgnoreDeleteForTesting(true);
1144 executor = Executors.newSingleThreadExecutor();
1145 Runnable runnable = new Runnable() {
1146 @Override
1147 public void run() {
1148 try {
1149
1150
1151
1152 slm.splitLogDistributed(logDir);
1153 } catch (IOException ioe) {
1154 try {
1155 assertTrue(fs.exists(corruptedLogFile));
1156
1157
1158
1159 slm.splitLogDistributed(logDir);
1160 } catch (IOException e) {
1161 assertTrue(Thread.currentThread().isInterrupted());
1162 return;
1163 }
1164 fail("did not get the expected IOException from the 2nd call");
1165 }
1166 fail("did not get the expected IOException from the 1st call");
1167 }
1168 };
1169 Future<?> result = executor.submit(runnable);
1170 try {
1171 result.get(2000, TimeUnit.MILLISECONDS);
1172 } catch (TimeoutException te) {
1173
1174 }
1175 waitForCounter(tot_mgr_wait_for_zk_delete, 0, 1, 10000);
1176 executor.shutdownNow();
1177 executor = null;
1178
1179
1180 result.get();
1181 } finally {
1182 if (executor != null) {
1183
1184
1185 executor.shutdownNow();
1186 }
1187 fs.delete(logDir, true);
1188 }
1189 }
1190
1191 @Ignore("DLR is broken by HBASE-12751") @Test(timeout = 300000)
1192 public void testMetaRecoveryInZK() throws Exception {
1193 LOG.info("testMetaRecoveryInZK");
1194 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
1195 startCluster(NUM_RS);
1196
1197
1198
1199 master.balanceSwitch(false);
1200 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1201
1202
1203 HRegionServer hrs = findRSToKill(true, null);
1204 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1205
1206 LOG.info("#regions = " + regions.size());
1207 Set<HRegionInfo> tmpRegions = new HashSet<HRegionInfo>();
1208 tmpRegions.add(HRegionInfo.FIRST_META_REGIONINFO);
1209 master.getMasterFileSystem().prepareLogReplay(hrs.getServerName(), tmpRegions);
1210 Set<HRegionInfo> userRegionSet = new HashSet<HRegionInfo>();
1211 userRegionSet.addAll(regions);
1212 master.getMasterFileSystem().prepareLogReplay(hrs.getServerName(), userRegionSet);
1213 boolean isMetaRegionInRecovery = false;
1214 List<String> recoveringRegions =
1215 zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
1216 for (String curEncodedRegionName : recoveringRegions) {
1217 if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1218 isMetaRegionInRecovery = true;
1219 break;
1220 }
1221 }
1222 assertTrue(isMetaRegionInRecovery);
1223
1224 master.getMasterFileSystem().splitMetaLog(hrs.getServerName());
1225
1226 isMetaRegionInRecovery = false;
1227 recoveringRegions =
1228 zkw.getRecoverableZooKeeper().getChildren(zkw.recoveringRegionsZNode, false);
1229 for (String curEncodedRegionName : recoveringRegions) {
1230 if (curEncodedRegionName.equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1231 isMetaRegionInRecovery = true;
1232 break;
1233 }
1234 }
1235
1236 assertFalse(isMetaRegionInRecovery);
1237 zkw.close();
1238 }
1239
1240 @Ignore("DLR is broken by HBASE-12751") @Test(timeout = 300000)
1241 public void testSameVersionUpdatesRecovery() throws Exception {
1242 LOG.info("testSameVersionUpdatesRecovery");
1243 conf.setLong("hbase.regionserver.hlog.blocksize", 15 * 1024);
1244 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
1245 startCluster(NUM_RS);
1246 final int NUM_REGIONS_TO_CREATE = 40;
1247 final int NUM_LOG_LINES = 1000;
1248
1249
1250 master.balanceSwitch(false);
1251
1252 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1253 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1254 Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
1255 try {
1256 List<HRegionInfo> regions = null;
1257 HRegionServer hrs = null;
1258 for (int i = 0; i < NUM_RS; i++) {
1259 boolean isCarryingMeta = false;
1260 hrs = rsts.get(i).getRegionServer();
1261 regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1262 for (HRegionInfo region : regions) {
1263 if (region.isMetaRegion()) {
1264 isCarryingMeta = true;
1265 break;
1266 }
1267 }
1268 if (isCarryingMeta) {
1269 continue;
1270 }
1271 break;
1272 }
1273
1274 LOG.info("#regions = " + regions.size());
1275 Iterator<HRegionInfo> it = regions.iterator();
1276 while (it.hasNext()) {
1277 HRegionInfo region = it.next();
1278 if (region.isMetaTable()
1279 || region.getEncodedName().equals(
1280 HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1281 it.remove();
1282 }
1283 }
1284 if (regions.size() == 0) return;
1285 HRegionInfo curRegionInfo = regions.get(0);
1286 byte[] startRow = curRegionInfo.getStartKey();
1287 if (startRow == null || startRow.length == 0) {
1288 startRow = new byte[] { 0, 0, 0, 0, 1 };
1289 }
1290 byte[] row = Bytes.incrementBytes(startRow, 1);
1291
1292 row = Arrays.copyOfRange(row, 3, 8);
1293 long value = 0;
1294 TableName tableName = TableName.valueOf("table");
1295 byte[] family = Bytes.toBytes("family");
1296 byte[] qualifier = Bytes.toBytes("c1");
1297 long timeStamp = System.currentTimeMillis();
1298 HTableDescriptor htd = new HTableDescriptor(tableName);
1299 htd.addFamily(new HColumnDescriptor(family));
1300 final WAL wal = hrs.getWAL(curRegionInfo);
1301 for (int i = 0; i < NUM_LOG_LINES; i += 1) {
1302 WALEdit e = new WALEdit();
1303 value++;
1304 e.add(new KeyValue(row, family, qualifier, timeStamp, Bytes.toBytes(value)));
1305 wal.append(htd, curRegionInfo,
1306 new HLogKey(curRegionInfo.getEncodedNameAsBytes(), tableName, System.currentTimeMillis()),
1307 e, true);
1308 }
1309 wal.sync();
1310 wal.shutdown();
1311
1312
1313 this.abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
1314
1315
1316 LOG.info("Verification Starts...");
1317 Get g = new Get(row);
1318 Result r = ht.get(g);
1319 long theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
1320 assertEquals(value, theStoredVal);
1321
1322
1323 LOG.info("Verification after flush...");
1324 TEST_UTIL.getHBaseAdmin().flush(tableName);
1325 r = ht.get(g);
1326 theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
1327 assertEquals(value, theStoredVal);
1328 } finally {
1329 if (ht != null) ht.close();
1330 if (zkw != null) zkw.close();
1331 }
1332 }
1333
1334 @Ignore("DLR is broken by HBASE-12751") @Test(timeout = 300000)
1335 public void testSameVersionUpdatesRecoveryWithCompaction() throws Exception {
1336 LOG.info("testSameVersionUpdatesRecoveryWithWrites");
1337 conf.setLong("hbase.regionserver.hlog.blocksize", 15 * 1024);
1338 conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true);
1339 conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 30 * 1024);
1340 conf.setInt("hbase.hstore.compactionThreshold", 3);
1341 startCluster(NUM_RS);
1342 final int NUM_REGIONS_TO_CREATE = 40;
1343 final int NUM_LOG_LINES = 2000;
1344
1345
1346 master.balanceSwitch(false);
1347
1348 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1349 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1350 Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
1351 try {
1352 List<HRegionInfo> regions = null;
1353 HRegionServer hrs = null;
1354 for (int i = 0; i < NUM_RS; i++) {
1355 boolean isCarryingMeta = false;
1356 hrs = rsts.get(i).getRegionServer();
1357 regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1358 for (HRegionInfo region : regions) {
1359 if (region.isMetaRegion()) {
1360 isCarryingMeta = true;
1361 break;
1362 }
1363 }
1364 if (isCarryingMeta) {
1365 continue;
1366 }
1367 break;
1368 }
1369
1370 LOG.info("#regions = " + regions.size());
1371 Iterator<HRegionInfo> it = regions.iterator();
1372 while (it.hasNext()) {
1373 HRegionInfo region = it.next();
1374 if (region.isMetaTable()
1375 || region.getEncodedName().equals(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName())) {
1376 it.remove();
1377 }
1378 }
1379 if (regions.size() == 0) return;
1380 HRegionInfo curRegionInfo = regions.get(0);
1381 byte[] startRow = curRegionInfo.getStartKey();
1382 if (startRow == null || startRow.length == 0) {
1383 startRow = new byte[] { 0, 0, 0, 0, 1 };
1384 }
1385 byte[] row = Bytes.incrementBytes(startRow, 1);
1386
1387 row = Arrays.copyOfRange(row, 3, 8);
1388 long value = 0;
1389 final TableName tableName = TableName.valueOf("table");
1390 byte[] family = Bytes.toBytes("family");
1391 byte[] qualifier = Bytes.toBytes("c1");
1392 long timeStamp = System.currentTimeMillis();
1393 HTableDescriptor htd = new HTableDescriptor(tableName);
1394 htd.addFamily(new HColumnDescriptor(family));
1395 final WAL wal = hrs.getWAL(curRegionInfo);
1396 for (int i = 0; i < NUM_LOG_LINES; i += 1) {
1397 WALEdit e = new WALEdit();
1398 value++;
1399 e.add(new KeyValue(row, family, qualifier, timeStamp, Bytes.toBytes(value)));
1400 wal.append(htd, curRegionInfo, new HLogKey(curRegionInfo.getEncodedNameAsBytes(),
1401 tableName, System.currentTimeMillis()), e, true);
1402 }
1403 wal.sync();
1404 wal.shutdown();
1405
1406
1407 this.abortRSAndWaitForRecovery(hrs, zkw, NUM_REGIONS_TO_CREATE);
1408
1409
1410 LOG.info("Verification Starts...");
1411 Get g = new Get(row);
1412 Result r = ht.get(g);
1413 long theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
1414 assertEquals(value, theStoredVal);
1415
1416
1417 LOG.info("Verification after flush...");
1418 TEST_UTIL.getHBaseAdmin().flush(tableName);
1419 TEST_UTIL.getHBaseAdmin().compact(tableName);
1420
1421
1422 TEST_UTIL.waitFor(30000, 200, new Waiter.Predicate<Exception>() {
1423 @Override
1424 public boolean evaluate() throws Exception {
1425 return (TEST_UTIL.getHBaseAdmin().getCompactionState(tableName) == CompactionState.NONE);
1426 }
1427 });
1428
1429 r = ht.get(g);
1430 theStoredVal = Bytes.toLong(r.getValue(family, qualifier));
1431 assertEquals(value, theStoredVal);
1432 } finally {
1433 if (ht != null) ht.close();
1434 if (zkw != null) zkw.close();
1435 }
1436 }
1437
1438 @Test(timeout = 300000)
1439 public void testReadWriteSeqIdFiles() throws Exception {
1440 LOG.info("testReadWriteSeqIdFiles");
1441 startCluster(2);
1442 final ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "table-creation", null);
1443 Table ht = installTable(zkw, "table", "family", 10);
1444 try {
1445 FileSystem fs = master.getMasterFileSystem().getFileSystem();
1446 Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), TableName.valueOf("table"));
1447 List<Path> regionDirs = FSUtils.getRegionDirs(fs, tableDir);
1448 long newSeqId = WALSplitter.writeRegionSequenceIdFile(fs, regionDirs.get(0), 1L, 1000L);
1449 WALSplitter.writeRegionSequenceIdFile(fs, regionDirs.get(0) , 1L, 1000L);
1450 assertEquals(newSeqId + 2000,
1451 WALSplitter.writeRegionSequenceIdFile(fs, regionDirs.get(0), 3L, 1000L));
1452
1453 Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(regionDirs.get(0));
1454 FileStatus[] files = FSUtils.listStatus(fs, editsdir, new PathFilter() {
1455 @Override
1456 public boolean accept(Path p) {
1457 return WALSplitter.isSequenceIdFile(p);
1458 }
1459 });
1460
1461 assertEquals(1, files.length);
1462
1463
1464 NavigableSet<Path> recoveredEdits =
1465 WALSplitter.getSplitEditFilesSorted(fs, regionDirs.get(0));
1466 assertEquals(0, recoveredEdits.size());
1467 } finally {
1468 if (ht != null) ht.close();
1469 if (zkw != null) zkw.close();
1470 }
1471 }
1472
1473 Table installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs) throws Exception {
1474 return installTable(zkw, tname, fname, nrs, 0);
1475 }
1476
1477 Table installTable(ZooKeeperWatcher zkw, String tname, String fname, int nrs,
1478 int existingRegions) throws Exception {
1479
1480 TableName table = TableName.valueOf(tname);
1481 byte [] family = Bytes.toBytes(fname);
1482 LOG.info("Creating table with " + nrs + " regions");
1483 Table ht = TEST_UTIL.createMultiRegionTable(table, family, nrs);
1484 int numRegions = -1;
1485 try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(table)) {
1486 numRegions = r.getStartKeys().length;
1487 }
1488 assertEquals(nrs, numRegions);
1489 LOG.info("Waiting for no more RIT\n");
1490 blockUntilNoRIT(zkw, master);
1491
1492
1493 LOG.debug("Disabling table\n");
1494 TEST_UTIL.getHBaseAdmin().disableTable(table);
1495 LOG.debug("Waiting for no more RIT\n");
1496 blockUntilNoRIT(zkw, master);
1497 NavigableSet<String> regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
1498 LOG.debug("Verifying only catalog and namespace regions are assigned\n");
1499 if (regions.size() != 2) {
1500 for (String oregion : regions)
1501 LOG.debug("Region still online: " + oregion);
1502 }
1503 assertEquals(2 + existingRegions, regions.size());
1504 LOG.debug("Enabling table\n");
1505 TEST_UTIL.getHBaseAdmin().enableTable(table);
1506 LOG.debug("Waiting for no more RIT\n");
1507 blockUntilNoRIT(zkw, master);
1508 LOG.debug("Verifying there are " + numRegions + " assigned on cluster\n");
1509 regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
1510 assertEquals(numRegions + 2 + existingRegions, regions.size());
1511 return ht;
1512 }
1513
1514 void populateDataInTable(int nrows, String fname) throws Exception {
1515 byte [] family = Bytes.toBytes(fname);
1516
1517 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1518 assertEquals(NUM_RS, rsts.size());
1519
1520 for (RegionServerThread rst : rsts) {
1521 HRegionServer hrs = rst.getRegionServer();
1522 List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1523 for (HRegionInfo hri : hris) {
1524 if (hri.getTable().isSystemTable()) {
1525 continue;
1526 }
1527 LOG.debug("adding data to rs = " + rst.getName() +
1528 " region = "+ hri.getRegionNameAsString());
1529 Region region = hrs.getOnlineRegion(hri.getRegionName());
1530 assertTrue(region != null);
1531 putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
1532 }
1533 }
1534
1535 for (MasterThread mt : cluster.getLiveMasterThreads()) {
1536 HRegionServer hrs = mt.getMaster();
1537 List<HRegionInfo> hris;
1538 try {
1539 hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1540 } catch (ServerNotRunningYetException e) {
1541
1542 continue;
1543 }
1544 for (HRegionInfo hri : hris) {
1545 if (hri.getTable().isSystemTable()) {
1546 continue;
1547 }
1548 LOG.debug("adding data to rs = " + mt.getName() +
1549 " region = "+ hri.getRegionNameAsString());
1550 Region region = hrs.getOnlineRegion(hri.getRegionName());
1551 assertTrue(region != null);
1552 putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
1553 }
1554 }
1555 }
1556
1557 public void makeWAL(HRegionServer hrs, List<HRegionInfo> regions, String tname, String fname,
1558 int num_edits, int edit_size) throws IOException {
1559 makeWAL(hrs, regions, tname, fname, num_edits, edit_size, true);
1560 }
1561
1562 public void makeWAL(HRegionServer hrs, List<HRegionInfo> regions, String tname, String fname,
1563 int num_edits, int edit_size, boolean cleanShutdown) throws IOException {
1564 TableName fullTName = TableName.valueOf(tname);
1565
1566 regions.remove(HRegionInfo.FIRST_META_REGIONINFO);
1567
1568
1569 for(Iterator<HRegionInfo> iter = regions.iterator(); iter.hasNext(); ) {
1570 HRegionInfo regionInfo = iter.next();
1571 if(regionInfo.getTable().isSystemTable()) {
1572 iter.remove();
1573 }
1574 }
1575 HTableDescriptor htd = new HTableDescriptor(fullTName);
1576 byte[] family = Bytes.toBytes(fname);
1577 htd.addFamily(new HColumnDescriptor(family));
1578 byte[] value = new byte[edit_size];
1579
1580 List<HRegionInfo> hris = new ArrayList<HRegionInfo>();
1581 for (HRegionInfo region : regions) {
1582 if (!region.getTable().getNameAsString().equalsIgnoreCase(tname)) {
1583 continue;
1584 }
1585 hris.add(region);
1586 }
1587 LOG.info("Creating wal edits across " + hris.size() + " regions.");
1588 for (int i = 0; i < edit_size; i++) {
1589 value[i] = (byte) ('a' + (i % 26));
1590 }
1591 int n = hris.size();
1592 int[] counts = new int[n];
1593
1594 final int syncEvery = 30 * 1024 / edit_size;
1595 if (n > 0) {
1596 for (int i = 0; i < num_edits; i += 1) {
1597 WALEdit e = new WALEdit();
1598 HRegionInfo curRegionInfo = hris.get(i % n);
1599 final WAL log = hrs.getWAL(curRegionInfo);
1600 byte[] startRow = curRegionInfo.getStartKey();
1601 if (startRow == null || startRow.length == 0) {
1602 startRow = new byte[] { 0, 0, 0, 0, 1 };
1603 }
1604 byte[] row = Bytes.incrementBytes(startRow, counts[i % n]);
1605 row = Arrays.copyOfRange(row, 3, 8);
1606
1607
1608 byte[] qualifier = Bytes.toBytes("c" + Integer.toString(i));
1609 e.add(new KeyValue(row, family, qualifier, System.currentTimeMillis(), value));
1610 log.append(htd, curRegionInfo,
1611 new HLogKey(curRegionInfo.getEncodedNameAsBytes(), fullTName,
1612 System.currentTimeMillis()), e, true);
1613 if (0 == i % syncEvery) {
1614 log.sync();
1615 }
1616 counts[i % n] += 1;
1617 }
1618 }
1619
1620
1621 for (HRegionInfo info : hris) {
1622 final WAL log = hrs.getWAL(info);
1623 log.sync();
1624 }
1625 if (cleanShutdown) {
1626 for (HRegionInfo info : hris) {
1627 final WAL log = hrs.getWAL(info);
1628 log.shutdown();
1629 }
1630 }
1631 for (int i = 0; i < n; i++) {
1632 LOG.info("region " + hris.get(i).getRegionNameAsString() + " has " + counts[i] + " edits");
1633 }
1634 return;
1635 }
1636
1637 private int countWAL(Path log, FileSystem fs, Configuration conf)
1638 throws IOException {
1639 int count = 0;
1640 WAL.Reader in = WALFactory.createReader(fs, log, conf);
1641 try {
1642 WAL.Entry e;
1643 while ((e = in.next()) != null) {
1644 if (!WALEdit.isMetaEditFamily(e.getEdit().getCells().get(0))) {
1645 count++;
1646 }
1647 }
1648 } finally {
1649 try {
1650 in.close();
1651 } catch (IOException exception) {
1652 LOG.warn("Problem closing wal: " + exception.getMessage());
1653 LOG.debug("exception details.", exception);
1654 }
1655 }
1656 return count;
1657 }
1658
1659 private void blockUntilNoRIT(ZooKeeperWatcher zkw, HMaster master) throws Exception {
1660 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1661 }
1662
1663 private void putData(Region region, byte[] startRow, int numRows, byte [] qf,
1664 byte [] ...families)
1665 throws IOException {
1666 for(int i = 0; i < numRows; i++) {
1667 Put put = new Put(Bytes.add(startRow, Bytes.toBytes(i)));
1668 for(byte [] family : families) {
1669 put.add(family, qf, null);
1670 }
1671 region.put(put);
1672 }
1673 }
1674
1675
1676
1677
1678 private void prepareData(final Table t, final byte[] f, final byte[] column) throws IOException {
1679 byte[] k = new byte[3];
1680
1681
1682 List<Put> puts = new ArrayList<>();
1683 for (byte b1 = 'a'; b1 <= 'z'; b1++) {
1684 for (byte b2 = 'a'; b2 <= 'z'; b2++) {
1685 for (byte b3 = 'a'; b3 <= 'z'; b3++) {
1686 k[0] = b1;
1687 k[1] = b2;
1688 k[2] = b3;
1689 Put put = new Put(k);
1690 put.add(f, column, k);
1691 puts.add(put);
1692 }
1693 }
1694 }
1695 t.put(puts);
1696
1697 for (byte b3 = 'a'; b3 <= 'z'; b3++) {
1698 k[0] = 'a';
1699 k[1] = 'a';
1700 k[2] = b3;
1701 Delete del = new Delete(k);
1702 t.delete(del);
1703 }
1704 }
1705
1706 private void waitForCounter(AtomicLong ctr, long oldval, long newval,
1707 long timems) {
1708 long curt = System.currentTimeMillis();
1709 long endt = curt + timems;
1710 while (curt < endt) {
1711 if (ctr.get() == oldval) {
1712 Thread.yield();
1713 curt = System.currentTimeMillis();
1714 } else {
1715 assertEquals(newval, ctr.get());
1716 return;
1717 }
1718 }
1719 assertTrue(false);
1720 }
1721
1722 private void abortMaster(MiniHBaseCluster cluster) throws InterruptedException {
1723 for (MasterThread mt : cluster.getLiveMasterThreads()) {
1724 if (mt.getMaster().isActiveMaster()) {
1725 mt.getMaster().abort("Aborting for tests", new Exception("Trace info"));
1726 mt.join();
1727 break;
1728 }
1729 }
1730 LOG.debug("Master is aborted");
1731 }
1732
1733
1734
1735
1736
1737
1738
1739
1740 private HRegionServer findRSToKill(boolean hasMetaRegion, String tableName) throws Exception {
1741 List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
1742 List<HRegionInfo> regions = null;
1743 HRegionServer hrs = null;
1744
1745 for (RegionServerThread rst: rsts) {
1746 hrs = rst.getRegionServer();
1747 while (rst.isAlive() && !hrs.isOnline()) {
1748 Thread.sleep(100);
1749 }
1750 if (!rst.isAlive()) {
1751 continue;
1752 }
1753 boolean isCarryingMeta = false;
1754 boolean foundTableRegion = false;
1755 regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1756 for (HRegionInfo region : regions) {
1757 if (region.isMetaRegion()) {
1758 isCarryingMeta = true;
1759 }
1760 if (tableName == null || region.getTable().getNameAsString().equals(tableName)) {
1761 foundTableRegion = true;
1762 }
1763 if (foundTableRegion && (isCarryingMeta || !hasMetaRegion)) {
1764 break;
1765 }
1766 }
1767 if (isCarryingMeta && hasMetaRegion) {
1768
1769 if (!foundTableRegion) {
1770 final HRegionServer destRS = hrs;
1771
1772 List<HRegionInfo> tableRegions =
1773 TEST_UTIL.getHBaseAdmin().getTableRegions(TableName.valueOf(tableName));
1774 final HRegionInfo hri = tableRegions.get(0);
1775 TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
1776 Bytes.toBytes(destRS.getServerName().getServerName()));
1777
1778 final RegionStates regionStates =
1779 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
1780 TEST_UTIL.waitFor(45000, 200, new Waiter.Predicate<Exception>() {
1781 @Override
1782 public boolean evaluate() throws Exception {
1783 ServerName sn = regionStates.getRegionServerOfRegion(hri);
1784 return (sn != null && sn.equals(destRS.getServerName()));
1785 }
1786 });
1787 }
1788 return hrs;
1789 } else if (hasMetaRegion || isCarryingMeta) {
1790 continue;
1791 }
1792 if (foundTableRegion) break;
1793 }
1794
1795 return hrs;
1796 }
1797 }