1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master;
20
21 import static org.junit.Assert.assertEquals;
22 import static org.junit.Assert.assertFalse;
23 import static org.junit.Assert.assertNotNull;
24 import static org.junit.Assert.assertTrue;
25
26 import java.io.IOException;
27 import java.util.ArrayList;
28 import java.util.Iterator;
29 import java.util.List;
30 import java.util.Set;
31 import java.util.TreeSet;
32
33 import java.util.concurrent.TimeUnit;
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.fs.FileSystem;
38 import org.apache.hadoop.fs.Path;
39 import org.apache.hadoop.hbase.Abortable;
40 import org.apache.hadoop.hbase.ClusterStatus;
41 import org.apache.hadoop.hbase.HBaseConfiguration;
42 import org.apache.hadoop.hbase.HBaseTestingUtility;
43 import org.apache.hadoop.hbase.HColumnDescriptor;
44 import org.apache.hadoop.hbase.HConstants;
45 import org.apache.hadoop.hbase.HRegionInfo;
46 import org.apache.hadoop.hbase.HTableDescriptor;
47 import org.apache.hadoop.hbase.Waiter;
48 import org.apache.hadoop.hbase.testclassification.LargeTests;
49 import org.apache.hadoop.hbase.MetaTableAccessor;
50 import org.apache.hadoop.hbase.MiniHBaseCluster;
51 import org.apache.hadoop.hbase.RegionTransition;
52 import org.apache.hadoop.hbase.ServerName;
53 import org.apache.hadoop.hbase.TableName;
54 import org.apache.hadoop.hbase.TableStateManager;
55 import org.apache.hadoop.hbase.client.RegionLocator;
56 import org.apache.hadoop.hbase.client.Table;
57 import org.apache.hadoop.hbase.executor.EventType;
58 import org.apache.hadoop.hbase.master.RegionState.State;
59 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
60 import org.apache.hadoop.hbase.protobuf.RequestConverter;
61 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
62 import org.apache.hadoop.hbase.regionserver.HRegion;
63 import org.apache.hadoop.hbase.regionserver.HRegionServer;
64 import org.apache.hadoop.hbase.regionserver.Region;
65 import org.apache.hadoop.hbase.regionserver.RegionMergeTransactionImpl;
66 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
67 import org.apache.hadoop.hbase.util.Bytes;
68 import org.apache.hadoop.hbase.util.FSTableDescriptors;
69 import org.apache.hadoop.hbase.util.FSUtils;
70 import org.apache.hadoop.hbase.util.JVMClusterUtil;
71 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
72 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
73 import org.apache.hadoop.hbase.util.Threads;
74 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
75 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
76 import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
77 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
78 import org.apache.zookeeper.data.Stat;
79 import org.junit.Ignore;
80 import org.junit.Test;
81 import org.junit.experimental.categories.Category;
82
83 @Category(LargeTests.class)
84 public class TestMasterFailover {
85 private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166 @Test (timeout=240000)
167 public void testMasterFailoverWithMockedRIT() throws Exception {
168
169 final int NUM_MASTERS = 1;
170 final int NUM_RS = 3;
171
172
173 Configuration conf = HBaseConfiguration.create();
174 conf.setBoolean("hbase.assignment.usezk", true);
175
176
177 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
178 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
179 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
180 log("Cluster started");
181
182
183 ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
184
185
186 List<MasterThread> masterThreads = cluster.getMasterThreads();
187 assertEquals(1, masterThreads.size());
188
189
190 assertTrue(cluster.waitForActiveAndReadyMaster());
191 HMaster master = masterThreads.get(0).getMaster();
192 assertTrue(master.isActiveMaster());
193 assertTrue(master.isInitialized());
194
195
196 master.balanceSwitch(false);
197
198
199 byte [] FAMILY = Bytes.toBytes("family");
200 byte [][] SPLIT_KEYS = new byte [][] {
201 new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
202 Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
203 Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
204 Bytes.toBytes("iii"), Bytes.toBytes("jjj")
205 };
206
207 byte [] enabledTable = Bytes.toBytes("enabledTable");
208 HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
209 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
210
211 FileSystem filesystem = FileSystem.get(conf);
212 Path rootdir = FSUtils.getRootDir(conf);
213 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
214
215 fstd.createTableDescriptor(htdEnabled);
216
217 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(), null, null);
218 createRegion(hriEnabled, rootdir, conf, htdEnabled);
219
220 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
221 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
222
223 TableName disabledTable = TableName.valueOf("disabledTable");
224 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
225 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
226
227 fstd.createTableDescriptor(htdDisabled);
228 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
229 createRegion(hriDisabled, rootdir, conf, htdDisabled);
230 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
231 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
232
233 TableName tableWithMergingRegions = TableName.valueOf("tableWithMergingRegions");
234 TEST_UTIL.createTable(tableWithMergingRegions, FAMILY, new byte [][] {Bytes.toBytes("m")});
235
236 log("Regions in hbase:meta and namespace have been created");
237
238
239
240 assertEquals(4, cluster.countServedRegions());
241
242
243 AssignmentManager am = master.getAssignmentManager();
244 RegionStates regionStates = am.getRegionStates();
245 List<HRegionInfo> mergingRegions = regionStates.getRegionsOfTable(tableWithMergingRegions);
246 assertEquals(2, mergingRegions.size());
247 HRegionInfo a = mergingRegions.get(0);
248 HRegionInfo b = mergingRegions.get(1);
249 HRegionInfo newRegion = RegionMergeTransactionImpl.getMergedRegionInfo(a, b);
250 ServerName mergingServer = regionStates.getRegionServerOfRegion(a);
251 ServerName serverB = regionStates.getRegionServerOfRegion(b);
252 if (!serverB.equals(mergingServer)) {
253 RegionPlan plan = new RegionPlan(b, serverB, mergingServer);
254 am.balance(plan);
255 assertTrue(am.waitForAssignment(b));
256 }
257
258
259 HRegionServer hrs = cluster.getRegionServer(0);
260 ServerName serverName = hrs.getServerName();
261 HRegionInfo closingRegion = enabledRegions.remove(0);
262
263 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
264 enabledAndAssignedRegions.add(enabledRegions.remove(0));
265 enabledAndAssignedRegions.add(enabledRegions.remove(0));
266 enabledAndAssignedRegions.add(closingRegion);
267
268 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
269 disabledAndAssignedRegions.add(disabledRegions.remove(0));
270 disabledAndAssignedRegions.add(disabledRegions.remove(0));
271
272
273 for (HRegionInfo hri : enabledAndAssignedRegions) {
274 master.assignmentManager.addPlan(hri.getEncodedName(),
275 new RegionPlan(hri, null, serverName));
276 master.assignmentManager.assign(hri, true);
277 }
278
279 for (HRegionInfo hri : disabledAndAssignedRegions) {
280 master.assignmentManager.addPlan(hri.getEncodedName(),
281 new RegionPlan(hri, null, serverName));
282 master.assignmentManager.assign(hri, true);
283 }
284
285
286 log("Waiting for assignment to finish");
287 ZKAssign.blockUntilNoRIT(zkw);
288 log("Assignment completed");
289
290
291 log("Aborting master");
292 cluster.abortMaster(0);
293 cluster.waitOnMaster(0);
294 log("Master has aborted");
295
296
297
298
299
300
301 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
302 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
303
304 log("Beginning to mock scenarios");
305
306
307 TableStateManager zktable = new ZKTableStateManager(zkw);
308 zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
309
310
311
312
313
314
315
316
317 HRegionInfo region = enabledRegions.remove(0);
318 regionsThatShouldBeOnline.add(region);
319 ZKAssign.createNodeOffline(zkw, region, serverName);
320
321
322
323
324
325 regionsThatShouldBeOnline.add(closingRegion);
326 ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
327
328
329
330
331
332
333
334 region = enabledRegions.remove(0);
335 regionsThatShouldBeOnline.add(region);
336 int version = ZKAssign.createNodeClosing(zkw, region, serverName);
337 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
338
339
340 region = disabledRegions.remove(0);
341 regionsThatShouldBeOffline.add(region);
342 version = ZKAssign.createNodeClosing(zkw, region, serverName);
343 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
344
345
346
347
348
349
350
351 region = enabledRegions.remove(0);
352 regionsThatShouldBeOnline.add(region);
353 ZKAssign.createNodeOffline(zkw, region, serverName);
354 ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region);
355 while (true) {
356 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
357 RegionTransition rt = RegionTransition.parseFrom(bytes);
358 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
359 break;
360 }
361 Thread.sleep(100);
362 }
363
364
365
366 region = disabledRegions.remove(0);
367 regionsThatShouldBeOffline.add(region);
368 ZKAssign.createNodeOffline(zkw, region, serverName);
369 ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region);
370 while (true) {
371 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
372 RegionTransition rt = RegionTransition.parseFrom(bytes);
373 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
374 break;
375 }
376 Thread.sleep(100);
377 }
378
379
380
381
382
383
384
385 hrs.getCoordinatedStateManager().
386 getRegionMergeCoordination().startRegionMergeTransaction(newRegion, mergingServer, a, b);
387
388
389
390
391
392
393
394
395
396 log("Done mocking data up in ZK");
397
398
399 log("Starting up a new master");
400 master = cluster.startMaster().getMaster();
401 log("Waiting for master to be ready");
402 cluster.waitForActiveAndReadyMaster();
403 log("Master is ready");
404
405
406 regionStates = master.getAssignmentManager().getRegionStates();
407
408 assertTrue(regionStates.isRegionInState(a, State.MERGING));
409 assertTrue(regionStates.isRegionInState(b, State.MERGING));
410 assertTrue(regionStates.isRegionInState(newRegion, State.MERGING_NEW));
411
412
413 ZKAssign.deleteNodeFailSilent(zkw, newRegion);
414
415
416 log("Waiting for no more RIT");
417 ZKAssign.blockUntilNoRIT(zkw);
418 log("No more RIT in ZK, now doing final test verification");
419
420
421 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
422 for (JVMClusterUtil.RegionServerThread rst :
423 cluster.getRegionServerThreads()) {
424 onlineRegions.addAll(ProtobufUtil.getOnlineRegions(
425 rst.getRegionServer().getRSRpcServices()));
426 }
427
428
429 for (HRegionInfo hri : regionsThatShouldBeOnline) {
430 assertTrue(onlineRegions.contains(hri));
431 }
432
433
434 for (HRegionInfo hri : regionsThatShouldBeOffline) {
435 if (onlineRegions.contains(hri)) {
436 LOG.debug(hri);
437 }
438 assertFalse(onlineRegions.contains(hri));
439 }
440
441 log("Done with verification, all passed, shutting down cluster");
442
443
444 TEST_UTIL.shutdownMiniCluster();
445 }
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503 @Test (timeout=180000)
504 public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
505
506 final int NUM_MASTERS = 1;
507 final int NUM_RS = 2;
508
509
510 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
511 Configuration conf = TEST_UTIL.getConfiguration();
512 conf.setBoolean("hbase.assignment.usezk", true);
513
514 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
515 conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2);
516 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
517 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
518 log("Cluster started");
519
520
521 ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
522 "unittest", new Abortable() {
523
524 @Override
525 public void abort(String why, Throwable e) {
526 LOG.error("Fatal ZK Error: " + why, e);
527 org.junit.Assert.assertFalse("Fatal ZK error", true);
528 }
529
530 @Override
531 public boolean isAborted() {
532 return false;
533 }
534
535 });
536
537
538 List<MasterThread> masterThreads = cluster.getMasterThreads();
539 assertEquals(1, masterThreads.size());
540
541
542 assertTrue(cluster.waitForActiveAndReadyMaster());
543 HMaster master = masterThreads.get(0).getMaster();
544 assertTrue(master.isActiveMaster());
545 assertTrue(master.isInitialized());
546
547
548 master.balanceSwitch(false);
549
550
551 byte [] FAMILY = Bytes.toBytes("family");
552 byte[][] SPLIT_KEYS =
553 TEST_UTIL.getRegionSplitStartKeys(Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 30);
554
555 byte [] enabledTable = Bytes.toBytes("enabledTable");
556 HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
557 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
558 FileSystem filesystem = FileSystem.get(conf);
559 Path rootdir = FSUtils.getRootDir(conf);
560 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
561
562 fstd.createTableDescriptor(htdEnabled);
563 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(),
564 null, null);
565 createRegion(hriEnabled, rootdir, conf, htdEnabled);
566
567 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
568 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
569
570 TableName disabledTable =
571 TableName.valueOf("disabledTable");
572 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
573 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
574
575 fstd.createTableDescriptor(htdDisabled);
576 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
577 createRegion(hriDisabled, rootdir, conf, htdDisabled);
578
579 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
580 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
581
582 log("Regions in hbase:meta and Namespace have been created");
583
584
585 assertEquals(2, cluster.countServedRegions());
586
587
588 List<RegionServerThread> regionservers =
589 cluster.getRegionServerThreads();
590 HRegionServer hrs = regionservers.get(0).getRegionServer();
591
592
593 RegionServerThread hrsDeadThread = regionservers.get(1);
594 HRegionServer hrsDead = hrsDeadThread.getRegionServer();
595 ServerName deadServerName = hrsDead.getServerName();
596
597
598 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
599 enabledAndAssignedRegions.addAll(enabledRegions.subList(0, 6));
600 enabledRegions.removeAll(enabledAndAssignedRegions);
601 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
602 disabledAndAssignedRegions.addAll(disabledRegions.subList(0, 6));
603 disabledRegions.removeAll(disabledAndAssignedRegions);
604
605
606 for (HRegionInfo hri : enabledAndAssignedRegions) {
607 master.assignmentManager.addPlan(hri.getEncodedName(),
608 new RegionPlan(hri, null, hrs.getServerName()));
609 master.assignmentManager.assign(hri, true);
610 }
611 for (HRegionInfo hri : disabledAndAssignedRegions) {
612 master.assignmentManager.addPlan(hri.getEncodedName(),
613 new RegionPlan(hri, null, hrs.getServerName()));
614 master.assignmentManager.assign(hri, true);
615 }
616
617 log("Waiting for assignment to finish");
618 ZKAssign.blockUntilNoRIT(zkw);
619 master.assignmentManager.waitUntilNoRegionsInTransition(60000);
620 log("Assignment completed");
621
622 assertTrue(" Table must be enabled.", master.getAssignmentManager()
623 .getTableStateManager().isTableState(TableName.valueOf("enabledTable"),
624 ZooKeeperProtos.Table.State.ENABLED));
625
626 List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
627 enabledAndOnDeadRegions.addAll(enabledRegions.subList(0, 6));
628 enabledRegions.removeAll(enabledAndOnDeadRegions);
629 List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
630 disabledAndOnDeadRegions.addAll(disabledRegions.subList(0, 6));
631 disabledRegions.removeAll(disabledAndOnDeadRegions);
632
633
634 for (HRegionInfo hri : enabledAndOnDeadRegions) {
635 master.assignmentManager.addPlan(hri.getEncodedName(),
636 new RegionPlan(hri, null, deadServerName));
637 master.assignmentManager.assign(hri, true);
638 }
639 for (HRegionInfo hri : disabledAndOnDeadRegions) {
640 master.assignmentManager.addPlan(hri.getEncodedName(),
641 new RegionPlan(hri, null, deadServerName));
642 master.assignmentManager.assign(hri, true);
643 }
644
645
646 log("Waiting for assignment to finish");
647 ZKAssign.blockUntilNoRIT(zkw);
648 master.assignmentManager.waitUntilNoRegionsInTransition(60000);
649 log("Assignment completed");
650
651
652
653 verifyRegionLocation(hrs, enabledAndAssignedRegions);
654 verifyRegionLocation(hrs, disabledAndAssignedRegions);
655 verifyRegionLocation(hrsDead, enabledAndOnDeadRegions);
656 verifyRegionLocation(hrsDead, disabledAndOnDeadRegions);
657
658 assertTrue(" Didn't get enough regions of enabledTalbe on live rs.",
659 enabledAndAssignedRegions.size() >= 2);
660 assertTrue(" Didn't get enough regions of disalbedTable on live rs.",
661 disabledAndAssignedRegions.size() >= 2);
662 assertTrue(" Didn't get enough regions of enabledTalbe on dead rs.",
663 enabledAndOnDeadRegions.size() >= 2);
664 assertTrue(" Didn't get enough regions of disalbedTable on dead rs.",
665 disabledAndOnDeadRegions.size() >= 2);
666
667
668 log("Aborting master");
669 cluster.abortMaster(0);
670 cluster.waitOnMaster(0);
671 log("Master has aborted");
672
673
674
675
676
677
678 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
679 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
680
681 log("Beginning to mock scenarios");
682
683
684 TableStateManager zktable = new ZKTableStateManager(zkw);
685 zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
686
687 assertTrue(" The enabled table should be identified on master fail over.",
688 zktable.isTableState(TableName.valueOf("enabledTable"),
689 ZooKeeperProtos.Table.State.ENABLED));
690
691
692
693
694
695
696 HRegionInfo region = enabledAndOnDeadRegions.remove(0);
697 regionsThatShouldBeOnline.add(region);
698 ZKAssign.createNodeClosing(zkw, region, deadServerName);
699 LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
700 region + "\n\n");
701
702
703 region = disabledAndOnDeadRegions.remove(0);
704 regionsThatShouldBeOffline.add(region);
705 ZKAssign.createNodeClosing(zkw, region, deadServerName);
706 LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
707 region + "\n\n");
708
709
710
711
712
713
714 region = enabledAndOnDeadRegions.remove(0);
715 regionsThatShouldBeOnline.add(region);
716 int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
717 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
718 LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
719 region + "\n\n");
720
721
722 region = disabledAndOnDeadRegions.remove(0);
723 regionsThatShouldBeOffline.add(region);
724 version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
725 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
726 LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
727 region + "\n\n");
728
729
730
731
732
733
734 region = enabledRegions.remove(0);
735 regionsThatShouldBeOnline.add(region);
736 ZKAssign.createNodeOffline(zkw, region, deadServerName);
737 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
738 LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
739 region + "\n\n");
740
741
742 region = disabledRegions.remove(0);
743 regionsThatShouldBeOffline.add(region);
744 ZKAssign.createNodeOffline(zkw, region, deadServerName);
745 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
746 LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
747 region + "\n\n");
748
749
750
751
752
753
754 region = enabledRegions.remove(0);
755 regionsThatShouldBeOnline.add(region);
756 ZKAssign.createNodeOffline(zkw, region, deadServerName);
757 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
758 hrsDead.getServerName(), region);
759 while (true) {
760 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
761 RegionTransition rt = RegionTransition.parseFrom(bytes);
762 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
763 break;
764 }
765 Thread.sleep(100);
766 }
767 LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" + region + "\n\n");
768
769
770 region = disabledRegions.remove(0);
771 regionsThatShouldBeOffline.add(region);
772 ZKAssign.createNodeOffline(zkw, region, deadServerName);
773 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
774 hrsDead.getServerName(), region);
775 while (true) {
776 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
777 RegionTransition rt = RegionTransition.parseFrom(bytes);
778 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
779 break;
780 }
781 Thread.sleep(100);
782 }
783 LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" + region + "\n\n");
784
785
786
787
788
789
790 region = enabledRegions.remove(0);
791 regionsThatShouldBeOnline.add(region);
792 ZKAssign.createNodeOffline(zkw, region, deadServerName);
793 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
794 hrsDead.getServerName(), region);
795 while (true) {
796 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
797 RegionTransition rt = RegionTransition.parseFrom(bytes);
798 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
799 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
800 LOG.debug("DELETED " + rt);
801 break;
802 }
803 Thread.sleep(100);
804 }
805 LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
806 + "\n" + region + "\n\n");
807
808
809 region = disabledRegions.remove(0);
810 regionsThatShouldBeOffline.add(region);
811 ZKAssign.createNodeOffline(zkw, region, deadServerName);
812 ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
813 hrsDead.getServerName(), region);
814 while (true) {
815 byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
816 RegionTransition rt = RegionTransition.parseFrom(bytes);
817 if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
818 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
819 break;
820 }
821 Thread.sleep(100);
822 }
823 LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
824 + "\n" + region + "\n\n");
825
826
827
828
829
830 log("Done mocking data up in ZK");
831
832
833 log("Killing RS " + deadServerName);
834 hrsDead.abort("Killing for unit test");
835 log("RS " + deadServerName + " killed");
836
837
838
839 while (hrsDeadThread.isAlive()) {
840 Threads.sleep(10);
841 }
842 log("Starting up a new master");
843 master = cluster.startMaster().getMaster();
844 log("Waiting for master to be ready");
845 assertTrue(cluster.waitForActiveAndReadyMaster());
846 log("Master is ready");
847
848
849 while (master.getServerManager().areDeadServersInProgress()) {
850 Thread.sleep(10);
851 }
852
853
854 log("Waiting for no more RIT");
855 ZKAssign.blockUntilNoRIT(zkw);
856 log("No more RIT in ZK");
857 long now = System.currentTimeMillis();
858 long maxTime = 120000;
859 boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
860 if (!done) {
861 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
862 LOG.info("rit=" + regionStates.getRegionsInTransition());
863 }
864 long elapsed = System.currentTimeMillis() - now;
865 assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
866 elapsed < maxTime);
867 log("No more RIT in RIT map, doing final test verification");
868
869
870 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
871 now = System.currentTimeMillis();
872 maxTime = 30000;
873 for (JVMClusterUtil.RegionServerThread rst :
874 cluster.getRegionServerThreads()) {
875 try {
876 HRegionServer rs = rst.getRegionServer();
877 while (!rs.getRegionsInTransitionInRS().isEmpty()) {
878 elapsed = System.currentTimeMillis() - now;
879 assertTrue("Test timed out in getting online regions", elapsed < maxTime);
880 if (rs.isAborted() || rs.isStopped()) {
881
882 break;
883 }
884 Thread.sleep(100);
885 }
886 onlineRegions.addAll(ProtobufUtil.getOnlineRegions(rs.getRSRpcServices()));
887 } catch (RegionServerStoppedException e) {
888 LOG.info("Got RegionServerStoppedException", e);
889 }
890 }
891
892
893 for (HRegionInfo hri : regionsThatShouldBeOnline) {
894 assertTrue("region=" + hri.getRegionNameAsString() + ", " + onlineRegions.toString(),
895 onlineRegions.contains(hri));
896 }
897
898
899 for (HRegionInfo hri : regionsThatShouldBeOffline) {
900 assertFalse(onlineRegions.contains(hri));
901 }
902
903 log("Done with verification, all passed, shutting down cluster");
904
905
906 TEST_UTIL.shutdownMiniCluster();
907 }
908
909
910
911
912 private void verifyRegionLocation(HRegionServer hrs, List<HRegionInfo> regions)
913 throws IOException {
914 List<HRegionInfo> tmpOnlineRegions =
915 ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
916 Iterator<HRegionInfo> itr = regions.iterator();
917 while (itr.hasNext()) {
918 HRegionInfo tmp = itr.next();
919 if (!tmpOnlineRegions.contains(tmp)) {
920 itr.remove();
921 }
922 }
923 }
924
925 HRegion createRegion(final HRegionInfo hri, final Path rootdir, final Configuration c,
926 final HTableDescriptor htd)
927 throws IOException {
928 HRegion r = HRegion.createHRegion(hri, rootdir, c, htd);
929
930
931
932
933
934 HRegion.closeHRegion(r);
935 return r;
936 }
937
938
939
940
941 private void log(String string) {
942 LOG.info("\n\n" + string + " \n\n");
943 }
944
945 @Test (timeout=180000)
946 public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
947 throws Exception {
948 LOG.info("Starting testShouldCheckMasterFailOverWhenMETAIsInOpenedState");
949 final int NUM_MASTERS = 1;
950 final int NUM_RS = 2;
951
952
953 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
954 Configuration conf = TEST_UTIL.getConfiguration();
955 conf.setInt("hbase.master.info.port", -1);
956 conf.setBoolean("hbase.assignment.usezk", true);
957
958 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
959 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
960
961
962 List<RegionServerThread> regionServerThreads =
963 cluster.getRegionServerThreads();
964 Region metaRegion = null;
965 HRegionServer metaRegionServer = null;
966 for (RegionServerThread regionServerThread : regionServerThreads) {
967 HRegionServer regionServer = regionServerThread.getRegionServer();
968 metaRegion = regionServer.getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
969 regionServer.abort("");
970 if (null != metaRegion) {
971 metaRegionServer = regionServer;
972 break;
973 }
974 }
975
976 assertNotNull(metaRegion);
977 assertNotNull(metaRegionServer);
978
979 TEST_UTIL.shutdownMiniHBaseCluster();
980
981
982 ZooKeeperWatcher zkw =
983 HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL,
984 metaRegion, metaRegionServer.getServerName());
985
986 LOG.info("Staring cluster for second time");
987 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
988
989 HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
990 while (!master.isInitialized()) {
991 Thread.sleep(100);
992 }
993
994 log("Waiting for no more RIT");
995 ZKAssign.blockUntilNoRIT(zkw);
996
997 zkw.close();
998
999 TEST_UTIL.shutdownMiniCluster();
1000 }
1001
1002
1003
1004
1005 @Test(timeout=240000)
1006 public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception {
1007 final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart");
1008 final int NUM_MASTERS = 1;
1009 final int NUM_RS = 2;
1010
1011
1012 Configuration conf = HBaseConfiguration.create();
1013 conf.setBoolean("hbase.assignment.usezk", true);
1014
1015
1016 final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1017 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1018 log("Cluster started");
1019
1020 TEST_UTIL.createTable(table, Bytes.toBytes("family"));
1021 HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
1022 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1023 HRegionInfo hri = regionStates.getRegionsOfTable(table).get(0);
1024 ServerName serverName = regionStates.getRegionServerOfRegion(hri);
1025 TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
1026
1027 ServerName dstName = null;
1028 for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) {
1029 if (!tmpServer.equals(serverName)) {
1030 dstName = tmpServer;
1031 break;
1032 }
1033 }
1034
1035 assertTrue(dstName != null);
1036
1037 TEST_UTIL.shutdownMiniHBaseCluster();
1038
1039 ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
1040 ZKAssign.createNodeOffline(zkw, hri, dstName);
1041 Stat stat = new Stat();
1042 byte[] data =
1043 ZKAssign.getDataNoWatch(zkw, hri.getEncodedName(), stat);
1044 assertTrue(data != null);
1045 RegionTransition rt = RegionTransition.parseFrom(data);
1046 assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE);
1047
1048 LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName
1049 + " and dst server=" + dstName);
1050
1051
1052 TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
1053
1054 while (true) {
1055 master = TEST_UTIL.getHBaseCluster().getMaster();
1056 if (master != null && master.isInitialized()) {
1057 ServerManager serverManager = master.getServerManager();
1058 if (!serverManager.areDeadServersInProgress()) {
1059 break;
1060 }
1061 }
1062 Thread.sleep(200);
1063 }
1064
1065
1066 master = TEST_UTIL.getHBaseCluster().getMaster();
1067 master.getAssignmentManager().waitForAssignment(hri);
1068 regionStates = master.getAssignmentManager().getRegionStates();
1069 RegionState newState = regionStates.getRegionState(hri);
1070 assertTrue(newState.isOpened());
1071 }
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081 @Test (timeout=240000)
1082 public void testSimpleMasterFailover() throws Exception {
1083
1084 final int NUM_MASTERS = 3;
1085 final int NUM_RS = 3;
1086
1087
1088 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
1089
1090 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1091 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1092
1093
1094 List<MasterThread> masterThreads = cluster.getMasterThreads();
1095
1096
1097 for (MasterThread mt : masterThreads) {
1098 assertTrue(mt.isAlive());
1099 }
1100
1101
1102 int numActive = 0;
1103 int activeIndex = -1;
1104 ServerName activeName = null;
1105 HMaster active = null;
1106 for (int i = 0; i < masterThreads.size(); i++) {
1107 if (masterThreads.get(i).getMaster().isActiveMaster()) {
1108 numActive++;
1109 activeIndex = i;
1110 active = masterThreads.get(activeIndex).getMaster();
1111 activeName = active.getServerName();
1112 }
1113 }
1114 assertEquals(1, numActive);
1115 assertEquals(NUM_MASTERS, masterThreads.size());
1116 LOG.info("Active master " + activeName);
1117
1118
1119 assertNotNull(active);
1120 ClusterStatus status = active.getClusterStatus();
1121 assertEquals(status.getMaster(), activeName);
1122 assertEquals(2, status.getBackupMastersSize());
1123 assertEquals(2, status.getBackupMasters().size());
1124
1125
1126 int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
1127 HMaster master = cluster.getMaster(backupIndex);
1128 LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
1129 cluster.stopMaster(backupIndex, false);
1130 cluster.waitOnMaster(backupIndex);
1131
1132
1133 for (int i = 0; i < masterThreads.size(); i++) {
1134 if (masterThreads.get(i).getMaster().isActiveMaster()) {
1135 assertEquals(activeName, masterThreads.get(i).getMaster().getServerName());
1136 activeIndex = i;
1137 active = masterThreads.get(activeIndex).getMaster();
1138 }
1139 }
1140 assertEquals(1, numActive);
1141 assertEquals(2, masterThreads.size());
1142 int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
1143 LOG.info("Active master " + active.getServerName() + " managing " + rsCount + " regions servers");
1144 assertEquals(3, rsCount);
1145
1146
1147 final HMaster activeFinal = active;
1148 TEST_UTIL.waitFor(TimeUnit.SECONDS.toMillis(30), new Waiter.Predicate<Exception>() {
1149 @Override public boolean evaluate() {
1150 return activeFinal.getBackupMasters().size() == 1;
1151 }
1152 });
1153
1154
1155 assertNotNull(active);
1156 final HMaster finalActive = active;
1157 TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() {
1158 @Override
1159 public boolean evaluate() throws Exception {
1160 ClusterStatus status = finalActive.getClusterStatus();
1161 return status.getBackupMastersSize() == 1 && status.getBackupMasters().size() == 1;
1162 }
1163 });
1164 status = active.getClusterStatus();
1165 assertEquals(activeName, status.getMaster());
1166
1167
1168 LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
1169 cluster.stopMaster(activeIndex, false);
1170 cluster.waitOnMaster(activeIndex);
1171
1172
1173 assertTrue(cluster.waitForActiveAndReadyMaster());
1174
1175 LOG.debug("\n\nVerifying backup master is now active\n");
1176
1177 assertEquals(1, masterThreads.size());
1178
1179
1180 active = masterThreads.get(0).getMaster();
1181 assertNotNull(active);
1182 status = active.getClusterStatus();
1183 ServerName masterName = status.getMaster();
1184 assertNotNull(masterName);
1185 assertEquals(active.getServerName(), masterName);
1186 assertTrue(active.isActiveMaster());
1187 assertEquals(0, status.getBackupMastersSize());
1188 assertEquals(0, status.getBackupMasters().size());
1189 int rss = status.getServersSize();
1190 LOG.info("Active master " + masterName.getServerName() + " managing " +
1191 rss + " region servers");
1192 assertEquals(3, rss);
1193
1194
1195 TEST_UTIL.shutdownMiniCluster();
1196 }
1197
1198
1199
1200
1201 @Test (timeout=180000)
1202 @SuppressWarnings("deprecation")
1203 public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
1204 final int NUM_MASTERS = 1;
1205 final int NUM_RS = 1;
1206
1207
1208 Configuration conf = HBaseConfiguration.create();
1209 conf.setBoolean("hbase.assignment.usezk", false);
1210
1211
1212 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1213 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1214 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1215 log("Cluster started");
1216
1217
1218 List<MasterThread> masterThreads = cluster.getMasterThreads();
1219 assertEquals(1, masterThreads.size());
1220
1221
1222 assertTrue(cluster.waitForActiveAndReadyMaster());
1223 HMaster master = masterThreads.get(0).getMaster();
1224 assertTrue(master.isActiveMaster());
1225 assertTrue(master.isInitialized());
1226
1227
1228 Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
1229 onlineTable.close();
1230
1231 HTableDescriptor offlineTable = new HTableDescriptor(
1232 TableName.valueOf(Bytes.toBytes("offlineTable")));
1233 offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
1234
1235 FileSystem filesystem = FileSystem.get(conf);
1236 Path rootdir = FSUtils.getRootDir(conf);
1237 FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
1238 fstd.createTableDescriptor(offlineTable);
1239
1240 HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
1241 createRegion(hriOffline, rootdir, conf, offlineTable);
1242 MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);
1243
1244 log("Regions in hbase:meta and namespace have been created");
1245
1246
1247
1248 assertEquals(3, cluster.countServedRegions());
1249 HRegionInfo hriOnline = null;
1250 try (RegionLocator locator =
1251 TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
1252 hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
1253 }
1254 RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1255 RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
1256
1257
1258
1259 RegionState oldState = regionStates.getRegionState(hriOnline);
1260 RegionState newState = new RegionState(
1261 hriOnline, State.PENDING_CLOSE, oldState.getServerName());
1262 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1263
1264
1265
1266 oldState = new RegionState(hriOffline, State.OFFLINE);
1267 newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
1268 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1269
1270 HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
1271 createRegion(failedClose, rootdir, conf, offlineTable);
1272 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
1273
1274 oldState = new RegionState(failedClose, State.PENDING_CLOSE);
1275 newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
1276 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1277
1278
1279 HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
1280 createRegion(failedOpen, rootdir, conf, offlineTable);
1281 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
1282
1283
1284
1285 oldState = new RegionState(failedOpen, State.PENDING_OPEN);
1286 newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
1287 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1288
1289 HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
1290 createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
1291 MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
1292
1293
1294
1295 oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
1296 newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
1297 stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1298
1299
1300
1301
1302 log("Aborting master");
1303 cluster.abortMaster(0);
1304 cluster.waitOnMaster(0);
1305 log("Master has aborted");
1306
1307
1308 log("Starting up a new master");
1309 master = cluster.startMaster().getMaster();
1310 log("Waiting for master to be ready");
1311 cluster.waitForActiveAndReadyMaster();
1312 log("Master is ready");
1313
1314
1315 master.getAssignmentManager().waitUntilNoRegionsInTransition(60000);
1316
1317
1318 regionStates = master.getAssignmentManager().getRegionStates();
1319
1320
1321 assertTrue(regionStates.isRegionOnline(hriOffline));
1322 assertTrue(regionStates.isRegionOnline(hriOnline));
1323 assertTrue(regionStates.isRegionOnline(failedClose));
1324 assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
1325 assertTrue(regionStates.isRegionOnline(failedOpen));
1326
1327 log("Done with verification, shutting down cluster");
1328
1329
1330 TEST_UTIL.shutdownMiniCluster();
1331 }
1332
1333
1334
1335
1336 @Test(timeout = 180000)
1337 public void testMetaInTransitionWhenMasterFailover() throws Exception {
1338 final int NUM_MASTERS = 1;
1339 final int NUM_RS = 1;
1340
1341
1342 Configuration conf = HBaseConfiguration.create();
1343 conf.setBoolean("hbase.assignment.usezk", false);
1344 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1345 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1346 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1347 log("Cluster started");
1348
1349 log("Moving meta off the master");
1350 HMaster activeMaster = cluster.getMaster();
1351 HRegionServer rs = cluster.getRegionServer(0);
1352 ServerName metaServerName = cluster.getLiveRegionServerThreads()
1353 .get(0).getRegionServer().getServerName();
1354 activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
1355 Bytes.toBytes(metaServerName.getServerName()));
1356 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1357 assertEquals("Meta should be assigned on expected regionserver",
1358 metaServerName, activeMaster.getMetaTableLocator()
1359 .getMetaRegionLocation(activeMaster.getZooKeeper()));
1360
1361
1362 log("Aborting master");
1363 activeMaster.abort("test-kill");
1364 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1365 log("Master has aborted");
1366
1367
1368 RegionState metaState =
1369 MetaTableLocator.getMetaRegionState(rs.getZooKeeper());
1370 assertEquals("hbase:meta should be onlined on RS",
1371 metaState.getServerName(), rs.getServerName());
1372 assertEquals("hbase:meta should be onlined on RS",
1373 metaState.getState(), State.OPEN);
1374
1375
1376 log("Starting up a new master");
1377 activeMaster = cluster.startMaster().getMaster();
1378 log("Waiting for master to be ready");
1379 cluster.waitForActiveAndReadyMaster();
1380 log("Master is ready");
1381
1382
1383 metaState =
1384 MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1385 assertEquals("hbase:meta should be onlined on RS",
1386 metaState.getServerName(), rs.getServerName());
1387 assertEquals("hbase:meta should be onlined on RS",
1388 metaState.getState(), State.OPEN);
1389
1390
1391
1392
1393
1394 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1395 rs.getServerName(), State.PENDING_OPEN);
1396 Region meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
1397 rs.removeFromOnlineRegions(meta, null);
1398 ((HRegion)meta).close();
1399
1400 log("Aborting master");
1401 activeMaster.abort("test-kill");
1402 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1403 log("Master has aborted");
1404
1405
1406 log("Starting up a new master");
1407 activeMaster = cluster.startMaster().getMaster();
1408 log("Waiting for master to be ready");
1409 cluster.waitForActiveAndReadyMaster();
1410 log("Master is ready");
1411
1412 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1413 log("Meta was assigned");
1414
1415 metaState =
1416 MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1417 assertEquals("hbase:meta should be onlined on RS",
1418 metaState.getServerName(), rs.getServerName());
1419 assertEquals("hbase:meta should be onlined on RS",
1420 metaState.getState(), State.OPEN);
1421
1422
1423
1424
1425
1426 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1427 rs.getServerName(), State.PENDING_CLOSE);
1428
1429 log("Aborting master");
1430 activeMaster.abort("test-kill");
1431 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1432 log("Master has aborted");
1433
1434 rs.getRSRpcServices().closeRegion(null, RequestConverter.buildCloseRegionRequest(
1435 rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1436
1437
1438 log("Starting up a new master");
1439 activeMaster = cluster.startMaster().getMaster();
1440 log("Waiting for master to be ready");
1441 cluster.waitForActiveAndReadyMaster();
1442 log("Master is ready");
1443
1444 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1445 log("Meta was assigned");
1446
1447 rs.getRSRpcServices().closeRegion(
1448 null,
1449 RequestConverter.buildCloseRegionRequest(rs.getServerName(),
1450 HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1451
1452
1453 MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1454 ServerName.valueOf("dummyserver.example.org", 1234, -1L), State.OPEN);
1455
1456 log("Aborting master");
1457 activeMaster.stop("test-kill");
1458
1459 cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1460 log("Master has aborted");
1461
1462
1463 log("Starting up a new master");
1464 activeMaster = cluster.startMaster().getMaster();
1465 log("Waiting for master to be ready");
1466 cluster.waitForActiveAndReadyMaster();
1467 log("Master is ready");
1468
1469 TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1470 log("Meta was assigned");
1471
1472
1473 TEST_UTIL.shutdownMiniCluster();
1474 }
1475 }
1476