View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertTrue;
25  
26  import java.io.IOException;
27  import java.util.ArrayList;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Set;
31  import java.util.TreeSet;
32  
33  import java.util.concurrent.TimeUnit;
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.Path;
39  import org.apache.hadoop.hbase.Abortable;
40  import org.apache.hadoop.hbase.ClusterStatus;
41  import org.apache.hadoop.hbase.HBaseConfiguration;
42  import org.apache.hadoop.hbase.HBaseTestingUtility;
43  import org.apache.hadoop.hbase.HColumnDescriptor;
44  import org.apache.hadoop.hbase.HConstants;
45  import org.apache.hadoop.hbase.HRegionInfo;
46  import org.apache.hadoop.hbase.HTableDescriptor;
47  import org.apache.hadoop.hbase.Waiter;
48  import org.apache.hadoop.hbase.testclassification.LargeTests;
49  import org.apache.hadoop.hbase.MetaTableAccessor;
50  import org.apache.hadoop.hbase.MiniHBaseCluster;
51  import org.apache.hadoop.hbase.RegionTransition;
52  import org.apache.hadoop.hbase.ServerName;
53  import org.apache.hadoop.hbase.TableName;
54  import org.apache.hadoop.hbase.TableStateManager;
55  import org.apache.hadoop.hbase.client.RegionLocator;
56  import org.apache.hadoop.hbase.client.Table;
57  import org.apache.hadoop.hbase.executor.EventType;
58  import org.apache.hadoop.hbase.master.RegionState.State;
59  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
60  import org.apache.hadoop.hbase.protobuf.RequestConverter;
61  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
62  import org.apache.hadoop.hbase.regionserver.HRegion;
63  import org.apache.hadoop.hbase.regionserver.HRegionServer;
64  import org.apache.hadoop.hbase.regionserver.Region;
65  import org.apache.hadoop.hbase.regionserver.RegionMergeTransactionImpl;
66  import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
67  import org.apache.hadoop.hbase.util.Bytes;
68  import org.apache.hadoop.hbase.util.FSTableDescriptors;
69  import org.apache.hadoop.hbase.util.FSUtils;
70  import org.apache.hadoop.hbase.util.JVMClusterUtil;
71  import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
72  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
73  import org.apache.hadoop.hbase.util.Threads;
74  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
75  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
76  import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
77  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
78  import org.apache.zookeeper.data.Stat;
79  import org.junit.Ignore;
80  import org.junit.Test;
81  import org.junit.experimental.categories.Category;
82  
83  @Category(LargeTests.class)
84  public class TestMasterFailover {
85    private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
86  
87    /**
88     * Complex test of master failover that tests as many permutations of the
89     * different possible states that regions in transition could be in within ZK.
90     * <p>
91     * This tests the proper handling of these states by the failed-over master
92     * and includes a thorough testing of the timeout code as well.
93     * <p>
94     * Starts with a single master and three regionservers.
95     * <p>
96     * Creates two tables, enabledTable and disabledTable, each containing 5
97     * regions.  The disabledTable is then disabled.
98     * <p>
99     * After reaching steady-state, the master is killed.  We then mock several
100    * states in ZK.
101    * <p>
102    * After mocking them, we will startup a new master which should become the
103    * active master and also detect that it is a failover.  The primary test
104    * passing condition will be that all regions of the enabled table are
105    * assigned and all the regions of the disabled table are not assigned.
106    * <p>
107    * The different scenarios to be tested are below:
108    * <p>
109    * <b>ZK State:  OFFLINE</b>
110    * <p>A node can get into OFFLINE state if</p>
111    * <ul>
112    * <li>An RS fails to open a region, so it reverts the state back to OFFLINE
113    * <li>The Master is assigning the region to a RS before it sends RPC
114    * </ul>
115    * <p>We will mock the scenarios</p>
116    * <ul>
117    * <li>Master has assigned an enabled region but RS failed so a region is
118    *     not assigned anywhere and is sitting in ZK as OFFLINE</li>
119    * <li>This seems to cover both cases?</li>
120    * </ul>
121    * <p>
122    * <b>ZK State:  CLOSING</b>
123    * <p>A node can get into CLOSING state if</p>
124    * <ul>
125    * <li>An RS has begun to close a region
126    * </ul>
127    * <p>We will mock the scenarios</p>
128    * <ul>
129    * <li>Region of enabled table was being closed but did not complete
130    * <li>Region of disabled table was being closed but did not complete
131    * </ul>
132    * <p>
133    * <b>ZK State:  CLOSED</b>
134    * <p>A node can get into CLOSED state if</p>
135    * <ul>
136    * <li>An RS has completed closing a region but not acknowledged by master yet
137    * </ul>
138    * <p>We will mock the scenarios</p>
139    * <ul>
140    * <li>Region of a table that should be enabled was closed on an RS
141    * <li>Region of a table that should be disabled was closed on an RS
142    * </ul>
143    * <p>
144    * <b>ZK State:  OPENING</b>
145    * <p>A node can get into OPENING state if</p>
146    * <ul>
147    * <li>An RS has begun to open a region
148    * </ul>
149    * <p>We will mock the scenarios</p>
150    * <ul>
151    * <li>RS was opening a region of enabled table but never finishes
152    * </ul>
153    * <p>
154    * <b>ZK State:  OPENED</b>
155    * <p>A node can get into OPENED state if</p>
156    * <ul>
157    * <li>An RS has finished opening a region but not acknowledged by master yet
158    * </ul>
159    * <p>We will mock the scenarios</p>
160    * <ul>
161    * <li>Region of a table that should be enabled was opened on an RS
162    * <li>Region of a table that should be disabled was opened on an RS
163    * </ul>
164    * @throws Exception
165    */
166   @Test (timeout=240000)
167   public void testMasterFailoverWithMockedRIT() throws Exception {
168 
169     final int NUM_MASTERS = 1;
170     final int NUM_RS = 3;
171 
172     // Create config to use for this cluster
173     Configuration conf = HBaseConfiguration.create();
174     conf.setBoolean("hbase.assignment.usezk", true);
175 
176     // Start the cluster
177     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
178     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
179     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
180     log("Cluster started");
181 
182     // Create a ZKW to use in the test
183     ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
184 
185     // get all the master threads
186     List<MasterThread> masterThreads = cluster.getMasterThreads();
187     assertEquals(1, masterThreads.size());
188 
189     // only one master thread, let's wait for it to be initialized
190     assertTrue(cluster.waitForActiveAndReadyMaster());
191     HMaster master = masterThreads.get(0).getMaster();
192     assertTrue(master.isActiveMaster());
193     assertTrue(master.isInitialized());
194 
195     // disable load balancing on this master
196     master.balanceSwitch(false);
197 
198     // create two tables in META, each with 10 regions
199     byte [] FAMILY = Bytes.toBytes("family");
200     byte [][] SPLIT_KEYS = new byte [][] {
201         new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
202         Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
203         Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
204         Bytes.toBytes("iii"), Bytes.toBytes("jjj")
205     };
206 
207     byte [] enabledTable = Bytes.toBytes("enabledTable");
208     HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
209     htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
210 
211     FileSystem filesystem = FileSystem.get(conf);
212     Path rootdir = FSUtils.getRootDir(conf);
213     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
214     // Write the .tableinfo
215     fstd.createTableDescriptor(htdEnabled);
216 
217     HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(), null, null);
218     createRegion(hriEnabled, rootdir, conf, htdEnabled);
219 
220     List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
221         TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
222 
223     TableName disabledTable = TableName.valueOf("disabledTable");
224     HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
225     htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
226     // Write the .tableinfo
227     fstd.createTableDescriptor(htdDisabled);
228     HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
229     createRegion(hriDisabled, rootdir, conf, htdDisabled);
230     List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
231         TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
232 
233     TableName tableWithMergingRegions = TableName.valueOf("tableWithMergingRegions");
234     TEST_UTIL.createTable(tableWithMergingRegions, FAMILY, new byte [][] {Bytes.toBytes("m")});
235 
236     log("Regions in hbase:meta and namespace have been created");
237 
238     // at this point we only expect 4 regions to be assigned out
239     // (catalogs and namespace, + 2 merging regions)
240     assertEquals(4, cluster.countServedRegions());
241 
242     // Move merging regions to the same region server
243     AssignmentManager am = master.getAssignmentManager();
244     RegionStates regionStates = am.getRegionStates();
245     List<HRegionInfo> mergingRegions = regionStates.getRegionsOfTable(tableWithMergingRegions);
246     assertEquals(2, mergingRegions.size());
247     HRegionInfo a = mergingRegions.get(0);
248     HRegionInfo b = mergingRegions.get(1);
249     HRegionInfo newRegion = RegionMergeTransactionImpl.getMergedRegionInfo(a, b);
250     ServerName mergingServer = regionStates.getRegionServerOfRegion(a);
251     ServerName serverB = regionStates.getRegionServerOfRegion(b);
252     if (!serverB.equals(mergingServer)) {
253       RegionPlan plan = new RegionPlan(b, serverB, mergingServer);
254       am.balance(plan);
255       assertTrue(am.waitForAssignment(b));
256     }
257 
258     // Let's just assign everything to first RS
259     HRegionServer hrs = cluster.getRegionServer(0);
260     ServerName serverName = hrs.getServerName();
261     HRegionInfo closingRegion = enabledRegions.remove(0);
262     // we'll need some regions to already be assigned out properly on live RS
263     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
264     enabledAndAssignedRegions.add(enabledRegions.remove(0));
265     enabledAndAssignedRegions.add(enabledRegions.remove(0));
266     enabledAndAssignedRegions.add(closingRegion);
267 
268     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
269     disabledAndAssignedRegions.add(disabledRegions.remove(0));
270     disabledAndAssignedRegions.add(disabledRegions.remove(0));
271 
272     // now actually assign them
273     for (HRegionInfo hri : enabledAndAssignedRegions) {
274       master.assignmentManager.addPlan(hri.getEncodedName(),
275           new RegionPlan(hri, null, serverName));
276       master.assignmentManager.assign(hri, true);
277     }
278 
279     for (HRegionInfo hri : disabledAndAssignedRegions) {
280       master.assignmentManager.addPlan(hri.getEncodedName(),
281           new RegionPlan(hri, null, serverName));
282       master.assignmentManager.assign(hri, true);
283     }
284 
285     // wait for no more RIT
286     log("Waiting for assignment to finish");
287     ZKAssign.blockUntilNoRIT(zkw);
288     log("Assignment completed");
289 
290     // Stop the master
291     log("Aborting master");
292     cluster.abortMaster(0);
293     cluster.waitOnMaster(0);
294     log("Master has aborted");
295 
296     /*
297      * Now, let's start mocking up some weird states as described in the method
298      * javadoc.
299      */
300 
301     List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
302     List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
303 
304     log("Beginning to mock scenarios");
305 
306     // Disable the disabledTable in ZK
307     TableStateManager zktable = new ZKTableStateManager(zkw);
308     zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
309 
310     /*
311      *  ZK = OFFLINE
312      */
313 
314     // Region that should be assigned but is not and is in ZK as OFFLINE
315     // Cause: This can happen if the master crashed after creating the znode but before sending the
316     //  request to the region server
317     HRegionInfo region = enabledRegions.remove(0);
318     regionsThatShouldBeOnline.add(region);
319     ZKAssign.createNodeOffline(zkw, region, serverName);
320 
321     /*
322      * ZK = CLOSING
323      */
324     // Cause: Same as offline.
325     regionsThatShouldBeOnline.add(closingRegion);
326     ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
327 
328     /*
329      * ZK = CLOSED
330      */
331 
332     // Region of enabled table closed but not ack
333     //Cause: Master was down while the region server updated the ZK status.
334     region = enabledRegions.remove(0);
335     regionsThatShouldBeOnline.add(region);
336     int version = ZKAssign.createNodeClosing(zkw, region, serverName);
337     ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
338 
339     // Region of disabled table closed but not ack
340     region = disabledRegions.remove(0);
341     regionsThatShouldBeOffline.add(region);
342     version = ZKAssign.createNodeClosing(zkw, region, serverName);
343     ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
344 
345     /*
346      * ZK = OPENED
347      */
348 
349     // Region of enabled table was opened on RS
350     // Cause: as offline
351     region = enabledRegions.remove(0);
352     regionsThatShouldBeOnline.add(region);
353     ZKAssign.createNodeOffline(zkw, region, serverName);
354     ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region);
355     while (true) {
356       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
357       RegionTransition rt = RegionTransition.parseFrom(bytes);
358       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
359         break;
360       }
361       Thread.sleep(100);
362     }
363 
364     // Region of disable table was opened on RS
365     // Cause: Master failed while updating the status for this region server.
366     region = disabledRegions.remove(0);
367     regionsThatShouldBeOffline.add(region);
368     ZKAssign.createNodeOffline(zkw, region, serverName);
369     ProtobufUtil.openRegion(null, hrs.getRSRpcServices(), hrs.getServerName(), region);
370     while (true) {
371       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
372       RegionTransition rt = RegionTransition.parseFrom(bytes);
373       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
374         break;
375       }
376       Thread.sleep(100);
377     }
378 
379     /*
380      * ZK = MERGING
381      */
382 
383     // Regions of table of merging regions
384     // Cause: Master was down while merging was going on
385     hrs.getCoordinatedStateManager().
386       getRegionMergeCoordination().startRegionMergeTransaction(newRegion, mergingServer, a, b);
387 
388     /*
389      * ZK = NONE
390      */
391 
392     /*
393      * DONE MOCKING
394      */
395 
396     log("Done mocking data up in ZK");
397 
398     // Start up a new master
399     log("Starting up a new master");
400     master = cluster.startMaster().getMaster();
401     log("Waiting for master to be ready");
402     cluster.waitForActiveAndReadyMaster();
403     log("Master is ready");
404 
405     // Get new region states since master restarted
406     regionStates = master.getAssignmentManager().getRegionStates();
407     // Merging region should remain merging
408     assertTrue(regionStates.isRegionInState(a, State.MERGING));
409     assertTrue(regionStates.isRegionInState(b, State.MERGING));
410     assertTrue(regionStates.isRegionInState(newRegion, State.MERGING_NEW));
411     // Now remove the faked merging znode, merging regions should be
412     // offlined automatically, otherwise it is a bug in AM.
413     ZKAssign.deleteNodeFailSilent(zkw, newRegion);
414 
415     // Failover should be completed, now wait for no RIT
416     log("Waiting for no more RIT");
417     ZKAssign.blockUntilNoRIT(zkw);
418     log("No more RIT in ZK, now doing final test verification");
419 
420     // Grab all the regions that are online across RSs
421     Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
422     for (JVMClusterUtil.RegionServerThread rst :
423       cluster.getRegionServerThreads()) {
424       onlineRegions.addAll(ProtobufUtil.getOnlineRegions(
425         rst.getRegionServer().getRSRpcServices()));
426     }
427 
428     // Now, everything that should be online should be online
429     for (HRegionInfo hri : regionsThatShouldBeOnline) {
430       assertTrue(onlineRegions.contains(hri));
431     }
432 
433     // Everything that should be offline should not be online
434     for (HRegionInfo hri : regionsThatShouldBeOffline) {
435       if (onlineRegions.contains(hri)) {
436        LOG.debug(hri);
437       }
438       assertFalse(onlineRegions.contains(hri));
439     }
440 
441     log("Done with verification, all passed, shutting down cluster");
442 
443     // Done, shutdown the cluster
444     TEST_UTIL.shutdownMiniCluster();
445   }
446 
447   /**
448    * Complex test of master failover that tests as many permutations of the
449    * different possible states that regions in transition could be in within ZK
450    * pointing to an RS that has died while no master is around to process it.
451    * <p>
452    * This tests the proper handling of these states by the failed-over master
453    * and includes a thorough testing of the timeout code as well.
454    * <p>
455    * Starts with a single master and two regionservers.
456    * <p>
457    * Creates two tables, enabledTable and disabledTable, each containing 5
458    * regions.  The disabledTable is then disabled.
459    * <p>
460    * After reaching steady-state, the master is killed.  We then mock several
461    * states in ZK.  And one of the RS will be killed.
462    * <p>
463    * After mocking them and killing an RS, we will startup a new master which
464    * should become the active master and also detect that it is a failover.  The
465    * primary test passing condition will be that all regions of the enabled
466    * table are assigned and all the regions of the disabled table are not
467    * assigned.
468    * <p>
469    * The different scenarios to be tested are below:
470    * <p>
471    * <b>ZK State:  CLOSING</b>
472    * <p>A node can get into CLOSING state if</p>
473    * <ul>
474    * <li>An RS has begun to close a region
475    * </ul>
476    * <p>We will mock the scenarios</p>
477    * <ul>
478    * <li>Region was being closed but the RS died before finishing the close
479    * </ul>
480    * <b>ZK State:  OPENED</b>
481    * <p>A node can get into OPENED state if</p>
482    * <ul>
483    * <li>An RS has finished opening a region but not acknowledged by master yet
484    * </ul>
485    * <p>We will mock the scenarios</p>
486    * <ul>
487    * <li>Region of a table that should be enabled was opened by a now-dead RS
488    * <li>Region of a table that should be disabled was opened by a now-dead RS
489    * </ul>
490    * <p>
491    * <b>ZK State:  NONE</b>
492    * <p>A region could not have a transition node if</p>
493    * <ul>
494    * <li>The server hosting the region died and no master processed it
495    * </ul>
496    * <p>We will mock the scenarios</p>
497    * <ul>
498    * <li>Region of enabled table was on a dead RS that was not yet processed
499    * <li>Region of disabled table was on a dead RS that was not yet processed
500    * </ul>
501    * @throws Exception
502    */
503   @Test (timeout=180000)
504   public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
505 
506     final int NUM_MASTERS = 1;
507     final int NUM_RS = 2;
508 
509     // Create and start the cluster
510     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
511     Configuration conf = TEST_UTIL.getConfiguration();
512     conf.setBoolean("hbase.assignment.usezk", true);
513 
514     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
515     conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MAXTOSTART, 2);
516     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
517     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
518     log("Cluster started");
519 
520     // Create a ZKW to use in the test
521     ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
522         "unittest", new Abortable() {
523 
524           @Override
525           public void abort(String why, Throwable e) {
526             LOG.error("Fatal ZK Error: " + why, e);
527             org.junit.Assert.assertFalse("Fatal ZK error", true);
528           }
529 
530           @Override
531           public boolean isAborted() {
532             return false;
533           }
534 
535     });
536 
537     // get all the master threads
538     List<MasterThread> masterThreads = cluster.getMasterThreads();
539     assertEquals(1, masterThreads.size());
540 
541     // only one master thread, let's wait for it to be initialized
542     assertTrue(cluster.waitForActiveAndReadyMaster());
543     HMaster master = masterThreads.get(0).getMaster();
544     assertTrue(master.isActiveMaster());
545     assertTrue(master.isInitialized());
546 
547     // disable load balancing on this master
548     master.balanceSwitch(false);
549 
550     // create two tables in META, each with 30 regions
551     byte [] FAMILY = Bytes.toBytes("family");
552     byte[][] SPLIT_KEYS =
553         TEST_UTIL.getRegionSplitStartKeys(Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 30);
554 
555     byte [] enabledTable = Bytes.toBytes("enabledTable");
556     HTableDescriptor htdEnabled = new HTableDescriptor(TableName.valueOf(enabledTable));
557     htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
558     FileSystem filesystem = FileSystem.get(conf);
559     Path rootdir = FSUtils.getRootDir(conf);
560     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
561     // Write the .tableinfo
562     fstd.createTableDescriptor(htdEnabled);
563     HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getTableName(),
564         null, null);
565     createRegion(hriEnabled, rootdir, conf, htdEnabled);
566 
567     List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
568         TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
569 
570     TableName disabledTable =
571         TableName.valueOf("disabledTable");
572     HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
573     htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
574     // Write the .tableinfo
575     fstd.createTableDescriptor(htdDisabled);
576     HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getTableName(), null, null);
577     createRegion(hriDisabled, rootdir, conf, htdDisabled);
578 
579     List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
580         TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
581 
582     log("Regions in hbase:meta and Namespace have been created");
583 
584     // at this point we only expect 2 regions to be assigned out (catalogs and namespace  )
585     assertEquals(2, cluster.countServedRegions());
586 
587     // The first RS will stay online
588     List<RegionServerThread> regionservers =
589       cluster.getRegionServerThreads();
590     HRegionServer hrs = regionservers.get(0).getRegionServer();
591 
592     // The second RS is going to be hard-killed
593     RegionServerThread hrsDeadThread = regionservers.get(1);
594     HRegionServer hrsDead = hrsDeadThread.getRegionServer();
595     ServerName deadServerName = hrsDead.getServerName();
596 
597     // we'll need some regions to already be assigned out properly on live RS
598     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
599     enabledAndAssignedRegions.addAll(enabledRegions.subList(0, 6));
600     enabledRegions.removeAll(enabledAndAssignedRegions);
601     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
602     disabledAndAssignedRegions.addAll(disabledRegions.subList(0, 6));
603     disabledRegions.removeAll(disabledAndAssignedRegions);
604 
605     // now actually assign them
606     for (HRegionInfo hri : enabledAndAssignedRegions) {
607       master.assignmentManager.addPlan(hri.getEncodedName(),
608           new RegionPlan(hri, null, hrs.getServerName()));
609       master.assignmentManager.assign(hri, true);
610     }
611     for (HRegionInfo hri : disabledAndAssignedRegions) {
612       master.assignmentManager.addPlan(hri.getEncodedName(),
613           new RegionPlan(hri, null, hrs.getServerName()));
614       master.assignmentManager.assign(hri, true);
615     }
616 
617     log("Waiting for assignment to finish");
618     ZKAssign.blockUntilNoRIT(zkw);
619     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
620     log("Assignment completed");
621 
622     assertTrue(" Table must be enabled.", master.getAssignmentManager()
623         .getTableStateManager().isTableState(TableName.valueOf("enabledTable"),
624         ZooKeeperProtos.Table.State.ENABLED));
625     // we also need regions assigned out on the dead server
626     List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
627     enabledAndOnDeadRegions.addAll(enabledRegions.subList(0, 6));
628     enabledRegions.removeAll(enabledAndOnDeadRegions);
629     List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
630     disabledAndOnDeadRegions.addAll(disabledRegions.subList(0, 6));
631     disabledRegions.removeAll(disabledAndOnDeadRegions);
632 
633     // set region plan to server to be killed and trigger assign
634     for (HRegionInfo hri : enabledAndOnDeadRegions) {
635       master.assignmentManager.addPlan(hri.getEncodedName(),
636           new RegionPlan(hri, null, deadServerName));
637       master.assignmentManager.assign(hri, true);
638     }
639     for (HRegionInfo hri : disabledAndOnDeadRegions) {
640       master.assignmentManager.addPlan(hri.getEncodedName(),
641           new RegionPlan(hri, null, deadServerName));
642       master.assignmentManager.assign(hri, true);
643     }
644 
645     // wait for no more RIT
646     log("Waiting for assignment to finish");
647     ZKAssign.blockUntilNoRIT(zkw);
648     master.assignmentManager.waitUntilNoRegionsInTransition(60000);
649     log("Assignment completed");
650 
651     // Due to master.assignment.assign(hri) could fail to assign a region to a specified RS
652     // therefore, we need make sure that regions are in the expected RS
653     verifyRegionLocation(hrs, enabledAndAssignedRegions);
654     verifyRegionLocation(hrs, disabledAndAssignedRegions);
655     verifyRegionLocation(hrsDead, enabledAndOnDeadRegions);
656     verifyRegionLocation(hrsDead, disabledAndOnDeadRegions);
657 
658     assertTrue(" Didn't get enough regions of enabledTalbe on live rs.",
659       enabledAndAssignedRegions.size() >= 2);
660     assertTrue(" Didn't get enough regions of disalbedTable on live rs.",
661       disabledAndAssignedRegions.size() >= 2);
662     assertTrue(" Didn't get enough regions of enabledTalbe on dead rs.",
663       enabledAndOnDeadRegions.size() >= 2);
664     assertTrue(" Didn't get enough regions of disalbedTable on dead rs.",
665       disabledAndOnDeadRegions.size() >= 2);
666 
667     // Stop the master
668     log("Aborting master");
669     cluster.abortMaster(0);
670     cluster.waitOnMaster(0);
671     log("Master has aborted");
672 
673     /*
674      * Now, let's start mocking up some weird states as described in the method
675      * javadoc.
676      */
677 
678     List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
679     List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
680 
681     log("Beginning to mock scenarios");
682 
683     // Disable the disabledTable in ZK
684     TableStateManager zktable = new ZKTableStateManager(zkw);
685     zktable.setTableState(disabledTable, ZooKeeperProtos.Table.State.DISABLED);
686 
687     assertTrue(" The enabled table should be identified on master fail over.",
688         zktable.isTableState(TableName.valueOf("enabledTable"),
689           ZooKeeperProtos.Table.State.ENABLED));
690 
691     /*
692      * ZK = CLOSING
693      */
694 
695     // Region of enabled table being closed on dead RS but not finished
696     HRegionInfo region = enabledAndOnDeadRegions.remove(0);
697     regionsThatShouldBeOnline.add(region);
698     ZKAssign.createNodeClosing(zkw, region, deadServerName);
699     LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
700         region + "\n\n");
701 
702     // Region of disabled table being closed on dead RS but not finished
703     region = disabledAndOnDeadRegions.remove(0);
704     regionsThatShouldBeOffline.add(region);
705     ZKAssign.createNodeClosing(zkw, region, deadServerName);
706     LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
707         region + "\n\n");
708 
709     /*
710      * ZK = CLOSED
711      */
712 
713     // Region of enabled on dead server gets closed but not ack'd by master
714     region = enabledAndOnDeadRegions.remove(0);
715     regionsThatShouldBeOnline.add(region);
716     int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
717     ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
718     LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
719         region + "\n\n");
720 
721     // Region of disabled on dead server gets closed but not ack'd by master
722     region = disabledAndOnDeadRegions.remove(0);
723     regionsThatShouldBeOffline.add(region);
724     version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
725     ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
726     LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
727         region + "\n\n");
728 
729     /*
730      * ZK = OPENING
731      */
732 
733     // RS was opening a region of enabled table then died
734     region = enabledRegions.remove(0);
735     regionsThatShouldBeOnline.add(region);
736     ZKAssign.createNodeOffline(zkw, region, deadServerName);
737     ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
738     LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
739         region + "\n\n");
740 
741     // RS was opening a region of disabled table then died
742     region = disabledRegions.remove(0);
743     regionsThatShouldBeOffline.add(region);
744     ZKAssign.createNodeOffline(zkw, region, deadServerName);
745     ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
746     LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
747         region + "\n\n");
748 
749     /*
750      * ZK = OPENED
751      */
752 
753     // Region of enabled table was opened on dead RS
754     region = enabledRegions.remove(0);
755     regionsThatShouldBeOnline.add(region);
756     ZKAssign.createNodeOffline(zkw, region, deadServerName);
757     ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
758       hrsDead.getServerName(), region);
759     while (true) {
760       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
761       RegionTransition rt = RegionTransition.parseFrom(bytes);
762       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
763         break;
764       }
765       Thread.sleep(100);
766     }
767     LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" + region + "\n\n");
768 
769     // Region of disabled table was opened on dead RS
770     region = disabledRegions.remove(0);
771     regionsThatShouldBeOffline.add(region);
772     ZKAssign.createNodeOffline(zkw, region, deadServerName);
773     ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
774       hrsDead.getServerName(), region);
775     while (true) {
776       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
777       RegionTransition rt = RegionTransition.parseFrom(bytes);
778       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
779         break;
780       }
781       Thread.sleep(100);
782     }
783     LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" + region + "\n\n");
784 
785     /*
786      * ZK = NONE
787      */
788 
789     // Region of enabled table was open at steady-state on dead RS
790     region = enabledRegions.remove(0);
791     regionsThatShouldBeOnline.add(region);
792     ZKAssign.createNodeOffline(zkw, region, deadServerName);
793     ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
794       hrsDead.getServerName(), region);
795     while (true) {
796       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
797       RegionTransition rt = RegionTransition.parseFrom(bytes);
798       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
799         ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
800         LOG.debug("DELETED " + rt);
801         break;
802       }
803       Thread.sleep(100);
804     }
805     LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
806         + "\n" + region + "\n\n");
807 
808     // Region of disabled table was open at steady-state on dead RS
809     region = disabledRegions.remove(0);
810     regionsThatShouldBeOffline.add(region);
811     ZKAssign.createNodeOffline(zkw, region, deadServerName);
812     ProtobufUtil.openRegion(null, hrsDead.getRSRpcServices(),
813       hrsDead.getServerName(), region);
814     while (true) {
815       byte [] bytes = ZKAssign.getData(zkw, region.getEncodedName());
816       RegionTransition rt = RegionTransition.parseFrom(bytes);
817       if (rt != null && rt.getEventType().equals(EventType.RS_ZK_REGION_OPENED)) {
818         ZKAssign.deleteOpenedNode(zkw, region.getEncodedName(), rt.getServerName());
819         break;
820       }
821       Thread.sleep(100);
822     }
823     LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
824       + "\n" + region + "\n\n");
825 
826     /*
827      * DONE MOCKING
828      */
829 
830     log("Done mocking data up in ZK");
831 
832     // Kill the RS that had a hard death
833     log("Killing RS " + deadServerName);
834     hrsDead.abort("Killing for unit test");
835     log("RS " + deadServerName + " killed");
836 
837     // Start up a new master.  Wait until regionserver is completely down
838     // before starting new master because of hbase-4511.
839     while (hrsDeadThread.isAlive()) {
840       Threads.sleep(10);
841     }
842     log("Starting up a new master");
843     master = cluster.startMaster().getMaster();
844     log("Waiting for master to be ready");
845     assertTrue(cluster.waitForActiveAndReadyMaster());
846     log("Master is ready");
847 
848     // Wait until SSH processing completed for dead server.
849     while (master.getServerManager().areDeadServersInProgress()) {
850       Thread.sleep(10);
851     }
852 
853     // Failover should be completed, now wait for no RIT
854     log("Waiting for no more RIT");
855     ZKAssign.blockUntilNoRIT(zkw);
856     log("No more RIT in ZK");
857     long now = System.currentTimeMillis();
858     long maxTime = 120000;
859     boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
860     if (!done) {
861       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
862       LOG.info("rit=" + regionStates.getRegionsInTransition());
863     }
864     long elapsed = System.currentTimeMillis() - now;
865     assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
866       elapsed < maxTime);
867     log("No more RIT in RIT map, doing final test verification");
868 
869     // Grab all the regions that are online across RSs
870     Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
871     now = System.currentTimeMillis();
872     maxTime = 30000;
873     for (JVMClusterUtil.RegionServerThread rst :
874         cluster.getRegionServerThreads()) {
875       try {
876         HRegionServer rs = rst.getRegionServer();
877         while (!rs.getRegionsInTransitionInRS().isEmpty()) {
878           elapsed = System.currentTimeMillis() - now;
879           assertTrue("Test timed out in getting online regions", elapsed < maxTime);
880           if (rs.isAborted() || rs.isStopped()) {
881             // This region server is stopped, skip it.
882             break;
883           }
884           Thread.sleep(100);
885         }
886         onlineRegions.addAll(ProtobufUtil.getOnlineRegions(rs.getRSRpcServices()));
887       } catch (RegionServerStoppedException e) {
888         LOG.info("Got RegionServerStoppedException", e);
889       }
890     }
891 
892     // Now, everything that should be online should be online
893     for (HRegionInfo hri : regionsThatShouldBeOnline) {
894       assertTrue("region=" + hri.getRegionNameAsString() + ", " + onlineRegions.toString(),
895         onlineRegions.contains(hri));
896     }
897 
898     // Everything that should be offline should not be online
899     for (HRegionInfo hri : regionsThatShouldBeOffline) {
900       assertFalse(onlineRegions.contains(hri));
901     }
902 
903     log("Done with verification, all passed, shutting down cluster");
904 
905     // Done, shutdown the cluster
906     TEST_UTIL.shutdownMiniCluster();
907   }
908 
909   /**
910    * Verify regions are on the expected region server
911    */
912   private void verifyRegionLocation(HRegionServer hrs, List<HRegionInfo> regions)
913       throws IOException {
914     List<HRegionInfo> tmpOnlineRegions =
915       ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
916     Iterator<HRegionInfo> itr = regions.iterator();
917     while (itr.hasNext()) {
918       HRegionInfo tmp = itr.next();
919       if (!tmpOnlineRegions.contains(tmp)) {
920         itr.remove();
921       }
922     }
923   }
924 
925   HRegion createRegion(final HRegionInfo  hri, final Path rootdir, final Configuration c,
926       final HTableDescriptor htd)
927   throws IOException {
928     HRegion r = HRegion.createHRegion(hri, rootdir, c, htd);
929     // The above call to create a region will create an wal file.  Each
930     // log file create will also create a running thread to do syncing.  We need
931     // to close out this log else we will have a running thread trying to sync
932     // the file system continuously which is ugly when dfs is taken away at the
933     // end of the test.
934     HRegion.closeHRegion(r);
935     return r;
936   }
937 
938   // TODO: Next test to add is with testing permutations of the RIT or the RS
939   //       killed are hosting ROOT and hbase:meta regions.
940 
941   private void log(String string) {
942     LOG.info("\n\n" + string + " \n\n");
943   }
944 
945   @Test (timeout=180000)
946   public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
947       throws Exception {
948     LOG.info("Starting testShouldCheckMasterFailOverWhenMETAIsInOpenedState");
949     final int NUM_MASTERS = 1;
950     final int NUM_RS = 2;
951 
952     // Start the cluster
953     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
954     Configuration conf = TEST_UTIL.getConfiguration();
955     conf.setInt("hbase.master.info.port", -1);
956     conf.setBoolean("hbase.assignment.usezk", true);
957 
958     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
959     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
960 
961     // Find regionserver carrying meta.
962     List<RegionServerThread> regionServerThreads =
963       cluster.getRegionServerThreads();
964     Region metaRegion = null;
965     HRegionServer metaRegionServer = null;
966     for (RegionServerThread regionServerThread : regionServerThreads) {
967       HRegionServer regionServer = regionServerThread.getRegionServer();
968       metaRegion = regionServer.getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
969       regionServer.abort("");
970       if (null != metaRegion) {
971         metaRegionServer = regionServer;
972         break;
973       }
974     }
975 
976     assertNotNull(metaRegion);
977     assertNotNull(metaRegionServer);
978 
979     TEST_UTIL.shutdownMiniHBaseCluster();
980 
981     // Create a ZKW to use in the test
982     ZooKeeperWatcher zkw =
983       HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL,
984           metaRegion, metaRegionServer.getServerName());
985 
986     LOG.info("Staring cluster for second time");
987     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
988 
989     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
990     while (!master.isInitialized()) {
991       Thread.sleep(100);
992     }
993     // Failover should be completed, now wait for no RIT
994     log("Waiting for no more RIT");
995     ZKAssign.blockUntilNoRIT(zkw);
996 
997     zkw.close();
998     // Stop the cluster
999     TEST_UTIL.shutdownMiniCluster();
1000   }
1001 
1002   /**
1003    * This tests a RIT in offline state will get re-assigned after a master restart
1004    */
1005   @Test(timeout=240000)
1006   public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception {
1007     final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart");
1008     final int NUM_MASTERS = 1;
1009     final int NUM_RS = 2;
1010 
1011     // Create config to use for this cluster
1012     Configuration conf = HBaseConfiguration.create();
1013     conf.setBoolean("hbase.assignment.usezk", true);
1014 
1015     // Start the cluster
1016     final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1017     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1018     log("Cluster started");
1019 
1020     TEST_UTIL.createTable(table, Bytes.toBytes("family"));
1021     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
1022     RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1023     HRegionInfo hri = regionStates.getRegionsOfTable(table).get(0);
1024     ServerName serverName = regionStates.getRegionServerOfRegion(hri);
1025     TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
1026 
1027     ServerName dstName = null;
1028     for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) {
1029       if (!tmpServer.equals(serverName)) {
1030         dstName = tmpServer;
1031         break;
1032       }
1033     }
1034     // find a different server
1035     assertTrue(dstName != null);
1036     // shutdown HBase cluster
1037     TEST_UTIL.shutdownMiniHBaseCluster();
1038     // create a RIT node in offline state
1039     ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
1040     ZKAssign.createNodeOffline(zkw, hri, dstName);
1041     Stat stat = new Stat();
1042     byte[] data =
1043         ZKAssign.getDataNoWatch(zkw, hri.getEncodedName(), stat);
1044     assertTrue(data != null);
1045     RegionTransition rt = RegionTransition.parseFrom(data);
1046     assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE);
1047 
1048     LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName
1049         + " and dst server=" + dstName);
1050 
1051     // start HBase cluster
1052     TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, NUM_RS);
1053 
1054     while (true) {
1055       master = TEST_UTIL.getHBaseCluster().getMaster();
1056       if (master != null && master.isInitialized()) {
1057         ServerManager serverManager = master.getServerManager();
1058         if (!serverManager.areDeadServersInProgress()) {
1059           break;
1060         }
1061       }
1062       Thread.sleep(200);
1063     }
1064 
1065     // verify the region is assigned
1066     master = TEST_UTIL.getHBaseCluster().getMaster();
1067     master.getAssignmentManager().waitForAssignment(hri);
1068     regionStates = master.getAssignmentManager().getRegionStates();
1069     RegionState newState = regionStates.getRegionState(hri);
1070     assertTrue(newState.isOpened());
1071   }
1072 
1073  /**
1074    * Simple test of master failover.
1075    * <p>
1076    * Starts with three masters.  Kills a backup master.  Then kills the active
1077    * master.  Ensures the final master becomes active and we can still contact
1078    * the cluster.
1079    * @throws Exception
1080    */
1081   @Test (timeout=240000)
1082   public void testSimpleMasterFailover() throws Exception {
1083 
1084     final int NUM_MASTERS = 3;
1085     final int NUM_RS = 3;
1086 
1087     // Start the cluster
1088     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
1089 
1090     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1091     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1092 
1093     // get all the master threads
1094     List<MasterThread> masterThreads = cluster.getMasterThreads();
1095 
1096     // wait for each to come online
1097     for (MasterThread mt : masterThreads) {
1098       assertTrue(mt.isAlive());
1099     }
1100 
1101     // verify only one is the active master and we have right number
1102     int numActive = 0;
1103     int activeIndex = -1;
1104     ServerName activeName = null;
1105     HMaster active = null;
1106     for (int i = 0; i < masterThreads.size(); i++) {
1107       if (masterThreads.get(i).getMaster().isActiveMaster()) {
1108         numActive++;
1109         activeIndex = i;
1110         active = masterThreads.get(activeIndex).getMaster();
1111         activeName = active.getServerName();
1112       }
1113     }
1114     assertEquals(1, numActive);
1115     assertEquals(NUM_MASTERS, masterThreads.size());
1116     LOG.info("Active master " + activeName);
1117 
1118     // Check that ClusterStatus reports the correct active and backup masters
1119     assertNotNull(active);
1120     ClusterStatus status = active.getClusterStatus();
1121     assertEquals(status.getMaster(), activeName);
1122     assertEquals(2, status.getBackupMastersSize());
1123     assertEquals(2, status.getBackupMasters().size());
1124 
1125     // attempt to stop one of the inactive masters
1126     int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
1127     HMaster master = cluster.getMaster(backupIndex);
1128     LOG.debug("\n\nStopping a backup master: " + master.getServerName() + "\n");
1129     cluster.stopMaster(backupIndex, false);
1130     cluster.waitOnMaster(backupIndex);
1131 
1132     // Verify still one active master and it's the same
1133     for (int i = 0; i < masterThreads.size(); i++) {
1134       if (masterThreads.get(i).getMaster().isActiveMaster()) {
1135         assertEquals(activeName, masterThreads.get(i).getMaster().getServerName());
1136         activeIndex = i;
1137         active = masterThreads.get(activeIndex).getMaster();
1138       }
1139     }
1140     assertEquals(1, numActive);
1141     assertEquals(2, masterThreads.size());
1142     int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
1143     LOG.info("Active master " + active.getServerName() + " managing " + rsCount +  " regions servers");
1144     assertEquals(3, rsCount);
1145 
1146     // wait for the active master to acknowledge loss of the backup from ZK
1147     final HMaster activeFinal = active;
1148     TEST_UTIL.waitFor(TimeUnit.SECONDS.toMillis(30), new Waiter.Predicate<Exception>() {
1149       @Override public boolean evaluate() {
1150         return activeFinal.getBackupMasters().size() == 1;
1151       }
1152     });
1153 
1154     // Check that ClusterStatus reports the correct active and backup masters
1155     assertNotNull(active);
1156     final HMaster finalActive = active;
1157     TEST_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() {
1158       @Override
1159       public boolean evaluate() throws Exception {
1160         ClusterStatus status = finalActive.getClusterStatus();
1161         return status.getBackupMastersSize() == 1 && status.getBackupMasters().size() == 1;
1162       }
1163     });
1164     status = active.getClusterStatus();
1165     assertEquals(activeName, status.getMaster());
1166 
1167     // kill the active master
1168     LOG.debug("\n\nStopping the active master " + active.getServerName() + "\n");
1169     cluster.stopMaster(activeIndex, false);
1170     cluster.waitOnMaster(activeIndex);
1171 
1172     // wait for an active master to show up and be ready
1173     assertTrue(cluster.waitForActiveAndReadyMaster());
1174 
1175     LOG.debug("\n\nVerifying backup master is now active\n");
1176     // should only have one master now
1177     assertEquals(1, masterThreads.size());
1178 
1179     // and he should be active
1180     active = masterThreads.get(0).getMaster();
1181     assertNotNull(active);
1182     status = active.getClusterStatus();
1183     ServerName masterName = status.getMaster();
1184     assertNotNull(masterName);
1185     assertEquals(active.getServerName(), masterName);
1186     assertTrue(active.isActiveMaster());
1187     assertEquals(0, status.getBackupMastersSize());
1188     assertEquals(0, status.getBackupMasters().size());
1189     int rss = status.getServersSize();
1190     LOG.info("Active master " + masterName.getServerName() + " managing " +
1191       rss +  " region servers");
1192     assertEquals(3, rss);
1193 
1194     // Stop the cluster
1195     TEST_UTIL.shutdownMiniCluster();
1196   }
1197 
1198   /**
1199    * Test region in pending_open/close and failed_open/close when master failover
1200    */
1201   @Test (timeout=180000)
1202   @SuppressWarnings("deprecation")
1203   public void testPendingOpenOrCloseWhenMasterFailover() throws Exception {
1204     final int NUM_MASTERS = 1;
1205     final int NUM_RS = 1;
1206 
1207     // Create config to use for this cluster
1208     Configuration conf = HBaseConfiguration.create();
1209     conf.setBoolean("hbase.assignment.usezk", false);
1210 
1211     // Start the cluster
1212     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1213     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1214     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1215     log("Cluster started");
1216 
1217     // get all the master threads
1218     List<MasterThread> masterThreads = cluster.getMasterThreads();
1219     assertEquals(1, masterThreads.size());
1220 
1221     // only one master thread, let's wait for it to be initialized
1222     assertTrue(cluster.waitForActiveAndReadyMaster());
1223     HMaster master = masterThreads.get(0).getMaster();
1224     assertTrue(master.isActiveMaster());
1225     assertTrue(master.isInitialized());
1226 
1227     // Create a table with a region online
1228     Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
1229     onlineTable.close();
1230     // Create a table in META, so it has a region offline
1231     HTableDescriptor offlineTable = new HTableDescriptor(
1232       TableName.valueOf(Bytes.toBytes("offlineTable")));
1233     offlineTable.addFamily(new HColumnDescriptor(Bytes.toBytes("family")));
1234 
1235     FileSystem filesystem = FileSystem.get(conf);
1236     Path rootdir = FSUtils.getRootDir(conf);
1237     FSTableDescriptors fstd = new FSTableDescriptors(conf, filesystem, rootdir);
1238     fstd.createTableDescriptor(offlineTable);
1239 
1240     HRegionInfo hriOffline = new HRegionInfo(offlineTable.getTableName(), null, null);
1241     createRegion(hriOffline, rootdir, conf, offlineTable);
1242     MetaTableAccessor.addRegionToMeta(master.getConnection(), hriOffline);
1243 
1244     log("Regions in hbase:meta and namespace have been created");
1245 
1246     // at this point we only expect 3 regions to be assigned out
1247     // (catalogs and namespace, + 1 online region)
1248     assertEquals(3, cluster.countServedRegions());
1249     HRegionInfo hriOnline = null;
1250     try (RegionLocator locator =
1251         TEST_UTIL.getConnection().getRegionLocator(TableName.valueOf("onlineTable"))) {
1252       hriOnline = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
1253     }
1254     RegionStates regionStates = master.getAssignmentManager().getRegionStates();
1255     RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
1256 
1257     // Put the online region in pending_close. It is actually already opened.
1258     // This is to simulate that the region close RPC is not sent out before failover
1259     RegionState oldState = regionStates.getRegionState(hriOnline);
1260     RegionState newState = new RegionState(
1261       hriOnline, State.PENDING_CLOSE, oldState.getServerName());
1262     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1263 
1264     // Put the offline region in pending_open. It is actually not opened yet.
1265     // This is to simulate that the region open RPC is not sent out before failover
1266     oldState = new RegionState(hriOffline, State.OFFLINE);
1267     newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
1268     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1269 
1270     HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
1271     createRegion(failedClose, rootdir, conf, offlineTable);
1272     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
1273 
1274     oldState = new RegionState(failedClose, State.PENDING_CLOSE);
1275     newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
1276     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1277 
1278 
1279     HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
1280     createRegion(failedOpen, rootdir, conf, offlineTable);
1281     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
1282 
1283     // Simulate a region transitioning to failed open when the region server reports the
1284     // transition as FAILED_OPEN
1285     oldState = new RegionState(failedOpen, State.PENDING_OPEN);
1286     newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
1287     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1288 
1289     HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
1290     createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
1291     MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
1292 
1293     // Simulate a region transitioning to failed open when the master couldn't find a plan for
1294     // the region
1295     oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
1296     newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
1297     stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
1298 
1299 
1300 
1301     // Stop the master
1302     log("Aborting master");
1303     cluster.abortMaster(0);
1304     cluster.waitOnMaster(0);
1305     log("Master has aborted");
1306 
1307     // Start up a new master
1308     log("Starting up a new master");
1309     master = cluster.startMaster().getMaster();
1310     log("Waiting for master to be ready");
1311     cluster.waitForActiveAndReadyMaster();
1312     log("Master is ready");
1313 
1314     // Wait till no region in transition any more
1315     master.getAssignmentManager().waitUntilNoRegionsInTransition(60000);
1316 
1317     // Get new region states since master restarted
1318     regionStates = master.getAssignmentManager().getRegionStates();
1319 
1320     // Both pending_open (RPC sent/not yet) regions should be online
1321     assertTrue(regionStates.isRegionOnline(hriOffline));
1322     assertTrue(regionStates.isRegionOnline(hriOnline));
1323     assertTrue(regionStates.isRegionOnline(failedClose));
1324     assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
1325     assertTrue(regionStates.isRegionOnline(failedOpen));
1326 
1327     log("Done with verification, shutting down cluster");
1328 
1329     // Done, shutdown the cluster
1330     TEST_UTIL.shutdownMiniCluster();
1331   }
1332 
1333   /**
1334    * Test meta in transition when master failover
1335    */
1336   @Test(timeout = 180000)
1337   public void testMetaInTransitionWhenMasterFailover() throws Exception {
1338     final int NUM_MASTERS = 1;
1339     final int NUM_RS = 1;
1340 
1341     // Start the cluster
1342     Configuration conf = HBaseConfiguration.create();
1343     conf.setBoolean("hbase.assignment.usezk", false);
1344     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
1345     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
1346     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1347     log("Cluster started");
1348 
1349     log("Moving meta off the master");
1350     HMaster activeMaster = cluster.getMaster();
1351     HRegionServer rs = cluster.getRegionServer(0);
1352     ServerName metaServerName = cluster.getLiveRegionServerThreads()
1353       .get(0).getRegionServer().getServerName();
1354     activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
1355       Bytes.toBytes(metaServerName.getServerName()));
1356     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1357     assertEquals("Meta should be assigned on expected regionserver",
1358       metaServerName, activeMaster.getMetaTableLocator()
1359         .getMetaRegionLocation(activeMaster.getZooKeeper()));
1360 
1361     // Now kill master, meta should remain on rs, where we placed it before.
1362     log("Aborting master");
1363     activeMaster.abort("test-kill");
1364     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1365     log("Master has aborted");
1366 
1367     // meta should remain where it was
1368     RegionState metaState =
1369       MetaTableLocator.getMetaRegionState(rs.getZooKeeper());
1370     assertEquals("hbase:meta should be onlined on RS",
1371       metaState.getServerName(), rs.getServerName());
1372     assertEquals("hbase:meta should be onlined on RS",
1373       metaState.getState(), State.OPEN);
1374 
1375     // Start up a new master
1376     log("Starting up a new master");
1377     activeMaster = cluster.startMaster().getMaster();
1378     log("Waiting for master to be ready");
1379     cluster.waitForActiveAndReadyMaster();
1380     log("Master is ready");
1381 
1382     // ensure meta is still deployed on RS
1383     metaState =
1384       MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1385     assertEquals("hbase:meta should be onlined on RS",
1386       metaState.getServerName(), rs.getServerName());
1387     assertEquals("hbase:meta should be onlined on RS",
1388       metaState.getState(), State.OPEN);
1389 
1390     // Update meta state as PENDING_OPEN, then kill master
1391     // that simulates, that RS successfully deployed, but
1392     // RPC was lost right before failure.
1393     // region server should expire (how it can be verified?)
1394     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1395       rs.getServerName(), State.PENDING_OPEN);
1396     Region meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
1397     rs.removeFromOnlineRegions(meta, null);
1398     ((HRegion)meta).close();
1399 
1400     log("Aborting master");
1401     activeMaster.abort("test-kill");
1402     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1403     log("Master has aborted");
1404 
1405     // Start up a new master
1406     log("Starting up a new master");
1407     activeMaster = cluster.startMaster().getMaster();
1408     log("Waiting for master to be ready");
1409     cluster.waitForActiveAndReadyMaster();
1410     log("Master is ready");
1411 
1412     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1413     log("Meta was assigned");
1414 
1415     metaState =
1416       MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
1417     assertEquals("hbase:meta should be onlined on RS",
1418       metaState.getServerName(), rs.getServerName());
1419     assertEquals("hbase:meta should be onlined on RS",
1420       metaState.getState(), State.OPEN);
1421 
1422     // Update meta state as PENDING_CLOSE, then kill master
1423     // that simulates, that RS successfully deployed, but
1424     // RPC was lost right before failure.
1425     // region server should expire (how it can be verified?)
1426     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1427       rs.getServerName(), State.PENDING_CLOSE);
1428 
1429     log("Aborting master");
1430     activeMaster.abort("test-kill");
1431     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1432     log("Master has aborted");
1433 
1434     rs.getRSRpcServices().closeRegion(null, RequestConverter.buildCloseRegionRequest(
1435       rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1436 
1437     // Start up a new master
1438     log("Starting up a new master");
1439     activeMaster = cluster.startMaster().getMaster();
1440     log("Waiting for master to be ready");
1441     cluster.waitForActiveAndReadyMaster();
1442     log("Master is ready");
1443 
1444     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1445     log("Meta was assigned");
1446 
1447     rs.getRSRpcServices().closeRegion(
1448       null,
1449       RequestConverter.buildCloseRegionRequest(rs.getServerName(),
1450         HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), false));
1451 
1452     // Set a dummy server to check if master reassigns meta on restart
1453     MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
1454       ServerName.valueOf("dummyserver.example.org", 1234, -1L), State.OPEN);
1455 
1456     log("Aborting master");
1457     activeMaster.stop("test-kill");
1458 
1459     cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
1460     log("Master has aborted");
1461 
1462     // Start up a new master
1463     log("Starting up a new master");
1464     activeMaster = cluster.startMaster().getMaster();
1465     log("Waiting for master to be ready");
1466     cluster.waitForActiveAndReadyMaster();
1467     log("Master is ready");
1468 
1469     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
1470     log("Meta was assigned");
1471 
1472     // Done, shutdown the cluster
1473     TEST_UTIL.shutdownMiniCluster();
1474   }
1475 }
1476