View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotEquals;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertNull;
25  import static org.junit.Assert.assertTrue;
26  import static org.junit.Assert.fail;
27  
28  import java.io.IOException;
29  import java.util.ArrayList;
30  import java.util.Arrays;
31  import java.util.HashMap;
32  import java.util.List;
33  import java.util.Map;
34  import java.util.Set;
35  import java.util.concurrent.ScheduledThreadPoolExecutor;
36  import java.util.concurrent.TimeUnit;
37  import java.util.concurrent.atomic.AtomicBoolean;
38  import java.util.concurrent.atomic.AtomicInteger;
39  
40  import org.apache.hadoop.conf.Configuration;
41  import org.apache.hadoop.fs.FileSystem;
42  import org.apache.hadoop.fs.Path;
43  import org.apache.hadoop.hbase.CoordinatedStateManager;
44  import org.apache.hadoop.hbase.HBaseTestingUtility;
45  import org.apache.hadoop.hbase.HColumnDescriptor;
46  import org.apache.hadoop.hbase.HConstants;
47  import org.apache.hadoop.hbase.HRegionInfo;
48  import org.apache.hadoop.hbase.HTableDescriptor;
49  import org.apache.hadoop.hbase.MetaTableAccessor;
50  import org.apache.hadoop.hbase.MiniHBaseCluster;
51  import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
52  import org.apache.hadoop.hbase.ServerLoad;
53  import org.apache.hadoop.hbase.ServerName;
54  import org.apache.hadoop.hbase.TableName;
55  import org.apache.hadoop.hbase.UnknownRegionException;
56  import org.apache.hadoop.hbase.Waiter;
57  import org.apache.hadoop.hbase.client.Admin;
58  import org.apache.hadoop.hbase.client.HBaseAdmin;
59  import org.apache.hadoop.hbase.client.HTable;
60  import org.apache.hadoop.hbase.client.Result;
61  import org.apache.hadoop.hbase.client.Table;
62  import org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager;
63  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
64  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
65  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
66  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
67  import org.apache.hadoop.hbase.coprocessor.RegionObserver;
68  import org.apache.hadoop.hbase.executor.EventType;
69  import org.apache.hadoop.hbase.master.RegionState.State;
70  import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
71  import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
72  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
73  import org.apache.hadoop.hbase.regionserver.HRegionServer;
74  import org.apache.hadoop.hbase.testclassification.MediumTests;
75  import org.apache.hadoop.hbase.util.Bytes;
76  import org.apache.hadoop.hbase.util.ConfigUtil;
77  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
78  import org.apache.hadoop.hbase.util.FSUtils;
79  import org.apache.hadoop.hbase.util.JVMClusterUtil;
80  import org.apache.hadoop.hbase.util.TestTableName;
81  import org.apache.hadoop.hbase.util.Threads;
82  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
83  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
84  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
85  import org.apache.zookeeper.KeeperException;
86  import org.junit.AfterClass;
87  import org.junit.BeforeClass;
88  import org.junit.Rule;
89  import org.junit.Test;
90  import org.junit.experimental.categories.Category;
91  
92  import com.google.common.collect.Lists;
93  import com.google.common.collect.Maps;
94  
95  
96  /**
97   * This tests AssignmentManager with a testing cluster.
98   */
99  @Category(MediumTests.class)
100 @SuppressWarnings("deprecation")
101 public class TestAssignmentManagerOnCluster {
102   private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
103   private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
104   final static Configuration conf = TEST_UTIL.getConfiguration();
105   private static HBaseAdmin admin;
106   @Rule
107   public TestTableName testTableName = new TestTableName();
108 
109   static void setupOnce() throws Exception {
110     // Using the our load balancer to control region plans
111     conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
112       MyLoadBalancer.class, LoadBalancer.class);
113     conf.setClass(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
114       MyRegionObserver.class, RegionObserver.class);
115     // Reduce the maximum attempts to speed up the test
116     conf.setInt("hbase.assignment.maximum.attempts", 3);
117     // Put meta on master to avoid meta server shutdown handling
118     conf.set("hbase.balancer.tablesOnMaster", "hbase:meta");
119     conf.setInt("hbase.master.maximum.ping.server.attempts", 3);
120     conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1);
121 
122     TEST_UTIL.startMiniCluster(1, 4, null, MyMaster.class, MyRegionServer.class);
123     admin = TEST_UTIL.getHBaseAdmin();
124   }
125 
126   @BeforeClass
127   public static void setUpBeforeClass() throws Exception {
128     // Use ZK for region assignment
129     conf.setBoolean("hbase.assignment.usezk", true);
130     setupOnce();
131   }
132 
133   @AfterClass
134   public static void tearDownAfterClass() throws Exception {
135     TEST_UTIL.shutdownMiniCluster();
136   }
137 
138   /**
139    * This tests restarting meta regionserver
140    */
141   @Test (timeout=180000)
142   public void testRestartMetaRegionServer() throws Exception {
143     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
144     boolean stoppedARegionServer = false;
145     try {
146       HMaster master = cluster.getMaster();
147       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
148       ServerName metaServerName = regionStates.getRegionServerOfRegion(
149         HRegionInfo.FIRST_META_REGIONINFO);
150       if (master.getServerName().equals(metaServerName) || metaServerName == null
151           || !metaServerName.equals(cluster.getServerHoldingMeta())) {
152         // Move meta off master
153         metaServerName = cluster.getLiveRegionServerThreads()
154           .get(0).getRegionServer().getServerName();
155         master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
156           Bytes.toBytes(metaServerName.getServerName()));
157         master.assignmentManager.waitUntilNoRegionsInTransition(60000);
158       }
159       RegionState metaState =
160           MetaTableLocator.getMetaRegionState(master.getZooKeeper());
161         assertEquals("Meta should be not in transition",
162             metaState.getState(), RegionState.State.OPEN);
163       assertNotEquals("Meta should be moved off master",
164         metaServerName, master.getServerName());
165       cluster.killRegionServer(metaServerName);
166       stoppedARegionServer = true;
167       cluster.waitForRegionServerToStop(metaServerName, 60000);
168       // Wait for SSH to finish
169       final ServerManager serverManager = master.getServerManager();
170       TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
171         @Override
172         public boolean evaluate() throws Exception {
173           return !serverManager.areDeadServersInProgress();
174         }
175       });
176 
177       // Now, make sure meta is assigned
178       assertTrue("Meta should be assigned",
179         regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
180       // Now, make sure meta is registered in zk
181       metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper());
182       assertEquals("Meta should be not in transition",
183           metaState.getState(), RegionState.State.OPEN);
184       assertEquals("Meta should be assigned", metaState.getServerName(),
185         regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO));
186       assertNotEquals("Meta should be assigned on a different server",
187         metaState.getServerName(), metaServerName);
188     } finally {
189       if (stoppedARegionServer) {
190         cluster.startRegionServer();
191       }
192     }
193   }
194 
195   /**
196    * This tests region assignment
197    */
198   @Test (timeout=60000)
199   public void testAssignRegion() throws Exception {
200     String table = "testAssignRegion";
201     try {
202       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
203       desc.addFamily(new HColumnDescriptor(FAMILY));
204       admin.createTable(desc);
205 
206       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
207       HRegionInfo hri = new HRegionInfo(
208         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
209       MetaTableAccessor.addRegionToMeta(meta, hri);
210 
211       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
212       AssignmentManager am = master.getAssignmentManager();
213       TEST_UTIL.assignRegion(hri);
214 
215       RegionStates regionStates = am.getRegionStates();
216       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
217       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
218 
219       // Region is assigned now. Let's assign it again.
220       // Master should not abort, and region should be assigned.
221       RegionState oldState = regionStates.getRegionState(hri);
222       TEST_UTIL.getHBaseAdmin().assign(hri.getRegionName());
223       master.getAssignmentManager().waitForAssignment(hri);
224       RegionState newState = regionStates.getRegionState(hri);
225       assertTrue(newState.isOpened()
226         && newState.getStamp() != oldState.getStamp());
227     } finally {
228       TEST_UTIL.deleteTable(Bytes.toBytes(table));
229     }
230   }
231 
232   // Simulate a scenario where the AssignCallable and SSH are trying to assign a region
233   @Test (timeout=60000)
234   public void testAssignRegionBySSH() throws Exception {
235     if (!conf.getBoolean("hbase.assignment.usezk", true)) {
236       return;
237     }
238     String table = "testAssignRegionBySSH";
239     MyMaster master = (MyMaster) TEST_UTIL.getHBaseCluster().getMaster();
240     try {
241       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
242       desc.addFamily(new HColumnDescriptor(FAMILY));
243       admin.createTable(desc);
244 
245       HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
246       HRegionInfo hri = new HRegionInfo(
247         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
248       MetaTableAccessor.addRegionToMeta(meta, hri);
249       // Add some dummy server for the region entry
250       MetaTableAccessor.updateRegionLocation(TEST_UTIL.getHBaseCluster().getMaster().getConnection(), hri,
251         ServerName.valueOf("example.org", 1234, System.currentTimeMillis()), 0, -1);
252       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
253       int i = TEST_UTIL.getHBaseCluster().getServerWithMeta();
254       HRegionServer rs = TEST_UTIL.getHBaseCluster().getRegionServer(i == 0 ? 1 : 0);
255       // Choose a server other than meta to kill
256       ServerName controlledServer = rs.getServerName();
257       master.enableSSH(false);
258       TEST_UTIL.getHBaseCluster().killRegionServer(controlledServer);
259       TEST_UTIL.getHBaseCluster().waitForRegionServerToStop(controlledServer, -1);
260       AssignmentManager am = master.getAssignmentManager();
261 
262       // Simulate the AssignCallable trying to assign the region. Have the region in OFFLINE state,
263       // but not in transition and the server is the dead 'controlledServer'
264       regionStates.createRegionState(hri, State.OFFLINE, controlledServer, null);
265       am.assign(hri, true, true);
266       // Region should remain OFFLINE and go to transition
267       assertEquals(State.OFFLINE, regionStates.getRegionState(hri).getState());
268       assertTrue (regionStates.isRegionInTransition(hri));
269 
270       master.enableSSH(true);
271       am.waitForAssignment(hri);
272       assertTrue (regionStates.getRegionState(hri).isOpened());
273       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
274       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
275     } finally {
276       if (master != null) {
277         master.enableSSH(true);
278       }
279       TEST_UTIL.deleteTable(Bytes.toBytes(table));
280       TEST_UTIL.getHBaseCluster().startRegionServer();
281     }
282   }
283 
284   /**
285    * This tests region assignment on a simulated restarted server
286    */
287   @Test (timeout=120000)
288   public void testAssignRegionOnRestartedServer() throws Exception {
289     String table = "testAssignRegionOnRestartedServer";
290     TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 20);
291     TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
292     TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect
293 
294     ServerName deadServer = null;
295     HMaster master = null;
296     try {
297       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
298       desc.addFamily(new HColumnDescriptor(FAMILY));
299       admin.createTable(desc);
300 
301       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
302       final HRegionInfo hri = new HRegionInfo(
303         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
304       MetaTableAccessor.addRegionToMeta(meta, hri);
305 
306       master = TEST_UTIL.getHBaseCluster().getMaster();
307       Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
308       assertFalse("There should be some servers online", onlineServers.isEmpty());
309 
310       // Use the first server as the destination server
311       ServerName destServer = onlineServers.iterator().next();
312 
313       // Created faked dead server
314       deadServer = ServerName.valueOf(destServer.getHostname(),
315           destServer.getPort(), destServer.getStartcode() - 100L);
316       master.serverManager.recordNewServerWithLock(deadServer, ServerLoad.EMPTY_SERVERLOAD);
317 
318       final AssignmentManager am = master.getAssignmentManager();
319       RegionPlan plan = new RegionPlan(hri, null, deadServer);
320       am.addPlan(hri.getEncodedName(), plan);
321       TEST_UTIL.assignRegion(hri);
322 
323       int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
324         destServer, EventType.M_ZK_REGION_OFFLINE,
325         EventType.RS_ZK_REGION_OPENING, 0);
326       assertEquals("TansitionNode should fail", -1, version);
327 
328       TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
329         @Override
330         public boolean evaluate() throws Exception {
331           return ! am.getRegionStates().isRegionInTransition(hri);
332         }
333       });
334 
335     assertFalse("Region should be assigned", am.getRegionStates().isRegionInTransition(hri));
336     } finally {
337       if (deadServer != null) {
338         master.serverManager.expireServer(deadServer);
339       }
340 
341       TEST_UTIL.deleteTable(Bytes.toBytes(table));
342 
343       // reset the value for other tests
344       TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 3);
345       ServerName masterServerName = TEST_UTIL.getMiniHBaseCluster().getMaster().getServerName();
346       TEST_UTIL.getMiniHBaseCluster().stopMaster(masterServerName);
347       TEST_UTIL.getMiniHBaseCluster().startMaster();
348       // Wait till master is active and is initialized
349       while (TEST_UTIL.getMiniHBaseCluster().getMaster() == null ||
350           !TEST_UTIL.getMiniHBaseCluster().getMaster().isInitialized()) {
351         Threads.sleep(1);
352       }
353     }
354   }
355 
356   /**
357    * This tests offlining a region
358    */
359   @Test (timeout=60000)
360   public void testOfflineRegion() throws Exception {
361     TableName table =
362         TableName.valueOf("testOfflineRegion");
363     try {
364       HRegionInfo hri = createTableAndGetOneRegion(table);
365 
366       RegionStates regionStates = TEST_UTIL.getHBaseCluster().
367         getMaster().getAssignmentManager().getRegionStates();
368       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
369       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
370       admin.offline(hri.getRegionName());
371 
372       long timeoutTime = System.currentTimeMillis() + 800;
373       while (true) {
374         if (regionStates.getRegionByStateOfTable(table)
375             .get(RegionState.State.OFFLINE).contains(hri))
376           break;
377         long now = System.currentTimeMillis();
378         if (now > timeoutTime) {
379           fail("Failed to offline the region in time");
380           break;
381         }
382         Thread.sleep(10);
383       }
384       RegionState regionState = regionStates.getRegionState(hri);
385       assertTrue(regionState.isOffline());
386     } finally {
387       TEST_UTIL.deleteTable(table);
388     }
389   }
390 
391   /**
392    * This tests moving a region
393    */
394   @Test (timeout=50000)
395   public void testMoveRegion() throws Exception {
396     TableName table =
397         TableName.valueOf("testMoveRegion");
398     try {
399       HRegionInfo hri = createTableAndGetOneRegion(table);
400 
401       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
402       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
403       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
404       ServerManager serverManager = master.getServerManager();
405       ServerName destServerName = null;
406       List<JVMClusterUtil.RegionServerThread> regionServers =
407         TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads();
408       for (JVMClusterUtil.RegionServerThread regionServer: regionServers) {
409         HRegionServer destServer = regionServer.getRegionServer();
410         destServerName = destServer.getServerName();
411         if (!destServerName.equals(serverName)
412             && serverManager.isServerOnline(destServerName)) {
413           break;
414         }
415       }
416       assertTrue(destServerName != null
417         && !destServerName.equals(serverName));
418       TEST_UTIL.getHBaseAdmin().move(hri.getEncodedNameAsBytes(),
419         Bytes.toBytes(destServerName.getServerName()));
420 
421       long timeoutTime = System.currentTimeMillis() + 30000;
422       while (true) {
423         ServerName sn = regionStates.getRegionServerOfRegion(hri);
424         if (sn != null && sn.equals(destServerName)) {
425           TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
426           break;
427         }
428         long now = System.currentTimeMillis();
429         if (now > timeoutTime) {
430           fail("Failed to move the region in time: "
431             + regionStates.getRegionState(hri));
432         }
433         regionStates.waitForUpdate(50);
434       }
435 
436     } finally {
437       TEST_UTIL.deleteTable(table);
438     }
439   }
440 
441   /**
442    * If a table is deleted, we should not be able to move it anymore.
443    * Otherwise, the region will be brought back.
444    * @throws Exception
445    */
446   @Test (timeout=50000)
447   public void testMoveRegionOfDeletedTable() throws Exception {
448     TableName table =
449         TableName.valueOf("testMoveRegionOfDeletedTable");
450     Admin admin = TEST_UTIL.getHBaseAdmin();
451     try {
452       HRegionInfo hri = createTableAndGetOneRegion(table);
453 
454       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
455       AssignmentManager am = master.getAssignmentManager();
456       RegionStates regionStates = am.getRegionStates();
457       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
458       ServerName destServerName = null;
459       for (int i = 0; i < 3; i++) {
460         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
461         if (!destServer.getServerName().equals(serverName)) {
462           destServerName = destServer.getServerName();
463           break;
464         }
465       }
466       assertTrue(destServerName != null
467         && !destServerName.equals(serverName));
468 
469       TEST_UTIL.deleteTable(table);
470 
471       try {
472         admin.move(hri.getEncodedNameAsBytes(),
473           Bytes.toBytes(destServerName.getServerName()));
474         fail("We should not find the region");
475       } catch (IOException ioe) {
476         assertTrue(ioe instanceof UnknownRegionException);
477       }
478 
479       am.balance(new RegionPlan(hri, serverName, destServerName));
480       assertFalse("The region should not be in transition",
481         regionStates.isRegionInTransition(hri));
482     } finally {
483       if (admin.tableExists(table)) {
484         TEST_UTIL.deleteTable(table);
485       }
486     }
487   }
488 
489   HRegionInfo createTableAndGetOneRegion(
490       final TableName tableName) throws IOException, InterruptedException {
491     HTableDescriptor desc = new HTableDescriptor(tableName);
492     desc.addFamily(new HColumnDescriptor(FAMILY));
493     admin.createTable(desc, Bytes.toBytes("A"), Bytes.toBytes("Z"), 5);
494 
495     // wait till the table is assigned
496     HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
497     long timeoutTime = System.currentTimeMillis() + 1000;
498     while (true) {
499       List<HRegionInfo> regions = master.getAssignmentManager().
500         getRegionStates().getRegionsOfTable(tableName);
501       if (regions.size() > 3) {
502         return regions.get(2);
503       }
504       long now = System.currentTimeMillis();
505       if (now > timeoutTime) {
506         fail("Could not find an online region");
507       }
508       Thread.sleep(10);
509     }
510   }
511 
512   /**
513    * This test should not be flaky. If it is flaky, it means something
514    * wrong with AssignmentManager which should be reported and fixed
515    *
516    * This tests forcefully assign a region while it's closing and re-assigned.
517    */
518   @Test (timeout=60000)
519   public void testForceAssignWhileClosing() throws Exception {
520     String table = "testForceAssignWhileClosing";
521     try {
522       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
523       desc.addFamily(new HColumnDescriptor(FAMILY));
524       admin.createTable(desc);
525 
526       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
527       HRegionInfo hri = new HRegionInfo(
528         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
529       MetaTableAccessor.addRegionToMeta(meta, hri);
530 
531       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
532       AssignmentManager am = master.getAssignmentManager();
533       assertTrue(TEST_UTIL.assignRegion(hri));
534 
535       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
536       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
537       MyRegionObserver.preCloseEnabled.set(true);
538       am.unassign(hri);
539       RegionState state = am.getRegionStates().getRegionState(hri);
540       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
541 
542       MyRegionObserver.preCloseEnabled.set(false);
543       am.unassign(hri, true);
544 
545       // region is closing now, will be re-assigned automatically.
546       // now, let's forcefully assign it again. it should be
547       // assigned properly and no double-assignment
548       am.assign(hri, true, true);
549 
550       // let's check if it's assigned after it's out of transition
551       am.waitOnRegionToClearRegionsInTransition(hri);
552       assertTrue(am.waitForAssignment(hri));
553 
554       ServerName serverName = master.getAssignmentManager().
555         getRegionStates().getRegionServerOfRegion(hri);
556       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
557     } finally {
558       MyRegionObserver.preCloseEnabled.set(false);
559       TEST_UTIL.deleteTable(Bytes.toBytes(table));
560     }
561   }
562 
563   /**
564    * This tests region close failed
565    */
566   @Test (timeout=60000)
567   public void testCloseFailed() throws Exception {
568     String table = "testCloseFailed";
569     try {
570       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
571       desc.addFamily(new HColumnDescriptor(FAMILY));
572       admin.createTable(desc);
573 
574       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
575       HRegionInfo hri = new HRegionInfo(
576         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
577       MetaTableAccessor.addRegionToMeta(meta, hri);
578 
579       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
580       AssignmentManager am = master.getAssignmentManager();
581       assertTrue(TEST_UTIL.assignRegion(hri));
582       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
583       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
584 
585       MyRegionObserver.preCloseEnabled.set(true);
586       am.unassign(hri);
587       RegionState state = am.getRegionStates().getRegionState(hri);
588       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
589 
590       MyRegionObserver.preCloseEnabled.set(false);
591       am.unassign(hri, true);
592 
593       // region may still be assigned now since it's closing,
594       // let's check if it's assigned after it's out of transition
595       am.waitOnRegionToClearRegionsInTransition(hri);
596 
597       // region should be closed and re-assigned
598       assertTrue(am.waitForAssignment(hri));
599       ServerName serverName = master.getAssignmentManager().
600         getRegionStates().getRegionServerOfRegion(hri);
601       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
602     } finally {
603       MyRegionObserver.preCloseEnabled.set(false);
604       TEST_UTIL.deleteTable(Bytes.toBytes(table));
605     }
606   }
607 
608   /**
609    * This tests region close with exponential backoff
610    */
611   @Test(timeout = 60000)
612   public void testCloseRegionWithExponentialBackOff() throws Exception {
613     TableName tableName = testTableName.getTableName();
614     // Set the backoff time between each retry for failed close
615     TEST_UTIL.getMiniHBaseCluster().getConf().setLong("hbase.assignment.retry.sleep.initial", 1000);
616     HMaster activeMaster = TEST_UTIL.getHBaseCluster().getMaster();
617     TEST_UTIL.getMiniHBaseCluster().stopMaster(activeMaster.getServerName());
618     TEST_UTIL.getMiniHBaseCluster().startMaster(); // restart the master for conf take into affect
619 
620     try {
621       ScheduledThreadPoolExecutor scheduledThreadPoolExecutor =
622           new ScheduledThreadPoolExecutor(1, Threads.newDaemonThreadFactory("ExponentialBackOff"));
623 
624       HTableDescriptor desc = new HTableDescriptor(tableName);
625       desc.addFamily(new HColumnDescriptor(FAMILY));
626       admin.createTable(desc);
627 
628       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
629       HRegionInfo hri =
630           new HRegionInfo(desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
631       MetaTableAccessor.addRegionToMeta(meta, hri);
632 
633       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
634       AssignmentManager am = master.getAssignmentManager();
635       assertTrue(TEST_UTIL.assignRegion(hri));
636       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
637       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
638 
639       MyRegionObserver.preCloseEnabled.set(true);
640       // Unset the precloseEnabled flag after 1 second for next retry to succeed
641       scheduledThreadPoolExecutor.schedule(new Runnable() {
642         @Override
643         public void run() {
644           MyRegionObserver.preCloseEnabled.set(false);
645         }
646       }, 1000, TimeUnit.MILLISECONDS);
647       am.unassign(hri);
648 
649       // region may still be assigned now since it's closing,
650       // let's check if it's assigned after it's out of transition
651       am.waitOnRegionToClearRegionsInTransition(hri);
652 
653       // region should be closed and re-assigned
654       assertTrue(am.waitForAssignment(hri));
655       ServerName serverName =
656           master.getAssignmentManager().getRegionStates().getRegionServerOfRegion(hri);
657       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
658     } finally {
659       MyRegionObserver.preCloseEnabled.set(false);
660       TEST_UTIL.deleteTable(tableName);
661 
662       // reset the backoff time to default
663       TEST_UTIL.getMiniHBaseCluster().getConf().unset("hbase.assignment.retry.sleep.initial");
664       activeMaster = TEST_UTIL.getMiniHBaseCluster().getMaster();
665       TEST_UTIL.getMiniHBaseCluster().stopMaster(activeMaster.getServerName());
666       TEST_UTIL.getMiniHBaseCluster().startMaster();
667     }
668   }
669 
670   /**
671    * This tests region open failed
672    */
673   @Test (timeout=60000)
674   public void testOpenFailed() throws Exception {
675     String table = "testOpenFailed";
676     try {
677       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
678       desc.addFamily(new HColumnDescriptor(FAMILY));
679       admin.createTable(desc);
680 
681       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
682       HRegionInfo hri = new HRegionInfo(
683         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
684       MetaTableAccessor.addRegionToMeta(meta, hri);
685 
686       MyLoadBalancer.controledRegion = hri;
687 
688       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
689       AssignmentManager am = master.getAssignmentManager();
690       assertFalse(TEST_UTIL.assignRegion(hri));
691 
692       RegionState state = am.getRegionStates().getRegionState(hri);
693       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
694       // Failed to open since no plan, so it's on no server
695       assertNull(state.getServerName());
696 
697       MyLoadBalancer.controledRegion = null;
698       assertTrue(TEST_UTIL.assignRegion(hri));
699 
700       ServerName serverName = master.getAssignmentManager().
701         getRegionStates().getRegionServerOfRegion(hri);
702       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
703     } finally {
704       MyLoadBalancer.controledRegion = null;
705       TEST_UTIL.deleteTable(Bytes.toBytes(table));
706     }
707   }
708 
709   /**
710    * This tests round-robin assignment failed due to no bulkplan
711    */
712   @Test (timeout=60000)
713   public void testRoundRobinAssignmentFailed() throws Exception {
714     TableName tableName = TableName.valueOf("testRoundRobinAssignmentFailed");
715     try {
716       HTableDescriptor desc = new HTableDescriptor(tableName);
717       desc.addFamily(new HColumnDescriptor(FAMILY));
718       admin.createTable(desc);
719 
720       Table meta = admin.getConnection().getTable(TableName.META_TABLE_NAME);
721       HRegionInfo hri = new HRegionInfo(
722         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
723       MetaTableAccessor.addRegionToMeta(meta, hri);
724 
725       MyLoadBalancer.controledRegion = hri;
726 
727       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
728       AssignmentManager am = master.getAssignmentManager();
729       // round-robin assignment but balancer cannot find a plan
730       // assignment should fail
731       am.assign(Arrays.asList(hri));
732 
733       // if bulk assignment cannot update region state to online
734       // or failed_open this waits until timeout
735       assertFalse(am.waitForAssignment(hri));
736       RegionState state = am.getRegionStates().getRegionState(hri);
737       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
738       // Failed to open since no plan, so it's on no server
739       assertNull(state.getServerName());
740 
741       // try again with valid plan
742       MyLoadBalancer.controledRegion = null;
743       am.assign(Arrays.asList(hri));
744       assertTrue(am.waitForAssignment(hri));
745 
746       ServerName serverName = master.getAssignmentManager().
747         getRegionStates().getRegionServerOfRegion(hri);
748       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
749     } finally {
750       MyLoadBalancer.controledRegion = null;
751       TEST_UTIL.deleteTable(tableName);
752     }
753   }
754 
755   /**
756    * This tests retain assignment failed due to no bulkplan
757    */
758   @Test (timeout=60000)
759   public void testRetainAssignmentFailed() throws Exception {
760     TableName tableName = TableName.valueOf("testRetainAssignmentFailed");
761     try {
762       HTableDescriptor desc = new HTableDescriptor(tableName);
763       desc.addFamily(new HColumnDescriptor(FAMILY));
764       admin.createTable(desc);
765 
766       Table meta = TEST_UTIL.getConnection().getTable(TableName.META_TABLE_NAME);
767       HRegionInfo hri = new HRegionInfo(
768         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
769       MetaTableAccessor.addRegionToMeta(meta, hri);
770 
771       MyLoadBalancer.controledRegion = hri;
772 
773       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
774       AssignmentManager am = master.getAssignmentManager();
775 
776       Map<HRegionInfo, ServerName> regions = new HashMap<HRegionInfo, ServerName>();
777       ServerName dest = TEST_UTIL.getHBaseCluster().getRegionServer(0).getServerName();
778       regions.put(hri, dest);
779       // retainAssignment but balancer cannot find a plan
780       // assignment should fail
781       am.assign(regions);
782 
783       // if retain assignment cannot update region state to online
784       // or failed_open this waits until timeout
785       assertFalse(am.waitForAssignment(hri));
786       RegionState state = am.getRegionStates().getRegionState(hri);
787       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
788       // Failed to open since no plan, so it's on no server
789       assertNull(state.getServerName());
790 
791       // try retainAssigment again with valid plan
792       MyLoadBalancer.controledRegion = null;
793       am.assign(regions);
794       assertTrue(am.waitForAssignment(hri));
795 
796       ServerName serverName = master.getAssignmentManager().
797         getRegionStates().getRegionServerOfRegion(hri);
798       TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
799 
800       // it retains on same server as specified
801       assertEquals(serverName, dest);
802     } finally {
803       MyLoadBalancer.controledRegion = null;
804       TEST_UTIL.deleteTable(tableName);
805     }
806   }
807 
808   /**
809    * This tests region open failure which is not recoverable
810    */
811   @Test (timeout=60000)
812   public void testOpenFailedUnrecoverable() throws Exception {
813     TableName table =
814         TableName.valueOf("testOpenFailedUnrecoverable");
815     try {
816       HTableDescriptor desc = new HTableDescriptor(table);
817       desc.addFamily(new HColumnDescriptor(FAMILY));
818       admin.createTable(desc);
819 
820       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
821       HRegionInfo hri = new HRegionInfo(
822         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
823       MetaTableAccessor.addRegionToMeta(meta, hri);
824 
825       FileSystem fs = FileSystem.get(conf);
826       Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
827       Path regionDir = new Path(tableDir, hri.getEncodedName());
828       // create a file named the same as the region dir to
829       // mess up with region opening
830       fs.create(regionDir, true);
831 
832       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
833       AssignmentManager am = master.getAssignmentManager();
834       assertFalse(TEST_UTIL.assignRegion(hri));
835 
836       RegionState state = am.getRegionStates().getRegionState(hri);
837       assertEquals(RegionState.State.FAILED_OPEN, state.getState());
838       // Failed to open due to file system issue. Region state should
839       // carry the opening region server so that we can force close it
840       // later on before opening it again. See HBASE-9092.
841       assertNotNull(state.getServerName());
842 
843       // remove the blocking file, so that region can be opened
844       fs.delete(regionDir, true);
845       assertTrue(TEST_UTIL.assignRegion(hri));
846 
847       ServerName serverName = master.getAssignmentManager().
848         getRegionStates().getRegionServerOfRegion(hri);
849       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
850     } finally {
851       TEST_UTIL.deleteTable(table);
852     }
853   }
854 
855   @Test (timeout=60000)
856   public void testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState() throws Exception {
857     final TableName table =
858         TableName.valueOf
859             ("testSSHWhenDisablingTableRegionsInOpeningOrPendingOpenState");
860     AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
861     HRegionInfo hri = null;
862     ServerName serverName = null;
863     try {
864       hri = createTableAndGetOneRegion(table);
865       serverName = am.getRegionStates().getRegionServerOfRegion(hri);
866       ServerName destServerName = null;
867       HRegionServer destServer = null;
868       for (int i = 0; i < 3; i++) {
869         destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
870         if (!destServer.getServerName().equals(serverName)) {
871           destServerName = destServer.getServerName();
872           break;
873         }
874       }
875       am.regionOffline(hri);
876       ZooKeeperWatcher zkw = TEST_UTIL.getHBaseCluster().getMaster().getZooKeeper();
877       am.getRegionStates().updateRegionState(hri, State.PENDING_OPEN, destServerName);
878       if (ConfigUtil.useZKForAssignment(conf)) {
879         ZKAssign.createNodeOffline(zkw, hri, destServerName);
880         ZKAssign.transitionNodeOpening(zkw, hri, destServerName);
881 
882         // Wait till the event is processed and the region is in transition
883         long timeoutTime = System.currentTimeMillis() + 20000;
884         while (!am.getRegionStates().isRegionInTransition(hri)) {
885           assertTrue("Failed to process ZK opening event in time",
886             System.currentTimeMillis() < timeoutTime);
887           Thread.sleep(100);
888         }
889       }
890 
891       am.getTableStateManager().setTableState(table, ZooKeeperProtos.Table.State.DISABLING);
892       List<HRegionInfo> toAssignRegions = am.cleanOutCrashedServerReferences(destServerName);
893       assertTrue("Regions to be assigned should be empty.", toAssignRegions.isEmpty());
894       assertTrue("Regions to be assigned should be empty.", am.getRegionStates()
895           .getRegionState(hri).isOffline());
896     } finally {
897       if (hri != null && serverName != null) {
898         am.regionOnline(hri, serverName);
899       }
900       am.getTableStateManager().setTableState(table, ZooKeeperProtos.Table.State.DISABLED);
901       TEST_UTIL.deleteTable(table);
902     }
903   }
904 
905   /**
906    * This tests region close hanging
907    */
908   @Test (timeout=60000)
909   public void testCloseHang() throws Exception {
910     String table = "testCloseHang";
911     try {
912       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
913       desc.addFamily(new HColumnDescriptor(FAMILY));
914       admin.createTable(desc);
915 
916       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
917       HRegionInfo hri = new HRegionInfo(
918         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
919       MetaTableAccessor.addRegionToMeta(meta, hri);
920 
921       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
922       AssignmentManager am = master.getAssignmentManager();
923       assertTrue(TEST_UTIL.assignRegion(hri));
924       ServerName sn = am.getRegionStates().getRegionServerOfRegion(hri);
925       TEST_UTIL.assertRegionOnServer(hri, sn, 6000);
926 
927       MyRegionObserver.postCloseEnabled.set(true);
928       am.unassign(hri);
929       // Now region should pending_close or closing
930       // Unassign it again forcefully so that we can trigger already
931       // in transition exception. This test is to make sure this scenario
932       // is handled properly.
933       am.server.getConfiguration().setLong(
934         AssignmentManager.ALREADY_IN_TRANSITION_WAITTIME, 1000);
935       am.unassign(hri, true);
936       RegionState state = am.getRegionStates().getRegionState(hri);
937       assertEquals(RegionState.State.FAILED_CLOSE, state.getState());
938 
939       // Let region closing move ahead. The region should be closed
940       // properly and re-assigned automatically
941       MyRegionObserver.postCloseEnabled.set(false);
942 
943       // region may still be assigned now since it's closing,
944       // let's check if it's assigned after it's out of transition
945       am.waitOnRegionToClearRegionsInTransition(hri);
946 
947       // region should be closed and re-assigned
948       assertTrue(am.waitForAssignment(hri));
949       ServerName serverName = master.getAssignmentManager().
950         getRegionStates().getRegionServerOfRegion(hri);
951       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
952     } finally {
953       MyRegionObserver.postCloseEnabled.set(false);
954       TEST_UTIL.deleteTable(Bytes.toBytes(table));
955     }
956   }
957 
958   /**
959    * This tests region close racing with open
960    */
961   @Test(timeout = 60000)
962   public void testOpenCloseRacing() throws Exception {
963     String table = "testOpenCloseRacing";
964     try {
965       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
966       desc.addFamily(new HColumnDescriptor(FAMILY));
967       admin.createTable(desc);
968 
969       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
970       HRegionInfo hri = new HRegionInfo(
971         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
972       MetaTableAccessor.addRegionToMeta(meta, hri);
973       meta.close();
974 
975       MyRegionObserver.postOpenEnabled.set(true);
976       MyRegionObserver.postOpenCalled = false;
977       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
978       AssignmentManager am = master.getAssignmentManager();
979       // Region will be opened, but it won't complete
980       am.assign(hri, true);
981       long end = EnvironmentEdgeManager.currentTime() + 20000;
982       // Wait till postOpen is called
983       while (!MyRegionObserver.postOpenCalled ) {
984         assertFalse("Timed out waiting for postOpen to be called",
985           EnvironmentEdgeManager.currentTime() > end);
986         Thread.sleep(300);
987       }
988 
989       // Now let's unassign it, it should do nothing
990       am.unassign(hri);
991       RegionState state = am.getRegionStates().getRegionState(hri);
992       ServerName oldServerName = state.getServerName();
993       assertTrue(state.isPendingOpenOrOpening() && oldServerName != null);
994 
995       // Now the region is stuck in opening
996       // Let's forcefully re-assign it to trigger closing/opening
997       // racing. This test is to make sure this scenario
998       // is handled properly.
999       ServerName destServerName = null;
1000       int numRS = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().size();
1001       for (int i = 0; i < numRS; i++) {
1002         HRegionServer destServer = TEST_UTIL.getHBaseCluster().getRegionServer(i);
1003         if (!destServer.getServerName().equals(oldServerName)) {
1004           destServerName = destServer.getServerName();
1005           break;
1006         }
1007       }
1008       assertNotNull(destServerName);
1009       assertFalse("Region should be assigned on a new region server",
1010         oldServerName.equals(destServerName));
1011       List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
1012       regions.add(hri);
1013       am.assign(destServerName, regions);
1014 
1015       // let region open continue
1016       MyRegionObserver.postOpenEnabled.set(false);
1017 
1018       // let's check if it's assigned after it's out of transition
1019       am.waitOnRegionToClearRegionsInTransition(hri);
1020       assertTrue(am.waitForAssignment(hri));
1021 
1022       ServerName serverName = master.getAssignmentManager().
1023         getRegionStates().getRegionServerOfRegion(hri);
1024       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
1025     } finally {
1026       MyRegionObserver.postOpenEnabled.set(false);
1027       TEST_UTIL.deleteTable(Bytes.toBytes(table));
1028     }
1029   }
1030 
1031   /**
1032    * Test force unassign/assign a region hosted on a dead server
1033    */
1034   @Test (timeout=60000)
1035   public void testAssignRacingWithSSH() throws Exception {
1036     String table = "testAssignRacingWithSSH";
1037     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1038     MyMaster master = null;
1039     try {
1040       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
1041       desc.addFamily(new HColumnDescriptor(FAMILY));
1042       admin.createTable(desc);
1043 
1044       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
1045       HRegionInfo hri = new HRegionInfo(
1046         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
1047       MetaTableAccessor.addRegionToMeta(meta, hri);
1048 
1049       // Assign the region
1050       master = (MyMaster)cluster.getMaster();
1051       AssignmentManager am = master.getAssignmentManager();
1052 
1053       am.assign(hri, true);
1054 
1055       // Hold SSH before killing the hosting server
1056       master.enableSSH(false);
1057 
1058 
1059       RegionStates regionStates = am.getRegionStates();
1060       ServerName metaServer = regionStates.getRegionServerOfRegion(
1061         HRegionInfo.FIRST_META_REGIONINFO);
1062       while (true) {
1063         assertTrue(am.waitForAssignment(hri));
1064         RegionState state = regionStates.getRegionState(hri);
1065         ServerName oldServerName = state.getServerName();
1066         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
1067           // Kill the hosting server, which doesn't have meta on it.
1068           cluster.killRegionServer(oldServerName);
1069           cluster.waitForRegionServerToStop(oldServerName, -1);
1070           break;
1071         }
1072         int i = cluster.getServerWithMeta();
1073         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
1074         oldServerName = rs.getServerName();
1075         master.move(hri.getEncodedNameAsBytes(),
1076           Bytes.toBytes(oldServerName.getServerName()));
1077       }
1078 
1079       // You can't assign a dead region before SSH
1080       am.assign(hri, true, true);
1081       RegionState state = regionStates.getRegionState(hri);
1082       assertTrue(state.isFailedClose());
1083 
1084       // You can't unassign a dead region before SSH either
1085       am.unassign(hri, true);
1086       assertTrue(state.isFailedClose());
1087 
1088       // Enable SSH so that log can be split
1089       master.enableSSH(true);
1090 
1091       // let's check if it's assigned after it's out of transition.
1092       // no need to assign it manually, SSH should do it
1093       am.waitOnRegionToClearRegionsInTransition(hri);
1094       assertTrue(am.waitForAssignment(hri));
1095 
1096       ServerName serverName = master.getAssignmentManager().
1097         getRegionStates().getRegionServerOfRegion(hri);
1098       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 6000);
1099     } finally {
1100       if (master != null) {
1101         master.enableSSH(true);
1102       }
1103       TEST_UTIL.deleteTable(Bytes.toBytes(table));
1104       cluster.startRegionServer();
1105     }
1106   }
1107 
1108   /**
1109    * Test force unassign/assign a region of a disabled table
1110    */
1111   @Test (timeout=60000)
1112   public void testAssignDisabledRegion() throws Exception {
1113     TableName table = TableName.valueOf("testAssignDisabledRegion");
1114     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1115     MyMaster master = null;
1116     try {
1117       HTableDescriptor desc = new HTableDescriptor(table);
1118       desc.addFamily(new HColumnDescriptor(FAMILY));
1119       admin.createTable(desc);
1120 
1121       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
1122       HRegionInfo hri = new HRegionInfo(
1123         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
1124       MetaTableAccessor.addRegionToMeta(meta, hri);
1125 
1126       // Assign the region
1127       master = (MyMaster)cluster.getMaster();
1128       AssignmentManager am = master.getAssignmentManager();
1129       RegionStates regionStates = am.getRegionStates();
1130       assertTrue(TEST_UTIL.assignRegion(hri));
1131 
1132       // Disable the table
1133       admin.disableTable(table);
1134       assertTrue(regionStates.isRegionOffline(hri));
1135 
1136       // You can't assign a disabled region
1137       am.assign(hri, true, true);
1138       assertTrue(regionStates.isRegionOffline(hri));
1139 
1140       // You can't unassign a disabled region either
1141       am.unassign(hri, true);
1142       assertTrue(regionStates.isRegionOffline(hri));
1143     } finally {
1144       TEST_UTIL.deleteTable(table);
1145     }
1146   }
1147 
1148   /**
1149    * Test offlined region is assigned by SSH
1150    */
1151   @Test (timeout=60000)
1152   public void testAssignOfflinedRegionBySSH() throws Exception {
1153     String table = "testAssignOfflinedRegionBySSH";
1154     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1155     MyMaster master = null;
1156     try {
1157       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
1158       desc.addFamily(new HColumnDescriptor(FAMILY));
1159       admin.createTable(desc);
1160 
1161       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
1162       HRegionInfo hri = new HRegionInfo(
1163         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
1164       MetaTableAccessor.addRegionToMeta(meta, hri);
1165 
1166       // Assign the region
1167       master = (MyMaster)cluster.getMaster();
1168       AssignmentManager am = master.getAssignmentManager();
1169       am.assign(hri, true);
1170 
1171       RegionStates regionStates = am.getRegionStates();
1172       ServerName metaServer = regionStates.getRegionServerOfRegion(
1173         HRegionInfo.FIRST_META_REGIONINFO);
1174       ServerName oldServerName = null;
1175       while (true) {
1176         assertTrue(am.waitForAssignment(hri));
1177         RegionState state = regionStates.getRegionState(hri);
1178         oldServerName = state.getServerName();
1179         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
1180           // Mark the hosting server aborted, but don't actually kill it.
1181           // It doesn't have meta on it.
1182           MyRegionServer.abortedServer = oldServerName;
1183           break;
1184         }
1185         int i = cluster.getServerWithMeta();
1186         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
1187         oldServerName = rs.getServerName();
1188         master.move(hri.getEncodedNameAsBytes(),
1189           Bytes.toBytes(oldServerName.getServerName()));
1190       }
1191 
1192       // Make sure the region is assigned on the dead server
1193       assertTrue(regionStates.isRegionOnline(hri));
1194       assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
1195 
1196       // Kill the hosting server, which doesn't have meta on it.
1197       cluster.killRegionServer(oldServerName);
1198       cluster.waitForRegionServerToStop(oldServerName, -1);
1199 
1200       ServerManager serverManager = master.getServerManager();
1201       while (!serverManager.isServerDead(oldServerName)
1202           || serverManager.getDeadServers().areDeadServersInProgress()) {
1203         Thread.sleep(100);
1204       }
1205 
1206       // Let's check if it's assigned after it's out of transition.
1207       // no need to assign it manually, SSH should do it
1208       am.waitOnRegionToClearRegionsInTransition(hri);
1209       assertTrue(am.waitForAssignment(hri));
1210 
1211       ServerName serverName = master.getAssignmentManager().
1212         getRegionStates().getRegionServerOfRegion(hri);
1213       TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
1214     } finally {
1215       MyRegionServer.abortedServer = null;
1216       TEST_UTIL.deleteTable(Bytes.toBytes(table));
1217       cluster.startRegionServer();
1218     }
1219   }
1220 
1221   /**
1222    * Test SSH waiting for extra region server for assignment
1223    */
1224   @Test (timeout=300000)
1225   public void testSSHWaitForServerToAssignRegion() throws Exception {
1226     TableName table = TableName.valueOf("testSSHWaitForServerToAssignRegion");
1227     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1228     boolean startAServer = false;
1229     try {
1230       HTableDescriptor desc = new HTableDescriptor(table);
1231       desc.addFamily(new HColumnDescriptor(FAMILY));
1232       admin.createTable(desc);
1233 
1234       HMaster master = cluster.getMaster();
1235       final ServerManager serverManager = master.getServerManager();
1236       MyLoadBalancer.countRegionServers = Integer.valueOf(
1237         serverManager.countOfRegionServers());
1238       HRegionServer rs = TEST_UTIL.getRSForFirstRegionInTable(table);
1239       assertNotNull("First region should be assigned", rs);
1240       final ServerName serverName = rs.getServerName();
1241       // Wait till SSH tried to assign regions a several times
1242       int counter = MyLoadBalancer.counter.get() + 5;
1243       cluster.killRegionServer(serverName);
1244       startAServer = true;
1245       cluster.waitForRegionServerToStop(serverName, -1);
1246       while (counter > MyLoadBalancer.counter.get()) {
1247         Thread.sleep(1000);
1248       }
1249       cluster.startRegionServer();
1250       startAServer = false;
1251       // Wait till the dead server is processed by SSH
1252       TEST_UTIL.waitFor(120000, 1000, new Waiter.Predicate<Exception>() {
1253         @Override
1254         public boolean evaluate() throws Exception {
1255           return serverManager.isServerDead(serverName)
1256             && !serverManager.areDeadServersInProgress();
1257         }
1258       });
1259       TEST_UTIL.waitUntilAllRegionsAssigned(table, 300000);
1260 
1261       rs = TEST_UTIL.getRSForFirstRegionInTable(table);
1262       assertTrue("First region should be re-assigned to a different server",
1263         rs != null && !serverName.equals(rs.getServerName()));
1264     } finally {
1265       MyLoadBalancer.countRegionServers = null;
1266       TEST_UTIL.deleteTable(table);
1267       if (startAServer) {
1268         cluster.startRegionServer();
1269       }
1270     }
1271   }
1272 
1273   /**
1274    * Test disabled region is ignored by SSH
1275    */
1276   @Test (timeout=60000)
1277   public void testAssignDisabledRegionBySSH() throws Exception {
1278     String table = "testAssignDisabledRegionBySSH";
1279     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1280     MyMaster master = null;
1281     try {
1282       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
1283       desc.addFamily(new HColumnDescriptor(FAMILY));
1284       admin.createTable(desc);
1285 
1286       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
1287       HRegionInfo hri = new HRegionInfo(
1288         desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
1289       MetaTableAccessor.addRegionToMeta(meta, hri);
1290 
1291       // Assign the region
1292       master = (MyMaster)cluster.getMaster();
1293       AssignmentManager am = master.getAssignmentManager();
1294       am.assign(hri, true);
1295 
1296       RegionStates regionStates = am.getRegionStates();
1297       ServerName metaServer = regionStates.getRegionServerOfRegion(
1298         HRegionInfo.FIRST_META_REGIONINFO);
1299       ServerName oldServerName = null;
1300       while (true) {
1301         assertTrue(am.waitForAssignment(hri));
1302         RegionState state = regionStates.getRegionState(hri);
1303         oldServerName = state.getServerName();
1304         if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
1305           // Mark the hosting server aborted, but don't actually kill it.
1306           // It doesn't have meta on it.
1307           MyRegionServer.abortedServer = oldServerName;
1308           break;
1309         }
1310         int i = cluster.getServerWithMeta();
1311         HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
1312         oldServerName = rs.getServerName();
1313         master.move(hri.getEncodedNameAsBytes(),
1314           Bytes.toBytes(oldServerName.getServerName()));
1315       }
1316 
1317       // Make sure the region is assigned on the dead server
1318       assertTrue(regionStates.isRegionOnline(hri));
1319       assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
1320 
1321       // Disable the table now.
1322       master.disableTable(hri.getTable(), HConstants.NO_NONCE, HConstants.NO_NONCE);
1323 
1324       // Kill the hosting server, which doesn't have meta on it.
1325       cluster.killRegionServer(oldServerName);
1326       cluster.waitForRegionServerToStop(oldServerName, -1);
1327 
1328       ServerManager serverManager = master.getServerManager();
1329       while (!serverManager.isServerDead(oldServerName)
1330           || serverManager.getDeadServers().areDeadServersInProgress()) {
1331         Thread.sleep(100);
1332       }
1333 
1334       // Wait till no more RIT, the region should be offline.
1335       am.waitUntilNoRegionsInTransition(60000);
1336       assertTrue(regionStates.isRegionOffline(hri));
1337     } finally {
1338       MyRegionServer.abortedServer = null;
1339       TEST_UTIL.deleteTable(Bytes.toBytes(table));
1340       cluster.startRegionServer();
1341     }
1342   }
1343 
1344   /**
1345    * Test that region state transition call is idempotent
1346    */
1347   @Test(timeout = 60000)
1348   public void testReportRegionStateTransition() throws Exception {
1349     String table = "testReportRegionStateTransition";
1350     try {
1351       MyRegionServer.simulateRetry = true;
1352       HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
1353       desc.addFamily(new HColumnDescriptor(FAMILY));
1354       admin.createTable(desc);
1355       Table meta = new HTable(conf, TableName.META_TABLE_NAME);
1356       HRegionInfo hri =
1357           new HRegionInfo(desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
1358       MetaTableAccessor.addRegionToMeta(meta, hri);
1359       HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
1360       AssignmentManager am = master.getAssignmentManager();
1361       TEST_UTIL.assignRegion(hri);
1362       RegionStates regionStates = am.getRegionStates();
1363       ServerName serverName = regionStates.getRegionServerOfRegion(hri);
1364       // Assert the the region is actually open on the server
1365       TEST_UTIL.assertRegionOnServer(hri, serverName, 6000);
1366       // Closing region should just work fine
1367       admin.disableTable(TableName.valueOf(table));
1368       assertTrue(regionStates.isRegionOffline(hri));
1369       List<HRegionInfo> regions = TEST_UTIL.getHBaseAdmin().getOnlineRegions(serverName);
1370       assertTrue(!regions.contains(hri));
1371     } finally {
1372       MyRegionServer.simulateRetry = false;
1373       TEST_UTIL.deleteTable(Bytes.toBytes(table));
1374     }
1375   }
1376 
1377   /**
1378    * Test concurrent updates to meta when meta is not on master
1379    * @throws Exception
1380    */
1381   @Test(timeout = 30000)
1382   public void testUpdatesRemoteMeta() throws Exception {
1383     // Not for zk less assignment
1384     if (conf.getBoolean("hbase.assignment.usezk", true)) {
1385       return;
1386     }
1387     conf.setInt("hbase.regionstatestore.meta.connection", 3);
1388     final RegionStateStore rss =
1389         new RegionStateStore(new MyRegionServer(conf, new ZkCoordinatedStateManager()));
1390     rss.start();
1391     // Create 10 threads and make each do 10 puts related to region state update
1392     Thread[] th = new Thread[10];
1393     List<String> nameList = new ArrayList<String>();
1394     List<TableName> tableNameList = new ArrayList<TableName>();
1395     for (int i = 0; i < th.length; i++) {
1396       th[i] = new Thread() {
1397         @Override
1398         public void run() {
1399           HRegionInfo[] hri = new HRegionInfo[10];
1400           ServerName serverName = ServerName.valueOf("dummyhost", 1000, 1234);
1401           for (int i = 0; i < 10; i++) {
1402             hri[i] = new HRegionInfo(TableName.valueOf(Thread.currentThread().getName() + "_" + i));
1403             RegionState newState = new RegionState(hri[i], RegionState.State.OPEN, serverName);
1404             RegionState oldState =
1405                 new RegionState(hri[i], RegionState.State.PENDING_OPEN, serverName);
1406             rss.updateRegionState(1, newState, oldState);
1407           }
1408         }
1409       };
1410       th[i].start();
1411       nameList.add(th[i].getName());
1412     }
1413     for (int i = 0; i < th.length; i++) {
1414       th[i].join();
1415     }
1416     // Add all the expected table names in meta to tableNameList
1417     for (String name : nameList) {
1418       for (int i = 0; i < 10; i++) {
1419         tableNameList.add(TableName.valueOf(name + "_" + i));
1420       }
1421     }
1422     List<Result> metaRows = MetaTableAccessor.fullScanOfMeta(admin.getConnection());
1423     int count = 0;
1424     // Check all 100 rows are in meta
1425     for (Result result : metaRows) {
1426       if (tableNameList.contains(HRegionInfo.getTable(result.getRow()))) {
1427         count++;
1428         if (count == 100) {
1429           break;
1430         }
1431       }
1432     }
1433     assertTrue(count == 100);
1434     rss.stop();
1435   }
1436 
1437   static class MyLoadBalancer extends StochasticLoadBalancer {
1438     // For this region, if specified, always assign to nowhere
1439     static volatile HRegionInfo controledRegion = null;
1440 
1441     static volatile Integer countRegionServers = null;
1442     static AtomicInteger counter = new AtomicInteger(0);
1443 
1444     @Override
1445     public ServerName randomAssignment(HRegionInfo regionInfo,
1446         List<ServerName> servers) {
1447       if (regionInfo.equals(controledRegion)) {
1448         return null;
1449       }
1450       return super.randomAssignment(regionInfo, servers);
1451     }
1452 
1453     @Override
1454     public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(
1455         List<HRegionInfo> regions, List<ServerName> servers) {
1456       if (countRegionServers != null && services != null) {
1457         int regionServers = services.getServerManager().countOfRegionServers();
1458         if (regionServers < countRegionServers.intValue()) {
1459           // Let's wait till more region servers join in.
1460           // Before that, fail region assignments.
1461           counter.incrementAndGet();
1462           return null;
1463         }
1464       }
1465       if (regions.get(0).equals(controledRegion)) {
1466         Map<ServerName, List<HRegionInfo>> m = Maps.newHashMap();
1467         m.put(LoadBalancer.BOGUS_SERVER_NAME, regions);
1468         return m;
1469       }
1470       return super.roundRobinAssignment(regions, servers);
1471     }
1472 
1473     @Override
1474     public Map<ServerName, List<HRegionInfo>> retainAssignment(
1475         Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
1476       for (HRegionInfo hri : regions.keySet()) {
1477         if (hri.equals(controledRegion)) {
1478           Map<ServerName, List<HRegionInfo>> m = Maps.newHashMap();
1479           m.put(LoadBalancer.BOGUS_SERVER_NAME, Lists.newArrayList(regions.keySet()));
1480           return m;
1481         }
1482       }
1483       return super.retainAssignment(regions, servers);
1484     }
1485   }
1486 
1487   public static class MyMaster extends HMaster {
1488     AtomicBoolean enabled = new AtomicBoolean(true);
1489 
1490     public MyMaster(Configuration conf, CoordinatedStateManager cp)
1491       throws IOException, KeeperException,
1492         InterruptedException {
1493       super(conf, cp);
1494     }
1495 
1496     @Override
1497     public boolean isServerCrashProcessingEnabled() {
1498       return enabled.get() && super.isServerCrashProcessingEnabled();
1499     }
1500 
1501     public void enableSSH(boolean enabled) {
1502       this.enabled.set(enabled);
1503       if (enabled) {
1504         serverManager.processQueuedDeadServers();
1505       }
1506     }
1507   }
1508 
1509   public static class MyRegionServer extends MiniHBaseClusterRegionServer {
1510     static volatile ServerName abortedServer = null;
1511     static volatile boolean simulateRetry = false;
1512 
1513     public MyRegionServer(Configuration conf, CoordinatedStateManager cp)
1514       throws IOException, KeeperException,
1515         InterruptedException {
1516       super(conf, cp);
1517     }
1518 
1519     @Override
1520     public boolean reportRegionStateTransition(TransitionCode code, long openSeqNum,
1521         HRegionInfo... hris) {
1522       if (simulateRetry) {
1523         // Simulate retry by calling the method twice
1524         super.reportRegionStateTransition(code, openSeqNum, hris);
1525         return super.reportRegionStateTransition(code, openSeqNum, hris);
1526       }
1527       return super.reportRegionStateTransition(code, openSeqNum, hris);
1528     }
1529 
1530     @Override
1531     public boolean isAborted() {
1532       return getServerName().equals(abortedServer) || super.isAborted();
1533     }
1534   }
1535 
1536   public static class MyRegionObserver extends BaseRegionObserver {
1537     // If enabled, fail all preClose calls
1538     static AtomicBoolean preCloseEnabled = new AtomicBoolean(false);
1539 
1540     // If enabled, stall postClose calls
1541     static AtomicBoolean postCloseEnabled = new AtomicBoolean(false);
1542 
1543     // If enabled, stall postOpen calls
1544     static AtomicBoolean postOpenEnabled = new AtomicBoolean(false);
1545 
1546     // A flag to track if postOpen is called
1547     static volatile boolean postOpenCalled = false;
1548 
1549     @Override
1550     public void preClose(ObserverContext<RegionCoprocessorEnvironment> c,
1551         boolean abortRequested) throws IOException {
1552       if (preCloseEnabled.get()) throw new IOException("fail preClose from coprocessor");
1553     }
1554 
1555     @Override
1556     public void postClose(ObserverContext<RegionCoprocessorEnvironment> c,
1557         boolean abortRequested) {
1558       stallOnFlag(postCloseEnabled);
1559     }
1560 
1561     @Override
1562     public void postOpen(ObserverContext<RegionCoprocessorEnvironment> c) {
1563       postOpenCalled = true;
1564       stallOnFlag(postOpenEnabled);
1565     }
1566 
1567     private void stallOnFlag(final AtomicBoolean flag) {
1568       try {
1569         // If enabled, stall
1570         while (flag.get()) {
1571           Thread.sleep(1000);
1572         }
1573       } catch (InterruptedException ie) {
1574         Thread.currentThread().interrupt();
1575       }
1576     }
1577   }
1578 }