View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotEquals;
24  import static org.junit.Assert.assertNotNull;
25  import static org.junit.Assert.assertNotSame;
26  import static org.junit.Assert.assertNull;
27  import static org.junit.Assert.assertTrue;
28  import static org.junit.Assert.fail;
29  
30  import java.io.IOException;
31  import java.lang.reflect.Field;
32  import java.util.ArrayList;
33  import java.util.Collection;
34  import java.util.List;
35  import java.util.Map;
36  import java.util.Set;
37  import java.util.concurrent.CountDownLatch;
38  import java.util.concurrent.TimeUnit;
39  
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.conf.Configuration;
43  import org.apache.hadoop.fs.FileSystem;
44  import org.apache.hadoop.fs.Path;
45  import org.apache.hadoop.hbase.Abortable;
46  import org.apache.hadoop.hbase.CoordinatedStateManager;
47  import org.apache.hadoop.hbase.Coprocessor;
48  import org.apache.hadoop.hbase.CoprocessorEnvironment;
49  import org.apache.hadoop.hbase.HBaseTestingUtility;
50  import org.apache.hadoop.hbase.HColumnDescriptor;
51  import org.apache.hadoop.hbase.HConstants;
52  import org.apache.hadoop.hbase.HRegionInfo;
53  import org.apache.hadoop.hbase.HTableDescriptor;
54  import org.apache.hadoop.hbase.MasterNotRunningException;
55  import org.apache.hadoop.hbase.MetaTableAccessor;
56  import org.apache.hadoop.hbase.MiniHBaseCluster;
57  import org.apache.hadoop.hbase.RegionTransition;
58  import org.apache.hadoop.hbase.Server;
59  import org.apache.hadoop.hbase.ServerName;
60  import org.apache.hadoop.hbase.TableName;
61  import org.apache.hadoop.hbase.UnknownRegionException;
62  import org.apache.hadoop.hbase.Waiter;
63  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
64  import org.apache.hadoop.hbase.client.Admin;
65  import org.apache.hadoop.hbase.client.Connection;
66  import org.apache.hadoop.hbase.client.ConnectionFactory;
67  import org.apache.hadoop.hbase.client.Consistency;
68  import org.apache.hadoop.hbase.client.Delete;
69  import org.apache.hadoop.hbase.client.Get;
70  import org.apache.hadoop.hbase.client.HBaseAdmin;
71  import org.apache.hadoop.hbase.client.HTable;
72  import org.apache.hadoop.hbase.client.Mutation;
73  import org.apache.hadoop.hbase.client.Put;
74  import org.apache.hadoop.hbase.client.Result;
75  import org.apache.hadoop.hbase.client.ResultScanner;
76  import org.apache.hadoop.hbase.client.Scan;
77  import org.apache.hadoop.hbase.client.Table;
78  import org.apache.hadoop.hbase.client.TestReplicasClient.SlowMeCopro;
79  import org.apache.hadoop.hbase.coordination.ZKSplitTransactionCoordination;
80  import org.apache.hadoop.hbase.coordination.ZkCloseRegionCoordination;
81  import org.apache.hadoop.hbase.coordination.ZkCoordinatedStateManager;
82  import org.apache.hadoop.hbase.coordination.ZkOpenRegionCoordination;
83  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
84  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
85  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
86  import org.apache.hadoop.hbase.exceptions.DeserializationException;
87  import org.apache.hadoop.hbase.executor.EventType;
88  import org.apache.hadoop.hbase.master.AssignmentManager;
89  import org.apache.hadoop.hbase.io.Reference;
90  import org.apache.hadoop.hbase.master.HMaster;
91  import org.apache.hadoop.hbase.master.RegionState;
92  import org.apache.hadoop.hbase.master.RegionState.State;
93  import org.apache.hadoop.hbase.master.RegionStates;
94  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
95  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
96  import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
97  import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
98  import org.apache.hadoop.hbase.testclassification.LargeTests;
99  import org.apache.hadoop.hbase.util.Bytes;
100 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
101 import org.apache.hadoop.hbase.util.FSUtils;
102 import org.apache.hadoop.hbase.util.HBaseFsck;
103 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
104 import org.apache.hadoop.hbase.util.PairOfSameType;
105 import org.apache.hadoop.hbase.util.RetryCounter;
106 import org.apache.hadoop.hbase.util.Threads;
107 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
108 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
109 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
110 import org.apache.zookeeper.KeeperException;
111 import org.apache.zookeeper.KeeperException.NodeExistsException;
112 import org.apache.zookeeper.data.Stat;
113 import org.junit.After;
114 import org.junit.AfterClass;
115 import org.junit.Assert;
116 import org.junit.Before;
117 import org.junit.BeforeClass;
118 import org.junit.Test;
119 import org.junit.experimental.categories.Category;
120 import org.mockito.Mockito;
121 
122 import com.google.protobuf.ServiceException;
123 
124 /**
125  * Like TestSplitTransaction in that we're testing {@link SplitTransactionImpl}
126  * only the below tests are against a running cluster where TestSplitTransaction
127  * is tests against a bare {@link HRegion}.
128  */
129 @Category(LargeTests.class)
130 @SuppressWarnings("deprecation")
131 public class TestSplitTransactionOnCluster {
132   private static final Log LOG =
133     LogFactory.getLog(TestSplitTransactionOnCluster.class);
134   private HBaseAdmin admin = null;
135   private MiniHBaseCluster cluster = null;
136   private static Configuration conf;
137   private static final int NB_SERVERS = 3;
138   private static CountDownLatch latch = new CountDownLatch(1);
139   private static volatile boolean secondSplit = false;
140   private static volatile boolean callRollBack = false;
141   private static volatile boolean firstSplitCompleted = false;
142   private static boolean useZKForAssignment;
143 
144   static final HBaseTestingUtility TESTING_UTIL =
145     new HBaseTestingUtility();
146 
147   static void setupOnce() throws Exception {
148     TESTING_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_PERIOD, 60000);
149     useZKForAssignment = TESTING_UTIL.getConfiguration().getBoolean(
150       "hbase.assignment.usezk", true);
151     TESTING_UTIL.startMiniCluster(NB_SERVERS);
152   }
153 
154   @BeforeClass
155   public static void before() throws Exception {
156     conf = TESTING_UTIL.getConfiguration();
157     // Use ZK for region assignment
158     conf.setBoolean("hbase.assignment.usezk", true);
159     setupOnce();
160   }
161 
162   @AfterClass public static void after() throws Exception {
163     TESTING_UTIL.shutdownMiniCluster();
164   }
165 
166   @Before public void setup() throws IOException {
167     TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS);
168     this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
169     this.cluster = TESTING_UTIL.getMiniHBaseCluster();
170   }
171 
172   @After
173   public void tearDown() throws Exception {
174     this.admin.close();
175   }
176 
177   private HRegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions)
178       throws IOException, InterruptedException {
179     assertEquals(1, regions.size());
180     HRegionInfo hri = regions.get(0).getRegionInfo();
181     TESTING_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager()
182       .waitOnRegionToClearRegionsInTransition(hri, 600000);
183     return hri;
184   }
185 
186   @Test(timeout = 60000)
187   public void testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack() throws Exception {
188     final TableName tableName =
189         TableName.valueOf("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack");
190 
191     if (!useZKForAssignment) {
192       // This test doesn't apply if not using ZK for assignment
193       return;
194     }
195 
196     try {
197       // Create table then get the single region for our new table.
198       HTable t = createTableAndWait(tableName, Bytes.toBytes("cf"));
199       final List<HRegion> regions = cluster.getRegions(tableName);
200       HRegionInfo hri = getAndCheckSingleTableRegion(regions);
201       int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo()
202         .getRegionName());
203       final HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
204       insertData(tableName, admin, t);
205       t.close();
206 
207       // Turn off balancer so it doesn't cut in and mess up our placements.
208       this.admin.setBalancerRunning(false, true);
209       // Turn off the meta scanner so it don't remove parent on us.
210       cluster.getMaster().setCatalogJanitorEnabled(false);
211 
212       // find a splittable region
213       final HRegion region = findSplittableRegion(regions);
214       assertTrue("not able to find a splittable region", region != null);
215       MockedCoordinatedStateManager cp = new MockedCoordinatedStateManager();
216       cp.initialize(regionServer, region);
217       cp.start();
218       regionServer.csm = cp;
219 
220       new Thread() {
221         @Override
222         public void run() {
223           SplitTransaction st = null;
224           st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
225           try {
226             st.prepare();
227             st.execute(regionServer, regionServer);
228           } catch (IOException e) {
229 
230           }
231         }
232       }.start();
233       for (int i = 0; !callRollBack && i < 100; i++) {
234         Thread.sleep(100);
235       }
236       assertTrue("Waited too long for rollback", callRollBack);
237       SplitTransaction st = new MockedSplitTransaction(region, Bytes.toBytes("row3"));
238       try {
239         secondSplit = true;
240         // make region splittable
241         region.initialize();
242         st.prepare();
243         st.execute(regionServer, regionServer);
244       } catch (IOException e) {
245         LOG.debug("Rollback started :"+ e.getMessage());
246         st.rollback(regionServer, regionServer);
247       }
248       for (int i=0; !firstSplitCompleted && i<100; i++) {
249         Thread.sleep(100);
250       }
251       assertTrue("fist split did not complete", firstSplitCompleted);
252 
253       RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
254       for (int i = 0; regionStates.isRegionInTransition(hri) && i < 100; i++) {
255         Thread.sleep(100);
256       }
257       assertFalse("region still in transition", regionStates.isRegionInTransition(hri));
258 
259       List<Region> onlineRegions = regionServer.getOnlineRegions(tableName);
260       // Region server side split is successful.
261       assertEquals("The parent region should be splitted", 2, onlineRegions.size());
262       //Should be present in RIT
263       List<HRegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager()
264           .getRegionStates().getRegionsOfTable(tableName);
265       // Master side should also reflect the same
266       assertEquals("No of regions in master", 2, regionsOfTable.size());
267     } finally {
268       admin.setBalancerRunning(true, false);
269       secondSplit = false;
270       firstSplitCompleted = false;
271       callRollBack = false;
272       cluster.getMaster().setCatalogJanitorEnabled(true);
273       TESTING_UTIL.deleteTable(tableName);
274     }
275   }
276 
277   @Test(timeout = 60000)
278   public void testSplitCompactWithPriority() throws Exception {
279     final TableName tableName = TableName.valueOf("testSplitCompactWithPriority");
280     // Create table then get the single region for our new table.
281     byte[] cf = Bytes.toBytes("cf");
282     HTable hTable = createTableAndWait(tableName, cf);
283 
284     assertNotEquals("Unable to retrieve regions of the table", -1,
285       TESTING_UTIL.waitFor(10000, new Waiter.Predicate<Exception>() {
286         @Override
287         public boolean evaluate() throws Exception {
288           return cluster.getRegions(tableName).size() == 1;
289         }
290       }));
291 
292     HRegion region = cluster.getRegions(tableName).get(0);
293     Store store = region.getStore(cf);
294     int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
295     HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
296 
297     Table table = TESTING_UTIL.getConnection().getTable(tableName);
298     // insert data
299     insertData(tableName, admin, table);
300     insertData(tableName, admin, table, 20);
301     insertData(tableName, admin, table, 40);
302 
303     // Compaction Request
304     store.triggerMajorCompaction();
305     CompactionContext compactionContext = store.requestCompaction();
306     assertNotNull(compactionContext);
307     assertFalse(compactionContext.getRequest().isAfterSplit());
308     assertEquals(compactionContext.getRequest().getPriority(), 7);
309 
310     // Split
311     this.admin.split(region.getRegionInfo().getRegionName(), Bytes.toBytes("row4"));
312 
313     Thread.sleep(5000);
314     assertEquals(2, cluster.getRegions(tableName).size());
315     // we have 2 daughter regions
316     HRegion hRegion1 = cluster.getRegions(tableName).get(0);
317     HRegion hRegion2 = cluster.getRegions(tableName).get(1);
318     Store store1 = hRegion1.getStore(cf);
319     Store store2 = hRegion2.getStore(cf);
320 
321     // For hStore1 && hStore2, set mock reference to one of the storeFiles
322     StoreFileInfo storeFileInfo1 = new ArrayList<>(store1.getStorefiles()).get(0).getFileInfo();
323     StoreFileInfo storeFileInfo2 = new ArrayList<>(store2.getStorefiles()).get(0).getFileInfo();
324     Field field = StoreFileInfo.class.getDeclaredField("reference");
325     field.setAccessible(true);
326     field.set(storeFileInfo1, Mockito.mock(Reference.class));
327     field.set(storeFileInfo2, Mockito.mock(Reference.class));
328     store1.triggerMajorCompaction();
329     store2.triggerMajorCompaction();
330 
331     compactionContext = store1.requestCompaction();
332     assertNotNull(compactionContext);
333     // since we set mock reference to one of the storeFiles, we will get isAfterSplit=true &&
334     // highest priority for hStore1's compactionContext
335     assertTrue(compactionContext.getRequest().isAfterSplit());
336     assertEquals(compactionContext.getRequest().getPriority(), Integer.MIN_VALUE + 1000);
337 
338     compactionContext =
339       store2.requestCompaction(Integer.MIN_VALUE + 10, null, null);
340     assertNotNull(compactionContext);
341     // compaction request contains higher priority than default priority of daughter region
342     // compaction (Integer.MIN_VALUE + 1000), hence we are expecting request priority to
343     // be accepted.
344     assertTrue(compactionContext.getRequest().isAfterSplit());
345     assertEquals(compactionContext.getRequest().getPriority(), Integer.MIN_VALUE + 10);
346   }
347 
348   @Test(timeout = 60000)
349   public void testRITStateForRollback() throws Exception {
350     final TableName tableName =
351         TableName.valueOf("testRITStateForRollback");
352     try {
353       // Create table then get the single region for our new table.
354       Table t = createTableAndWait(tableName, Bytes.toBytes("cf"));
355       final List<HRegion> regions = cluster.getRegions(tableName);
356       final HRegionInfo hri = getAndCheckSingleTableRegion(regions);
357       insertData(tableName, admin, t);
358       t.close();
359 
360       // Turn off balancer so it doesn't cut in and mess up our placements.
361       this.admin.setBalancerRunning(false, true);
362       // Turn off the meta scanner so it don't remove parent on us.
363       cluster.getMaster().setCatalogJanitorEnabled(false);
364 
365       // find a splittable region
366       final HRegion region = findSplittableRegion(regions);
367       assertTrue("not able to find a splittable region", region != null);
368 
369       // install region co-processor to fail splits
370       region.getCoprocessorHost().load(FailingSplitRegionObserver.class,
371         Coprocessor.PRIORITY_USER, region.getBaseConf());
372 
373       // split async
374       this.admin.split(region.getRegionInfo().getRegionName(), new byte[] {42});
375 
376       // we have to wait until the SPLITTING state is seen by the master
377       FailingSplitRegionObserver observer = (FailingSplitRegionObserver) region
378           .getCoprocessorHost().findCoprocessor(FailingSplitRegionObserver.class.getName());
379       assertNotNull(observer);
380       observer.latch.await();
381 
382       LOG.info("Waiting for region to come out of RIT");
383       TESTING_UTIL.waitUntilNoRegionsInTransition(60000);
384     } finally {
385       admin.setBalancerRunning(true, false);
386       cluster.getMaster().setCatalogJanitorEnabled(true);
387       TESTING_UTIL.deleteTable(tableName);
388     }
389   }
390   @Test(timeout = 60000)
391   public void testSplitFailedCompactionAndSplit() throws Exception {
392     final TableName tableName = TableName.valueOf("testSplitFailedCompactionAndSplit");
393     Configuration conf = TESTING_UTIL.getConfiguration();
394     try {
395       HBaseAdmin admin = new HBaseAdmin(conf);
396       // Create table then get the single region for our new table.
397       HTableDescriptor htd = new HTableDescriptor(tableName);
398       byte[] cf = Bytes.toBytes("cf");
399       htd.addFamily(new HColumnDescriptor(cf));
400       admin.createTable(htd);
401 
402       for (int i = 0; cluster.getRegions(tableName).size() == 0 && i < 100; i++) {
403         Thread.sleep(100);
404       }
405       assertEquals(1, cluster.getRegions(tableName).size());
406 
407       HRegion region = cluster.getRegions(tableName).get(0);
408       Store store = region.getStore(cf);
409       int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
410       HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
411 
412       Table t = new HTable(conf, tableName);
413       // insert data
414       insertData(tableName, admin, t);
415       insertData(tableName, admin, t);
416 
417       int fileNum = store.getStorefiles().size();
418       // 0, Compaction Request
419       store.triggerMajorCompaction();
420       CompactionContext cc = store.requestCompaction();
421       assertNotNull(cc);
422       // 1, A timeout split
423       // 1.1 close region
424       assertEquals(2, region.close(false).get(cf).size());
425       // 1.2 rollback and Region initialize again
426       region.initialize();
427 
428       // 2, Run Compaction cc
429       assertFalse(region.compact(cc, store, NoLimitThroughputController.INSTANCE));
430       assertTrue(fileNum > store.getStorefiles().size());
431 
432       // 3, Split
433       SplitTransaction st = new SplitTransactionImpl(region, Bytes.toBytes("row3"));
434       assertTrue(st.prepare());
435       st.execute(regionServer, regionServer);
436       LOG.info("Waiting for region to come out of RIT");
437       TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
438         @Override
439         public boolean evaluate() throws Exception {
440           RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
441           return !regionStates.isRegionsInTransition();
442         }
443       });
444       assertEquals(2, cluster.getRegions(tableName).size());
445     } finally {
446       TESTING_UTIL.deleteTable(tableName);
447     }
448   }
449 
450   public static class FailingSplitRegionObserver extends BaseRegionObserver {
451     volatile CountDownLatch latch;
452     volatile CountDownLatch postSplit;
453     @Override
454     public void start(CoprocessorEnvironment e) throws IOException {
455       latch = new CountDownLatch(1);
456       postSplit = new CountDownLatch(1);
457     }
458     @Override
459     public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
460         byte[] splitKey, List<Mutation> metaEntries) throws IOException {
461       latch.countDown();
462       LOG.info("Causing rollback of region split");
463       throw new IOException("Causing rollback of region split");
464     }
465     @Override
466     public void postCompleteSplit(ObserverContext<RegionCoprocessorEnvironment> ctx)
467         throws IOException {
468       postSplit.countDown();
469       LOG.info("postCompleteSplit called");
470     }
471   }
472 
473   /*
474    * A test that intentionally has master fail the processing of the split message.
475    * Tests that the regionserver split ephemeral node gets cleaned up if it
476    * crashes and that after we process server shutdown, the parent region is online and
477    * daughters are cleaned up.
478    */
479   @Test (timeout = 60000)
480   public void testSplitIsRolledBackOnSplitFailure() throws Exception {
481     final TableName tableName = TableName.valueOf("testSplitIsRolledBackOnSplitFailure");
482 
483     // Create table then get the single region for our new table.
484     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
485     List<HRegion> regions = cluster.getRegions(tableName);
486     final HRegionInfo hri = getAndCheckSingleTableRegion(regions);
487 
488     // Turn off balancer so it doesn't cut in and mess up our placements.
489     this.admin.setBalancerRunning(false, true);
490     // Turn off the meta scanner so it don't remove parent on us.
491     cluster.getMaster().setCatalogJanitorEnabled(false);
492 
493     int serverIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
494 
495     try {
496       // Add a bit of load up into the table so splittable.
497       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
498       // Get region pre-split.
499       HRegionServer server = cluster.getRegionServer(serverIndex);
500       printOutRegions(server, "Initial regions: ");
501       int regionCount = ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
502       // Now, before we split, set special flag in master, a flag that has
503       // it FAIL the processing of split.
504       AssignmentManager.setTestSkipSplitHandling(true);
505       // Now try splitting and it should work.
506       split(hri, server, regionCount);
507 
508       ZooKeeperWatcher zkw = TESTING_UTIL.getZooKeeperWatcher();
509       String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
510       RegionTransition rt = null;
511       Stat stats = null;
512 
513       // Wait till the znode moved to SPLIT
514       for (int i = 0; i < 100; i++) {
515         stats = zkw.getRecoverableZooKeeper().exists(path, false);
516         rt = RegionTransition.parseFrom(ZKAssign.getData(zkw, hri.getEncodedName()));
517         if (rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)) {
518           break;
519         }
520         Thread.sleep(100);
521       }
522       LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats=" + stats);
523       assertTrue(rt.getEventType().equals(EventType.RS_ZK_REGION_SPLIT));
524       // Now crash the server, for ZK-less assignment, the server is auto aborted
525       abortServerAndWaitForProcessingToComplete(serverIndex);
526       waitUntilRegionServerDead();
527 
528       TESTING_UTIL.waitUntilNoRegionsInTransition();
529 
530       // Lets wait until parent region is online.
531       TESTING_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
532         @Override
533         public boolean evaluate() {
534           for (HRegion region : cluster.getRegions(tableName)) {
535             if (Bytes.equals(region.getRegionInfo().getRegionName(), hri.getRegionName())) {
536               return true;
537             } else {
538               LOG.debug("Wait for some more time, online region: " + region);
539             }
540           }
541           return false;
542         }
543       });
544 
545       RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
546       assertTrue("Parent region should be online", regionStates.isRegionOnline(hri));
547       // Check if daughter regions are cleaned up.
548       List<HRegionInfo> tableRegions = MetaTableAccessor.getTableRegions(zkw,
549           cluster.getMaster().getConnection(), tableName);
550       assertEquals("Only parent region should be present, but we have: " + tableRegions,
551           1, tableRegions.size());
552       Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), tableName);
553       List<Path> regionDirs =
554           FSUtils.getRegionDirs(cluster.getMaster().getFileSystem(), tableDir);
555       assertEquals("Only one region dir should be present, we have, dirs: " + regionDirs,
556           1, regionDirs.size());
557       assertTrue("Region dir doesn't belong to region: " + hri + " dir: " + regionDirs,
558           regionDirs.get(0).getName().endsWith(hri.getEncodedName()));
559     } finally {
560       // Set this flag back.
561       AssignmentManager.setTestSkipSplitHandling(false);
562       admin.setBalancerRunning(true, false);
563       cluster.getMaster().setCatalogJanitorEnabled(true);
564       cluster.startRegionServer();
565       t.close();
566       TESTING_UTIL.deleteTable(tableName);
567     }
568   }
569 
570   private void abortServerAndWaitForProcessingToComplete(int serverIndex) throws Exception {
571 
572     final HMaster master = TESTING_UTIL.getMiniHBaseCluster().getMaster();
573     cluster.abortRegionServer(serverIndex);
574     TESTING_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
575       @Override
576       public boolean evaluate() throws Exception {
577         return master.getServerManager().areDeadServersInProgress();
578       }
579     });
580   }
581 
582   @Test (timeout = 300000) public void testExistingZnodeBlocksSplitAndWeRollback()
583   throws IOException, InterruptedException, NodeExistsException, KeeperException, ServiceException {
584     final TableName tableName =
585         TableName.valueOf("testExistingZnodeBlocksSplitAndWeRollback");
586 
587     // Create table then get the single region for our new table.
588     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
589     List<HRegion> regions = cluster.getRegions(tableName);
590     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
591 
592     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
593 
594     RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
595 
596     // Turn off balancer so it doesn't cut in and mess up our placements.
597     this.admin.setBalancerRunning(false, true);
598     // Turn off the meta scanner so it don't remove parent on us.
599     cluster.getMaster().setCatalogJanitorEnabled(false);
600     try {
601       // Add a bit of load up into the table so splittable.
602       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
603       // Get region pre-split.
604       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
605       printOutRegions(server, "Initial regions: ");
606       int regionCount = ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
607       // Insert into zk a blocking znode, a znode of same name as region
608       // so it gets in way of our splitting.
609       ServerName fakedServer = ServerName.valueOf("any.old.server", 1234, -1);
610       if (useZKForAssignment) {
611         ZKAssign.createNodeClosing(TESTING_UTIL.getZooKeeperWatcher(),
612           hri, fakedServer);
613       } else {
614         regionStates.updateRegionState(hri, RegionState.State.CLOSING);
615       }
616       // Now try splitting.... should fail.  And each should successfully
617       // rollback.
618       this.admin.split(hri.getRegionNameAsString());
619       this.admin.split(hri.getRegionNameAsString());
620       this.admin.split(hri.getRegionNameAsString());
621       // Wait around a while and assert count of regions remains constant.
622       for (int i = 0; i < 10; i++) {
623         Thread.sleep(100);
624         assertEquals(regionCount, ProtobufUtil.getOnlineRegions(
625           server.getRSRpcServices()).size());
626       }
627       if (useZKForAssignment) {
628         // Now clear the zknode
629         ZKAssign.deleteClosingNode(TESTING_UTIL.getZooKeeperWatcher(),
630           hri, fakedServer);
631       } else {
632         regionStates.regionOnline(hri, server.getServerName());
633       }
634       // Now try splitting and it should work.
635       split(hri, server, regionCount);
636       // Get daughters
637       checkAndGetDaughters(tableName);
638       // OK, so split happened after we cleared the blocking node.
639     } finally {
640       admin.setBalancerRunning(true, false);
641       cluster.getMaster().setCatalogJanitorEnabled(true);
642       t.close();
643     }
644   }
645 
646   /**
647    * Test that if daughter split on us, we won't do the shutdown handler fixup
648    * just because we can't find the immediate daughter of an offlined parent.
649    * @throws IOException
650    * @throws InterruptedException
651    */
652   @Test (timeout=300000) public void testShutdownFixupWhenDaughterHasSplit()
653   throws IOException, InterruptedException, ServiceException {
654     final TableName tableName =
655         TableName.valueOf("testShutdownFixupWhenDaughterHasSplit");
656 
657     // Create table then get the single region for our new table.
658     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
659     List<HRegion> regions = cluster.getRegions(tableName);
660     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
661 
662     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
663 
664     // Turn off balancer so it doesn't cut in and mess up our placements.
665     this.admin.setBalancerRunning(false, true);
666     // Turn off the meta scanner so it don't remove parent on us.
667     cluster.getMaster().setCatalogJanitorEnabled(false);
668     try {
669       // Add a bit of load up into the table so splittable.
670       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
671       // Get region pre-split.
672       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
673       printOutRegions(server, "Initial regions: ");
674       int regionCount = ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
675       // Now split.
676       split(hri, server, regionCount);
677       // Get daughters
678       List<HRegion> daughters = checkAndGetDaughters(tableName);
679       // Now split one of the daughters.
680       regionCount = ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size();
681       HRegionInfo daughter = daughters.get(0).getRegionInfo();
682       LOG.info("Daughter we are going to split: " + daughter);
683       // Compact first to ensure we have cleaned up references -- else the split
684       // will fail.
685       this.admin.compact(daughter.getRegionName());
686       RetryCounter retrier = new RetryCounter(30, 1, TimeUnit.SECONDS);
687       while (CompactionState.NONE != admin.getCompactionStateForRegion(daughter.getRegionName())
688           && retrier.shouldRetry()) {
689         retrier.sleepUntilNextRetry();
690       }
691 
692       daughters = cluster.getRegions(tableName);
693       HRegion daughterRegion = null;
694       for (HRegion r : daughters) {
695         if (r.getRegionInfo().equals(daughter)) {
696           daughterRegion = r;
697           // Archiving the compacted references file
698           r.getStores().get(0).closeAndArchiveCompactedFiles();
699 
700           LOG.info("Found matching HRI: " + daughterRegion);
701           break;
702         }
703       }
704       assertTrue(daughterRegion != null);
705       for (int i=0; i<100; i++) {
706         if (!daughterRegion.hasReferences()) break;
707         Threads.sleep(100);
708       }
709       assertFalse("Waiting for reference to be compacted", daughterRegion.hasReferences());
710       LOG.info("Daughter hri before split (has been compacted): " + daughter);
711       split(daughter, server, regionCount);
712       // Get list of daughters
713       daughters = cluster.getRegions(tableName);
714       for (HRegion d: daughters) {
715         LOG.info("Regions before crash: " + d);
716       }
717       // Now crash the server
718       cluster.abortRegionServer(tableRegionIndex);
719       waitUntilRegionServerDead();
720       awaitDaughters(tableName, daughters.size());
721       // Assert daughters are online and ONLY the original daughters -- that
722       // fixup didn't insert one during server shutdown recover.
723       regions = cluster.getRegions(tableName);
724       for (HRegion d: daughters) {
725         LOG.info("Regions after crash: " + d);
726       }
727       assertEquals(daughters.size(), regions.size());
728       for (HRegion r: regions) {
729         LOG.info("Regions post crash " + r);
730         assertTrue("Missing region post crash " + r, daughters.contains(r));
731       }
732     } finally {
733       admin.setBalancerRunning(true, false);
734       cluster.getMaster().setCatalogJanitorEnabled(true);
735       t.close();
736     }
737   }
738 
739   @Test(timeout = 180000)
740   public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception {
741     TableName userTableName =
742         TableName.valueOf("testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles");
743     HTableDescriptor htd = new HTableDescriptor(userTableName);
744     HColumnDescriptor hcd = new HColumnDescriptor("col");
745     htd.addFamily(hcd);
746     admin.createTable(htd);
747     Table table = new HTable(TESTING_UTIL.getConfiguration(), userTableName);
748     try {
749       for (int i = 0; i <= 5; i++) {
750         String row = "row" + i;
751         Put p = new Put(row.getBytes());
752         String val = "Val" + i;
753         p.add("col".getBytes(), "ql".getBytes(), val.getBytes());
754         table.put(p);
755         admin.flush(userTableName.getName());
756         Delete d = new Delete(row.getBytes());
757         // Do a normal delete
758         table.delete(d);
759         admin.flush(userTableName.getName());
760       }
761       admin.majorCompact(userTableName.getName());
762       List<HRegionInfo> regionsOfTable = TESTING_UTIL.getMiniHBaseCluster()
763           .getMaster().getAssignmentManager().getRegionStates()
764           .getRegionsOfTable(userTableName);
765       HRegionInfo hRegionInfo = regionsOfTable.get(0);
766       Put p = new Put("row6".getBytes());
767       p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
768       table.put(p);
769       p = new Put("row7".getBytes());
770       p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
771       table.put(p);
772       p = new Put("row8".getBytes());
773       p.add("col".getBytes(), "ql".getBytes(), "val".getBytes());
774       table.put(p);
775       admin.flush(userTableName.getName());
776       admin.split(hRegionInfo.getRegionName(), "row7".getBytes());
777       regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
778           .getAssignmentManager().getRegionStates()
779           .getRegionsOfTable(userTableName);
780 
781       while (regionsOfTable.size() != 2) {
782         Thread.sleep(2000);
783         regionsOfTable = TESTING_UTIL.getMiniHBaseCluster().getMaster()
784             .getAssignmentManager().getRegionStates()
785             .getRegionsOfTable(userTableName);
786       }
787       Assert.assertEquals(2, regionsOfTable.size());
788       Scan s = new Scan();
789       ResultScanner scanner = table.getScanner(s);
790       int mainTableCount = 0;
791       for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
792         mainTableCount++;
793       }
794       Assert.assertEquals(3, mainTableCount);
795     } finally {
796       table.close();
797     }
798   }
799 
800   /**
801    * Noop Abortable implementation used below in tests.
802    */
803   static class UselessTestAbortable implements Abortable {
804     boolean aborted = false;
805     @Override
806     public void abort(String why, Throwable e) {
807       LOG.warn("ABORTED (But nothing to abort): why=" + why, e);
808       aborted = true;
809     }
810 
811     @Override
812     public boolean isAborted() {
813       return this.aborted;
814     }
815   }
816 
817   /**
818    * Verifies HBASE-5806.  When splitting is partially done and the master goes down
819    * when the SPLIT node is in either SPLIT or SPLITTING state.
820    *
821    * @throws IOException
822    * @throws InterruptedException
823    * @throws NodeExistsException
824    * @throws KeeperException
825    * @throws DeserializationException
826    */
827   @Test(timeout = 400000)
828   public void testMasterRestartWhenSplittingIsPartial()
829       throws IOException, InterruptedException, NodeExistsException,
830       KeeperException, DeserializationException, ServiceException {
831     final TableName tableName = TableName.valueOf("testMasterRestartWhenSplittingIsPartial");
832 
833     if (!useZKForAssignment) {
834       // This test doesn't apply if not using ZK for assignment
835       return;
836     }
837 
838     // Create table then get the single region for our new table.
839     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
840     List<HRegion> regions = cluster.getRegions(tableName);
841     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
842 
843     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
844 
845     // Turn off balancer so it doesn't cut in and mess up our placements.
846     this.admin.setBalancerRunning(false, true);
847     // Turn off the meta scanner so it don't remove parent on us.
848     cluster.getMaster().setCatalogJanitorEnabled(false);
849     ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
850       "testMasterRestartWhenSplittingIsPartial", new UselessTestAbortable());
851     try {
852       // Add a bit of load up into the table so splittable.
853       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
854       // Get region pre-split.
855       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
856       printOutRegions(server, "Initial regions: ");
857       // Now, before we split, set special flag in master, a flag that has
858       // it FAIL the processing of split.
859       AssignmentManager.setTestSkipSplitHandling(true);
860       // Now try splitting and it should work.
861 
862       this.admin.split(hri.getRegionNameAsString());
863       checkAndGetDaughters(tableName);
864       // Assert the ephemeral node is up in zk.
865       String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
866       Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
867       LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
868           + stats);
869       byte[] bytes = ZKAssign.getData(zkw, hri.getEncodedName());
870       RegionTransition rtd = RegionTransition.parseFrom(bytes);
871       // State could be SPLIT or SPLITTING.
872       assertTrue(rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)
873           || rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING));
874 
875       // abort and wait for new master.
876       MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
877 
878       this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
879 
880       // Update the region to be offline and split, so that HRegionInfo#equals
881       // returns true in checking rebuilt region states map.
882       hri.setOffline(true);
883       hri.setSplit(true);
884       ServerName regionServerOfRegion = master.getAssignmentManager()
885         .getRegionStates().getRegionServerOfRegion(hri);
886       assertTrue(regionServerOfRegion != null);
887 
888       // Remove the block so that split can move ahead.
889       AssignmentManager.setTestSkipSplitHandling(false);
890       String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
891       Stat stat = new Stat();
892       byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
893       // ZKUtil.create
894       for (int i=0; data != null && i<60; i++) {
895         Thread.sleep(1000);
896         data = ZKUtil.getDataNoWatch(zkw, node, stat);
897       }
898       assertNull("Waited too long for ZK node to be removed: "+node, data);
899       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
900       assertTrue("Split parent should be in SPLIT state",
901         regionStates.isRegionInState(hri, State.SPLIT));
902       regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
903       assertTrue(regionServerOfRegion == null);
904     } finally {
905       // Set this flag back.
906       AssignmentManager.setTestSkipSplitHandling(false);
907       admin.setBalancerRunning(true, false);
908       cluster.getMaster().setCatalogJanitorEnabled(true);
909       t.close();
910       zkw.close();
911     }
912   }
913 
914   /**
915    * Verifies HBASE-5806.  Here the case is that splitting is completed but before the
916    * CJ could remove the parent region the master is killed and restarted.
917    * @throws IOException
918    * @throws InterruptedException
919    * @throws NodeExistsException
920    * @throws KeeperException
921    */
922   @Test (timeout = 300000)
923   public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
924       throws IOException, InterruptedException, NodeExistsException,
925       KeeperException, ServiceException {
926     final TableName tableName = TableName
927         .valueOf("testMasterRestartAtRegionSplitPendingCatalogJanitor");
928 
929     // Create table then get the single region for our new table.
930     HTable t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
931     List<HRegion> regions = cluster.getRegions(tableName);
932     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
933 
934     int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
935 
936     // Turn off balancer so it doesn't cut in and mess up our placements.
937     this.admin.setBalancerRunning(false, true);
938     // Turn off the meta scanner so it don't remove parent on us.
939     cluster.getMaster().setCatalogJanitorEnabled(false);
940     ZooKeeperWatcher zkw = new ZooKeeperWatcher(t.getConfiguration(),
941       "testMasterRestartAtRegionSplitPendingCatalogJanitor", new UselessTestAbortable());
942     try {
943       // Add a bit of load up into the table so splittable.
944       TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
945       // Get region pre-split.
946       HRegionServer server = cluster.getRegionServer(tableRegionIndex);
947       printOutRegions(server, "Initial regions: ");
948 
949       this.admin.split(hri.getRegionNameAsString());
950       checkAndGetDaughters(tableName);
951       // Assert the ephemeral node is up in zk.
952       String path = ZKAssign.getNodeName(zkw, hri.getEncodedName());
953       Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
954       LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
955           + stats);
956       String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
957       Stat stat = new Stat();
958       byte[] data = ZKUtil.getDataNoWatch(zkw, node, stat);
959       // ZKUtil.create
960       for (int i=0; data != null && i<60; i++) {
961         Thread.sleep(1000);
962         data = ZKUtil.getDataNoWatch(zkw, node, stat);
963       }
964       assertNull("Waited too long for ZK node to be removed: "+node, data);
965 
966       MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
967 
968       this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
969 
970       // Update the region to be offline and split, so that HRegionInfo#equals
971       // returns true in checking rebuilt region states map.
972       hri.setOffline(true);
973       hri.setSplit(true);
974       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
975       assertTrue("Split parent should be in SPLIT state",
976         regionStates.isRegionInState(hri, State.SPLIT));
977       ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
978       assertTrue(regionServerOfRegion == null);
979     } finally {
980       this.admin.setBalancerRunning(true, false);
981       cluster.getMaster().setCatalogJanitorEnabled(true);
982       t.close();
983       zkw.close();
984     }
985   }
986 
987   /**
988    *
989    * While transitioning node from RS_ZK_REGION_SPLITTING to
990    * RS_ZK_REGION_SPLITTING during region split,if zookeper went down split always
991    * fails for the region. HBASE-6088 fixes this scenario.
992    * This test case is to test the znode is deleted(if created) or not in roll back.
993    *
994    * @throws IOException
995    * @throws InterruptedException
996    * @throws KeeperException
997    */
998   @Test(timeout = 60000)
999   public void testSplitBeforeSettingSplittingInZK() throws Exception,
1000       InterruptedException, KeeperException {
1001     testSplitBeforeSettingSplittingInZKInternals();
1002   }
1003 
1004   @Test(timeout = 60000)
1005   public void testTableExistsIfTheSpecifiedTableRegionIsSplitParent() throws Exception {
1006     ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TESTING_UTIL);
1007     final TableName tableName =
1008         TableName.valueOf("testTableExistsIfTheSpecifiedTableRegionIsSplitParent");
1009     // Create table then get the single region for our new table.
1010     Table t = createTableAndWait(tableName, Bytes.toBytes("cf"));
1011     List<HRegion> regions = null;
1012     try {
1013       regions = cluster.getRegions(tableName);
1014       int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo()
1015         .getRegionName());
1016       HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
1017       insertData(tableName, admin, t);
1018       // Turn off balancer so it doesn't cut in and mess up our placements.
1019       admin.setBalancerRunning(false, true);
1020       // Turn off the meta scanner so it don't remove parent on us.
1021       cluster.getMaster().setCatalogJanitorEnabled(false);
1022       boolean tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
1023         tableName);
1024       assertEquals("The specified table should present.", true, tableExists);
1025       final HRegion region = findSplittableRegion(regions);
1026       assertTrue("not able to find a splittable region", region != null);
1027       SplitTransactionImpl st = new SplitTransactionImpl(region, Bytes.toBytes("row2"));
1028       try {
1029         st.prepare();
1030         st.createDaughters(regionServer, regionServer, null);
1031       } catch (IOException e) {
1032 
1033       }
1034       tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
1035         tableName);
1036       assertEquals("The specified table should present.", true, tableExists);
1037       Set<RegionState> rit = cluster.getMaster().getAssignmentManager().getRegionStates()
1038           .getRegionsInTransition();
1039       assertTrue(rit.size() == 3);
1040       cluster.getMaster().getAssignmentManager().regionOffline(st.getFirstDaughter());
1041       cluster.getMaster().getAssignmentManager().regionOffline(st.getSecondDaughter());
1042       cluster.getMaster().getAssignmentManager().regionOffline(region.getRegionInfo());
1043       rit = cluster.getMaster().getAssignmentManager().getRegionStates().getRegionsInTransition();
1044       assertTrue(rit.size() == 0);
1045     }
1046     finally {
1047       admin.setBalancerRunning(true, false);
1048       cluster.getMaster().setCatalogJanitorEnabled(true);
1049       t.close();
1050       TESTING_UTIL.deleteTable(tableName);
1051     }
1052   }
1053 
1054   @Test
1055   public void testSplitWithRegionReplicas() throws Exception {
1056     ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TESTING_UTIL);
1057     final TableName tableName =
1058         TableName.valueOf("foobar");
1059     HTableDescriptor htd = TESTING_UTIL.createTableDescriptor("foobar");
1060     htd.setRegionReplication(2);
1061     htd.addCoprocessor(SlowMeCopro.class.getName());
1062     // Create table then get the single region for our new table.
1063     HTable t = TESTING_UTIL.createTable(htd, new byte[][]{Bytes.toBytes("cf")},
1064         TESTING_UTIL.getConfiguration());
1065     int count;
1066     List<HRegion> oldRegions;
1067     do {
1068       oldRegions = cluster.getRegions(tableName);
1069       Thread.sleep(10);
1070     } while (oldRegions.size() != 2);
1071     for (HRegion h : oldRegions) LOG.debug("OLDREGION " + h.getRegionInfo());
1072     try {
1073       int regionServerIndex = cluster.getServerWith(oldRegions.get(0).getRegionInfo()
1074         .getRegionName());
1075       HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
1076       insertData(tableName, admin, t);
1077       // Turn off balancer so it doesn't cut in and mess up our placements.
1078       admin.setBalancerRunning(false, true);
1079       // Turn off the meta scanner so it don't remove parent on us.
1080       cluster.getMaster().setCatalogJanitorEnabled(false);
1081       boolean tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
1082           tableName);
1083       assertEquals("The specified table should be present.", true, tableExists);
1084       final HRegion region = findSplittableRegion(oldRegions);
1085       regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
1086       regionServer = cluster.getRegionServer(regionServerIndex);
1087       assertTrue("not able to find a splittable region", region != null);
1088       String node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1089           region.getRegionInfo().getEncodedName());
1090       regionServer.getZooKeeper().syncOrTimeout(node);
1091       SplitTransactionImpl st = new SplitTransactionImpl(region, Bytes.toBytes("row2"));
1092       try {
1093         st.prepare();
1094         st.execute(regionServer, regionServer);
1095       } catch (IOException e) {
1096         e.printStackTrace();
1097         fail("Split execution should have succeeded with no exceptions thrown " + e);
1098       }
1099       //TESTING_UTIL.waitUntilAllRegionsAssigned(tableName);
1100       List<HRegion> newRegions;
1101       do {
1102         newRegions = cluster.getRegions(tableName);
1103         for (HRegion h : newRegions) LOG.debug("NEWREGION " + h.getRegionInfo());
1104         Thread.sleep(1000);
1105       } while ((newRegions.contains(oldRegions.get(0)) || newRegions.contains(oldRegions.get(1)))
1106           || newRegions.size() != 4);
1107       tableExists = MetaTableAccessor.tableExists(regionServer.getConnection(),
1108           tableName);
1109       assertEquals("The specified table should be present.", true, tableExists);
1110       // exists works on stale and we see the put after the flush
1111       byte[] b1 = "row1".getBytes();
1112       Get g = new Get(b1);
1113       g.setConsistency(Consistency.STRONG);
1114       // The following GET will make a trip to the meta to get the new location of the 1st daughter
1115       // In the process it will also get the location of the replica of the daughter (initially
1116       // pointing to the parent's replica)
1117       Result r = t.get(g);
1118       Assert.assertFalse(r.isStale());
1119       LOG.info("exists stale after flush done");
1120 
1121       SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1));
1122       g = new Get(b1);
1123       g.setConsistency(Consistency.TIMELINE);
1124       // This will succeed because in the previous GET we get the location of the replica
1125       r = t.get(g);
1126       Assert.assertTrue(r.isStale());
1127       SlowMeCopro.getPrimaryCdl().get().countDown();
1128     } finally {
1129       SlowMeCopro.getPrimaryCdl().get().countDown();
1130       admin.setBalancerRunning(true, false);
1131       cluster.getMaster().setCatalogJanitorEnabled(true);
1132       t.close();
1133     }
1134   }
1135 
1136   private void insertData(final TableName tableName, HBaseAdmin admin, Table t)
1137       throws IOException {
1138     insertData(tableName, admin, t, 1);
1139   }
1140 
1141   private void insertData(TableName tableName, Admin admin, Table t, int i) throws IOException {
1142     Put p = new Put(Bytes.toBytes("row" + i));
1143     p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1"));
1144     t.put(p);
1145     p = new Put(Bytes.toBytes("row" + (i + 1)));
1146     p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2"));
1147     t.put(p);
1148     p = new Put(Bytes.toBytes("row" + (i + 2)));
1149     p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3"));
1150     t.put(p);
1151     p = new Put(Bytes.toBytes("row" + (i + 3)));
1152     p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4"));
1153     t.put(p);
1154     admin.flush(tableName);
1155   }
1156 
1157   /**
1158    * If a table has regions that have no store files in a region, they should split successfully
1159    * into two regions with no store files.
1160    */
1161   @Test(timeout = 60000)
1162   public void testSplitRegionWithNoStoreFiles()
1163       throws Exception {
1164     final TableName tableName =
1165         TableName.valueOf("testSplitRegionWithNoStoreFiles");
1166     // Create table then get the single region for our new table.
1167     createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
1168     List<HRegion> regions = cluster.getRegions(tableName);
1169     HRegionInfo hri = getAndCheckSingleTableRegion(regions);
1170     ensureTableRegionNotOnSameServerAsMeta(admin, hri);
1171     int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo()
1172       .getRegionName());
1173     HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
1174     // Turn off balancer so it doesn't cut in and mess up our placements.
1175     this.admin.setBalancerRunning(false, true);
1176     // Turn off the meta scanner so it don't remove parent on us.
1177     cluster.getMaster().setCatalogJanitorEnabled(false);
1178     try {
1179       // Precondition: we created a table with no data, no store files.
1180       printOutRegions(regionServer, "Initial regions: ");
1181       Configuration conf = cluster.getConfiguration();
1182       HBaseFsck.debugLsr(conf, new Path("/"));
1183       Path rootDir = FSUtils.getRootDir(conf);
1184       FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem();
1185       Map<String, Path> storefiles =
1186           FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
1187       assertEquals("Expected nothing but found " + storefiles.toString(), storefiles.size(), 0);
1188 
1189       // find a splittable region.  Refresh the regions list
1190       regions = cluster.getRegions(tableName);
1191       final HRegion region = findSplittableRegion(regions);
1192       assertTrue("not able to find a splittable region", region != null);
1193 
1194       // Now split.
1195       SplitTransactionImpl st = new MockedSplitTransaction(region, Bytes.toBytes("row2"));
1196       try {
1197         st.prepare();
1198         st.execute(regionServer, regionServer);
1199       } catch (IOException e) {
1200         fail("Split execution should have succeeded with no exceptions thrown");
1201       }
1202 
1203       // Postcondition: split the table with no store files into two regions, but still have not
1204       // store files
1205       List<HRegion> daughters = cluster.getRegions(tableName);
1206       assertTrue(daughters.size() == 2);
1207 
1208       // check dirs
1209       HBaseFsck.debugLsr(conf, new Path("/"));
1210       Map<String, Path> storefilesAfter =
1211           FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
1212       assertEquals("Expected nothing but found " + storefilesAfter.toString(),
1213           storefilesAfter.size(), 0);
1214 
1215       hri = region.getRegionInfo(); // split parent
1216       AssignmentManager am = cluster.getMaster().getAssignmentManager();
1217       RegionStates regionStates = am.getRegionStates();
1218       long start = EnvironmentEdgeManager.currentTime();
1219       while (!regionStates.isRegionInState(hri, State.SPLIT)) {
1220         assertFalse("Timed out in waiting split parent to be in state SPLIT",
1221           EnvironmentEdgeManager.currentTime() - start > 60000);
1222         Thread.sleep(500);
1223       }
1224 
1225       // We should not be able to assign it again
1226       am.assign(hri, true, true);
1227       assertFalse("Split region can't be assigned",
1228         regionStates.isRegionInTransition(hri));
1229       assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
1230 
1231       // We should not be able to unassign it either
1232       am.unassign(hri, true, null);
1233       assertFalse("Split region can't be unassigned",
1234         regionStates.isRegionInTransition(hri));
1235       assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
1236     } finally {
1237       admin.setBalancerRunning(true, false);
1238       cluster.getMaster().setCatalogJanitorEnabled(true);
1239     }
1240   }
1241 
1242   @Test(timeout = 180000)
1243   public void testSplitHooksBeforeAndAfterPONR() throws Exception {
1244     TableName firstTable = TableName.valueOf("testSplitHooksBeforeAndAfterPONR_1");
1245     TableName secondTable = TableName.valueOf("testSplitHooksBeforeAndAfterPONR_2");
1246     HColumnDescriptor hcd = new HColumnDescriptor("cf");
1247 
1248     HTableDescriptor desc = new HTableDescriptor(firstTable);
1249     desc.addCoprocessor(MockedRegionObserver.class.getName());
1250     desc.addFamily(hcd);
1251     admin.createTable(desc);
1252     TESTING_UTIL.waitUntilAllRegionsAssigned(firstTable);
1253 
1254     desc = new HTableDescriptor(secondTable);
1255     desc.addFamily(hcd);
1256     admin.createTable(desc);
1257     TESTING_UTIL.waitUntilAllRegionsAssigned(secondTable);
1258 
1259     List<HRegion> firstTableRegions = cluster.getRegions(firstTable);
1260     List<HRegion> secondTableRegions = cluster.getRegions(secondTable);
1261 
1262     // Check that both tables actually have regions.
1263     if (firstTableRegions.size() == 0 || secondTableRegions.size() == 0) {
1264       fail("Each table should have at least one region.");
1265     }
1266     ServerName serverName = cluster.getServerHoldingRegion(firstTable,
1267       firstTableRegions.get(0).getRegionInfo().getRegionName());
1268     admin.move(secondTableRegions.get(0).getRegionInfo().getEncodedNameAsBytes(),
1269       Bytes.toBytes(serverName.getServerName()));
1270     Table table1 = null;
1271     Table table2 = null;
1272     try {
1273       table1 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
1274       table2 = new HTable(TESTING_UTIL.getConfiguration(), firstTable);
1275       insertData(firstTable, admin, table1);
1276       insertData(secondTable, admin, table2);
1277       admin.split(firstTable, "row2".getBytes());
1278       firstTableRegions = cluster.getRegions(firstTable);
1279       while (firstTableRegions.size() != 2) {
1280         Thread.sleep(1000);
1281         firstTableRegions = cluster.getRegions(firstTable);
1282       }
1283       assertEquals("Number of regions after split should be 2.", 2, firstTableRegions.size());
1284       secondTableRegions = cluster.getRegions(secondTable);
1285       assertEquals("Number of regions after split should be 2.", 2, secondTableRegions.size());
1286     } finally {
1287       if (table1 != null) {
1288         table1.close();
1289       }
1290       if (table2 != null) {
1291         table2.close();
1292       }
1293       TESTING_UTIL.deleteTable(firstTable);
1294       TESTING_UTIL.deleteTable(secondTable);
1295     }
1296   }
1297 
1298   private void testSplitBeforeSettingSplittingInZKInternals() throws Exception {
1299     final TableName tableName = TableName.valueOf("testSplitBeforeSettingSplittingInZK");
1300     try {
1301       // Create table then get the single region for our new table.
1302       createTableAndWait(tableName, Bytes.toBytes("cf"));
1303 
1304       List<HRegion> regions = awaitTableRegions(tableName);
1305       assertTrue("Table not online", cluster.getRegions(tableName).size() != 0);
1306 
1307       int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo()
1308         .getRegionName());
1309       HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
1310       final HRegion region = findSplittableRegion(regions);
1311       assertTrue("not able to find a splittable region", region != null);
1312       SplitTransactionImpl st = new MockedSplitTransaction(region, Bytes.toBytes("row2")) {
1313         @Override
1314         public PairOfSameType<Region> stepsBeforePONR(final Server server,
1315             final RegionServerServices services, boolean testing) throws IOException {
1316           throw new SplittingNodeCreationFailedException ();
1317         }
1318       };
1319       String node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1320           region.getRegionInfo().getEncodedName());
1321       regionServer.getZooKeeper().syncOrTimeout(node);
1322       for (int i = 0; i < 100; i++) {
1323         // We expect the znode to be deleted by this time. Here the
1324         // znode could be in OPENED state and the
1325         // master has not yet deleted the znode.
1326         if (ZKUtil.checkExists(regionServer.getZooKeeper(), node) != -1) {
1327           Thread.sleep(100);
1328         }
1329       }
1330       try {
1331         st.prepare();
1332         st.execute(regionServer, regionServer);
1333       } catch (IOException e) {
1334         // check for the specific instance in case the Split failed due to the
1335         // existence of the znode in OPENED state.
1336         // This will at least make the test to fail;
1337         assertTrue("Should be instance of CreateSplittingNodeFailedException",
1338             e instanceof SplittingNodeCreationFailedException );
1339         node = ZKAssign.getNodeName(regionServer.getZooKeeper(),
1340             region.getRegionInfo().getEncodedName());
1341         {
1342           assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1343         }
1344         assertTrue(st.rollback(regionServer, regionServer));
1345         assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
1346       }
1347     } finally {
1348       TESTING_UTIL.deleteTable(tableName);
1349     }
1350   }
1351 
1352   @Test
1353   public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck()
1354       throws Exception {
1355     final TableName tableName =
1356         TableName.valueOf("testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck");
1357     try {
1358       HTableDescriptor htd = new HTableDescriptor(tableName);
1359       htd.addFamily(new HColumnDescriptor("f"));
1360       htd.addFamily(new HColumnDescriptor("i_f"));
1361       htd.setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName());
1362       admin.createTable(htd);
1363       List<HRegion> regions = awaitTableRegions(tableName);
1364       HRegion region = regions.get(0);
1365       for(int i = 3;i<9;i++) {
1366         Put p = new Put(Bytes.toBytes("row"+i));
1367         p.add(Bytes.toBytes("f"), Bytes.toBytes("q"), Bytes.toBytes("value"+i));
1368         p.add(Bytes.toBytes("i_f"), Bytes.toBytes("q"), Bytes.toBytes("value"+i));
1369         region.put(p);
1370       }
1371       region.flush(true);
1372       Store store = region.getStore(Bytes.toBytes("f"));
1373       Collection<StoreFile> storefiles = store.getStorefiles();
1374       assertEquals(storefiles.size(), 1);
1375       assertFalse(region.hasReferences());
1376       Path referencePath =
1377           region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "f",
1378             storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
1379       assertNull(referencePath);
1380       referencePath =
1381           region.getRegionFileSystem().splitStoreFile(region.getRegionInfo(), "i_f",
1382             storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy());
1383       assertNotNull(referencePath);
1384     } finally {
1385       TESTING_UTIL.deleteTable(tableName);
1386     }
1387   }
1388 
1389   @Test(timeout = 120000)
1390   public void testFailedSplit() throws Exception {
1391     TableName tableName = TableName.valueOf("testFailedSplit");
1392     byte[] colFamily = Bytes.toBytes("info");
1393     TESTING_UTIL.createTable(tableName, colFamily);
1394     Connection connection = ConnectionFactory.createConnection(TESTING_UTIL.getConfiguration());
1395     HTable table = (HTable) connection.getTable(tableName);
1396     try {
1397       TESTING_UTIL.loadTable(table, colFamily);
1398       List<HRegionInfo> regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
1399       assertTrue(regions.size() == 1);
1400       final HRegion actualRegion = cluster.getRegions(tableName).get(0);
1401       actualRegion.getCoprocessorHost().load(FailingSplitRegionObserver.class,
1402         Coprocessor.PRIORITY_USER, actualRegion.getBaseConf());
1403 
1404       // The following split would fail.
1405       admin.split(tableName);
1406       FailingSplitRegionObserver observer = (FailingSplitRegionObserver) actualRegion
1407           .getCoprocessorHost().findCoprocessor(FailingSplitRegionObserver.class.getName());
1408       assertNotNull(observer);
1409       observer.latch.await();
1410       observer.postSplit.await();
1411       LOG.info("Waiting for region to come out of RIT: " + actualRegion);
1412       TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
1413         @Override
1414         public boolean evaluate() throws Exception {
1415           RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
1416           return !regionStates.isRegionsInTransition();
1417         }
1418       });
1419       regions = TESTING_UTIL.getHBaseAdmin().getTableRegions(tableName);
1420       assertTrue(regions.size() == 1);
1421       RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
1422       Set<RegionState> rit = regionStates.getRegionsInTransition();
1423       assertTrue(rit.size() == 0);
1424     } finally {
1425       table.close();
1426       connection.close();
1427       TESTING_UTIL.deleteTable(tableName);
1428     }
1429   }
1430 
1431   @Test (timeout=300000)
1432   public void testSSHCleanupDaugtherRegionsOfAbortedSplit() throws Exception {
1433     TableName table = TableName.valueOf("testSSHCleanupDaugtherRegionsOfAbortedSplit");
1434     try {
1435       HTableDescriptor desc = new HTableDescriptor(table);
1436       desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
1437       admin.createTable(desc);
1438       HTable hTable = new HTable(cluster.getConfiguration(), desc.getTableName());
1439       for(int i = 1; i < 5; i++) {
1440         Put p1 = new Put(("r"+i).getBytes());
1441         p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
1442         hTable.put(p1);
1443       }
1444       admin.flush(desc.getTableName());
1445       List<HRegion> regions = cluster.getRegions(desc.getTableName());
1446       int serverWith = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
1447       HRegionServer regionServer = cluster.getRegionServer(serverWith);
1448       cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
1449       SplitTransactionImpl st = new SplitTransactionImpl(regions.get(0), Bytes.toBytes("r3"));
1450       st.prepare();
1451       st.stepsBeforePONR(regionServer, regionServer, false);
1452       Path tableDir =
1453           FSUtils.getTableDir(cluster.getMaster().getMasterFileSystem().getRootDir(),
1454             desc.getTableName());
1455       tableDir.getFileSystem(cluster.getConfiguration());
1456       List<Path> regionDirs =
1457           FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
1458       assertEquals(3,regionDirs.size());
1459       cluster.startRegionServer();
1460       regionServer.kill();
1461       cluster.getRegionServerThreads().get(serverWith).join();
1462       // Wait until finish processing of shutdown
1463       TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
1464         @Override
1465         public boolean evaluate() throws Exception {
1466           return !cluster.getMaster().getServerManager().areDeadServersInProgress();
1467         }
1468       });
1469       // Wait until there are no more regions in transition
1470       TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
1471         @Override
1472         public boolean evaluate() throws Exception {
1473           return !cluster.getMaster().getAssignmentManager().
1474               getRegionStates().isRegionsInTransition();
1475         }
1476       });
1477       regionDirs =
1478           FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
1479       assertEquals(1,regionDirs.size());
1480     } finally {
1481       TESTING_UTIL.deleteTable(table);
1482     }
1483   }
1484 
1485     public static class MockedCoordinatedStateManager extends ZkCoordinatedStateManager {
1486 
1487         public void initialize(Server server, HRegion region) {
1488           this.server = server;
1489           this.watcher = server.getZooKeeper();
1490           splitTransactionCoordination = new MockedSplitTransactionCoordination(this, watcher, region);
1491           closeRegionCoordination = new ZkCloseRegionCoordination(this, watcher);
1492           openRegionCoordination = new ZkOpenRegionCoordination(this, watcher);
1493         }
1494       }
1495 
1496       public static class MockedSplitTransaction extends SplitTransactionImpl {
1497 
1498         private HRegion currentRegion;
1499         public MockedSplitTransaction(HRegion region, byte[] splitrow) {
1500           super(region, splitrow);
1501           this.currentRegion = region;
1502         }
1503         @Override
1504         public boolean rollback(Server server, RegionServerServices services) throws IOException {
1505           if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1506               .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1507             if(secondSplit){
1508               super.rollback(server, services);
1509               latch.countDown();
1510               return true;
1511             }
1512           }
1513           return super.rollback(server, services);
1514         }
1515 
1516 
1517       }
1518 
1519   public static class MockedSplitTransactionCoordination extends ZKSplitTransactionCoordination {
1520 
1521     private HRegion currentRegion;
1522 
1523     public MockedSplitTransactionCoordination(CoordinatedStateManager coordinationProvider,
1524         ZooKeeperWatcher watcher, HRegion region) {
1525       super(coordinationProvider, watcher);
1526       currentRegion = region;
1527     }
1528 
1529     @Override
1530     public void completeSplitTransaction(RegionServerServices services, Region a, Region b,
1531         SplitTransactionDetails std, Region parent) throws IOException {
1532       if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1533           .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1534         try {
1535           if (!secondSplit){
1536             callRollBack = true;
1537             latch.await();
1538           }
1539         } catch (InterruptedException e) {
1540         }
1541 
1542       }
1543       super.completeSplitTransaction(services, a, b, std, parent);
1544       if (this.currentRegion.getRegionInfo().getTable().getNameAsString()
1545           .equals("testShouldFailSplitIfZNodeDoesNotExistDueToPrevRollBack")) {
1546         firstSplitCompleted = true;
1547       }
1548     }
1549   }
1550 
1551   private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException {
1552     for (int i = 0; i < 5; ++i) {
1553       for (HRegion r: regions) {
1554         if (r.isSplittable() && r.getRegionInfo().getReplicaId() == 0) {
1555           return(r);
1556         }
1557       }
1558       Thread.sleep(100);
1559     }
1560     return(null);
1561   }
1562 
1563   private List<HRegion> checkAndGetDaughters(TableName tableName)
1564       throws InterruptedException {
1565     List<HRegion> daughters = null;
1566     // try up to 10s
1567     for (int i=0; i<100; i++) {
1568       daughters = cluster.getRegions(tableName);
1569       if (daughters.size() >= 2) break;
1570       Thread.sleep(100);
1571     }
1572     assertTrue(daughters.size() >= 2);
1573     return daughters;
1574   }
1575 
1576   private MockMasterWithoutCatalogJanitor abortAndWaitForMaster()
1577   throws IOException, InterruptedException {
1578     cluster.abortMaster(0);
1579     cluster.waitOnMaster(0);
1580     cluster.getConfiguration().setClass(HConstants.MASTER_IMPL,
1581         MockMasterWithoutCatalogJanitor.class, HMaster.class);
1582     MockMasterWithoutCatalogJanitor master = null;
1583     master = (MockMasterWithoutCatalogJanitor) cluster.startMaster().getMaster();
1584     cluster.waitForActiveAndReadyMaster();
1585     return master;
1586   }
1587 
1588   private void split(final HRegionInfo hri, final HRegionServer server, final int regionCount)
1589       throws IOException, InterruptedException {
1590     this.admin.split(hri.getRegionNameAsString());
1591     try {
1592       for (int i = 0; ProtobufUtil.getOnlineRegions(
1593           server.getRSRpcServices()).size() <= regionCount && i < 300; i++) {
1594         LOG.debug("Waiting on region to split");
1595         Thread.sleep(100);
1596       }
1597 
1598       assertFalse("Waited too long for split",
1599         ProtobufUtil.getOnlineRegions(server.getRSRpcServices()).size() <= regionCount);
1600     } catch (RegionServerStoppedException e) {
1601       if (useZKForAssignment) {
1602         // If not using ZK for assignment, the exception may be expected.
1603         LOG.error(e);
1604         throw e;
1605       }
1606     }
1607   }
1608 
1609   /**
1610    * Ensure single table region is not on same server as the single hbase:meta table
1611    * region.
1612    * @param admin
1613    * @param hri
1614    * @return Index of the server hosting the single table region
1615    * @throws UnknownRegionException
1616    * @throws MasterNotRunningException
1617    * @throws org.apache.hadoop.hbase.ZooKeeperConnectionException
1618    * @throws InterruptedException
1619    */
1620   private int ensureTableRegionNotOnSameServerAsMeta(final Admin admin,
1621       final HRegionInfo hri)
1622   throws IOException, MasterNotRunningException,
1623   ZooKeeperConnectionException, InterruptedException {
1624     // Now make sure that the table region is not on same server as that hosting
1625     // hbase:meta  We don't want hbase:meta replay polluting our test when we later crash
1626     // the table region serving server.
1627     int metaServerIndex = cluster.getServerWithMeta();
1628     assertTrue(metaServerIndex != -1);
1629     HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex);
1630     int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1631     assertTrue(tableRegionIndex != -1);
1632     HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex);
1633     if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
1634       HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
1635       assertNotNull(hrs);
1636       assertNotNull(hri);
1637       LOG.info("Moving " + hri.getRegionNameAsString() + " from " +
1638         metaRegionServer.getServerName() + " to " +
1639         hrs.getServerName() + "; metaServerIndex=" + metaServerIndex);
1640       admin.move(hri.getEncodedNameAsBytes(), Bytes.toBytes(hrs.getServerName().toString()));
1641     }
1642     // Wait till table region is up on the server that is NOT carrying hbase:meta.
1643     for (int i = 0; i < 20; i++) {
1644       tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1645       if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
1646       LOG.debug("Waiting on region move off the hbase:meta server; current index " +
1647         tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
1648       Thread.sleep(1000);
1649     }
1650     assertTrue("Region not moved off hbase:meta server", tableRegionIndex != -1
1651         && tableRegionIndex != metaServerIndex);
1652     // Verify for sure table region is not on same server as hbase:meta
1653     tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1654     assertTrue(tableRegionIndex != -1);
1655     assertNotSame(metaServerIndex, tableRegionIndex);
1656     return tableRegionIndex;
1657   }
1658 
1659   /**
1660    * Find regionserver other than the one passed.
1661    * Can't rely on indexes into list of regionservers since crashed servers
1662    * occupy an index.
1663    * @param cluster
1664    * @param notThisOne
1665    * @return A regionserver that is not <code>notThisOne</code> or null if none
1666    * found
1667    */
1668   private HRegionServer getOtherRegionServer(final MiniHBaseCluster cluster,
1669       final HRegionServer notThisOne) {
1670     for (RegionServerThread rst: cluster.getRegionServerThreads()) {
1671       HRegionServer hrs = rst.getRegionServer();
1672       if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
1673       if (hrs.isStopping() || hrs.isStopped()) continue;
1674       return hrs;
1675     }
1676     return null;
1677   }
1678 
1679   private void printOutRegions(final HRegionServer hrs, final String prefix)
1680       throws IOException {
1681     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1682     for (HRegionInfo region: regions) {
1683       LOG.info(prefix + region.getRegionNameAsString());
1684     }
1685   }
1686 
1687   private void waitUntilRegionServerDead() throws InterruptedException, IOException {
1688     // Wait until the master processes the RS shutdown
1689     for (int i=0; cluster.getMaster().getClusterStatus().
1690         getServers().size() > NB_SERVERS && i<100; i++) {
1691       LOG.info("Waiting on server to go down");
1692       Thread.sleep(100);
1693     }
1694     assertFalse("Waited too long for RS to die", cluster.getMaster().getClusterStatus().
1695         getServers().size() > NB_SERVERS);
1696   }
1697 
1698   private void awaitDaughters(TableName tableName, int numDaughters) throws InterruptedException {
1699     // Wait till regions are back on line again.
1700     for (int i=0; cluster.getRegions(tableName).size() < numDaughters && i<60; i++) {
1701       LOG.info("Waiting for repair to happen");
1702       Thread.sleep(1000);
1703     }
1704     if (cluster.getRegions(tableName).size() < numDaughters) {
1705       fail("Waiting too long for daughter regions");
1706     }
1707   }
1708 
1709   private List<HRegion> awaitTableRegions(final TableName tableName) throws InterruptedException {
1710     List<HRegion> regions = null;
1711     for (int i = 0; i < 100; i++) {
1712       regions = cluster.getRegions(tableName);
1713       if (regions.size() > 0) break;
1714       Thread.sleep(100);
1715     }
1716     return regions;
1717   }
1718 
1719   private HTable createTableAndWait(TableName tableName, byte[] cf) throws IOException,
1720       InterruptedException {
1721     HTable t = TESTING_UTIL.createTable(tableName, cf);
1722     awaitTableRegions(tableName);
1723     assertTrue("Table not online: " + tableName,
1724       cluster.getRegions(tableName).size() != 0);
1725     return t;
1726   }
1727 
1728   public static class MockMasterWithoutCatalogJanitor extends HMaster {
1729 
1730     public MockMasterWithoutCatalogJanitor(Configuration conf, CoordinatedStateManager cp)
1731       throws IOException, KeeperException,
1732         InterruptedException {
1733       super(conf, cp);
1734     }
1735   }
1736 
1737   private static class SplittingNodeCreationFailedException  extends IOException {
1738     private static final long serialVersionUID = 1652404976265623004L;
1739 
1740     public SplittingNodeCreationFailedException () {
1741       super();
1742     }
1743   }
1744 
1745   public static class MockedRegionObserver extends BaseRegionObserver {
1746     private SplitTransactionImpl st = null;
1747     private PairOfSameType<Region> daughterRegions = null;
1748 
1749     @Override
1750     public void preSplitBeforePONR(ObserverContext<RegionCoprocessorEnvironment> ctx,
1751         byte[] splitKey, List<Mutation> metaEntries) throws IOException {
1752       RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1753       HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1754       List<Region> onlineRegions =
1755           rs.getOnlineRegions(TableName.valueOf("testSplitHooksBeforeAndAfterPONR_2"));
1756       Region region = onlineRegions.get(0);
1757       for (Region r : onlineRegions) {
1758         if (r.getRegionInfo().containsRow(splitKey)) {
1759           region = r;
1760           break;
1761         }
1762       }
1763       st = new SplitTransactionImpl((HRegion) region, splitKey);
1764       if (!st.prepare()) {
1765         LOG.error("Prepare for the table " + region.getTableDesc().getNameAsString()
1766             + " failed. So returning null. ");
1767         ctx.bypass();
1768         return;
1769       }
1770       ((HRegion)region).forceSplit(splitKey);
1771       daughterRegions = st.stepsBeforePONR(rs, rs, false);
1772       HRegionInfo copyOfParent = new HRegionInfo(region.getRegionInfo());
1773       copyOfParent.setOffline(true);
1774       copyOfParent.setSplit(true);
1775       // Put for parent
1776       Put putParent = MetaTableAccessor.makePutFromRegionInfo(copyOfParent);
1777       MetaTableAccessor.addDaughtersToPut(putParent, daughterRegions.getFirst().getRegionInfo(),
1778         daughterRegions.getSecond().getRegionInfo());
1779       metaEntries.add(putParent);
1780       // Puts for daughters
1781       Put putA = MetaTableAccessor.makePutFromRegionInfo(
1782         daughterRegions.getFirst().getRegionInfo());
1783       Put putB = MetaTableAccessor.makePutFromRegionInfo(
1784         daughterRegions.getSecond().getRegionInfo());
1785       st.addLocation(putA, rs.getServerName(), 1);
1786       st.addLocation(putB, rs.getServerName(), 1);
1787       metaEntries.add(putA);
1788       metaEntries.add(putB);
1789     }
1790 
1791     @Override
1792     public void preSplitAfterPONR(ObserverContext<RegionCoprocessorEnvironment> ctx)
1793         throws IOException {
1794       RegionCoprocessorEnvironment environment = ctx.getEnvironment();
1795       HRegionServer rs = (HRegionServer) environment.getRegionServerServices();
1796       st.stepsAfterPONR(rs, rs, daughterRegions, null);
1797     }
1798 
1799   }
1800 
1801   static class CustomSplitPolicy extends RegionSplitPolicy {
1802 
1803     @Override
1804     protected boolean shouldSplit() {
1805       return true;
1806     }
1807 
1808     @Override
1809     public boolean skipStoreFileRangeCheck(String familyName) {
1810       if(familyName.startsWith("i_")) {
1811         return true;
1812       } else {
1813         return false;
1814       }
1815     }
1816   }
1817 }
1818