View Javadoc

1   /**
2    * Copyright The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements. See the NOTICE file distributed with this
6    * work for additional information regarding copyright ownership. The ASF
7    * licenses this file to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance with the License.
9    * You may obtain a copy of the License at
10   *
11   * http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
15   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
16   * License for the specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNull;
24  import static org.junit.Assert.assertTrue;
25  import static org.junit.Assert.fail;
26  
27  import java.io.IOException;
28  import java.util.ArrayList;
29  import java.util.List;
30  
31  import org.apache.commons.lang.math.RandomUtils;
32  import org.apache.commons.logging.Log;
33  import org.apache.commons.logging.LogFactory;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FileSystem;
36  import org.apache.hadoop.fs.Path;
37  import org.apache.hadoop.hbase.HBaseTestingUtility;
38  import org.apache.hadoop.hbase.HColumnDescriptor;
39  import org.apache.hadoop.hbase.HConstants;
40  import org.apache.hadoop.hbase.HRegionInfo;
41  import org.apache.hadoop.hbase.HTableDescriptor;
42  import org.apache.hadoop.hbase.MiniHBaseCluster;
43  import org.apache.hadoop.hbase.RegionTransition;
44  import org.apache.hadoop.hbase.ServerName;
45  import org.apache.hadoop.hbase.TableName;
46  import org.apache.hadoop.hbase.UnknownRegionException;
47  import org.apache.hadoop.hbase.MetaTableAccessor;
48  import org.apache.hadoop.hbase.Waiter;
49  import org.apache.hadoop.hbase.client.Admin;
50  import org.apache.hadoop.hbase.client.Put;
51  import org.apache.hadoop.hbase.client.RegionReplicaUtil;
52  import org.apache.hadoop.hbase.client.Result;
53  import org.apache.hadoop.hbase.client.ResultScanner;
54  import org.apache.hadoop.hbase.client.Scan;
55  import org.apache.hadoop.hbase.client.Table;
56  import org.apache.hadoop.hbase.exceptions.MergeRegionException;
57  import org.apache.hadoop.hbase.executor.EventType;
58  import org.apache.hadoop.hbase.master.AssignmentManager;
59  import org.apache.hadoop.hbase.master.HMaster;
60  import org.apache.hadoop.hbase.master.RegionState.State;
61  import org.apache.hadoop.hbase.master.RegionStates;
62  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
63  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
64  import org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration;
65  import org.apache.hadoop.hbase.testclassification.LargeTests;
66  import org.apache.hadoop.hbase.util.Bytes;
67  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
68  import org.apache.hadoop.hbase.util.FSUtils;
69  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
70  import org.apache.hadoop.hbase.util.Pair;
71  import org.apache.hadoop.hbase.util.PairOfSameType;
72  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
73  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
74  import org.apache.hadoop.util.StringUtils;
75  import org.apache.zookeeper.data.Stat;
76  import org.junit.AfterClass;
77  import org.junit.BeforeClass;
78  import org.junit.Test;
79  import org.junit.experimental.categories.Category;
80  
81  import com.google.common.base.Joiner;
82  
83  /**
84   * Like {@link TestRegionMergeTransaction} in that we're testing
85   * {@link RegionMergeTransactionImpl} only the below tests are against a running
86   * cluster where {@link TestRegionMergeTransaction} is tests against bare
87   * {@link HRegion}.
88   */
89  @Category(LargeTests.class)
90  public class TestRegionMergeTransactionOnCluster {
91    private static final Log LOG = LogFactory
92        .getLog(TestRegionMergeTransactionOnCluster.class);
93    private static final int NB_SERVERS = 3;
94  
95    private static final byte[] FAMILYNAME = Bytes.toBytes("fam");
96    private static final byte[] QUALIFIER = Bytes.toBytes("q");
97  
98    private static byte[] ROW = Bytes.toBytes("testRow");
99    private static final int INITIAL_REGION_NUM = 10;
100   private static final int ROWSIZE = 200;
101   private static byte[][] ROWS = makeN(ROW, ROWSIZE);
102 
103   private static int waitTime = 60 * 1000;
104 
105   static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
106 
107   private static HMaster master;
108   private static Admin admin;
109   static MiniHBaseCluster cluster;
110   static Configuration conf;
111 
112   static void setupOnce() throws Exception {
113     // Start a cluster
114     //Make sure discharger does not interfere with tests that control discharger
115     TEST_UTIL.getConfiguration().setInt(CompactionConfiguration.HBASE_HFILE_COMPACTION_DISCHARGER_INTERVAL,
116         Integer.MAX_VALUE);
117     TEST_UTIL.startMiniCluster(NB_SERVERS);
118     cluster = TEST_UTIL.getHBaseCluster();
119     master = cluster.getMaster();
120     master.balanceSwitch(false);
121     admin = TEST_UTIL.getHBaseAdmin();
122   }
123 
124   @BeforeClass
125   public static void beforeAllTests() throws Exception {
126     conf = TEST_UTIL.getConfiguration();
127     // Use ZK for region assignment
128     conf.setBoolean("hbase.assignment.usezk", true);
129     setupOnce();
130   }
131 
132   @AfterClass
133   public static void afterAllTests() throws Exception {
134     TEST_UTIL.shutdownMiniCluster();
135   }
136 
137   @Test
138   public void testWholesomeMerge() throws Exception {
139     LOG.info("Starting testWholesomeMerge");
140     final TableName tableName =
141         TableName.valueOf("testWholesomeMerge");
142 
143     // Create table and load data.
144     Table table = createTableAndLoadData(master, tableName);
145     // Merge 1st and 2nd region
146     mergeRegionsAndVerifyRegionNum(master, tableName, 0, 1,
147         INITIAL_REGION_NUM - 1);
148 
149     // Merge 2nd and 3th region
150     PairOfSameType<HRegionInfo> mergedRegions =
151       mergeRegionsAndVerifyRegionNum(master, tableName, 1, 2,
152         INITIAL_REGION_NUM - 2);
153 
154     verifyRowCount(table, ROWSIZE);
155 
156     // Randomly choose one of the two merged regions
157     HRegionInfo hri = RandomUtils.nextBoolean() ?
158       mergedRegions.getFirst() : mergedRegions.getSecond();
159     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
160     AssignmentManager am = cluster.getMaster().getAssignmentManager();
161     RegionStates regionStates = am.getRegionStates();
162     long start = EnvironmentEdgeManager.currentTime();
163     while (!regionStates.isRegionInState(hri, State.MERGED)) {
164       assertFalse("Timed out in waiting one merged region to be in state MERGED",
165         EnvironmentEdgeManager.currentTime() - start > 60000);
166       Thread.sleep(500);
167     }
168 
169     // We should not be able to assign it again
170     am.assign(hri, true, true);
171     assertFalse("Merged region can't be assigned",
172       regionStates.isRegionInTransition(hri));
173     assertTrue(regionStates.isRegionInState(hri, State.MERGED));
174 
175     // We should not be able to unassign it either
176     am.unassign(hri, true, null);
177     assertFalse("Merged region can't be unassigned",
178       regionStates.isRegionInTransition(hri));
179     assertTrue(regionStates.isRegionInState(hri, State.MERGED));
180 
181     table.close();
182   }
183 
184   @Test
185   public void testCleanMergeReference() throws Exception {
186     LOG.info("Starting testCleanMergeReference");
187     admin.enableCatalogJanitor(false);
188     try {
189       final TableName tableName =
190           TableName.valueOf("testCleanMergeReference");
191       // Create table and load data.
192       Table table = createTableAndLoadData(master, tableName);
193       // Merge 1st and 2nd region
194       mergeRegionsAndVerifyRegionNum(master, tableName, 0, 1,
195           INITIAL_REGION_NUM - 1);
196       verifyRowCount(table, ROWSIZE);
197       table.close();
198 
199       List<Pair<HRegionInfo, ServerName>> tableRegions = MetaTableAccessor
200           .getTableRegionsAndLocations(master.getZooKeeper(), master.getConnection(), tableName);
201       HRegionInfo mergedRegionInfo = tableRegions.get(0).getFirst();
202       HTableDescriptor tableDescriptor = master.getTableDescriptors().get(
203           tableName);
204       Result mergedRegionResult = MetaTableAccessor.getRegionResult(
205         master.getConnection(), mergedRegionInfo.getRegionName());
206 
207       // contains merge reference in META
208       assertTrue(mergedRegionResult.getValue(HConstants.CATALOG_FAMILY,
209           HConstants.MERGEA_QUALIFIER) != null);
210       assertTrue(mergedRegionResult.getValue(HConstants.CATALOG_FAMILY,
211           HConstants.MERGEB_QUALIFIER) != null);
212 
213       // merging regions' directory are in the file system all the same
214       HRegionInfo regionA = HRegionInfo.getHRegionInfo(mergedRegionResult,
215           HConstants.MERGEA_QUALIFIER);
216       HRegionInfo regionB = HRegionInfo.getHRegionInfo(mergedRegionResult,
217           HConstants.MERGEB_QUALIFIER);
218       FileSystem fs = master.getMasterFileSystem().getFileSystem();
219       Path rootDir = master.getMasterFileSystem().getRootDir();
220 
221       Path tabledir = FSUtils.getTableDir(rootDir, mergedRegionInfo.getTable());
222       Path regionAdir = new Path(tabledir, regionA.getEncodedName());
223       Path regionBdir = new Path(tabledir, regionB.getEncodedName());
224       assertTrue(fs.exists(regionAdir));
225       assertTrue(fs.exists(regionBdir));
226 
227       HColumnDescriptor[] columnFamilies = tableDescriptor.getColumnFamilies();
228       HRegionFileSystem hrfs = new HRegionFileSystem(
229         TEST_UTIL.getConfiguration(), fs, tabledir, mergedRegionInfo);
230       int count = 0;
231       for(HColumnDescriptor colFamily : columnFamilies) {
232         count += hrfs.getStoreFiles(colFamily.getName()).size();
233       }
234       admin.compactRegion(mergedRegionInfo.getRegionName());
235       // clean up the merged region store files
236       // wait until merged region have reference file
237       long timeout = System.currentTimeMillis() + waitTime;
238       int newcount = 0;
239       while (System.currentTimeMillis() < timeout) {
240         for(HColumnDescriptor colFamily : columnFamilies) {
241           newcount += hrfs.getStoreFiles(colFamily.getName()).size();
242         }
243         if(newcount > count &&
244             //compacted file added to directory, let's make sure compaction is actually done with the commit
245             admin.getCompactionState(tableName) == AdminProtos.GetRegionInfoResponse.CompactionState.NONE) {
246           break;
247         }
248         Thread.sleep(50);
249       }
250       assertTrue(newcount > count);
251       List<RegionServerThread> regionServerThreads = TEST_UTIL.getHBaseCluster()
252           .getRegionServerThreads();
253       for (RegionServerThread rs : regionServerThreads) {
254         CompactedHFilesDischarger cleaner = new CompactedHFilesDischarger(100, null,
255             rs.getRegionServer(), false);
256         cleaner.chore();
257         Thread.sleep(1000);
258       }
259       int newcount1 = 0;
260       while (System.currentTimeMillis() < timeout) {
261         for(HColumnDescriptor colFamily : columnFamilies) {
262           newcount1 += hrfs.getStoreFiles(colFamily.getName()).size();
263         }
264         if(newcount1 <= 1) {
265           break;
266         }
267         Thread.sleep(50);
268       }
269       // run CatalogJanitor to clean merge references in hbase:meta and archive the
270       // files of merging regions
271       int cleaned = admin.runCatalogScan();
272       assertTrue(cleaned > 0);
273       assertFalse(fs.exists(regionAdir));
274       assertFalse(fs.exists(regionBdir));
275 
276       mergedRegionResult = MetaTableAccessor.getRegionResult(
277         master.getConnection(), mergedRegionInfo.getRegionName());
278       assertFalse(mergedRegionResult.getValue(HConstants.CATALOG_FAMILY,
279           HConstants.MERGEA_QUALIFIER) != null);
280       assertFalse(mergedRegionResult.getValue(HConstants.CATALOG_FAMILY,
281           HConstants.MERGEB_QUALIFIER) != null);
282 
283     } finally {
284       admin.enableCatalogJanitor(true);
285     }
286   }
287 
288   /**
289    * This test tests 1, merging region not online;
290    * 2, merging same two regions; 3, merging unknown regions.
291    * They are in one test case so that we don't have to create
292    * many tables, and these tests are simple.
293    */
294   @Test
295   public void testMerge() throws Exception {
296     LOG.info("Starting testMerge");
297     final TableName tableName = TableName.valueOf("testMerge");
298 
299     try {
300       // Create table and load data.
301       Table table = createTableAndLoadData(master, tableName);
302       RegionStates regionStates = master.getAssignmentManager().getRegionStates();
303       List<HRegionInfo> regions = regionStates.getRegionsOfTable(tableName);
304       // Fake offline one region
305       HRegionInfo a = regions.get(0);
306       HRegionInfo b = regions.get(1);
307       regionStates.regionOffline(a);
308       try {
309         // Merge offline region. Region a is offline here
310         admin.mergeRegions(a.getEncodedNameAsBytes(), b.getEncodedNameAsBytes(), false);
311         fail("Offline regions should not be able to merge");
312       } catch (IOException ie) {
313         System.out.println(ie);
314         assertTrue("Exception should mention regions not online",
315           StringUtils.stringifyException(ie).contains("regions not online")
316             && ie instanceof MergeRegionException);
317       }
318       try {
319         // Merge the same region: b and b.
320         admin.mergeRegions(b.getEncodedNameAsBytes(), b.getEncodedNameAsBytes(), true);
321         fail("A region should not be able to merge with itself, even forcifully");
322       } catch (IOException ie) {
323         assertTrue("Exception should mention regions not online",
324           StringUtils.stringifyException(ie).contains("region to itself")
325             && ie instanceof MergeRegionException);
326       }
327       try {
328         // Merge unknown regions
329         admin.mergeRegions(Bytes.toBytes("-f1"), Bytes.toBytes("-f2"), true);
330         fail("Unknown region could not be merged");
331       } catch (IOException ie) {
332         assertTrue("UnknownRegionException should be thrown",
333           ie instanceof UnknownRegionException);
334       }
335       table.close();
336     } finally {
337       TEST_UTIL.deleteTable(tableName);
338     }
339   }
340 
341   @Test
342   public void testMergeWithReplicas() throws Exception {
343     final TableName tableName = TableName.valueOf("testMergeWithReplicas");
344     // Create table and load data.
345     createTableAndLoadData(master, tableName, 5, 2);
346     List<Pair<HRegionInfo, ServerName>> initialRegionToServers =
347         MetaTableAccessor.getTableRegionsAndLocations(master.getZooKeeper(), master.getConnection(),
348            tableName);
349     // Merge 1st and 2nd region
350     PairOfSameType<HRegionInfo> mergedRegions = mergeRegionsAndVerifyRegionNum(master, tableName,
351         0, 2, 5 * 2 - 2);
352     List<Pair<HRegionInfo, ServerName>> currentRegionToServers =
353         MetaTableAccessor.getTableRegionsAndLocations(master.getZooKeeper(), master.getConnection(),
354            tableName);
355     List<HRegionInfo> initialRegions = new ArrayList<HRegionInfo>();
356     for (Pair<HRegionInfo, ServerName> p : initialRegionToServers) {
357       initialRegions.add(p.getFirst());
358     }
359     List<HRegionInfo> currentRegions = new ArrayList<HRegionInfo>();
360     for (Pair<HRegionInfo, ServerName> p : currentRegionToServers) {
361       currentRegions.add(p.getFirst());
362     }
363     assertTrue(initialRegions.contains(mergedRegions.getFirst())); //this is the first region
364     assertTrue(initialRegions.contains(RegionReplicaUtil.getRegionInfoForReplica(
365         mergedRegions.getFirst(), 1))); //this is the replica of the first region
366     assertTrue(initialRegions.contains(mergedRegions.getSecond())); //this is the second region
367     assertTrue(initialRegions.contains(RegionReplicaUtil.getRegionInfoForReplica(
368         mergedRegions.getSecond(), 1))); //this is the replica of the second region
369     assertTrue(!initialRegions.contains(currentRegions.get(0))); //this is the new region
370     assertTrue(!initialRegions.contains(RegionReplicaUtil.getRegionInfoForReplica(
371         currentRegions.get(0), 1))); //replica of the new region
372     assertTrue(currentRegions.contains(RegionReplicaUtil.getRegionInfoForReplica(
373         currentRegions.get(0), 1))); //replica of the new region
374     assertTrue(!currentRegions.contains(RegionReplicaUtil.getRegionInfoForReplica(
375         mergedRegions.getFirst(), 1))); //replica of the merged region
376     assertTrue(!currentRegions.contains(RegionReplicaUtil.getRegionInfoForReplica(
377         mergedRegions.getSecond(), 1))); //replica of the merged region
378   }
379 
380   private PairOfSameType<HRegionInfo> mergeRegionsAndVerifyRegionNum(
381       HMaster master, TableName tablename,
382       int regionAnum, int regionBnum, int expectedRegionNum) throws Exception {
383     PairOfSameType<HRegionInfo> mergedRegions =
384       requestMergeRegion(master, tablename, regionAnum, regionBnum);
385     waitAndVerifyRegionNum(master, tablename, expectedRegionNum);
386     return mergedRegions;
387   }
388 
389   private PairOfSameType<HRegionInfo> requestMergeRegion(
390       HMaster master, TableName tablename,
391       int regionAnum, int regionBnum) throws Exception {
392     List<Pair<HRegionInfo, ServerName>> tableRegions = MetaTableAccessor
393         .getTableRegionsAndLocations(master.getZooKeeper(),
394           master.getConnection(), tablename);
395     HRegionInfo regionA = tableRegions.get(regionAnum).getFirst();
396     HRegionInfo regionB = tableRegions.get(regionBnum).getFirst();
397     TEST_UTIL.getHBaseAdmin().mergeRegions(
398       regionA.getEncodedNameAsBytes(),
399       regionB.getEncodedNameAsBytes(), false);
400     return new PairOfSameType<HRegionInfo>(regionA, regionB);
401   }
402 
403   private void waitAndVerifyRegionNum(HMaster master, TableName tablename,
404       int expectedRegionNum) throws Exception {
405     List<Pair<HRegionInfo, ServerName>> tableRegionsInMeta;
406     List<HRegionInfo> tableRegionsInMaster;
407     long timeout = System.currentTimeMillis() + waitTime;
408     while (System.currentTimeMillis() < timeout) {
409       tableRegionsInMeta = MetaTableAccessor.getTableRegionsAndLocations(master.getZooKeeper(),
410         master.getConnection(), tablename);
411       tableRegionsInMaster = master.getAssignmentManager().getRegionStates()
412           .getRegionsOfTable(tablename);
413       if (tableRegionsInMeta.size() == expectedRegionNum
414           && tableRegionsInMaster.size() == expectedRegionNum) {
415         break;
416       }
417       Thread.sleep(250);
418     }
419 
420     tableRegionsInMeta = MetaTableAccessor.getTableRegionsAndLocations(master.getZooKeeper(),
421       master.getConnection(), tablename);
422     LOG.info("Regions after merge:" + Joiner.on(',').join(tableRegionsInMeta));
423     assertEquals(expectedRegionNum, tableRegionsInMeta.size());
424   }
425 
426   private Table createTableAndLoadData(HMaster master, TableName tablename)
427       throws Exception {
428     return createTableAndLoadData(master, tablename, INITIAL_REGION_NUM, 1);
429   }
430 
431   private Table createTableAndLoadData(HMaster master, TableName tablename,
432       int numRegions, int replication) throws Exception {
433     assertTrue("ROWSIZE must > numregions:" + numRegions, ROWSIZE > numRegions);
434     byte[][] splitRows = new byte[numRegions - 1][];
435     for (int i = 0; i < splitRows.length; i++) {
436       splitRows[i] = ROWS[(i + 1) * ROWSIZE / numRegions];
437     }
438 
439     Table table = TEST_UTIL.createTable(tablename, FAMILYNAME, splitRows);
440     if (replication > 1) {
441       HBaseTestingUtility.setReplicas(admin, tablename, replication);
442     }
443     loadData(table);
444     verifyRowCount(table, ROWSIZE);
445 
446     // sleep here is an ugly hack to allow region transitions to finish
447     long timeout = System.currentTimeMillis() + waitTime;
448     List<Pair<HRegionInfo, ServerName>> tableRegions;
449     while (System.currentTimeMillis() < timeout) {
450       tableRegions = MetaTableAccessor.getTableRegionsAndLocations(master.getZooKeeper(),
451         master.getConnection(), tablename);
452       if (tableRegions.size() == numRegions * replication)
453         break;
454       Thread.sleep(250);
455     }
456 
457     tableRegions = MetaTableAccessor.getTableRegionsAndLocations(
458       master.getZooKeeper(),
459       master.getConnection(), tablename);
460     LOG.info("Regions after load: " + Joiner.on(',').join(tableRegions));
461     assertEquals(numRegions * replication, tableRegions.size());
462     return table;
463   }
464 
465   private static byte[][] makeN(byte[] base, int n) {
466     byte[][] ret = new byte[n][];
467     for (int i = 0; i < n; i++) {
468       ret[i] = Bytes.add(base, Bytes.toBytes(String.format("%04d", i)));
469     }
470     return ret;
471   }
472 
473   private void loadData(Table table) throws IOException {
474     for (int i = 0; i < ROWSIZE; i++) {
475       Put put = new Put(ROWS[i]);
476       put.add(FAMILYNAME, QUALIFIER, Bytes.toBytes(i));
477       table.put(put);
478     }
479   }
480 
481   private void verifyRowCount(Table table, int expectedRegionNum)
482       throws IOException {
483     ResultScanner scanner = table.getScanner(new Scan());
484     int rowCount = 0;
485     while (scanner.next() != null) {
486       rowCount++;
487     }
488     assertEquals(expectedRegionNum, rowCount);
489     scanner.close();
490   }
491 
492   /**
493    * A test that intentionally has master fail the processing of the merge message.
494    * Tests that the regionserver merge ephemeral node gets cleaned up if it
495    * crashes and that after we process server shutdown, the parent regions are online and
496    * merged region is cleaned up.
497    */
498   @Test (timeout = 60000)
499   public void testMergeIsRolledBackOnMergeFailure() throws Exception {
500 
501     final RegionStates regionStates = master.getAssignmentManager().getRegionStates();
502     final ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
503 
504     final TableName tableName = TableName.valueOf("testMergeIsRolledBackOnMergeFailure");
505     // Create table with 2 regions as its easy for us to merge.
506     createTableAndLoadData(master, tableName, 2, 1);
507     List<HRegion> regions = cluster.getRegions(tableName);
508 
509     assertEquals("Table shudn't have more than 2 regions, " + regions, 2, regions.size());
510     final HRegionInfo regionA = regions.get(0).getRegionInfo();
511     final HRegionInfo regionB = regions.get(1).getRegionInfo();
512 
513     // Turn off balancer so it doesn't cut in and mess up our placements.
514     admin.setBalancerRunning(false, true);
515     // Turn off the meta scanner so it don't remove parent on us.
516     master.setCatalogJanitorEnabled(false);
517 
518     // Start a server and move both the regions to it. We kill this server later.
519     HRegionServer regionServer = cluster.startRegionServer().getRegionServer();
520     moveRegionToServer(regionA, regionServer);
521     moveRegionToServer(regionB, regionServer);
522 
523     int serverIndex = cluster.getServerWith(regionA.getRegionName());
524 
525     // This helps with server aborts later.
526     TEST_UTIL.compact(tableName, true);
527 
528     try {
529       printOutRegions(regionServer, "Initial regions: ");
530 
531       // Now, before we merge, set special flag in master, a flag that has
532       // it FAIL the processing of merge.
533       AssignmentManager.setTestSkipMergeHandling(true);
534       admin.mergeRegions(regionA.getRegionName(), regionB.getRegionName(), false);
535 
536       // Lets wait until we have a merge region.
537       TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
538         @Override
539         public boolean evaluate() throws Exception {
540           return regionStates.getRegionByStateOfTable(tableName).get(State.MERGING_NEW).size() > 0;
541         }
542       });
543 
544       List<HRegionInfo> mergedRegions =
545           regionStates.getRegionByStateOfTable(tableName).get(State.MERGING_NEW);
546       assertEquals("Only one region should be in MERGING_NEW state", 1, mergedRegions.size());
547       final HRegionInfo merge = mergedRegions.get(0);
548 
549       // Lets double check if we have the merge Znode with the appr. state.
550       final String path = ZKAssign.getNodeName(zkw, merge.getEncodedName());
551       // Wait till the znode moved to MERGED
552       TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
553         @Override public boolean evaluate() throws Exception {
554           Stat stats = zkw.getRecoverableZooKeeper().exists(path, false);
555           RegionTransition rt =
556               RegionTransition.parseFrom(ZKAssign.getData(zkw, merge.getEncodedName()));
557           return stats != null && rt.getEventType().equals(EventType.RS_ZK_REGION_MERGED);
558         }
559       });
560 
561       // Now crash the server
562       abortServerAndWaitForProcessingToComplete(serverIndex);
563       waitUntilRegionServerDead();
564 
565       TEST_UTIL.waitUntilNoRegionsInTransition();
566 
567       // Lets wait until merge parents are online.
568       TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
569         @Override
570         public boolean evaluate() {
571           return cluster.getRegions(tableName).size() == 2;
572         }
573       });
574 
575       // Check if merge regions is cleaned up.
576       List<HRegionInfo> tableRegions = MetaTableAccessor.getTableRegions(zkw,
577           cluster.getMaster().getConnection(), tableName);
578       assertEquals("Only parent regions should be present, but we have: " + tableRegions,
579           2, tableRegions.size());
580       assertTrue("Merge A not present? " + regionA, tableRegions.contains(regionA));
581       assertTrue("Merge B not present? " + regionB, tableRegions.contains(regionB));
582 
583       // Are both merge parents online?
584       assertTrue("region should be online, " + regionA, regionStates.isRegionOnline(regionA));
585       assertTrue("region should be online, " + regionB, regionStates.isRegionOnline(regionB));
586 
587       // Have HDFS dirs been cleaned up?
588       Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), tableName);
589       List<Path> regionDirs =
590           FSUtils.getRegionDirs(cluster.getMaster().getFileSystem(), tableDir);
591       assertEquals("Only two region dir should be present, we have, dirs: " + regionDirs,
592           2, regionDirs.size());
593 
594       assertTrue("Region dir doesn't belong to region: " + regionA + " dir: " + regionDirs,
595           regionDirs.get(0).getName().endsWith(regionA.getEncodedName())
596               || regionDirs.get(1).getName().endsWith(regionA.getEncodedName()));
597       assertTrue("Region dir doesn't belong to region: " + regionB + " dir: " + regionDirs,
598           regionDirs.get(0).getName().endsWith(regionB.getEncodedName())
599               || regionDirs.get(1).getName().endsWith(regionB.getEncodedName()));
600 
601       // The merged Znode should have been cleaned up.
602       Stat stat = zkw.getRecoverableZooKeeper().exists(path, false);
603       assertNull("Merged znode shouldn't exist, but we have stat: " + stat, stat);
604 
605     } finally {
606       // Set this flag back.
607       AssignmentManager.setTestSkipMergeHandling(false);
608       admin.setBalancerRunning(true, false);
609       master.setCatalogJanitorEnabled(true);
610       TEST_UTIL.deleteTable(tableName);
611     }
612   }
613 
614   private void moveRegionToServer(final HRegionInfo region, final HRegionServer rs)
615       throws Exception {
616     admin.move(region.getEncodedNameAsBytes(), rs.getServerName().toString().getBytes());
617     TEST_UTIL.waitUntilNoRegionsInTransition();
618     TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
619       @Override
620       public boolean evaluate() throws Exception {
621         return rs.getOnlineRegion(region.getRegionName()) != null;
622       }
623     });
624   }
625 
626   private void waitUntilRegionServerDead() throws InterruptedException, IOException {
627     // Wait until the master processes the RS shutdown
628     for (int i=0; cluster.getMaster().getClusterStatus().
629         getServers().size() > NB_SERVERS && i<100; i++) {
630       LOG.info("Waiting on server to go down");
631       Thread.sleep(100);
632     }
633     assertFalse("Waited too long for RS to die", cluster.getMaster().getClusterStatus().
634         getServers().size() > NB_SERVERS);
635   }
636 
637   private void printOutRegions(final HRegionServer hrs, final String prefix)
638       throws IOException {
639     List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
640     for (HRegionInfo region: regions) {
641       LOG.info(prefix + region.getRegionNameAsString());
642     }
643   }
644 
645   private void abortServerAndWaitForProcessingToComplete(int serverIndex) throws Exception {
646 
647     final HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
648     cluster.abortRegionServer(serverIndex);
649     TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() {
650       @Override
651       public boolean evaluate() throws Exception {
652         return master.getServerManager().areDeadServersInProgress();
653       }
654     });
655   }
656 }