1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.client;
20
21 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
22 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
23 import static org.junit.Assert.assertTrue;
24 import static org.junit.Assert.fail;
25
26 import java.io.IOException;
27 import java.util.Arrays;
28 import java.util.Collection;
29 import java.util.List;
30 import java.util.concurrent.ExecutorService;
31
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.apache.hadoop.conf.Configuration;
35 import org.apache.hadoop.hbase.Abortable;
36 import org.apache.hadoop.hbase.CategoryBasedTimeout;
37 import org.apache.hadoop.hbase.HBaseTestingUtility;
38 import org.apache.hadoop.hbase.HConstants;
39 import org.apache.hadoop.hbase.HRegionInfo;
40 import org.apache.hadoop.hbase.HRegionLocation;
41 import org.apache.hadoop.hbase.MetaTableAccessor;
42 import org.apache.hadoop.hbase.RegionLocations;
43 import org.apache.hadoop.hbase.ServerName;
44 import org.apache.hadoop.hbase.TableName;
45 import org.apache.hadoop.hbase.TableNotFoundException;
46 import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
47 import org.apache.hadoop.hbase.client.ConnectionManager.HConnectionImplementation;
48 import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore;
49 import org.apache.hadoop.hbase.testclassification.LargeTests;
50 import org.apache.hadoop.hbase.util.Bytes;
51 import org.apache.hadoop.hbase.util.HBaseFsck;
52 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
53 import org.apache.hadoop.hbase.util.HBaseFsckRepair;
54 import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
55 import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker;
56 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
57 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
58 import org.junit.After;
59 import org.junit.Before;
60 import org.junit.Rule;
61 import org.junit.Test;
62 import org.junit.experimental.categories.Category;
63 import org.junit.rules.TestRule;
64
65
66
67
68 @Category(LargeTests.class)
69 public class TestMetaWithReplicas {
70 @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
71 withTimeout(this.getClass()).
72 withLookingForStuckThread(true).
73 build();
74 private static final Log LOG = LogFactory.getLog(TestMetaWithReplicas.class);
75 private final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
76
77 @Before
78 public void setup() throws Exception {
79 TEST_UTIL.getConfiguration().setInt("zookeeper.session.timeout", 30000);
80 TEST_UTIL.getConfiguration().setInt(HConstants.META_REPLICAS_NUM, 3);
81 TEST_UTIL.getConfiguration().setInt(
82 StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, 1000);
83 TEST_UTIL.getConfiguration().setInt("hbase.master.wait.on.regionservers.mintostart", 3);
84 TEST_UTIL.startMiniCluster(4);
85
86 LoadBalancerTracker l = new LoadBalancerTracker(TEST_UTIL.getZooKeeperWatcher(),
87 new Abortable() {
88 boolean aborted = false;
89 @Override
90 public boolean isAborted() {
91 return aborted;
92 }
93 @Override
94 public void abort(String why, Throwable e) {
95 aborted = true;
96 }
97 });
98 l.setBalancerOn(false);
99 for (int replicaId = 1; replicaId < 3; replicaId ++) {
100 HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO,
101 replicaId);
102 TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(h);
103 }
104 LOG.debug("All meta replicas assigned");
105 }
106
107 @After
108 public void tearDown() throws Exception {
109 TEST_UTIL.shutdownMiniCluster();
110 }
111
112 @Test
113 public void testMetaHTDReplicaCount() throws Exception {
114 assertTrue(TEST_UTIL.getHBaseAdmin().getTableDescriptor(TableName.META_TABLE_NAME)
115 .getRegionReplication() == 3);
116 }
117
118 @Test
119 public void testZookeeperNodesForReplicas() throws Exception {
120
121 ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
122 Configuration conf = TEST_UTIL.getConfiguration();
123 String baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
124 HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
125 String primaryMetaZnode = ZKUtil.joinZNode(baseZNode,
126 conf.get("zookeeper.znode.metaserver", "meta-region-server"));
127
128 byte[] data = ZKUtil.getData(zkw, primaryMetaZnode);
129 ServerName.parseFrom(data);
130 for (int i = 1; i < 3; i++) {
131 String secZnode = ZKUtil.joinZNode(baseZNode,
132 conf.get("zookeeper.znode.metaserver", "meta-region-server") + "-" + i);
133 String str = zkw.getZNodeForReplica(i);
134 assertTrue(str.equals(secZnode));
135
136 data = ZKUtil.getData(zkw, secZnode);
137 ServerName.parseFrom(data);
138 }
139 }
140
141 @Test
142 public void testShutdownHandling() throws Exception {
143
144
145
146
147 shutdownMetaAndDoValidations(TEST_UTIL);
148 }
149
150 public static void shutdownMetaAndDoValidations(HBaseTestingUtility util) throws Exception {
151
152
153
154
155 ZooKeeperWatcher zkw = util.getZooKeeperWatcher();
156 Configuration conf = util.getConfiguration();
157 conf.setBoolean(HConstants.USE_META_REPLICAS, true);
158
159 String baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
160 HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
161 String primaryMetaZnode = ZKUtil.joinZNode(baseZNode,
162 conf.get("zookeeper.znode.metaserver", "meta-region-server"));
163 byte[] data = ZKUtil.getData(zkw, primaryMetaZnode);
164 ServerName primary = ServerName.parseFrom(data);
165
166 byte[] TABLE = Bytes.toBytes("testShutdownHandling");
167 byte[][] FAMILIES = new byte[][] { Bytes.toBytes("foo") };
168 if (util.getHBaseAdmin().tableExists(TABLE)) {
169 util.getHBaseAdmin().disableTable(TABLE);
170 util.getHBaseAdmin().deleteTable(TABLE);
171 }
172 ServerName master = null;
173 try (Connection c = ConnectionFactory.createConnection(util.getConfiguration());) {
174 try (Table htable = util.createTable(TABLE, FAMILIES, conf);) {
175 util.getHBaseAdmin().flush(TableName.META_TABLE_NAME);
176 Thread.sleep(conf.getInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD,
177 30000) * 6);
178 List<HRegionInfo> regions = MetaTableAccessor.getTableRegions(zkw, c,
179 TableName.valueOf(TABLE));
180 HRegionLocation hrl = MetaTableAccessor.getRegionLocation(c, regions.get(0));
181
182
183
184
185
186 if (hrl.getServerName().equals(primary)) {
187 util.getHBaseAdmin().move(hrl.getRegionInfo().getEncodedNameAsBytes(), null);
188
189 do {
190 Thread.sleep(10);
191 hrl = MetaTableAccessor.getRegionLocation(c, regions.get(0));
192 } while (primary.equals(hrl.getServerName()));
193 util.getHBaseAdmin().flush(TableName.META_TABLE_NAME);
194 Thread.sleep(conf.getInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD,
195 30000) * 3);
196 }
197 master = util.getHBaseClusterInterface().getClusterStatus().getMaster();
198
199
200 util.getHBaseClusterInterface().stopMaster(master);
201 util.getHBaseClusterInterface().waitForMasterToStop(master, 60000);
202 if (!master.equals(primary)) {
203 util.getHBaseClusterInterface().killRegionServer(primary);
204 util.getHBaseClusterInterface().waitForRegionServerToStop(primary, 60000);
205 }
206 ((ClusterConnection)c).clearRegionCache();
207 }
208 Get get = null;
209 Result r = null;
210 byte[] row = "test".getBytes();
211 try (Table htable = c.getTable(TableName.valueOf(TABLE));) {
212 Put put = new Put(row);
213 put.add("foo".getBytes(), row, row);
214 BufferedMutator m = c.getBufferedMutator(TableName.valueOf(TABLE));
215 m.mutate(put);
216 m.flush();
217
218 get = new Get(row);
219 r = htable.get(get);
220 assertTrue(Arrays.equals(r.getRow(), row));
221
222
223 util.getHBaseClusterInterface().startMaster(master.getHostname(), 0);
224 util.getHBaseClusterInterface().startRegionServer(primary.getHostname(), 0);
225 util.getHBaseClusterInterface().waitForActiveAndReadyMaster();
226 ((ClusterConnection)c).clearRegionCache();
227 }
228 conf.setBoolean(HConstants.USE_META_REPLICAS, false);
229 try (Table htable = c.getTable(TableName.valueOf(TABLE));) {
230 r = htable.get(get);
231 assertTrue(Arrays.equals(r.getRow(), row));
232 }
233 }
234 }
235
236 @Test
237 public void testMetaLookupThreadPoolCreated() throws Exception {
238 byte[] TABLE = Bytes.toBytes("testMetaLookupThreadPoolCreated");
239 byte[][] FAMILIES = new byte[][] { Bytes.toBytes("foo") };
240 if (TEST_UTIL.getHBaseAdmin().tableExists(TABLE)) {
241 TEST_UTIL.getHBaseAdmin().disableTable(TABLE);
242 TEST_UTIL.getHBaseAdmin().deleteTable(TABLE);
243 }
244 try (Table htable =
245 TEST_UTIL.createTable(TABLE, FAMILIES, TEST_UTIL.getConfiguration());) {
246 byte[] row = "test".getBytes();
247 HConnectionImplementation c = ((HConnectionImplementation)((HTable)htable).connection);
248
249 c.relocateRegion(TABLE, row);
250 ExecutorService ex = c.getCurrentMetaLookupPool();
251 assert(ex != null);
252 }
253 }
254
255 @Test
256 public void testChangingReplicaCount() throws Exception {
257
258
259 stopMasterAndValidateReplicaCount(3, 2);
260
261 stopMasterAndValidateReplicaCount(2, 3);
262 }
263
264 private void stopMasterAndValidateReplicaCount(int originalReplicaCount, int newReplicaCount)
265 throws Exception {
266 ServerName sn = TEST_UTIL.getHBaseClusterInterface().getClusterStatus().getMaster();
267 TEST_UTIL.getHBaseClusterInterface().stopMaster(sn);
268 TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(sn, 60000);
269 List<String> metaZnodes = TEST_UTIL.getZooKeeperWatcher().getMetaReplicaNodes();
270 assert(metaZnodes.size() == originalReplicaCount);
271 TEST_UTIL.getHBaseClusterInterface().getConf().setInt(HConstants.META_REPLICAS_NUM,
272 newReplicaCount);
273 TEST_UTIL.getHBaseClusterInterface().startMaster(sn.getHostname(), 0);
274 TEST_UTIL.getHBaseClusterInterface().waitForActiveAndReadyMaster();
275 int count = 0;
276 do {
277 metaZnodes = TEST_UTIL.getZooKeeperWatcher().getMetaReplicaNodes();
278 Thread.sleep(10);
279 count++;
280
281
282
283 } while (metaZnodes.size() == originalReplicaCount && count < 1000);
284 assert(metaZnodes.size() == newReplicaCount);
285
286 TEST_UTIL.getConfiguration().setInt(HConstants.META_REPLICAS_NUM,
287 newReplicaCount);
288 HBaseFsck hbck = HbckTestingUtil.doFsck(TEST_UTIL.getConfiguration(), false);
289 HbckTestingUtil.assertNoErrors(hbck);
290 }
291
292 @Test
293 public void testHBaseFsckWithMetaReplicas() throws Exception {
294 HBaseFsck hbck = HbckTestingUtil.doFsck(TEST_UTIL.getConfiguration(), false);
295 HbckTestingUtil.assertNoErrors(hbck);
296 }
297
298 @Test
299 public void testHBaseFsckWithFewerMetaReplicas() throws Exception {
300 ClusterConnection c = (ClusterConnection)ConnectionFactory.createConnection(
301 TEST_UTIL.getConfiguration());
302 RegionLocations rl = c.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW,
303 false, false);
304 HBaseFsckRepair.closeRegionSilentlyAndWait(c,
305 rl.getRegionLocation(1).getServerName(), rl.getRegionLocation(1).getRegionInfo());
306
307 HBaseFsck hbck = doFsck(TEST_UTIL.getConfiguration(), false);
308 assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.UNKNOWN,ERROR_CODE.NO_META_REGION});
309
310 hbck = doFsck(TEST_UTIL.getConfiguration(), true);
311
312 hbck = doFsck(TEST_UTIL.getConfiguration(), false);
313 assertErrors(hbck, new ERROR_CODE[]{});
314 }
315
316 @Test
317 public void testHBaseFsckWithFewerMetaReplicaZnodes() throws Exception {
318 ClusterConnection c = (ClusterConnection)ConnectionFactory.createConnection(
319 TEST_UTIL.getConfiguration());
320 RegionLocations rl = c.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW,
321 false, false);
322 HBaseFsckRepair.closeRegionSilentlyAndWait(c,
323 rl.getRegionLocation(2).getServerName(), rl.getRegionLocation(2).getRegionInfo());
324 ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
325 ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(2));
326
327 HBaseFsck hbck = doFsck(TEST_UTIL.getConfiguration(), false);
328 assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.UNKNOWN,ERROR_CODE.NO_META_REGION});
329
330 hbck = doFsck(TEST_UTIL.getConfiguration(), true);
331
332 hbck = doFsck(TEST_UTIL.getConfiguration(), false);
333 assertErrors(hbck, new ERROR_CODE[]{});
334 }
335
336 @Test
337 public void testAccessingUnknownTables() throws Exception {
338 Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
339 conf.setBoolean(HConstants.USE_META_REPLICAS, true);
340 Table table = TEST_UTIL.getConnection().getTable(TableName.valueOf("RandomTable"));
341 Get get = new Get(Bytes.toBytes("foo"));
342 try {
343 table.get(get);
344 } catch (TableNotFoundException t) {
345 return;
346 }
347 fail("Expected TableNotFoundException");
348 }
349
350 @Test
351 public void testMetaAddressChange() throws Exception {
352
353
354
355 Configuration conf = TEST_UTIL.getConfiguration();
356 ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
357 String baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
358 HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
359 String primaryMetaZnode = ZKUtil.joinZNode(baseZNode,
360 conf.get("zookeeper.znode.metaserver", "meta-region-server"));
361
362 byte[] data = ZKUtil.getData(zkw, primaryMetaZnode);
363 ServerName currentServer = ServerName.parseFrom(data);
364 Collection<ServerName> liveServers = TEST_UTIL.getHBaseAdmin().getClusterStatus().getServers();
365 ServerName moveToServer = null;
366 for (ServerName s : liveServers) {
367 if (!currentServer.equals(s)) {
368 moveToServer = s;
369 }
370 }
371 assert(moveToServer != null);
372 String tableName = "randomTable5678";
373 TEST_UTIL.createTable(TableName.valueOf(tableName), "f");
374 assertTrue(TEST_UTIL.getHBaseAdmin().tableExists(tableName));
375 TEST_UTIL.getHBaseAdmin().move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
376 Bytes.toBytes(moveToServer.getServerName()));
377 int i = 0;
378 do {
379 Thread.sleep(10);
380 data = ZKUtil.getData(zkw, primaryMetaZnode);
381 currentServer = ServerName.parseFrom(data);
382 i++;
383 } while (!moveToServer.equals(currentServer) && i < 1000);
384 assert(i != 1000);
385 TEST_UTIL.getHBaseAdmin().disableTable("randomTable5678");
386 assertTrue(TEST_UTIL.getHBaseAdmin().isTableDisabled("randomTable5678"));
387 }
388
389 @Test
390 public void testShutdownOfReplicaHolder() throws Exception {
391
392
393 RegionLocations rl = ConnectionManager.getConnectionInternal(TEST_UTIL.getConfiguration()).
394 locateRegion(TableName.META_TABLE_NAME, Bytes.toBytes(""), false, true);
395 HRegionLocation hrl = rl.getRegionLocation(1);
396 ServerName oldServer = hrl.getServerName();
397 TEST_UTIL.getHBaseClusterInterface().killRegionServer(oldServer);
398 int i = 0;
399 do {
400 LOG.debug("Waiting for the replica " + hrl.getRegionInfo() + " to come up");
401 Thread.sleep(30000);
402 rl = ConnectionManager.getConnectionInternal(TEST_UTIL.getConfiguration()).
403 locateRegion(TableName.META_TABLE_NAME, Bytes.toBytes(""), false, true);
404 hrl = rl.getRegionLocation(1);
405 i++;
406 } while ((hrl == null || hrl.getServerName().equals(oldServer)) && i < 3);
407 assertTrue(i != 3);
408 }
409
410 @Test
411 public void testHBaseFsckWithExcessMetaReplicas() throws Exception {
412
413 HRegionInfo h = RegionReplicaUtil.getRegionInfoForReplica(
414 HRegionInfo.FIRST_META_REGIONINFO, 3);
415
416 TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager()
417 .getRegionStates().createRegionState(h);
418 TEST_UTIL.assignRegion(h);
419 HBaseFsckRepair.waitUntilAssigned(TEST_UTIL.getHBaseAdmin(), h);
420
421 HBaseFsck hbck = doFsck(TEST_UTIL.getConfiguration(), false);
422 assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.UNKNOWN, ERROR_CODE.SHOULD_NOT_BE_DEPLOYED});
423
424 hbck = doFsck(TEST_UTIL.getConfiguration(), true);
425
426 hbck = doFsck(TEST_UTIL.getConfiguration(), false);
427 assertErrors(hbck, new ERROR_CODE[]{});
428 }
429
430 @Test
431 public void testMetaTableReplicaAssignment() throws Exception {
432 final ClusterConnection c =
433 ConnectionManager.getConnectionInternal(TEST_UTIL.getConfiguration());
434 final RegionLocations rl =
435 c.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW, false, true);
436
437 final ServerName meta0SN = rl.getRegionLocation(0).getServerName();
438 LOG.debug("The hbase:meta default replica region is in server: " + meta0SN);
439 final ServerName meta1SN = rl.getRegionLocation(1).getServerName();
440 LOG.debug("The hbase:meta replica 1 region " + rl.getRegionLocation(1).getRegionInfo() +
441 " is in server: " + meta1SN);
442
443 LOG.debug("Killing the region server " + meta1SN +
444 " that hosts hbase:meta replica 1 region " + rl.getRegionLocation(1).getRegionInfo());
445 TEST_UTIL.getHBaseClusterInterface().killRegionServer(meta1SN);
446 TEST_UTIL.getHBaseClusterInterface().waitForRegionServerToStop(meta1SN, 60000);
447
448 ServerName masterSN = TEST_UTIL.getHBaseClusterInterface().getClusterStatus().getMaster();
449 LOG.debug("Killing the master server " + masterSN);
450 TEST_UTIL.getHBaseClusterInterface().stopMaster(masterSN);
451 TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(masterSN, 60000);
452 LOG.debug("Restarting the master server " + masterSN);
453 TEST_UTIL.getHBaseClusterInterface().startMaster(masterSN.getHostname(), masterSN.getPort());
454 TEST_UTIL.getHBaseClusterInterface().waitForActiveAndReadyMaster();
455
456
457 TEST_UTIL.waitFor(60000, 100, new ExplainingPredicate<IOException>() {
458
459 @Override
460 public boolean evaluate() throws IOException {
461 RegionLocations rls =
462 c.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW, false, true);
463 HRegionLocation loc = rls.getRegionLocation(1);
464 if (loc != null && !meta1SN.equals(loc.getServerName())) {
465 LOG.debug("The hbase:meta replica 1 region " + rls.getRegionLocation(1).getRegionInfo() +
466 " is now moved from server " + meta1SN + " to server " + loc.getServerName());
467 return true;
468 } else {
469 return false;
470 }
471 }
472
473 @Override
474 public String explainFailure() throws IOException {
475 return "The hbase:meta replica 1 region " + rl.getRegionLocation(1).getRegionInfo() +
476 " has not been assigned in time";
477 }
478 });
479
480 LOG.debug("Killing the region server " + meta0SN +
481 " that hosts hbase:meta default replica region " + rl.getRegionLocation(0).getRegionInfo());
482 TEST_UTIL.getHBaseClusterInterface().killRegionServer(meta0SN);
483 TEST_UTIL.getHBaseClusterInterface().waitForRegionServerToStop(meta0SN, 60000);
484
485 TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(
486 HRegionInfo.FIRST_META_REGIONINFO);
487
488
489 TEST_UTIL.waitFor(60000, 100, new ExplainingPredicate<IOException>() {
490
491 @Override
492 public boolean evaluate() throws IOException {
493 RegionLocations rls =
494 c.locateRegion(TableName.META_TABLE_NAME, HConstants.EMPTY_START_ROW, false, true);
495 HRegionLocation loc = rls.getRegionLocation(0);
496 if (loc != null && !meta0SN.equals(loc.getServerName())) {
497 LOG.debug(
498 "The hbase:meta default replica region " + rls.getRegionLocation(0).getRegionInfo() +
499 " is now moved from server " + meta0SN + " to server " + loc.getServerName());
500 return true;
501 } else {
502 return false;
503 }
504 }
505
506 @Override
507 public String explainFailure() throws IOException {
508 return "The hbase:meta default replica region " + rl.getRegionLocation(0).getRegionInfo() +
509 " has not been assigned in time";
510 }
511 });
512 }
513 }