1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.replication;
21
22 import static org.junit.Assert.assertArrayEquals;
23 import static org.junit.Assert.assertEquals;
24 import static org.junit.Assert.assertFalse;
25 import static org.junit.Assert.assertNotNull;
26 import static org.junit.Assert.assertTrue;
27 import static org.junit.Assert.fail;
28
29 import java.io.IOException;
30 import java.util.Arrays;
31 import java.util.List;
32 import java.util.concurrent.CountDownLatch;
33
34 import org.apache.commons.logging.Log;
35 import org.apache.commons.logging.LogFactory;
36 import org.apache.hadoop.conf.Configuration;
37 import org.apache.hadoop.fs.Path;
38 import org.apache.hadoop.hbase.*;
39 import org.apache.hadoop.hbase.client.Delete;
40 import org.apache.hadoop.hbase.client.Get;
41 import org.apache.hadoop.hbase.client.Admin;
42 import org.apache.hadoop.hbase.client.HBaseAdmin;
43 import org.apache.hadoop.hbase.client.HTable;
44 import org.apache.hadoop.hbase.client.Put;
45 import org.apache.hadoop.hbase.client.Result;
46 import org.apache.hadoop.hbase.client.Table;
47 import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;
48 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
49 import org.apache.hadoop.hbase.master.cleaner.ReplicationZKLockCleanerChore;
50 import org.apache.hadoop.hbase.regionserver.HRegion;
51 import org.apache.hadoop.hbase.regionserver.HRegionServer;
52 import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
53 import org.apache.hadoop.hbase.testclassification.LargeTests;
54 import org.apache.hadoop.hbase.util.Bytes;
55 import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
56 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
57 import org.junit.After;
58 import org.junit.AfterClass;
59 import org.junit.Before;
60 import org.junit.BeforeClass;
61 import org.junit.Test;
62 import org.junit.experimental.categories.Category;
63
64 @Category(LargeTests.class)
65 public class TestMultiSlaveReplication {
66
67 private static final Log LOG = LogFactory.getLog(TestReplicationBase.class);
68
69 private static Configuration conf1;
70 private static Configuration conf2;
71 private static Configuration conf3;
72
73 private static HBaseTestingUtility utility1;
74 private static HBaseTestingUtility utility2;
75 private static HBaseTestingUtility utility3;
76 private static final long SLEEP_TIME = 500;
77 private static final int NB_RETRIES = 100;
78
79 private static final TableName tableName = TableName.valueOf("test");
80 private static final byte[] famName = Bytes.toBytes("f");
81 private static final byte[] row = Bytes.toBytes("row");
82 private static final byte[] row1 = Bytes.toBytes("row1");
83 private static final byte[] row2 = Bytes.toBytes("row2");
84 private static final byte[] row3 = Bytes.toBytes("row3");
85 private static final byte[] noRepfamName = Bytes.toBytes("norep");
86
87 private static HTableDescriptor table;
88
89 @BeforeClass
90 public static void setUpBeforeClass() throws Exception {
91 conf1 = HBaseConfiguration.create();
92 conf1.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/1");
93
94
95 conf1.setInt("hbase.regionserver.hlog.blocksize", 1024*20);
96 conf1.setInt("replication.source.size.capacity", 1024);
97 conf1.setLong("replication.source.sleepforretries", 100);
98 conf1.setInt("hbase.regionserver.maxlogs", 10);
99 conf1.setLong("hbase.master.logcleaner.ttl", 10);
100 conf1.setBoolean(HConstants.REPLICATION_ENABLE_KEY, HConstants.REPLICATION_ENABLE_DEFAULT);
101 conf1.setBoolean("dfs.support.append", true);
102 conf1.setLong(HConstants.THREAD_WAKE_FREQUENCY, 100);
103 conf1.setStrings(CoprocessorHost.USER_REGION_COPROCESSOR_CONF_KEY,
104 "org.apache.hadoop.hbase.replication.TestMasterReplication$CoprocessorCounter");
105 conf1.setBoolean(HConstants.ZOOKEEPER_USEMULTI , false);
106 conf1.setInt("hbase.master.cleaner.interval", 5 * 1000);
107 conf1.setClass("hbase.region.replica.replication.replicationQueues.class",
108 ReplicationQueuesZKImpl.class, ReplicationQueues.class);
109 conf1.setLong(ReplicationZKLockCleanerChore.TTL_CONFIG_KEY, 0L);
110
111 utility1 = new HBaseTestingUtility(conf1);
112
113 conf2 = new Configuration(conf1);
114 conf2.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/2");
115
116 conf3 = new Configuration(conf1);
117 conf3.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/3");
118
119 utility2 = new HBaseTestingUtility(conf2);
120
121 utility3 = new HBaseTestingUtility(conf3);
122
123 table = new HTableDescriptor(tableName);
124 HColumnDescriptor fam = new HColumnDescriptor(famName);
125 fam.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
126 table.addFamily(fam);
127 fam = new HColumnDescriptor(noRepfamName);
128 table.addFamily(fam);
129 }
130
131 @Before
132 public void startup() throws Exception {
133 utility1.startMiniZKCluster();
134 MiniZooKeeperCluster miniZK = utility1.getZkCluster();
135 utility1.setZkCluster(miniZK);
136 new ZooKeeperWatcher(conf1, "cluster1", null, true);
137
138 utility2.setZkCluster(miniZK);
139 new ZooKeeperWatcher(conf2, "cluster2", null, true);
140
141 utility3.setZkCluster(miniZK);
142 new ZooKeeperWatcher(conf3, "cluster3", null, true);
143 }
144
145 @After
146 public void tearDown() throws Exception {
147 utility1.shutdownMiniZKCluster();
148 }
149
150 @Test(timeout=300000)
151 public void testMultiSlaveReplication() throws Exception {
152 LOG.info("Start the testMultiSlaveReplication Test");
153 MiniHBaseCluster master = utility1.startMiniCluster();
154 utility2.startMiniCluster();
155 utility3.startMiniCluster();
156 ReplicationAdmin admin1 = new ReplicationAdmin(conf1);
157
158 new HBaseAdmin(conf1).createTable(table);
159 new HBaseAdmin(conf2).createTable(table);
160 new HBaseAdmin(conf3).createTable(table);
161 Table htable1 = new HTable(conf1, tableName);
162 htable1.setWriteBufferSize(1024);
163 Table htable2 = new HTable(conf2, tableName);
164 htable2.setWriteBufferSize(1024);
165 Table htable3 = new HTable(conf3, tableName);
166 htable3.setWriteBufferSize(1024);
167
168 ReplicationPeerConfig rpc = new ReplicationPeerConfig();
169 rpc.setClusterKey(utility2.getClusterKey());
170 admin1.addPeer("1", rpc);
171
172
173 putAndWait(row, famName, htable1, htable2);
174 deleteAndWait(row, htable1, htable2);
175
176 checkRow(row,0,htable3);
177
178 putAndWait(row2, famName, htable1, htable2);
179
180
181 rollWALAndWait(utility1, htable1.getName(), row2);
182
183
184 putAndWait(row3, famName, htable1, htable2);
185
186 rpc = new ReplicationPeerConfig();
187 rpc.setClusterKey(utility3.getClusterKey());
188 admin1.addPeer("2", rpc);
189
190
191 putAndWait(row1, famName, htable1, htable2, htable3);
192
193 deleteAndWait(row1, htable1, htable2, htable3);
194
195
196
197 checkRow(row2,0,htable3);
198
199
200
201 checkRow(row3,1,htable3);
202
203 Put p = new Put(row);
204 p.add(famName, row, row);
205 htable1.put(p);
206
207 rollWALAndWait(utility1, htable1.getName(), row);
208
209
210
211 deleteAndWait(row2, htable1, htable2, htable3);
212
213
214 checkRow(row, 1, htable2);
215
216
217 checkWithWait(row, 1, htable3);
218
219
220 deleteAndWait(row, htable1, htable2, htable3);
221 deleteAndWait(row3, htable1, htable2, htable3);
222
223 utility3.shutdownMiniCluster();
224 utility2.shutdownMiniCluster();
225 utility1.shutdownMiniCluster();
226 }
227
228 @Test
229 public void testZKLockCleaner() throws Exception {
230 MiniHBaseCluster cluster = utility1.startMiniCluster(1, 2);
231 HTableDescriptor table = new HTableDescriptor(TableName.valueOf(Bytes.toBytes("zk")));
232 HColumnDescriptor fam = new HColumnDescriptor(famName);
233 fam.setScope(HConstants.REPLICATION_SCOPE_GLOBAL);
234 table.addFamily(fam);
235 new HBaseAdmin(conf1).createTable(table);
236 ReplicationAdmin replicationAdmin = new ReplicationAdmin(conf1);
237 ReplicationPeerConfig rpc = new ReplicationPeerConfig();
238 rpc.setClusterKey(utility2.getClusterKey());
239 replicationAdmin.addPeer("cluster2", rpc, null);
240 HRegionServer rs = cluster.getRegionServer(0);
241 ReplicationQueuesZKImpl zk = new ReplicationQueuesZKImpl(rs.getZooKeeper(), conf1, rs);
242 zk.init(rs.getServerName().toString());
243 List<String> replicators = zk.getListOfReplicators();
244 assertEquals(2, replicators.size());
245 String zNode = cluster.getRegionServer(1).getServerName().toString();
246
247 assertTrue(zk.lockOtherRS(zNode));
248 assertTrue(zk.checkLockExists(zNode));
249 Thread.sleep(10000);
250 assertTrue(zk.checkLockExists(zNode));
251 cluster.abortRegionServer(0);
252 Thread.sleep(10000);
253 HRegionServer rs1 = cluster.getRegionServer(1);
254 zk = new ReplicationQueuesZKImpl(rs1.getZooKeeper(), conf1, rs1);
255 zk.init(rs1.getServerName().toString());
256 assertFalse(zk.checkLockExists(zNode));
257
258 utility1.shutdownMiniCluster();
259 }
260
261 private void rollWALAndWait(final HBaseTestingUtility utility, final TableName table,
262 final byte[] row) throws IOException {
263 final Admin admin = utility.getHBaseAdmin();
264 final MiniHBaseCluster cluster = utility.getMiniHBaseCluster();
265
266
267 HRegion region = null;
268 for (HRegion candidate : cluster.getRegions(table)) {
269 if (HRegion.rowIsInRange(candidate.getRegionInfo(), row)) {
270 region = candidate;
271 break;
272 }
273 }
274 assertNotNull("Couldn't find the region for row '" + Arrays.toString(row) + "'", region);
275
276 final CountDownLatch latch = new CountDownLatch(1);
277
278
279 final WALActionsListener listener = new WALActionsListener.Base() {
280 @Override
281 public void postLogRoll(final Path oldPath, final Path newPath) throws IOException {
282 latch.countDown();
283 }
284 };
285 region.getWAL().registerWALActionsListener(listener);
286
287
288 admin.rollWALWriter(cluster.getServerHoldingRegion(region.getTableDesc().getTableName(),
289 region.getRegionInfo().getRegionName()));
290
291
292 try {
293 latch.await();
294 } catch (InterruptedException exception) {
295 LOG.warn("Interrupted while waiting for the wal of '" + region + "' to roll. If later " +
296 "replication tests fail, it's probably because we should still be waiting.");
297 Thread.currentThread().interrupt();
298 }
299 region.getWAL().unregisterWALActionsListener(listener);
300 }
301
302
303 private void checkWithWait(byte[] row, int count, Table table) throws Exception {
304 Get get = new Get(row);
305 for (int i = 0; i < NB_RETRIES; i++) {
306 if (i == NB_RETRIES - 1) {
307 fail("Waited too much time while getting the row.");
308 }
309 boolean rowReplicated = false;
310 Result res = table.get(get);
311 if (res.size() >= 1) {
312 LOG.info("Row is replicated");
313 rowReplicated = true;
314 assertEquals("Table '" + table + "' did not have the expected number of results.",
315 count, res.size());
316 break;
317 }
318 if (rowReplicated) {
319 break;
320 } else {
321 Thread.sleep(SLEEP_TIME);
322 }
323 }
324 }
325
326 private void checkRow(byte[] row, int count, Table... tables) throws IOException {
327 Get get = new Get(row);
328 for (Table table : tables) {
329 Result res = table.get(get);
330 assertEquals("Table '" + table + "' did not have the expected number of results.",
331 count, res.size());
332 }
333 }
334
335 private void deleteAndWait(byte[] row, Table source, Table... targets)
336 throws Exception {
337 Delete del = new Delete(row);
338 source.delete(del);
339
340 Get get = new Get(row);
341 for (int i = 0; i < NB_RETRIES; i++) {
342 if (i==NB_RETRIES-1) {
343 fail("Waited too much time for del replication");
344 }
345 boolean removedFromAll = true;
346 for (Table target : targets) {
347 Result res = target.get(get);
348 if (res.size() >= 1) {
349 LOG.info("Row not deleted");
350 removedFromAll = false;
351 break;
352 }
353 }
354 if (removedFromAll) {
355 break;
356 } else {
357 Thread.sleep(SLEEP_TIME);
358 }
359 }
360 }
361
362 private void putAndWait(byte[] row, byte[] fam, Table source, Table... targets)
363 throws Exception {
364 Put put = new Put(row);
365 put.add(fam, row, row);
366 source.put(put);
367
368 Get get = new Get(row);
369 for (int i = 0; i < NB_RETRIES; i++) {
370 if (i==NB_RETRIES-1) {
371 fail("Waited too much time for put replication");
372 }
373 boolean replicatedToAll = true;
374 for (Table target : targets) {
375 Result res = target.get(get);
376 if (res.size() == 0) {
377 LOG.info("Row not available");
378 replicatedToAll = false;
379 break;
380 } else {
381 assertArrayEquals(res.value(), row);
382 }
383 }
384 if (replicatedToAll) {
385 break;
386 } else {
387 Thread.sleep(SLEEP_TIME);
388 }
389 }
390 }
391
392 }
393