1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master.procedure;
20
21 import java.io.IOException;
22 import java.util.concurrent.atomic.AtomicInteger;
23 import java.util.concurrent.CountDownLatch;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27 import org.apache.hadoop.conf.Configuration;
28 import org.apache.hadoop.fs.FileSystem;
29 import org.apache.hadoop.fs.Path;
30 import org.apache.hadoop.hbase.HBaseTestingUtility;
31 import org.apache.hadoop.hbase.HRegionInfo;
32 import org.apache.hadoop.hbase.HTableDescriptor;
33 import org.apache.hadoop.hbase.MiniHBaseCluster;
34 import org.apache.hadoop.hbase.TableName;
35 import org.apache.hadoop.hbase.master.HMaster;
36 import org.apache.hadoop.hbase.procedure2.Procedure;
37 import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
38 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
39 import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.TestProcedure;
40 import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
41 import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
42 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.CreateTableState;
43 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DeleteTableState;
44 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DisableTableState;
45 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.EnableTableState;
46 import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.TruncateTableState;
47 import org.apache.hadoop.hbase.testclassification.LargeTests;
48 import org.apache.hadoop.hbase.util.Bytes;
49 import org.apache.hadoop.hbase.util.FSUtils;
50 import org.apache.hadoop.hbase.util.ModifyRegionUtils;
51 import org.apache.hadoop.hbase.util.Threads;
52 import org.apache.hadoop.hdfs.MiniDFSCluster;
53 import org.apache.hadoop.hdfs.server.datanode.DataNode;
54
55 import org.junit.Before;
56 import org.junit.Ignore;
57 import org.junit.Test;
58 import org.junit.experimental.categories.Category;
59 import org.mockito.Mockito;
60
61 import static org.junit.Assert.assertEquals;
62 import static org.junit.Assert.assertFalse;
63 import static org.junit.Assert.assertTrue;
64 import static org.junit.Assert.fail;
65
66 @Category(LargeTests.class)
67 public class TestWALProcedureStoreOnHDFS {
68 private static final Log LOG = LogFactory.getLog(TestWALProcedureStoreOnHDFS.class);
69
70 protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
71
72 private WALProcedureStore store;
73
74 private ProcedureStore.ProcedureStoreListener stopProcedureListener = new ProcedureStore.ProcedureStoreListener() {
75 @Override
76 public void postSync() {}
77
78 @Override
79 public void abortProcess() {
80 LOG.fatal("Abort the Procedure Store");
81 store.stop(true);
82 }
83 };
84
85 @Before
86 public void initConfig() {
87 Configuration conf = UTIL.getConfiguration();
88 conf.setInt("dfs.replication", 3);
89 conf.setInt("dfs.namenode.replication.min", 3);
90
91
92 conf.setInt(WALProcedureStore.WAIT_BEFORE_ROLL_CONF_KEY, 1000);
93 conf.setInt(WALProcedureStore.ROLL_RETRIES_CONF_KEY, 10);
94 conf.setInt(WALProcedureStore.MAX_SYNC_FAILURE_ROLL_CONF_KEY, 10);
95 }
96
97
98 private void setup() throws Exception {
99 MiniDFSCluster dfs = UTIL.startMiniDFSCluster(3);
100
101 Path logDir = new Path(new Path(dfs.getFileSystem().getUri()), "/test-logs");
102 store = ProcedureTestingUtility.createWalStore(UTIL.getConfiguration(), logDir);
103 store.registerListener(stopProcedureListener);
104 store.start(8);
105 store.recoverLease();
106 }
107
108
109 @SuppressWarnings("JUnit4TearDownNotRun")
110 public void tearDown() throws Exception {
111 store.stop(false);
112 UTIL.getDFSCluster().getFileSystem().delete(store.getWALDir(), true);
113
114 try {
115 UTIL.shutdownMiniCluster();
116 } catch (Exception e) {
117 LOG.warn("failure shutting down cluster", e);
118 }
119 }
120
121 @Test(timeout=60000, expected=RuntimeException.class)
122 public void testWalAbortOnLowReplication() throws Exception {
123 setup();
124 assertEquals(3, UTIL.getDFSCluster().getDataNodes().size());
125
126 LOG.info("Stop DataNode");
127 UTIL.getDFSCluster().stopDataNode(0);
128 assertEquals(2, UTIL.getDFSCluster().getDataNodes().size());
129
130 store.insert(new TestProcedure(1, -1), null);
131 for (long i = 2; store.isRunning(); ++i) {
132 assertEquals(2, UTIL.getDFSCluster().getDataNodes().size());
133 store.insert(new TestProcedure(i, -1), null);
134 Thread.sleep(100);
135 }
136 assertFalse(store.isRunning());
137 fail("The store.insert() should throw an exeption");
138 }
139
140 @Test(timeout=60000)
141 public void testWalAbortOnLowReplicationWithQueuedWriters() throws Exception {
142 setup();
143 assertEquals(3, UTIL.getDFSCluster().getDataNodes().size());
144 store.registerListener(new ProcedureStore.ProcedureStoreListener() {
145 @Override
146 public void postSync() {
147 Threads.sleepWithoutInterrupt(2000);
148 }
149
150 @Override
151 public void abortProcess() {}
152 });
153
154 final AtomicInteger reCount = new AtomicInteger(0);
155 Thread[] thread = new Thread[store.getNumThreads() * 2 + 1];
156 for (int i = 0; i < thread.length; ++i) {
157 final long procId = i + 1;
158 thread[i] = new Thread() {
159 public void run() {
160 try {
161 LOG.debug("[S] INSERT " + procId);
162 store.insert(new TestProcedure(procId, -1), null);
163 LOG.debug("[E] INSERT " + procId);
164 } catch (RuntimeException e) {
165 reCount.incrementAndGet();
166 LOG.debug("[F] INSERT " + procId + ": " + e.getMessage());
167 }
168 }
169 };
170 thread[i].start();
171 }
172
173 Thread.sleep(1000);
174 LOG.info("Stop DataNode");
175 UTIL.getDFSCluster().stopDataNode(0);
176 assertEquals(2, UTIL.getDFSCluster().getDataNodes().size());
177
178 for (int i = 0; i < thread.length; ++i) {
179 thread[i].join();
180 }
181
182 assertFalse(store.isRunning());
183 assertTrue(reCount.toString(), reCount.get() >= store.getNumThreads() &&
184 reCount.get() < thread.length);
185 }
186
187 @Test(timeout=60000)
188 public void testWalRollOnLowReplication() throws Exception {
189 UTIL.getConfiguration().setInt("dfs.namenode.replication.min", 1);
190 setup();
191 int dnCount = 0;
192 store.insert(new TestProcedure(1, -1), null);
193 UTIL.getDFSCluster().restartDataNode(dnCount);
194 for (long i = 2; i < 100; ++i) {
195 store.insert(new TestProcedure(i, -1), null);
196 waitForNumReplicas(3);
197 Thread.sleep(100);
198 if ((i % 30) == 0) {
199 LOG.info("Restart Data Node");
200 UTIL.getDFSCluster().restartDataNode(++dnCount % 3);
201 }
202 }
203 assertTrue(store.isRunning());
204 }
205
206 public void waitForNumReplicas(int numReplicas) throws Exception {
207 while (UTIL.getDFSCluster().getDataNodes().size() < numReplicas) {
208 Thread.sleep(100);
209 }
210
211 for (int i = 0; i < numReplicas; ++i) {
212 for (DataNode dn: UTIL.getDFSCluster().getDataNodes()) {
213 while (!dn.isDatanodeFullyStarted()) {
214 Thread.sleep(100);
215 }
216 }
217 }
218 }
219 }