View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.procedure;
20  
21  import java.io.IOException;
22  import java.util.concurrent.atomic.AtomicInteger;
23  import java.util.concurrent.CountDownLatch;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.fs.FileSystem;
29  import org.apache.hadoop.fs.Path;
30  import org.apache.hadoop.hbase.HBaseTestingUtility;
31  import org.apache.hadoop.hbase.HRegionInfo;
32  import org.apache.hadoop.hbase.HTableDescriptor;
33  import org.apache.hadoop.hbase.MiniHBaseCluster;
34  import org.apache.hadoop.hbase.TableName;
35  import org.apache.hadoop.hbase.master.HMaster;
36  import org.apache.hadoop.hbase.procedure2.Procedure;
37  import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
38  import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
39  import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.TestProcedure;
40  import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
41  import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
42  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.CreateTableState;
43  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DeleteTableState;
44  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DisableTableState;
45  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.EnableTableState;
46  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.TruncateTableState;
47  import org.apache.hadoop.hbase.testclassification.LargeTests;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.apache.hadoop.hbase.util.FSUtils;
50  import org.apache.hadoop.hbase.util.ModifyRegionUtils;
51  import org.apache.hadoop.hbase.util.Threads;
52  import org.apache.hadoop.hdfs.MiniDFSCluster;
53  import org.apache.hadoop.hdfs.server.datanode.DataNode;
54  
55  import org.junit.Before;
56  import org.junit.Ignore;
57  import org.junit.Test;
58  import org.junit.experimental.categories.Category;
59  import org.mockito.Mockito;
60  
61  import static org.junit.Assert.assertEquals;
62  import static org.junit.Assert.assertFalse;
63  import static org.junit.Assert.assertTrue;
64  import static org.junit.Assert.fail;
65  
66  @Category(LargeTests.class)
67  public class TestWALProcedureStoreOnHDFS {
68    private static final Log LOG = LogFactory.getLog(TestWALProcedureStoreOnHDFS.class);
69  
70    protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
71  
72    private WALProcedureStore store;
73  
74    private ProcedureStore.ProcedureStoreListener stopProcedureListener = new ProcedureStore.ProcedureStoreListener() {
75      @Override
76      public void postSync() {}
77  
78      @Override
79      public void abortProcess() {
80        LOG.fatal("Abort the Procedure Store");
81        store.stop(true);
82      }
83    };
84  
85    @Before
86    public void initConfig() {
87      Configuration conf = UTIL.getConfiguration();
88      conf.setInt("dfs.replication", 3);
89      conf.setInt("dfs.namenode.replication.min", 3);
90  
91      // increase the value for slow test-env
92      conf.setInt(WALProcedureStore.WAIT_BEFORE_ROLL_CONF_KEY, 1000);
93      conf.setInt(WALProcedureStore.ROLL_RETRIES_CONF_KEY, 10);
94      conf.setInt(WALProcedureStore.MAX_SYNC_FAILURE_ROLL_CONF_KEY, 10);
95    }
96  
97    // No @Before because some tests need to do additional config first
98    private void setup() throws Exception {
99      MiniDFSCluster dfs = UTIL.startMiniDFSCluster(3);
100 
101     Path logDir = new Path(new Path(dfs.getFileSystem().getUri()), "/test-logs");
102     store = ProcedureTestingUtility.createWalStore(UTIL.getConfiguration(), logDir);
103     store.registerListener(stopProcedureListener);
104     store.start(8);
105     store.recoverLease();
106   }
107 
108   // No @After
109   @SuppressWarnings("JUnit4TearDownNotRun")
110   public void tearDown() throws Exception {
111     store.stop(false);
112     UTIL.getDFSCluster().getFileSystem().delete(store.getWALDir(), true);
113 
114     try {
115       UTIL.shutdownMiniCluster();
116     } catch (Exception e) {
117       LOG.warn("failure shutting down cluster", e);
118     }
119   }
120 
121   @Test(timeout=60000, expected=RuntimeException.class)
122   public void testWalAbortOnLowReplication() throws Exception {
123     setup();
124     assertEquals(3, UTIL.getDFSCluster().getDataNodes().size());
125 
126     LOG.info("Stop DataNode");
127     UTIL.getDFSCluster().stopDataNode(0);
128     assertEquals(2, UTIL.getDFSCluster().getDataNodes().size());
129 
130     store.insert(new TestProcedure(1, -1), null);
131     for (long i = 2; store.isRunning(); ++i) {
132       assertEquals(2, UTIL.getDFSCluster().getDataNodes().size());
133       store.insert(new TestProcedure(i, -1), null);
134       Thread.sleep(100);
135     }
136     assertFalse(store.isRunning());
137     fail("The store.insert() should throw an exeption");
138   }
139 
140   @Test(timeout=60000)
141   public void testWalAbortOnLowReplicationWithQueuedWriters() throws Exception {
142     setup();
143     assertEquals(3, UTIL.getDFSCluster().getDataNodes().size());
144     store.registerListener(new ProcedureStore.ProcedureStoreListener() {
145       @Override
146       public void postSync() {
147         Threads.sleepWithoutInterrupt(2000);
148       }
149 
150       @Override
151       public void abortProcess() {}
152     });
153 
154     final AtomicInteger reCount = new AtomicInteger(0);
155     Thread[] thread = new Thread[store.getNumThreads() * 2 + 1];
156     for (int i = 0; i < thread.length; ++i) {
157       final long procId = i + 1;
158       thread[i] = new Thread() {
159         public void run() {
160           try {
161             LOG.debug("[S] INSERT " + procId);
162             store.insert(new TestProcedure(procId, -1), null);
163             LOG.debug("[E] INSERT " + procId);
164           } catch (RuntimeException e) {
165             reCount.incrementAndGet();
166             LOG.debug("[F] INSERT " + procId + ": " + e.getMessage());
167           }
168         }
169       };
170       thread[i].start();
171     }
172 
173     Thread.sleep(1000);
174     LOG.info("Stop DataNode");
175     UTIL.getDFSCluster().stopDataNode(0);
176     assertEquals(2, UTIL.getDFSCluster().getDataNodes().size());
177 
178     for (int i = 0; i < thread.length; ++i) {
179       thread[i].join();
180     }
181 
182     assertFalse(store.isRunning());
183     assertTrue(reCount.toString(), reCount.get() >= store.getNumThreads() &&
184                                    reCount.get() < thread.length);
185   }
186 
187   @Test(timeout=60000)
188   public void testWalRollOnLowReplication() throws Exception {
189     UTIL.getConfiguration().setInt("dfs.namenode.replication.min", 1);
190     setup();
191     int dnCount = 0;
192     store.insert(new TestProcedure(1, -1), null);
193     UTIL.getDFSCluster().restartDataNode(dnCount);
194     for (long i = 2; i < 100; ++i) {
195       store.insert(new TestProcedure(i, -1), null);
196       waitForNumReplicas(3);
197       Thread.sleep(100);
198       if ((i % 30) == 0) {
199         LOG.info("Restart Data Node");
200         UTIL.getDFSCluster().restartDataNode(++dnCount % 3);
201       }
202     }
203     assertTrue(store.isRunning());
204   }
205 
206   public void waitForNumReplicas(int numReplicas) throws Exception {
207     while (UTIL.getDFSCluster().getDataNodes().size() < numReplicas) {
208       Thread.sleep(100);
209     }
210 
211     for (int i = 0; i < numReplicas; ++i) {
212       for (DataNode dn: UTIL.getDFSCluster().getDataNodes()) {
213         while (!dn.isDatanodeFullyStarted()) {
214           Thread.sleep(100);
215         }
216       }
217     }
218   }
219 }