1 /**
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.wal;
20
21 import org.apache.commons.logging.Log;
22 import org.apache.commons.logging.LogFactory;
23 import org.apache.hadoop.fs.Path;
24 import org.apache.hadoop.hbase.HBaseTestingUtility;
25 import org.apache.hadoop.hbase.regionserver.HRegionServer;
26 import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
27 import org.apache.hadoop.hbase.testclassification.MediumTests;
28 import org.junit.BeforeClass;
29 import org.junit.Test;
30 import org.junit.experimental.categories.Category;
31
32
33 @Category(MediumTests.class)
34 public class TestWALOpenAfterDNRollingStart {
35 final Log LOG = LogFactory.getLog(getClass());
36 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
37 private static long DataNodeRestartInterval;
38
39 @BeforeClass
40 public static void setUpBeforeClass() throws Exception {
41 // Sleep time before restart next dn, we need to wait the current dn to finish start up
42 DataNodeRestartInterval = 15000;
43 // interval of checking low replication. The sleep time must smaller than DataNodeRestartInterval
44 // so a low replication case will be detected and the wal will be rolled
45 long checkLowReplicationInterval = 10000;
46 //don't let hdfs client to choose a new replica when dn down
47 TEST_UTIL.getConfiguration().setBoolean("dfs.client.block.write.replace-datanode-on-failure.enable",
48 false);
49 TEST_UTIL.getConfiguration().setLong("hbase.regionserver.hlog.check.lowreplication.interval",
50 checkLowReplicationInterval);
51 TEST_UTIL.startMiniDFSCluster(3);
52 TEST_UTIL.startMiniCluster(1);
53
54 }
55
56 /**
57 * see HBASE-18132
58 * This is a test case of failing open a wal(for replication for example) after all datanode
59 * restarted (rolling upgrade, for example).
60 * Before this patch, low replication detection is only used when syncing wal.
61 * But if the wal haven't had any entry whiten, it will never know all the replica of the wal
62 * is broken(because of dn restarting). And this wal can never be open
63 * @throws Exception
64 */
65 @Test(timeout = 300000)
66 public void test() throws Exception {
67 HRegionServer server = TEST_UTIL.getHBaseCluster().getRegionServer(0);
68 FSHLog hlog = (FSHLog)server.getWAL(null);
69 Path currentFile = hlog.getCurrentFileName();
70 //restart every dn to simulate a dn rolling upgrade
71 for(int i = 0; i < TEST_UTIL.getDFSCluster().getDataNodes().size(); i++) {
72 //This is NOT a bug, when restart dn in miniDFSCluster, it will remove the stopped dn from
73 //the dn list and then add to the tail of this list, we need to always restart the first one
74 //to simulate rolling upgrade of every dn.
75 TEST_UTIL.getDFSCluster().restartDataNode(0);
76 //sleep enough time so log roller can detect the pipeline break and roll log
77 Thread.sleep(DataNodeRestartInterval);
78 }
79
80 //if the log is not rolled, then we can never open this wal forever.
81 WAL.Reader reader = WALFactory
82 .createReader(TEST_UTIL.getTestFileSystem(), currentFile, TEST_UTIL.getConfiguration());
83 }
84
85
86 }