View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  
22  import static org.junit.Assert.assertTrue;
23  import static org.junit.Assert.fail;
24  import static org.mockito.Mockito.mock;
25  import static org.mockito.Mockito.when;
26  
27  import java.io.IOException;
28  import java.util.concurrent.atomic.AtomicLong;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.fs.FileSystem;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.DroppedSnapshotException;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.HConstants;
38  import org.apache.hadoop.hbase.Server;
39  import org.apache.hadoop.hbase.TableName;
40  import org.apache.hadoop.hbase.client.Durability;
41  import org.apache.hadoop.hbase.client.Put;
42  import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
43  import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
44  import org.apache.hadoop.hbase.testclassification.MediumTests;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
47  import org.apache.hadoop.hbase.util.Threads;
48  import org.apache.hadoop.hbase.wal.WAL;
49  import org.apache.hadoop.hbase.wal.WALProvider.Writer;
50  import org.junit.After;
51  import org.junit.Before;
52  import org.junit.Rule;
53  import org.junit.Test;
54  import org.junit.experimental.categories.Category;
55  import org.junit.rules.TestName;
56  import org.mockito.Mockito;
57  import org.mockito.exceptions.verification.WantedButNotInvoked;
58  
59  /**
60   * Testing sync/append failures.
61   * Copied from TestHRegion.
62   */
63  @Category({MediumTests.class})
64  public class TestFailedAppendAndSync {
65    private static final Log LOG = LogFactory.getLog(TestFailedAppendAndSync.class);
66    @Rule public TestName name = new TestName();
67  
68    private static final String COLUMN_FAMILY = "MyCF";
69    private static final byte [] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY);
70  
71    HRegion region = null;
72    // Do not run unit tests in parallel (? Why not?  It don't work?  Why not?  St.Ack)
73    private static HBaseTestingUtility TEST_UTIL;
74    public static Configuration CONF ;
75    private String dir;
76  
77    // Test names
78    protected TableName tableName;
79  
80    @Before
81    public void setup() throws IOException {
82      TEST_UTIL = HBaseTestingUtility.createLocalHTU();
83      CONF = TEST_UTIL.getConfiguration();
84      // Disable block cache.
85      CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
86      dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
87      tableName = TableName.valueOf(name.getMethodName());
88    }
89  
90    @After
91    public void tearDown() throws Exception {
92      EnvironmentEdgeManagerTestHelper.reset();
93      LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir());
94      TEST_UTIL.cleanupTestDir();
95    }
96  
97    String getName() {
98      return name.getMethodName();
99    }
100 
101   /**
102    * Reproduce locking up that happens when we get an exceptions appending and syncing.
103    * See HBASE-14317.
104    * First I need to set up some mocks for Server and RegionServerServices. I also need to
105    * set up a dodgy WAL that will throw an exception when we go to append to it.
106    */
107   @Test (timeout=300000)
108   public void testLockupAroundBadAssignSync() throws IOException {
109     final AtomicLong rolls = new AtomicLong(0);
110     // Dodgy WAL. Will throw exceptions when flags set.
111     class DodgyFSLog extends FSHLog {
112       volatile boolean throwSyncException = false;
113       volatile boolean throwAppendException = false;
114 
115       public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf)
116       throws IOException {
117         super(fs, root, logDir, conf);
118       }
119 
120       @Override
121       public byte[][] rollWriter(boolean force) throws FailedLogCloseException, IOException {
122         byte [][] regions = super.rollWriter(force);
123         rolls.getAndIncrement();
124         return regions;
125       }
126 
127       @Override
128       protected Writer createWriterInstance(Path path) throws IOException {
129         final Writer w = super.createWriterInstance(path);
130           return new Writer() {
131             @Override
132             public void close() throws IOException {
133               w.close();
134             }
135 
136           @Override
137           public void sync(boolean forceSync) throws IOException {
138             if (throwSyncException) {
139               throw new IOException("FAKE! Failed to replace a bad datanode...");
140             }
141             w.sync(forceSync);
142           }
143 
144             @Override
145             public void append(Entry entry) throws IOException {
146               if (throwAppendException) {
147                 throw new IOException("FAKE! Failed to replace a bad datanode...");
148               }
149               w.append(entry);
150             }
151 
152             @Override
153             public long getLength() throws IOException {
154               return w.getLength();
155               }
156             };
157           }
158       }
159 
160     // Make up mocked server and services.
161     Server server = mock(Server.class);
162     when(server.getConfiguration()).thenReturn(CONF);
163     when(server.isStopped()).thenReturn(false);
164     when(server.isAborted()).thenReturn(false);
165     RegionServerServices services = mock(RegionServerServices.class);
166     // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
167     // the test.
168     FileSystem fs = FileSystem.get(CONF);
169     Path rootDir = new Path(dir + getName());
170     DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
171     LogRoller logRoller = new LogRoller(server, services);
172     logRoller.addWAL(dodgyWAL);
173     logRoller.start();
174 
175     boolean threwOnSync = false;
176     boolean threwOnAppend = false;
177     boolean threwOnBoth = false;
178 
179     HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL);
180     region.setRegionServerServices(services);
181     try {
182       // Get some random bytes.
183       byte[] value = Bytes.toBytes(getName());
184       try {
185         // First get something into memstore
186         Put put = new Put(value);
187         put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
188         region.put(put);
189       } catch (IOException ioe) {
190         fail();
191       }
192       long rollsCount = rolls.get();
193       try {
194         dodgyWAL.throwAppendException = true;
195         dodgyWAL.throwSyncException = false;
196         Put put = new Put(value);
197         put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
198         region.put(put);
199       } catch (IOException ioe) {
200         threwOnAppend = true;
201       }
202       while (rollsCount == rolls.get()) Threads.sleep(100);
203       rollsCount = rolls.get();
204 
205       // When we get to here.. we should be ok. A new WAL has been put in place. There were no
206       // appends to sync. We should be able to continue.
207 
208       try {
209         dodgyWAL.throwAppendException = true;
210         dodgyWAL.throwSyncException = true;
211         Put put = new Put(value);
212         put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
213         region.put(put);
214       } catch (IOException ioe) {
215         threwOnBoth = true;
216       }
217       while (rollsCount == rolls.get()) Threads.sleep(100);
218 
219       // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
220       // to just continue.
221 
222       // So, should be no abort at this stage. Verify.
223       Mockito.verify(server, Mockito.atLeast(0)).
224         abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
225       try {
226         dodgyWAL.throwAppendException = false;
227         dodgyWAL.throwSyncException = true;
228         Put put = new Put(value);
229         put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
230         region.put(put);
231       } catch (IOException ioe) {
232         threwOnSync = true;
233       }
234       // An append in the WAL but the sync failed is a server abort condition. That is our
235       // current semantic. Verify. It takes a while for abort to be called. Just hang here till it
236       // happens. If it don't we'll timeout the whole test. That is fine.
237       while (true) {
238         try {
239           Mockito.verify(server, Mockito.atLeast(1)).
240             abort(Mockito.anyString(), (Throwable)Mockito.anyObject());
241           break;
242         } catch (WantedButNotInvoked t) {
243           Threads.sleep(1);
244         }
245       }
246     } finally {
247       // To stop logRoller, its server has to say it is stopped.
248       Mockito.when(server.isStopped()).thenReturn(true);
249       if (logRoller != null) logRoller.interrupt();
250       if (region != null) {
251         try {
252           region.close(true);
253         } catch (DroppedSnapshotException e) {
254           LOG.info("On way out; expected!", e);
255         }
256       }
257       if (dodgyWAL != null) dodgyWAL.close();
258       assertTrue("The regionserver should have thrown an exception", threwOnBoth);
259       assertTrue("The regionserver should have thrown an exception", threwOnAppend);
260       assertTrue("The regionserver should have thrown an exception", threwOnSync);
261     }
262   }
263 
264   /**
265    * @return A region on which you must call
266    *         {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)} when done.
267    */
268   public HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey,
269       Configuration conf, WAL wal) throws IOException {
270     return TEST_UTIL.createLocalHRegion(tableName.getName(), startKey, stopKey,
271       getName(), conf, false, Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES);
272   }
273 }