1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import static org.junit.Assert.assertTrue;
22
23 import java.io.IOException;
24 import java.util.concurrent.CountDownLatch;
25 import java.util.concurrent.TimeUnit;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.conf.Configuration;
30 import org.apache.hadoop.fs.FileSystem;
31 import org.apache.hadoop.fs.Path;
32 import org.apache.hadoop.hbase.CellScanner;
33 import org.apache.hadoop.hbase.ChoreService;
34 import org.apache.hadoop.hbase.CoordinatedStateManager;
35 import org.apache.hadoop.hbase.HBaseTestingUtility;
36 import org.apache.hadoop.hbase.HConstants;
37 import org.apache.hadoop.hbase.HTableDescriptor;
38 import org.apache.hadoop.hbase.Server;
39 import org.apache.hadoop.hbase.ServerName;
40 import org.apache.hadoop.hbase.TableName;
41 import org.apache.hadoop.hbase.client.ClusterConnection;
42 import org.apache.hadoop.hbase.client.Durability;
43 import org.apache.hadoop.hbase.client.Put;
44 import org.apache.hadoop.hbase.regionserver.wal.DamagedWALException;
45 import org.apache.hadoop.hbase.regionserver.wal.FSHLog;
46 import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
47 import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
48 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
49 import org.apache.hadoop.hbase.testclassification.MediumTests;
50 import org.apache.hadoop.hbase.util.Bytes;
51 import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper;
52 import org.apache.hadoop.hbase.util.Threads;
53 import org.apache.hadoop.hbase.wal.WAL;
54 import org.apache.hadoop.hbase.wal.WALKey;
55 import org.apache.hadoop.hbase.wal.WALProvider.Writer;
56 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
57 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
58 import org.junit.After;
59 import org.junit.Before;
60 import org.junit.Rule;
61 import org.junit.Test;
62 import org.junit.experimental.categories.Category;
63 import org.junit.rules.TestName;
64 import org.mockito.Mockito;
65
66
67
68
69
70 @Category({MediumTests.class})
71 public class TestWALLockup {
72 private static final Log LOG = LogFactory.getLog(TestWALLockup.class);
73 @Rule public TestName name = new TestName();
74
75 private static final String COLUMN_FAMILY = "MyCF";
76 private static final byte [] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY);
77
78 HRegion region = null;
79
80 private static HBaseTestingUtility TEST_UTIL;
81 private static Configuration CONF ;
82 private String dir;
83
84
85 protected TableName tableName;
86
87 @Before
88 public void setup() throws IOException {
89 TEST_UTIL = HBaseTestingUtility.createLocalHTU();
90 CONF = TEST_UTIL.getConfiguration();
91
92 CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
93 dir = TEST_UTIL.getDataTestDir("TestHRegion").toString();
94 tableName = TableName.valueOf(name.getMethodName());
95 }
96
97 @After
98 public void tearDown() throws Exception {
99 EnvironmentEdgeManagerTestHelper.reset();
100 LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir());
101 TEST_UTIL.cleanupTestDir();
102 }
103
104 String getName() {
105 return name.getMethodName();
106 }
107
108
109
110
111
112
113
114
115 @Test (timeout=20000)
116 public void testLockupWhenSyncInMiddleOfZigZagSetup() throws IOException {
117
118 class DodgyFSLog extends FSHLog {
119
120 volatile boolean throwException = false;
121
122
123 CountDownLatch latch = new CountDownLatch(1);
124
125 public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf)
126 throws IOException {
127 super(fs, root, logDir, conf);
128 }
129
130 @Override
131 protected void afterCreatingZigZagLatch() {
132
133
134
135
136 if (throwException) {
137 try {
138 LOG.info("LATCHED");
139
140
141
142
143 if (!this.latch.await(5, TimeUnit.SECONDS)) {
144 LOG.warn("GIVE UP! Failed waiting on latch...Test is ABORTED!");
145 }
146 } catch (InterruptedException e) {
147
148 e.printStackTrace();
149 }
150 }
151 }
152
153 @Override
154 protected void beforeWaitOnSafePoint() {
155 if (throwException) {
156 LOG.info("COUNTDOWN");
157
158
159
160 while (this.latch.getCount() <= 0) Threads.sleep(1);
161 this.latch.countDown();
162 }
163 }
164
165 @Override
166 protected Writer createWriterInstance(Path path) throws IOException {
167 final Writer w = super.createWriterInstance(path);
168 return new Writer() {
169 @Override
170 public void close() throws IOException {
171 w.close();
172 }
173
174 @Override
175 public void sync(boolean forceSync) throws IOException {
176 if (throwException) {
177 throw new IOException("FAKE! Failed to replace a bad datanode...SYNC");
178 }
179 w.sync(forceSync);
180 }
181
182 @Override
183 public void append(Entry entry) throws IOException {
184 if (throwException) {
185 throw new IOException("FAKE! Failed to replace a bad datanode...APPEND");
186 }
187 w.append(entry);
188 }
189
190 @Override
191 public long getLength() throws IOException {
192 return w.getLength();
193 }
194 };
195 }
196 }
197
198
199 Server server = Mockito.mock(Server.class);
200 Mockito.when(server.getConfiguration()).thenReturn(CONF);
201 Mockito.when(server.isStopped()).thenReturn(false);
202 Mockito.when(server.isAborted()).thenReturn(false);
203 RegionServerServices services = Mockito.mock(RegionServerServices.class);
204
205
206 FileSystem fs = FileSystem.get(CONF);
207 Path rootDir = new Path(dir + getName());
208 DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
209 Path originalWAL = dodgyWAL.getCurrentFileName();
210
211 LogRoller logRoller = new LogRoller(server, services);
212 logRoller.addWAL(dodgyWAL);
213
214 logRoller.start();
215
216 HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
217 final HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL);
218 byte [] bytes = Bytes.toBytes(getName());
219 MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
220 try {
221
222
223
224 Put put = new Put(bytes);
225 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
226 WALKey key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(),
227 htd.getTableName(), System.currentTimeMillis(), mvcc);
228 WALEdit edit = new WALEdit();
229 CellScanner CellScanner = put.cellScanner();
230 assertTrue(CellScanner.advance());
231 edit.add(CellScanner.current());
232
233
234 for (int i = 0; i < 1000; i++) {
235 region.put(put);
236 }
237
238 LOG.info("SET throwing of exception on append");
239 dodgyWAL.throwException = true;
240
241 dodgyWAL.append(htd, region.getRegionInfo(), key, edit, true);
242 boolean exception = false;
243 try {
244 dodgyWAL.sync();
245 } catch (Exception e) {
246 exception = true;
247 }
248 assertTrue("Did not get sync exception", exception);
249
250
251
252
253 Thread t = new Thread ("Flusher") {
254 public void run() {
255 try {
256 if (region.getMemstoreSize() <= 0) {
257 throw new IOException("memstore size=" + region.getMemstoreSize());
258 }
259 region.flush(false);
260 } catch (IOException e) {
261
262
263 LOG.info("In flush", e);
264 }
265 LOG.info("Exiting");
266 };
267 };
268 t.setDaemon(true);
269 t.start();
270
271 while (dodgyWAL.latch.getCount() > 0) Threads.sleep(1);
272
273 assertTrue(originalWAL != dodgyWAL.getCurrentFileName());
274
275 dodgyWAL.throwException = false;
276 try {
277 region.put(put);
278 } catch (Exception e) {
279 LOG.info("In the put", e);
280 }
281 } finally {
282
283 Mockito.when(server.isStopped()).thenReturn(true);
284 if (logRoller != null) logRoller.interrupt();
285 try {
286 if (region != null) region.close();
287 if (dodgyWAL != null) dodgyWAL.close();
288 } catch (Exception e) {
289 LOG.info("On way out", e);
290 }
291 }
292 }
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308 @Test(timeout = 20000)
309 public void testLockup16960() throws IOException {
310
311 class DodgyFSLog extends FSHLog {
312
313 volatile boolean throwException = false;
314
315 public DodgyFSLog(FileSystem fs, Path root, String logDir,
316 Configuration conf) throws IOException {
317 super(fs, root, logDir, conf);
318 }
319
320 @Override
321 protected Writer createWriterInstance(Path path) throws IOException {
322 final Writer w = super.createWriterInstance(path);
323 return new Writer() {
324 @Override
325 public void close() throws IOException {
326 w.close();
327 }
328
329 @Override
330 public void sync(boolean forceSync) throws IOException {
331 if (throwException) {
332 throw new IOException("FAKE! Failed to replace a bad datanode...SYNC");
333 }
334 w.sync(forceSync);
335 }
336
337 @Override
338 public void append(Entry entry) throws IOException {
339 if (throwException) {
340 throw new IOException(
341 "FAKE! Failed to replace a bad datanode...APPEND");
342 }
343 w.append(entry);
344 }
345
346 @Override
347 public long getLength() throws IOException {
348 return w.getLength();
349 }
350 };
351 }
352
353 @Override
354 public byte[][] rollWriter(boolean force) throws FailedLogCloseException,
355 IOException {
356 if (throwException) {
357 throw new FailedLogCloseException("testLockup16960");
358 }
359 return super.rollWriter(force);
360 }
361 }
362
363
364 Server server = new DummyServer(CONF, ServerName.valueOf(
365 "hostname1.example.org", 1234, 1L).toString());
366 RegionServerServices services = Mockito.mock(RegionServerServices.class);
367
368 CONF.setLong("hbase.regionserver.hlog.sync.timeout", 10000);
369
370
371
372 FileSystem fs = FileSystem.get(CONF);
373 Path rootDir = new Path(dir + getName());
374 DodgyFSLog dodgyWAL1 = new DodgyFSLog(fs, rootDir, getName(), CONF);
375
376 Path rootDir2 = new Path(dir + getName() + "2");
377 final DodgyFSLog dodgyWAL2 = new DodgyFSLog(fs, rootDir2, getName() + "2",
378 CONF);
379
380 dodgyWAL2.registerWALActionsListener(new DummyWALActionsListener());
381
382
383 LogRoller logRoller = new LogRoller(server, services);
384 logRoller.addWAL(dodgyWAL1);
385 logRoller.addWAL(dodgyWAL2);
386
387 logRoller.start();
388
389 HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
390 final HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL1);
391 byte[] bytes = Bytes.toBytes(getName());
392 MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
393 try {
394 Put put = new Put(bytes);
395 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
396 WALKey key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(),
397 htd.getTableName(), System.currentTimeMillis(), mvcc);
398 WALEdit edit = new WALEdit();
399 CellScanner CellScanner = put.cellScanner();
400 assertTrue(CellScanner.advance());
401 edit.add(CellScanner.current());
402
403 LOG.info("SET throwing of exception on append");
404 dodgyWAL1.throwException = true;
405
406 dodgyWAL1.append(htd, region.getRegionInfo(), key, edit, true);
407 boolean exception = false;
408 try {
409 dodgyWAL1.sync();
410 } catch (Exception e) {
411 exception = true;
412 }
413 assertTrue("Did not get sync exception", exception);
414
415
416
417 try {
418
419 Thread.sleep(50);
420 } catch (InterruptedException e) {
421 e.printStackTrace();
422 }
423
424 final CountDownLatch latch = new CountDownLatch(1);
425
426
427
428 key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(),
429 TableName.valueOf("sleep"), System.currentTimeMillis(), mvcc);
430 dodgyWAL2.append(htd, region.getRegionInfo(), key, edit, true);
431
432 Thread t = new Thread("Sync") {
433 public void run() {
434 try {
435 dodgyWAL2.sync();
436 } catch (IOException e) {
437 LOG.info("In sync", e);
438 }
439 latch.countDown();
440 LOG.info("Sync exiting");
441 };
442 };
443 t.setDaemon(true);
444 t.start();
445 try {
446
447 Thread.sleep(100);
448 } catch (InterruptedException e1) {
449 e1.printStackTrace();
450 }
451
452 key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(),
453 TableName.valueOf("DamagedWALException"), System.currentTimeMillis(), mvcc);
454 dodgyWAL2.append(htd, region.getRegionInfo(), key, edit, true);
455
456 while (latch.getCount() > 0) {
457 Threads.sleep(100);
458 }
459 assertTrue(server.isAborted());
460 } finally {
461 if (logRoller != null) {
462 logRoller.interrupt();
463 }
464 try {
465 if (region != null) {
466 region.close();
467 }
468 if (dodgyWAL1 != null) {
469 dodgyWAL1.close();
470 }
471 if (dodgyWAL2 != null) {
472 dodgyWAL2.close();
473 }
474 } catch (Exception e) {
475 LOG.info("On way out", e);
476 }
477 }
478 }
479
480 static class DummyServer implements Server {
481 private Configuration conf;
482 private String serverName;
483 private boolean isAborted = false;
484
485 public DummyServer(Configuration conf, String serverName) {
486 this.conf = conf;
487 this.serverName = serverName;
488 }
489
490 @Override
491 public Configuration getConfiguration() {
492 return conf;
493 }
494
495 @Override
496 public ZooKeeperWatcher getZooKeeper() {
497 return null;
498 }
499
500 @Override
501 public CoordinatedStateManager getCoordinatedStateManager() {
502 return null;
503 }
504
505 @Override
506 public ClusterConnection getConnection() {
507 return null;
508 }
509
510 @Override
511 public MetaTableLocator getMetaTableLocator() {
512 return null;
513 }
514
515 @Override
516 public ServerName getServerName() {
517 return ServerName.valueOf(this.serverName);
518 }
519
520 @Override
521 public void abort(String why, Throwable e) {
522 LOG.info("Aborting " + serverName);
523 this.isAborted = true;
524 }
525
526 @Override
527 public boolean isAborted() {
528 return this.isAborted;
529 }
530
531 @Override
532 public void stop(String why) {
533 this.isAborted = true;
534 }
535
536 @Override
537 public boolean isStopped() {
538 return this.isAborted;
539 }
540
541 @Override
542 public ChoreService getChoreService() {
543 return null;
544 }
545
546 }
547
548 static class DummyWALActionsListener extends WALActionsListener.Base {
549
550 @Override
551 public void visitLogEntryBeforeWrite(HTableDescriptor htd, WALKey logKey,
552 WALEdit logEdit) throws IOException {
553 if (logKey.getTablename().getNameAsString().equalsIgnoreCase("sleep")) {
554 try {
555 Thread.sleep(1000);
556 } catch (InterruptedException e) {
557 e.printStackTrace();
558 }
559 }
560 if (logKey.getTablename().getNameAsString()
561 .equalsIgnoreCase("DamagedWALException")) {
562 throw new DamagedWALException("Failed appending");
563 }
564 }
565
566 }
567
568
569
570
571
572 public HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey,
573 Configuration conf, WAL wal) throws IOException {
574 return TEST_UTIL.createLocalHRegion(tableName.getName(), startKey, stopKey,
575 getName(), conf, false, Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES);
576 }
577 }