View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.wal;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import com.google.common.base.Joiner;
27  import com.google.common.collect.ImmutableList;
28  import com.google.common.collect.ImmutableMap;
29  import com.google.protobuf.ByteString;
30  
31  import java.io.FileNotFoundException;
32  import java.io.IOException;
33  import java.lang.reflect.Method;
34  import java.security.PrivilegedExceptionAction;
35  import java.util.ArrayList;
36  import java.util.Arrays;
37  import java.util.Collections;
38  import java.util.HashMap;
39  import java.util.HashSet;
40  import java.util.List;
41  import java.util.Map;
42  import java.util.NavigableSet;
43  import java.util.Set;
44  import java.util.concurrent.atomic.AtomicBoolean;
45  import java.util.concurrent.atomic.AtomicInteger;
46  import java.util.concurrent.atomic.AtomicLong;
47  
48  import org.apache.commons.logging.Log;
49  import org.apache.commons.logging.LogFactory;
50  import org.apache.hadoop.conf.Configuration;
51  import org.apache.hadoop.fs.FSDataInputStream;
52  import org.apache.hadoop.fs.FSDataOutputStream;
53  import org.apache.hadoop.fs.FileStatus;
54  import org.apache.hadoop.fs.FileSystem;
55  import org.apache.hadoop.fs.FileUtil;
56  import org.apache.hadoop.fs.Path;
57  import org.apache.hadoop.fs.PathFilter;
58  import org.apache.hadoop.hbase.Cell;
59  import org.apache.hadoop.hbase.HBaseConfiguration;
60  import org.apache.hadoop.hbase.HBaseTestingUtility;
61  import org.apache.hadoop.hbase.HConstants;
62  import org.apache.hadoop.hbase.HRegionInfo;
63  import org.apache.hadoop.hbase.KeyValue;
64  import org.apache.hadoop.hbase.ServerName;
65  import org.apache.hadoop.hbase.TableName;
66  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
67  import org.apache.hadoop.hbase.protobuf.generated.WALProtos;
68  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
69  import org.apache.hadoop.hbase.regionserver.wal.FaultySequenceFileLogReader;
70  import org.apache.hadoop.hbase.regionserver.wal.InstrumentedLogWriter;
71  import org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader;
72  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
73  import org.apache.hadoop.hbase.security.User;
74  import org.apache.hadoop.hbase.testclassification.LargeTests;
75  import org.apache.hadoop.hbase.testclassification.RegionServerTests;
76  import org.apache.hadoop.hbase.util.Bytes;
77  import org.apache.hadoop.hbase.util.CancelableProgressable;
78  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
79  import org.apache.hadoop.hbase.util.FSUtils;
80  import org.apache.hadoop.hbase.util.Threads;
81  import org.apache.hadoop.hbase.wal.WAL.Entry;
82  import org.apache.hadoop.hbase.wal.WAL.Reader;
83  import org.apache.hadoop.hbase.wal.WALProvider.Writer;
84  import org.apache.hadoop.hbase.wal.WALSplitter.CorruptedLogFileException;
85  import org.apache.hadoop.hdfs.DFSTestUtil;
86  import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException;
87  import org.apache.hadoop.ipc.RemoteException;
88  import org.junit.After;
89  import org.junit.AfterClass;
90  import org.junit.Before;
91  import org.junit.BeforeClass;
92  import org.junit.Rule;
93  import org.junit.Test;
94  import org.junit.experimental.categories.Category;
95  import org.junit.rules.TestName;
96  import org.mockito.Mockito;
97  import org.mockito.invocation.InvocationOnMock;
98  import org.mockito.stubbing.Answer;
99  
100 /**
101  * Testing {@link WAL} splitting code.
102  */
103 @Category({RegionServerTests.class, LargeTests.class})
104 public class TestWALSplit {
105   {
106     // Uncomment the following lines if more verbosity is needed for
107     // debugging (see HBASE-12285 for details).
108     //((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
109     //((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL);
110     //((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL);
111   }
112   private final static Log LOG = LogFactory.getLog(TestWALSplit.class);
113 
114   private static Configuration conf;
115   private FileSystem fs;
116 
117   protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
118 
119   private Path HBASEDIR;
120   private Path HBASELOGDIR;
121   private Path WALDIR;
122   private Path OLDLOGDIR;
123   private Path CORRUPTDIR;
124   private Path TABLEDIR;
125   private String TMPDIRNAME;
126 
127   private static final int NUM_WRITERS = 10;
128   private static final int ENTRIES = 10; // entries per writer per region
129 
130   private static final String FILENAME_BEING_SPLIT = "testfile";
131   private static final TableName TABLE_NAME =
132       TableName.valueOf("t1");
133   private static final byte[] FAMILY = "f1".getBytes();
134   private static final byte[] QUALIFIER = "q1".getBytes();
135   private static final byte[] VALUE = "v1".getBytes();
136   private static final String WAL_FILE_PREFIX = "wal.dat.";
137   private static List<String> REGIONS = new ArrayList<String>();
138   private static final String HBASE_SKIP_ERRORS = "hbase.hlog.split.skip.errors";
139   private static String ROBBER;
140   private static String ZOMBIE;
141   private static String [] GROUP = new String [] {"supergroup"};
142   private RecoveryMode mode;
143 
144   static enum Corruptions {
145     INSERT_GARBAGE_ON_FIRST_LINE,
146     INSERT_GARBAGE_IN_THE_MIDDLE,
147     APPEND_GARBAGE,
148     TRUNCATE,
149     TRUNCATE_TRAILER
150   }
151 
152   @BeforeClass
153   public static void setUpBeforeClass() throws Exception {
154     conf = TEST_UTIL.getConfiguration();
155     conf.setClass("hbase.regionserver.hlog.writer.impl",
156         InstrumentedLogWriter.class, Writer.class);
157     // This is how you turn off shortcircuit read currently.  TODO: Fix.  Should read config.
158     System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
159     // Create fake maping user to group and set it to the conf.
160     Map<String, String []> u2g_map = new HashMap<String, String []>(2);
161     ROBBER = User.getCurrent().getName() + "-robber";
162     ZOMBIE = User.getCurrent().getName() + "-zombie";
163     u2g_map.put(ROBBER, GROUP);
164     u2g_map.put(ZOMBIE, GROUP);
165     DFSTestUtil.updateConfWithFakeGroupMapping(conf, u2g_map);
166     conf.setInt("dfs.heartbeat.interval", 1);
167     TEST_UTIL.startMiniDFSCluster(2);
168   }
169 
170   @AfterClass
171   public static void tearDownAfterClass() throws Exception {
172     TEST_UTIL.shutdownMiniDFSCluster();
173   }
174 
175   @Rule
176   public TestName name = new TestName();
177   private WALFactory wals = null;
178 
179   @Before
180   public void setUp() throws Exception {
181     LOG.info("Cleaning up cluster for new test.");
182     fs = TEST_UTIL.getDFSCluster().getFileSystem();
183     HBASEDIR = TEST_UTIL.createRootDir();
184     HBASELOGDIR = TEST_UTIL.createWALRootDir();
185     OLDLOGDIR = new Path(HBASELOGDIR, HConstants.HREGION_OLDLOGDIR_NAME);
186     CORRUPTDIR = new Path(HBASELOGDIR, HConstants.CORRUPT_DIR_NAME);
187     TABLEDIR = FSUtils.getTableDir(HBASEDIR, TABLE_NAME);
188     TMPDIRNAME = conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
189       HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY);
190     REGIONS.clear();
191     Collections.addAll(REGIONS, "bbb", "ccc");
192     InstrumentedLogWriter.activateFailure = false;
193     this.mode = (conf.getBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false) ?
194         RecoveryMode.LOG_REPLAY : RecoveryMode.LOG_SPLITTING);
195     wals = new WALFactory(conf, null, name.getMethodName());
196     WALDIR = new Path(HBASELOGDIR, DefaultWALProvider.getWALDirectoryName(name.getMethodName()));
197     //fs.mkdirs(WALDIR);
198   }
199 
200   @After
201   public void tearDown() throws Exception {
202     try {
203       wals.close();
204     } catch(IOException exception) {
205       // Some tests will move WALs out from under us. In those cases, we'll get an error on close.
206       LOG.info("Ignoring an error while closing down our WALFactory. Fine for some tests, but if" +
207           " you see a failure look here.");
208       LOG.debug("exception details", exception);
209     } finally {
210       wals = null;
211       fs.delete(HBASEDIR, true);
212       fs.delete(HBASELOGDIR, true);
213     }
214   }
215 
216   /**
217    * Simulates splitting a WAL out from under a regionserver that is still trying to write it.
218    * Ensures we do not lose edits.
219    * @throws IOException
220    * @throws InterruptedException
221    */
222   @Test (timeout=300000)
223   public void testLogCannotBeWrittenOnceParsed() throws IOException, InterruptedException {
224     final AtomicLong counter = new AtomicLong(0);
225     AtomicBoolean stop = new AtomicBoolean(false);
226     // Region we'll write edits too and then later examine to make sure they all made it in.
227     final String region = REGIONS.get(0);
228     final int numWriters = 3;
229     Thread zombie = new ZombieLastLogWriterRegionServer(counter, stop, region, numWriters);
230     try {
231       long startCount = counter.get();
232       zombie.start();
233       // Wait till writer starts going.
234       while (startCount == counter.get()) Threads.sleep(1);
235       // Give it a second to write a few appends.
236       Threads.sleep(1000);
237       final Configuration conf2 = HBaseConfiguration.create(this.conf);
238       final User robber = User.createUserForTesting(conf2, ROBBER, GROUP);
239       int count = robber.runAs(new PrivilegedExceptionAction<Integer>() {
240         @Override
241         public Integer run() throws Exception {
242           StringBuilder ls = new StringBuilder("Contents of WALDIR (").append(WALDIR)
243               .append("):\n");
244           for (FileStatus status : fs.listStatus(WALDIR)) {
245             ls.append("\t").append(status.toString()).append("\n");
246           }
247           LOG.debug(ls);
248           LOG.info("Splitting WALs out from under zombie. Expecting " + numWriters + " files.");
249           WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf2, wals);
250           LOG.info("Finished splitting out from under zombie.");
251           Path[] logfiles = getLogForRegion(TABLE_NAME, region);
252           assertEquals("wrong number of split files for region", numWriters, logfiles.length);
253           int count = 0;
254           for (Path logfile: logfiles) {
255             count += countWAL(logfile);
256           }
257           return count;
258         }
259       });
260       LOG.info("zombie=" + counter.get() + ", robber=" + count);
261       assertTrue("The log file could have at most 1 extra log entry, but can't have less. " +
262               "Zombie could write " + counter.get() + " and logfile had only " + count,
263           counter.get() == count || counter.get() + 1 == count);
264     } finally {
265       stop.set(true);
266       zombie.interrupt();
267       Threads.threadDumpingIsAlive(zombie);
268     }
269   }
270 
271   /**
272    * This thread will keep writing to a 'wal' file even after the split process has started.
273    * It simulates a region server that was considered dead but woke up and wrote some more to the
274    * last log entry. Does its writing as an alternate user in another filesystem instance to
275    * simulate better it being a regionserver.
276    */
277   class ZombieLastLogWriterRegionServer extends Thread {
278     final AtomicLong editsCount;
279     final AtomicBoolean stop;
280     final int numOfWriters;
281     /**
282      * Region to write edits for.
283      */
284     final String region;
285     final User user;
286 
287     public ZombieLastLogWriterRegionServer(AtomicLong counter, AtomicBoolean stop,
288         final String region, final int writers)
289         throws IOException, InterruptedException {
290       super("ZombieLastLogWriterRegionServer");
291       setDaemon(true);
292       this.stop = stop;
293       this.editsCount = counter;
294       this.region = region;
295       this.user = User.createUserForTesting(conf, ZOMBIE, GROUP);
296       numOfWriters = writers;
297     }
298 
299     @Override
300     public void run() {
301       try {
302         doWriting();
303       } catch (IOException e) {
304         LOG.warn(getName() + " Writer exiting " + e);
305       } catch (InterruptedException e) {
306         LOG.warn(getName() + " Writer exiting " + e);
307       }
308     }
309 
310     private void doWriting() throws IOException, InterruptedException {
311       this.user.runAs(new PrivilegedExceptionAction<Object>() {
312         @Override
313         public Object run() throws Exception {
314           // Index of the WAL we want to keep open.  generateWALs will leave open the WAL whose
315           // index we supply here.
316           int walToKeepOpen = numOfWriters - 1;
317           // The below method writes numOfWriters files each with ENTRIES entries for a total of
318           // numOfWriters * ENTRIES added per column family in the region.
319           Writer writer = null;
320           try {
321             writer = generateWALs(numOfWriters, ENTRIES, walToKeepOpen);
322           } catch (IOException e1) {
323             throw new RuntimeException("Failed", e1);
324           }
325           // Update counter so has all edits written so far.
326           editsCount.addAndGet(numOfWriters * ENTRIES);
327           loop(writer);
328           // If we've been interruped, then things should have shifted out from under us.
329           // closing should error
330           try {
331             writer.close();
332             fail("Writing closing after parsing should give an error.");
333           } catch (IOException exception) {
334             LOG.debug("ignoring error when closing final writer.", exception);
335           }
336           return null;
337         }
338       });
339     }
340 
341     private void loop(final Writer writer) {
342       byte [] regionBytes = Bytes.toBytes(this.region);
343       while (!stop.get()) {
344         try {
345           long seq = appendEntry(writer, TABLE_NAME, regionBytes,
346               ("r" + editsCount.get()).getBytes(), regionBytes, QUALIFIER, VALUE, 0);
347           long count = editsCount.incrementAndGet();
348           LOG.info(getName() + " sync count=" + count + ", seq=" + seq);
349           try {
350             Thread.sleep(1);
351           } catch (InterruptedException e) {
352             //
353           }
354         } catch (IOException ex) {
355           LOG.error(getName() + " ex " + ex.toString());
356           if (ex instanceof RemoteException) {
357             LOG.error("Juliet: got RemoteException " + ex.getMessage() +
358                 " while writing " + (editsCount.get() + 1));
359           } else {
360             LOG.error(getName() + " failed to write....at " + editsCount.get());
361             fail("Failed to write " + editsCount.get());
362           }
363           break;
364         } catch (Throwable t) {
365           LOG.error(getName() + " HOW? " + t);
366           LOG.debug("exception details", t);
367           break;
368         }
369       }
370       LOG.info(getName() + " Writer exiting");
371     }
372   }
373 
374   /**
375    * @throws IOException
376    * @see https://issues.apache.org/jira/browse/HBASE-3020
377    */
378   @Test (timeout=300000)
379   public void testRecoveredEditsPathForMeta() throws IOException {
380     byte [] encoded = HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes();
381     Path tdir = FSUtils.getTableDir(HBASEDIR, TableName.META_TABLE_NAME);
382     Path regiondir = new Path(tdir,
383         HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
384     fs.mkdirs(regiondir);
385     long now = System.currentTimeMillis();
386     Entry entry =
387         new Entry(new WALKey(encoded,
388             TableName.META_TABLE_NAME, 1, now, HConstants.DEFAULT_CLUSTER_ID),
389             new WALEdit());
390     Path p = WALSplitter.getRegionSplitEditsPath(entry,
391         FILENAME_BEING_SPLIT, TMPDIRNAME, conf);
392     String parentOfParent = p.getParent().getParent().getName();
393     assertEquals(parentOfParent, HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
394   }
395 
396   /**
397    * Test old recovered edits file doesn't break WALSplitter.
398    * This is useful in upgrading old instances.
399    */
400   @Test (timeout=300000)
401   public void testOldRecoveredEditsFileSidelined() throws IOException {
402     byte [] encoded = HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes();
403     Path tdir = FSUtils.getTableDir(HBASEDIR, TableName.META_TABLE_NAME);
404     Path regiondir = new Path(tdir,
405         HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
406     fs.mkdirs(regiondir);
407     long now = System.currentTimeMillis();
408     Entry entry =
409         new Entry(new WALKey(encoded,
410             TableName.META_TABLE_NAME, 1, now, HConstants.DEFAULT_CLUSTER_ID),
411             new WALEdit());
412     Path parent = WALSplitter.getRegionDirRecoveredEditsDir(regiondir);
413     assertEquals(parent.getName(), HConstants.RECOVERED_EDITS_DIR);
414     fs.createNewFile(parent); // create a recovered.edits file
415 
416     Path p = WALSplitter.getRegionSplitEditsPath(entry,
417         FILENAME_BEING_SPLIT, TMPDIRNAME, conf);
418     String parentOfParent = p.getParent().getParent().getName();
419     assertEquals(parentOfParent, HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
420     WALFactory.createRecoveredEditsWriter(fs, p, conf).close();
421   }
422 
423   private void useDifferentDFSClient() throws IOException {
424     // make fs act as a different client now
425     // initialize will create a new DFSClient with a new client ID
426     fs.initialize(fs.getUri(), conf);
427   }
428 
429   @Test (timeout=300000)
430   public void testSplitPreservesEdits() throws IOException{
431     final String REGION = "region__1";
432     REGIONS.clear();
433     REGIONS.add(REGION);
434 
435     generateWALs(1, 10, -1, 0);
436     useDifferentDFSClient();
437     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
438     Path originalLog = (fs.listStatus(OLDLOGDIR))[0].getPath();
439     Path[] splitLog = getLogForRegion(TABLE_NAME, REGION);
440     assertEquals(1, splitLog.length);
441 
442     assertTrue("edits differ after split", logsAreEqual(originalLog, splitLog[0]));
443   }
444 
445   @Test (timeout=300000)
446   public void testSplitRemovesRegionEventsEdits() throws IOException{
447     final String REGION = "region__1";
448     REGIONS.clear();
449     REGIONS.add(REGION);
450 
451     generateWALs(1, 10, -1, 100);
452     useDifferentDFSClient();
453     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
454     Path originalLog = (fs.listStatus(OLDLOGDIR))[0].getPath();
455     Path[] splitLog = getLogForRegion(TABLE_NAME, REGION);
456     assertEquals(1, splitLog.length);
457 
458     assertFalse("edits differ after split", logsAreEqual(originalLog, splitLog[0]));
459     // split log should only have the test edits
460     assertEquals(10, countWAL(splitLog[0]));
461   }
462 
463 
464   @Test (timeout=300000)
465   public void testSplitLeavesCompactionEventsEdits() throws IOException{
466     HRegionInfo hri = new HRegionInfo(TABLE_NAME);
467     REGIONS.clear();
468     REGIONS.add(hri.getEncodedName());
469     Path regionDir = new Path(FSUtils.getTableDir(HBASEDIR, TABLE_NAME), hri.getEncodedName());
470     LOG.info("Creating region directory: " + regionDir);
471     assertTrue(fs.mkdirs(regionDir));
472 
473     Writer writer = generateWALs(1, 10, 0, 10);
474     String[] compactInputs = new String[]{"file1", "file2", "file3"};
475     String compactOutput = "file4";
476     appendCompactionEvent(writer, hri, compactInputs, compactOutput);
477     writer.close();
478 
479     useDifferentDFSClient();
480     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
481 
482     Path originalLog = (fs.listStatus(OLDLOGDIR))[0].getPath();
483     // original log should have 10 test edits, 10 region markers, 1 compaction marker
484     assertEquals(21, countWAL(originalLog));
485 
486     Path[] splitLog = getLogForRegion(TABLE_NAME, hri.getEncodedName());
487     assertEquals(1, splitLog.length);
488 
489     assertFalse("edits differ after split", logsAreEqual(originalLog, splitLog[0]));
490     // split log should have 10 test edits plus 1 compaction marker
491     assertEquals(11, countWAL(splitLog[0]));
492   }
493 
494   /**
495    * @param expectedEntries -1 to not assert
496    * @return the count across all regions
497    */
498   private int splitAndCount(final int expectedFiles, final int expectedEntries)
499       throws IOException {
500     useDifferentDFSClient();
501     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
502     int result = 0;
503     for (String region : REGIONS) {
504       Path[] logfiles = getLogForRegion(TABLE_NAME, region);
505       assertEquals(expectedFiles, logfiles.length);
506       int count = 0;
507       for (Path logfile: logfiles) {
508         count += countWAL(logfile);
509       }
510       if (-1 != expectedEntries) {
511         assertEquals(expectedEntries, count);
512       }
513       result += count;
514     }
515     return result;
516   }
517 
518   @Test (timeout=300000)
519   public void testEmptyLogFiles() throws IOException {
520     testEmptyLogFiles(true);
521   }
522 
523   @Test (timeout=300000)
524   public void testEmptyOpenLogFiles() throws IOException {
525     testEmptyLogFiles(false);
526   }
527 
528   private void testEmptyLogFiles(final boolean close) throws IOException {
529     // we won't create the hlog dir until getWAL got called, so
530     // make dir here when testing empty log file
531     fs.mkdirs(WALDIR);
532     injectEmptyFile(".empty", close);
533     generateWALs(Integer.MAX_VALUE);
534     injectEmptyFile("empty", close);
535     splitAndCount(NUM_WRITERS, NUM_WRITERS * ENTRIES); // skip 2 empty
536   }
537 
538   @Test (timeout=300000)
539   public void testOpenZeroLengthReportedFileButWithDataGetsSplit() throws IOException {
540     // generate logs but leave wal.dat.5 open.
541     generateWALs(5);
542     splitAndCount(NUM_WRITERS, NUM_WRITERS * ENTRIES);
543   }
544 
545   @Test (timeout=300000)
546   public void testTralingGarbageCorruptionFileSkipErrorsPasses() throws IOException {
547     conf.setBoolean(HBASE_SKIP_ERRORS, true);
548     generateWALs(Integer.MAX_VALUE);
549     corruptWAL(new Path(WALDIR, WAL_FILE_PREFIX + "5"),
550         Corruptions.APPEND_GARBAGE, true);
551     splitAndCount(NUM_WRITERS, NUM_WRITERS * ENTRIES);
552   }
553 
554   @Test (timeout=300000)
555   public void testFirstLineCorruptionLogFileSkipErrorsPasses() throws IOException {
556     conf.setBoolean(HBASE_SKIP_ERRORS, true);
557     generateWALs(Integer.MAX_VALUE);
558     corruptWAL(new Path(WALDIR, WAL_FILE_PREFIX + "5"),
559         Corruptions.INSERT_GARBAGE_ON_FIRST_LINE, true);
560     splitAndCount(NUM_WRITERS - 1, (NUM_WRITERS - 1) * ENTRIES); //1 corrupt
561   }
562 
563   @Test (timeout=300000)
564   public void testMiddleGarbageCorruptionSkipErrorsReadsHalfOfFile() throws IOException {
565     conf.setBoolean(HBASE_SKIP_ERRORS, true);
566     generateWALs(Integer.MAX_VALUE);
567     corruptWAL(new Path(WALDIR, WAL_FILE_PREFIX + "5"),
568         Corruptions.INSERT_GARBAGE_IN_THE_MIDDLE, false);
569     // the entries in the original logs are alternating regions
570     // considering the sequence file header, the middle corruption should
571     // affect at least half of the entries
572     int goodEntries = (NUM_WRITERS - 1) * ENTRIES;
573     int firstHalfEntries = (int) Math.ceil(ENTRIES / 2) - 1;
574     int allRegionsCount = splitAndCount(NUM_WRITERS, -1);
575     assertTrue("The file up to the corrupted area hasn't been parsed",
576         REGIONS.size() * (goodEntries + firstHalfEntries) <= allRegionsCount);
577   }
578 
579   @Test (timeout=300000)
580   public void testCorruptedFileGetsArchivedIfSkipErrors() throws IOException {
581     conf.setBoolean(HBASE_SKIP_ERRORS, true);
582     for (FaultySequenceFileLogReader.FailureType  failureType :
583         FaultySequenceFileLogReader.FailureType.values()) {
584       final Set<String> walDirContents = splitCorruptWALs(failureType);
585       final Set<String> archivedLogs = new HashSet<String>();
586       final StringBuilder archived = new StringBuilder("Archived logs in CORRUPTDIR:");
587       for (FileStatus log : fs.listStatus(CORRUPTDIR)) {
588         archived.append("\n\t").append(log.toString());
589         archivedLogs.add(log.getPath().getName());
590       }
591       LOG.debug(archived.toString());
592       assertEquals(failureType.name() + ": expected to find all of our wals corrupt.",
593           walDirContents, archivedLogs);
594     }
595   }
596 
597   /**
598    * @return set of wal names present prior to split attempt.
599    * @throws IOException if the split process fails
600    */
601   private Set<String> splitCorruptWALs(final FaultySequenceFileLogReader.FailureType failureType)
602       throws IOException {
603     Class<?> backupClass = conf.getClass("hbase.regionserver.hlog.reader.impl",
604         Reader.class);
605     InstrumentedLogWriter.activateFailure = false;
606 
607     try {
608       conf.setClass("hbase.regionserver.hlog.reader.impl",
609           FaultySequenceFileLogReader.class, Reader.class);
610       conf.set("faultysequencefilelogreader.failuretype", failureType.name());
611       // Clean up from previous tests or previous loop
612       try {
613         wals.shutdown();
614       } catch (IOException exception) {
615         // since we're splitting out from under the factory, we should expect some closing failures.
616         LOG.debug("Ignoring problem closing WALFactory.", exception);
617       }
618       wals.close();
619       try {
620         for (FileStatus log : fs.listStatus(CORRUPTDIR)) {
621           fs.delete(log.getPath(), true);
622         }
623       } catch (FileNotFoundException exception) {
624         LOG.debug("no previous CORRUPTDIR to clean.");
625       }
626       // change to the faulty reader
627       wals = new WALFactory(conf, null, name.getMethodName());
628       generateWALs(-1);
629       // Our reader will render all of these files corrupt.
630       final Set<String> walDirContents = new HashSet<String>();
631       for (FileStatus status : fs.listStatus(WALDIR)) {
632         walDirContents.add(status.getPath().getName());
633       }
634       useDifferentDFSClient();
635       WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
636       return walDirContents;
637     } finally {
638       conf.setClass("hbase.regionserver.hlog.reader.impl", backupClass,
639           Reader.class);
640     }
641   }
642 
643   @Test (timeout=300000, expected = IOException.class)
644   public void testTrailingGarbageCorruptionLogFileSkipErrorsFalseThrows()
645       throws IOException {
646     conf.setBoolean(HBASE_SKIP_ERRORS, false);
647     splitCorruptWALs(FaultySequenceFileLogReader.FailureType.BEGINNING);
648   }
649 
650   @Test (timeout=300000)
651   public void testCorruptedLogFilesSkipErrorsFalseDoesNotTouchLogs()
652       throws IOException {
653     conf.setBoolean(HBASE_SKIP_ERRORS, false);
654     try {
655       splitCorruptWALs(FaultySequenceFileLogReader.FailureType.BEGINNING);
656     } catch (IOException e) {
657       LOG.debug("split with 'skip errors' set to 'false' correctly threw");
658     }
659     assertEquals("if skip.errors is false all files should remain in place",
660         NUM_WRITERS, fs.listStatus(WALDIR).length);
661   }
662 
663   private void ignoreCorruption(final Corruptions corruption, final int entryCount,
664       final int expectedCount) throws IOException {
665     conf.setBoolean(HBASE_SKIP_ERRORS, false);
666 
667     final String REGION = "region__1";
668     REGIONS.clear();
669     REGIONS.add(REGION);
670 
671     Path c1 = new Path(WALDIR, WAL_FILE_PREFIX + "0");
672     generateWALs(1, entryCount, -1, 0);
673     corruptWAL(c1, corruption, true);
674 
675     useDifferentDFSClient();
676     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
677 
678     Path[] splitLog = getLogForRegion(TABLE_NAME, REGION);
679     assertEquals(1, splitLog.length);
680 
681     int actualCount = 0;
682     Reader in = wals.createReader(fs, splitLog[0]);
683     @SuppressWarnings("unused")
684     Entry entry;
685     while ((entry = in.next()) != null) ++actualCount;
686     assertEquals(expectedCount, actualCount);
687     in.close();
688 
689     // should not have stored the EOF files as corrupt
690     FileStatus[] archivedLogs = fs.listStatus(CORRUPTDIR);
691     assertEquals(archivedLogs.length, 0);
692 
693   }
694 
695   @Test (timeout=300000)
696   public void testEOFisIgnored() throws IOException {
697     int entryCount = 10;
698     ignoreCorruption(Corruptions.TRUNCATE, entryCount, entryCount-1);
699   }
700 
701   @Test (timeout=300000)
702   public void testCorruptWALTrailer() throws IOException {
703     int entryCount = 10;
704     ignoreCorruption(Corruptions.TRUNCATE_TRAILER, entryCount, entryCount);
705   }
706 
707   @Test (timeout=300000)
708   public void testLogsGetArchivedAfterSplit() throws IOException {
709     conf.setBoolean(HBASE_SKIP_ERRORS, false);
710     generateWALs(-1);
711     useDifferentDFSClient();
712     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
713     FileStatus[] archivedLogs = fs.listStatus(OLDLOGDIR);
714     assertEquals("wrong number of files in the archive log", NUM_WRITERS, archivedLogs.length);
715   }
716 
717   @Test (timeout=300000)
718   public void testSplit() throws IOException {
719     generateWALs(-1);
720     splitAndCount(NUM_WRITERS, NUM_WRITERS * ENTRIES);
721   }
722 
723   @Test (timeout=300000)
724   public void testLogDirectoryShouldBeDeletedAfterSuccessfulSplit()
725       throws IOException {
726     generateWALs(-1);
727     useDifferentDFSClient();
728     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
729     FileStatus [] statuses = null;
730     try {
731       statuses = fs.listStatus(WALDIR);
732       if (statuses != null) {
733         fail("Files left in log dir: " +
734             Joiner.on(",").join(FileUtil.stat2Paths(statuses)));
735       }
736     } catch (FileNotFoundException e) {
737       // hadoop 0.21 throws FNFE whereas hadoop 0.20 returns null
738     }
739   }
740 
741   @Test(timeout=300000, expected = IOException.class)
742   public void testSplitWillFailIfWritingToRegionFails() throws Exception {
743     //leave 5th log open so we could append the "trap"
744     Writer writer = generateWALs(4);
745     useDifferentDFSClient();
746 
747     String region = "break";
748     Path regiondir = new Path(TABLEDIR, region);
749     fs.mkdirs(regiondir);
750 
751     InstrumentedLogWriter.activateFailure = false;
752     appendEntry(writer, TABLE_NAME, Bytes.toBytes(region),
753         ("r" + 999).getBytes(), FAMILY, QUALIFIER, VALUE, 0);
754     writer.close();
755 
756     try {
757       InstrumentedLogWriter.activateFailure = true;
758       WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
759     } catch (IOException e) {
760       assertTrue(e.getMessage().
761           contains("This exception is instrumented and should only be thrown for testing"));
762       throw e;
763     } finally {
764       InstrumentedLogWriter.activateFailure = false;
765     }
766   }
767 
768   @Test (timeout=300000)
769   public void testSplitDeletedRegion() throws IOException {
770     REGIONS.clear();
771     String region = "region_that_splits";
772     REGIONS.add(region);
773 
774     generateWALs(1);
775     useDifferentDFSClient();
776 
777     Path regiondir = new Path(TABLEDIR, region);
778     fs.delete(regiondir, true);
779     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
780     assertFalse(fs.exists(regiondir));
781   }
782 
783   @Test (timeout=300000)
784   public void testIOEOnOutputThread() throws Exception {
785     conf.setBoolean(HBASE_SKIP_ERRORS, false);
786 
787     generateWALs(-1);
788     useDifferentDFSClient();
789     FileStatus[] logfiles = fs.listStatus(WALDIR);
790     assertTrue("There should be some log file",
791         logfiles != null && logfiles.length > 0);
792     // wals with no entries (like the one we don't use in the factory)
793     // won't cause a failure since nothing will ever be written.
794     // pick the largest one since it's most likely to have entries.
795     int largestLogFile = 0;
796     long largestSize = 0;
797     for (int i = 0; i < logfiles.length; i++) {
798       if (logfiles[i].getLen() > largestSize) {
799         largestLogFile = i;
800         largestSize = logfiles[i].getLen();
801       }
802     }
803     assertTrue("There should be some log greater than size 0.", 0 < largestSize);
804     // Set up a splitter that will throw an IOE on the output side
805     WALSplitter logSplitter = new WALSplitter(wals,
806         conf, HBASEDIR, fs, HBASEDIR, fs, null, null, this.mode) {
807       @Override
808       protected Writer createWriter(Path logfile) throws IOException {
809         Writer mockWriter = Mockito.mock(Writer.class);
810         Mockito.doThrow(new IOException("Injected")).when(
811             mockWriter).append(Mockito.<Entry>any());
812         return mockWriter;
813       }
814     };
815     // Set up a background thread dumper.  Needs a thread to depend on and then we need to run
816     // the thread dumping in a background thread so it does not hold up the test.
817     final AtomicBoolean stop = new AtomicBoolean(false);
818     final Thread someOldThread = new Thread("Some-old-thread") {
819       @Override
820       public void run() {
821         while(!stop.get()) Threads.sleep(10);
822       }
823     };
824     someOldThread.setDaemon(true);
825     someOldThread.start();
826     final Thread t = new Thread("Background-thread-dumper") {
827       public void run() {
828         try {
829           Threads.threadDumpingIsAlive(someOldThread);
830         } catch (InterruptedException e) {
831           e.printStackTrace();
832         }
833       }
834     };
835     t.setDaemon(true);
836     t.start();
837     try {
838       logSplitter.splitLogFile(logfiles[largestLogFile], null);
839       fail("Didn't throw!");
840     } catch (IOException ioe) {
841       assertTrue(ioe.toString().contains("Injected"));
842     } finally {
843       // Setting this to true will turn off the background thread dumper.
844       stop.set(true);
845     }
846   }
847 
848   /**
849    * @param spiedFs should be instrumented for failure.
850    */
851   private void retryOverHdfsProblem(final FileSystem spiedFs) throws Exception {
852     generateWALs(-1);
853     useDifferentDFSClient();
854 
855     try {
856       WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, spiedFs, conf, wals);
857       assertEquals(NUM_WRITERS, fs.listStatus(OLDLOGDIR).length);
858       assertFalse(fs.exists(WALDIR));
859     } catch (IOException e) {
860       fail("There shouldn't be any exception but: " + e.toString());
861     }
862   }
863 
864   // Test for HBASE-3412
865   @Test (timeout=300000)
866   public void testMovedWALDuringRecovery() throws Exception {
867     // This partial mock will throw LEE for every file simulating
868     // files that were moved
869     FileSystem spiedFs = Mockito.spy(fs);
870     // The "File does not exist" part is very important,
871     // that's how it comes out of HDFS
872     Mockito.doThrow(new LeaseExpiredException("Injected: File does not exist")).
873         when(spiedFs).append(Mockito.<Path>any());
874     retryOverHdfsProblem(spiedFs);
875   }
876 
877   @Test (timeout=300000)
878   public void testRetryOpenDuringRecovery() throws Exception {
879     FileSystem spiedFs = Mockito.spy(fs);
880     // The "Cannot obtain block length", "Could not obtain the last block",
881     // and "Blocklist for [^ ]* has changed.*" part is very important,
882     // that's how it comes out of HDFS. If HDFS changes the exception
883     // message, this test needs to be adjusted accordingly.
884     //
885     // When DFSClient tries to open a file, HDFS needs to locate
886     // the last block of the file and get its length. However, if the
887     // last block is under recovery, HDFS may have problem to obtain
888     // the block length, in which case, retry may help.
889     Mockito.doAnswer(new Answer<FSDataInputStream>() {
890       private final String[] errors = new String[] {
891           "Cannot obtain block length", "Could not obtain the last block",
892           "Blocklist for " + OLDLOGDIR + " has changed"};
893       private int count = 0;
894 
895       public FSDataInputStream answer(InvocationOnMock invocation) throws Throwable {
896         if (count < 3) {
897           throw new IOException(errors[count++]);
898         }
899         return (FSDataInputStream)invocation.callRealMethod();
900       }
901     }).when(spiedFs).open(Mockito.<Path>any(), Mockito.anyInt());
902     retryOverHdfsProblem(spiedFs);
903   }
904 
905   @Test (timeout=300000)
906   public void testTerminationAskedByReporter() throws IOException, CorruptedLogFileException {
907     generateWALs(1, 10, -1);
908     FileStatus logfile = fs.listStatus(WALDIR)[0];
909     useDifferentDFSClient();
910 
911     final AtomicInteger count = new AtomicInteger();
912 
913     CancelableProgressable localReporter
914         = new CancelableProgressable() {
915       @Override
916       public boolean progress() {
917         count.getAndIncrement();
918         return false;
919       }
920     };
921 
922     FileSystem spiedFs = Mockito.spy(fs);
923     Mockito.doAnswer(new Answer<FSDataInputStream>() {
924       public FSDataInputStream answer(InvocationOnMock invocation) throws Throwable {
925         Thread.sleep(1500); // Sleep a while and wait report status invoked
926         return (FSDataInputStream)invocation.callRealMethod();
927       }
928     }).when(spiedFs).open(Mockito.<Path>any(), Mockito.anyInt());
929 
930     try {
931       conf.setInt("hbase.splitlog.report.period", 1000);
932       boolean ret = WALSplitter.splitLogFile(
933           HBASEDIR, logfile, spiedFs, conf, localReporter, null, null, this.mode, wals);
934       assertFalse("Log splitting should failed", ret);
935       assertTrue(count.get() > 0);
936     } catch (IOException e) {
937       fail("There shouldn't be any exception but: " + e.toString());
938     } finally {
939       // reset it back to its default value
940       conf.setInt("hbase.splitlog.report.period", 59000);
941     }
942   }
943 
944   /**
945    * Test log split process with fake data and lots of edits to trigger threading
946    * issues.
947    */
948   @Test (timeout=300000)
949   public void testThreading() throws Exception {
950     doTestThreading(20000, 128*1024*1024, 0);
951   }
952 
953   /**
954    * Test blocking behavior of the log split process if writers are writing slower
955    * than the reader is reading.
956    */
957   @Test (timeout=300000)
958   public void testThreadingSlowWriterSmallBuffer() throws Exception {
959     doTestThreading(200, 1024, 50);
960   }
961 
962   /**
963    * Sets up a log splitter with a mock reader and writer. The mock reader generates
964    * a specified number of edits spread across 5 regions. The mock writer optionally
965    * sleeps for each edit it is fed.
966    * *
967    * After the split is complete, verifies that the statistics show the correct number
968    * of edits output into each region.
969    *
970    * @param numFakeEdits number of fake edits to push through pipeline
971    * @param bufferSize size of in-memory buffer
972    * @param writerSlowness writer threads will sleep this many ms per edit
973    */
974   private void doTestThreading(final int numFakeEdits,
975       final int bufferSize,
976       final int writerSlowness) throws Exception {
977 
978     Configuration localConf = new Configuration(conf);
979     localConf.setInt("hbase.regionserver.hlog.splitlog.buffersize", bufferSize);
980 
981     // Create a fake log file (we'll override the reader to produce a stream of edits)
982     Path logPath = new Path(WALDIR, WAL_FILE_PREFIX + ".fake");
983     FSDataOutputStream out = fs.create(logPath);
984     out.close();
985 
986     // Make region dirs for our destination regions so the output doesn't get skipped
987     final List<String> regions = ImmutableList.of("r0", "r1", "r2", "r3", "r4");
988     makeRegionDirs(regions);
989 
990     // Create a splitter that reads and writes the data without touching disk
991     WALSplitter logSplitter = new WALSplitter(wals,
992         localConf, HBASEDIR, fs, HBASEDIR, fs, null, null, this.mode) {
993 
994       /* Produce a mock writer that doesn't write anywhere */
995       @Override
996       protected Writer createWriter(Path logfile) throws IOException {
997         Writer mockWriter = Mockito.mock(Writer.class);
998         Mockito.doAnswer(new Answer<Void>() {
999           int expectedIndex = 0;
1000 
1001           @Override
1002           public Void answer(InvocationOnMock invocation) {
1003             if (writerSlowness > 0) {
1004               try {
1005                 Thread.sleep(writerSlowness);
1006               } catch (InterruptedException ie) {
1007                 Thread.currentThread().interrupt();
1008               }
1009             }
1010             Entry entry = (Entry) invocation.getArguments()[0];
1011             WALEdit edit = entry.getEdit();
1012             List<Cell> cells = edit.getCells();
1013             assertEquals(1, cells.size());
1014             Cell cell = cells.get(0);
1015 
1016             // Check that the edits come in the right order.
1017             assertEquals(expectedIndex, Bytes.toInt(cell.getRowArray(), cell.getRowOffset(),
1018                 cell.getRowLength()));
1019             expectedIndex++;
1020             return null;
1021           }
1022         }).when(mockWriter).append(Mockito.<Entry>any());
1023         return mockWriter;
1024       }
1025 
1026       /* Produce a mock reader that generates fake entries */
1027       @Override
1028       protected Reader getReader(Path curLogFile, CancelableProgressable reporter)
1029           throws IOException {
1030         Reader mockReader = Mockito.mock(Reader.class);
1031         Mockito.doAnswer(new Answer<Entry>() {
1032           int index = 0;
1033 
1034           @Override
1035           public Entry answer(InvocationOnMock invocation) throws Throwable {
1036             if (index >= numFakeEdits) return null;
1037 
1038             // Generate r0 through r4 in round robin fashion
1039             int regionIdx = index % regions.size();
1040             byte region[] = new byte[] {(byte)'r', (byte) (0x30 + regionIdx)};
1041 
1042             Entry ret = createTestEntry(TABLE_NAME, region,
1043                 Bytes.toBytes((int)(index / regions.size())),
1044                 FAMILY, QUALIFIER, VALUE, index);
1045             index++;
1046             return ret;
1047           }
1048         }).when(mockReader).next();
1049         return mockReader;
1050       }
1051     };
1052 
1053     logSplitter.splitLogFile(fs.getFileStatus(logPath), null);
1054 
1055     // Verify number of written edits per region
1056     Map<byte[], Long> outputCounts = logSplitter.outputSink.getOutputCounts();
1057     for (Map.Entry<byte[], Long> entry : outputCounts.entrySet()) {
1058       LOG.info("Got " + entry.getValue() + " output edits for region " +
1059           Bytes.toString(entry.getKey()));
1060       assertEquals((long)entry.getValue(), numFakeEdits / regions.size());
1061     }
1062     assertEquals("Should have as many outputs as regions", regions.size(), outputCounts.size());
1063   }
1064 
1065   // Does leaving the writer open in testSplitDeletedRegion matter enough for two tests?
1066   @Test (timeout=300000)
1067   public void testSplitLogFileDeletedRegionDir() throws IOException {
1068     LOG.info("testSplitLogFileDeletedRegionDir");
1069     final String REGION = "region__1";
1070     REGIONS.clear();
1071     REGIONS.add(REGION);
1072 
1073     generateWALs(1, 10, -1);
1074     useDifferentDFSClient();
1075 
1076     Path regiondir = new Path(TABLEDIR, REGION);
1077     LOG.info("Region directory is" + regiondir);
1078     fs.delete(regiondir, true);
1079     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
1080     assertFalse(fs.exists(regiondir));
1081   }
1082 
1083   @Test (timeout=300000)
1084   public void testSplitLogFileEmpty() throws IOException {
1085     LOG.info("testSplitLogFileEmpty");
1086     // we won't create the hlog dir until getWAL got called, so
1087     // make dir here when testing empty log file
1088     fs.mkdirs(WALDIR);
1089     injectEmptyFile(".empty", true);
1090     useDifferentDFSClient();
1091 
1092     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
1093     Path tdir = FSUtils.getTableDir(HBASEDIR, TABLE_NAME);
1094     assertFalse(fs.exists(tdir));
1095 
1096     assertEquals(0, countWAL(fs.listStatus(OLDLOGDIR)[0].getPath()));
1097   }
1098 
1099   @Test (timeout=300000)
1100   public void testSplitLogFileMultipleRegions() throws IOException {
1101     LOG.info("testSplitLogFileMultipleRegions");
1102     generateWALs(1, 10, -1);
1103     splitAndCount(1, 10);
1104   }
1105 
1106   @Test (timeout=300000)
1107   public void testSplitLogFileFirstLineCorruptionLog()
1108       throws IOException {
1109     conf.setBoolean(HBASE_SKIP_ERRORS, true);
1110     generateWALs(1, 10, -1);
1111     FileStatus logfile = fs.listStatus(WALDIR)[0];
1112 
1113     corruptWAL(logfile.getPath(),
1114         Corruptions.INSERT_GARBAGE_ON_FIRST_LINE, true);
1115 
1116     useDifferentDFSClient();
1117     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
1118 
1119     final Path corruptDir = new Path(FSUtils.getWALRootDir(conf), HConstants.CORRUPT_DIR_NAME);
1120     assertEquals(1, fs.listStatus(corruptDir).length);
1121   }
1122 
1123   /**
1124    * @throws IOException
1125    * @see https://issues.apache.org/jira/browse/HBASE-4862
1126    */
1127   @Test (timeout=300000)
1128   public void testConcurrentSplitLogAndReplayRecoverEdit() throws IOException {
1129     LOG.info("testConcurrentSplitLogAndReplayRecoverEdit");
1130     // Generate wals for our destination region
1131     String regionName = "r0";
1132     final Path regiondir = new Path(TABLEDIR, regionName);
1133     REGIONS.clear();
1134     REGIONS.add(regionName);
1135     generateWALs(-1);
1136 
1137     wals.getWAL(Bytes.toBytes(regionName), null);
1138     FileStatus[] logfiles = fs.listStatus(WALDIR);
1139     assertTrue("There should be some log file",
1140         logfiles != null && logfiles.length > 0);
1141 
1142     WALSplitter logSplitter = new WALSplitter(wals,
1143         conf, HBASEDIR, fs, HBASEDIR, fs, null, null, this.mode) {
1144       @Override
1145       protected Writer createWriter(Path logfile)
1146           throws IOException {
1147         Writer writer = wals.createRecoveredEditsWriter(this.walFS, logfile);
1148         // After creating writer, simulate region's
1149         // replayRecoveredEditsIfAny() which gets SplitEditFiles of this
1150         // region and delete them, excluding files with '.temp' suffix.
1151         NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(fs, regiondir);
1152         if (files != null && !files.isEmpty()) {
1153           for (Path file : files) {
1154             if (!this.walFS.delete(file, false)) {
1155               LOG.error("Failed delete of " + file);
1156             } else {
1157               LOG.debug("Deleted recovered.edits file=" + file);
1158             }
1159           }
1160         }
1161         return writer;
1162       }
1163     };
1164     try{
1165       logSplitter.splitLogFile(logfiles[0], null);
1166     } catch (IOException e) {
1167       LOG.info(e);
1168       fail("Throws IOException when spliting "
1169           + "log, it is most likely because writing file does not "
1170           + "exist which is caused by concurrent replayRecoveredEditsIfAny()");
1171     }
1172     if (fs.exists(CORRUPTDIR)) {
1173       if (fs.listStatus(CORRUPTDIR).length > 0) {
1174         fail("There are some corrupt logs, "
1175             + "it is most likely caused by concurrent replayRecoveredEditsIfAny()");
1176       }
1177     }
1178   }
1179 
1180   private Writer generateWALs(int leaveOpen) throws IOException {
1181     return generateWALs(NUM_WRITERS, ENTRIES, leaveOpen, 0);
1182   }
1183 
1184   private Writer generateWALs(int writers, int entries, int leaveOpen) throws IOException {
1185     return generateWALs(writers, entries, leaveOpen, 7);
1186   }
1187 
1188   private void makeRegionDirs(List<String> regions) throws IOException {
1189     for (String region : regions) {
1190       LOG.debug("Creating dir for region " + region);
1191       fs.mkdirs(new Path(TABLEDIR, region));
1192     }
1193   }
1194 
1195   /**
1196    * @param leaveOpen index to leave un-closed. -1 to close all.
1197    * @return the writer that's still open, or null if all were closed.
1198    */
1199   private Writer generateWALs(int writers, int entries, int leaveOpen, int regionEvents) throws IOException {
1200     makeRegionDirs(REGIONS);
1201     fs.mkdirs(WALDIR);
1202     Writer [] ws = new Writer[writers];
1203     int seq = 0;
1204     int numRegionEventsAdded = 0;
1205     for (int i = 0; i < writers; i++) {
1206       ws[i] = wals.createWALWriter(fs, new Path(WALDIR, WAL_FILE_PREFIX + i));
1207       for (int j = 0; j < entries; j++) {
1208         int prefix = 0;
1209         for (String region : REGIONS) {
1210           String row_key = region + prefix++ + i + j;
1211           appendEntry(ws[i], TABLE_NAME, region.getBytes(), row_key.getBytes(), FAMILY, QUALIFIER,
1212               VALUE, seq++);
1213 
1214           if (numRegionEventsAdded < regionEvents) {
1215             numRegionEventsAdded ++;
1216             appendRegionEvent(ws[i], region);
1217           }
1218         }
1219       }
1220       if (i != leaveOpen) {
1221         ws[i].close();
1222         LOG.info("Closing writer " + i);
1223       }
1224     }
1225     if (leaveOpen < 0 || leaveOpen >= writers) {
1226       return null;
1227     }
1228     return ws[leaveOpen];
1229   }
1230 
1231 
1232 
1233   private Path[] getLogForRegion(TableName table, String region)
1234       throws IOException {
1235     Path tdir = FSUtils.getWALTableDir(conf, table);
1236     @SuppressWarnings("deprecation")
1237     Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(new Path(tdir,
1238         Bytes.toString(region.getBytes())));
1239     FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {
1240       @Override
1241       public boolean accept(Path p) {
1242         if (WALSplitter.isSequenceIdFile(p)) {
1243           return false;
1244         }
1245         return true;
1246       }
1247     });
1248     Path[] paths = new Path[files.length];
1249     for (int i = 0; i < files.length; i++) {
1250       paths[i] = files[i].getPath();
1251     }
1252     return paths;
1253   }
1254 
1255   private void corruptWAL(Path path, Corruptions corruption, boolean close) throws IOException {
1256     FSDataOutputStream out;
1257     int fileSize = (int) fs.listStatus(path)[0].getLen();
1258 
1259     FSDataInputStream in = fs.open(path);
1260     byte[] corrupted_bytes = new byte[fileSize];
1261     in.readFully(0, corrupted_bytes, 0, fileSize);
1262     in.close();
1263 
1264     switch (corruption) {
1265     case APPEND_GARBAGE:
1266       fs.delete(path, false);
1267       out = fs.create(path);
1268       out.write(corrupted_bytes);
1269       out.write("-----".getBytes());
1270       closeOrFlush(close, out);
1271       break;
1272 
1273     case INSERT_GARBAGE_ON_FIRST_LINE:
1274       fs.delete(path, false);
1275       out = fs.create(path);
1276       out.write(0);
1277       out.write(corrupted_bytes);
1278       closeOrFlush(close, out);
1279       break;
1280 
1281     case INSERT_GARBAGE_IN_THE_MIDDLE:
1282       fs.delete(path, false);
1283       out = fs.create(path);
1284       int middle = (int) Math.floor(corrupted_bytes.length / 2);
1285       out.write(corrupted_bytes, 0, middle);
1286       out.write(0);
1287       out.write(corrupted_bytes, middle, corrupted_bytes.length - middle);
1288       closeOrFlush(close, out);
1289       break;
1290 
1291     case TRUNCATE:
1292       fs.delete(path, false);
1293       out = fs.create(path);
1294       out.write(corrupted_bytes, 0, fileSize
1295           - (32 + ProtobufLogReader.PB_WAL_COMPLETE_MAGIC.length + Bytes.SIZEOF_INT));
1296       closeOrFlush(close, out);
1297       break;
1298 
1299     case TRUNCATE_TRAILER:
1300       fs.delete(path, false);
1301       out = fs.create(path);
1302       out.write(corrupted_bytes, 0, fileSize - Bytes.SIZEOF_INT);// trailer is truncated.
1303       closeOrFlush(close, out);
1304       break;
1305     }
1306   }
1307 
1308   private void closeOrFlush(boolean close, FSDataOutputStream out)
1309       throws IOException {
1310     if (close) {
1311       out.close();
1312     } else {
1313       Method syncMethod = null;
1314       try {
1315         syncMethod = out.getClass().getMethod("hflush", new Class<?> []{});
1316       } catch (NoSuchMethodException e) {
1317         try {
1318           syncMethod = out.getClass().getMethod("sync", new Class<?> []{});
1319         } catch (NoSuchMethodException ex) {
1320           throw new IOException("This version of Hadoop supports " +
1321               "neither Syncable.sync() nor Syncable.hflush().");
1322         }
1323       }
1324       try {
1325         syncMethod.invoke(out, new Object[]{});
1326       } catch (Exception e) {
1327         throw new IOException(e);
1328       }
1329       // Not in 0out.hflush();
1330     }
1331   }
1332 
1333   private int countWAL(Path log) throws IOException {
1334     int count = 0;
1335     Reader in = wals.createReader(fs, log);
1336     while (in.next() != null) {
1337       count++;
1338     }
1339     in.close();
1340     return count;
1341   }
1342 
1343   private static void appendCompactionEvent(Writer w, HRegionInfo hri, String[] inputs,
1344       String output) throws IOException {
1345     WALProtos.CompactionDescriptor.Builder desc = WALProtos.CompactionDescriptor.newBuilder();
1346     desc.setTableName(ByteString.copyFrom(hri.getTable().toBytes()))
1347         .setEncodedRegionName(ByteString.copyFrom(hri.getEncodedNameAsBytes()))
1348         .setRegionName(ByteString.copyFrom(hri.getRegionName()))
1349         .setFamilyName(ByteString.copyFrom(FAMILY))
1350         .setStoreHomeDir(hri.getEncodedName() + "/" + Bytes.toString(FAMILY))
1351         .addAllCompactionInput(Arrays.asList(inputs))
1352         .addCompactionOutput(output);
1353 
1354     WALEdit edit = WALEdit.createCompaction(hri, desc.build());
1355     WALKey key = new WALKey(hri.getEncodedNameAsBytes(), TABLE_NAME, 1,
1356         EnvironmentEdgeManager.currentTime(), HConstants.DEFAULT_CLUSTER_ID);
1357     w.append(new Entry(key, edit));
1358     w.sync(false);
1359   }
1360 
1361   private static void appendRegionEvent(Writer w, String region) throws IOException {
1362     WALProtos.RegionEventDescriptor regionOpenDesc = ProtobufUtil.toRegionEventDescriptor(
1363         WALProtos.RegionEventDescriptor.EventType.REGION_OPEN,
1364         TABLE_NAME.toBytes(),
1365         region.getBytes(),
1366         String.valueOf(region.hashCode()).getBytes(),
1367         1,
1368         ServerName.parseServerName("ServerName:9099"), ImmutableMap.<byte[], List<Path>>of());
1369     final long time = EnvironmentEdgeManager.currentTime();
1370     KeyValue kv = new KeyValue(region.getBytes(), WALEdit.METAFAMILY, WALEdit.REGION_EVENT,
1371         time, regionOpenDesc.toByteArray());
1372     final WALKey walKey = new WALKey(region.getBytes(), TABLE_NAME, 1, time,
1373         HConstants.DEFAULT_CLUSTER_ID);
1374     w.append(
1375         new Entry(walKey, new WALEdit().add(kv)));
1376     w.sync(false);
1377   }
1378 
1379   public static long appendEntry(Writer writer, TableName table, byte[] region,
1380       byte[] row, byte[] family, byte[] qualifier,
1381       byte[] value, long seq)
1382       throws IOException {
1383     LOG.info(Thread.currentThread().getName() + " append");
1384     writer.append(createTestEntry(table, region, row, family, qualifier, value, seq));
1385     LOG.info(Thread.currentThread().getName() + " sync");
1386     writer.sync(false);
1387     return seq;
1388   }
1389 
1390   private static Entry createTestEntry(
1391       TableName table, byte[] region,
1392       byte[] row, byte[] family, byte[] qualifier,
1393       byte[] value, long seq) {
1394     long time = System.nanoTime();
1395 
1396     seq++;
1397     final KeyValue cell = new KeyValue(row, family, qualifier, time, KeyValue.Type.Put, value);
1398     WALEdit edit = new WALEdit();
1399     edit.add(cell);
1400     return new Entry(new WALKey(region, table, seq, time,
1401         HConstants.DEFAULT_CLUSTER_ID), edit);
1402   }
1403 
1404   private void injectEmptyFile(String suffix, boolean closeFile)
1405       throws IOException {
1406     Writer writer = wals.createWALWriter(fs, new Path(WALDIR, WAL_FILE_PREFIX + suffix),
1407         conf);
1408     if (closeFile) writer.close();
1409   }
1410 
1411   private boolean logsAreEqual(Path p1, Path p2) throws IOException {
1412     Reader in1, in2;
1413     in1 = wals.createReader(fs, p1);
1414     in2 = wals.createReader(fs, p2);
1415     Entry entry1;
1416     Entry entry2;
1417     while ((entry1 = in1.next()) != null) {
1418       entry2 = in2.next();
1419       if ((entry1.getKey().compareTo(entry2.getKey()) != 0) ||
1420           (!entry1.getEdit().toString().equals(entry2.getEdit().toString()))) {
1421         return false;
1422       }
1423     }
1424     in1.close();
1425     in2.close();
1426     return true;
1427   }
1428 }