View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.wal;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertTrue;
24  import static org.junit.Assert.fail;
25  
26  import com.google.common.base.Joiner;
27  import com.google.common.collect.ImmutableList;
28  import com.google.common.collect.ImmutableMap;
29  import com.google.protobuf.ByteString;
30  
31  import java.io.FileNotFoundException;
32  import java.io.IOException;
33  import java.lang.reflect.Method;
34  import java.security.PrivilegedExceptionAction;
35  import java.util.ArrayList;
36  import java.util.Arrays;
37  import java.util.Collections;
38  import java.util.HashMap;
39  import java.util.HashSet;
40  import java.util.List;
41  import java.util.Map;
42  import java.util.NavigableSet;
43  import java.util.Set;
44  import java.util.concurrent.atomic.AtomicBoolean;
45  import java.util.concurrent.atomic.AtomicInteger;
46  import java.util.concurrent.atomic.AtomicLong;
47  
48  import org.apache.commons.logging.Log;
49  import org.apache.commons.logging.LogFactory;
50  import org.apache.hadoop.conf.Configuration;
51  import org.apache.hadoop.fs.FSDataInputStream;
52  import org.apache.hadoop.fs.FSDataOutputStream;
53  import org.apache.hadoop.fs.FileStatus;
54  import org.apache.hadoop.fs.FileSystem;
55  import org.apache.hadoop.fs.FileUtil;
56  import org.apache.hadoop.fs.Path;
57  import org.apache.hadoop.fs.PathFilter;
58  import org.apache.hadoop.hbase.Cell;
59  import org.apache.hadoop.hbase.HBaseConfiguration;
60  import org.apache.hadoop.hbase.HBaseTestingUtility;
61  import org.apache.hadoop.hbase.HConstants;
62  import org.apache.hadoop.hbase.HRegionInfo;
63  import org.apache.hadoop.hbase.KeyValue;
64  import org.apache.hadoop.hbase.ServerName;
65  import org.apache.hadoop.hbase.TableName;
66  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
67  import org.apache.hadoop.hbase.protobuf.generated.WALProtos;
68  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
69  import org.apache.hadoop.hbase.regionserver.wal.FaultySequenceFileLogReader;
70  import org.apache.hadoop.hbase.regionserver.wal.InstrumentedLogWriter;
71  import org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader;
72  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
73  import org.apache.hadoop.hbase.security.User;
74  import org.apache.hadoop.hbase.testclassification.LargeTests;
75  import org.apache.hadoop.hbase.testclassification.RegionServerTests;
76  import org.apache.hadoop.hbase.util.Bytes;
77  import org.apache.hadoop.hbase.util.CancelableProgressable;
78  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
79  import org.apache.hadoop.hbase.util.FSUtils;
80  import org.apache.hadoop.hbase.util.Threads;
81  import org.apache.hadoop.hbase.wal.WAL.Entry;
82  import org.apache.hadoop.hbase.wal.WAL.Reader;
83  import org.apache.hadoop.hbase.wal.WALProvider.Writer;
84  import org.apache.hadoop.hbase.wal.WALSplitter.CorruptedLogFileException;
85  import org.apache.hadoop.hdfs.DFSTestUtil;
86  import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException;
87  import org.apache.hadoop.ipc.RemoteException;
88  import org.junit.After;
89  import org.junit.AfterClass;
90  import org.junit.Before;
91  import org.junit.BeforeClass;
92  import org.junit.Rule;
93  import org.junit.Test;
94  import org.junit.experimental.categories.Category;
95  import org.junit.rules.TestName;
96  import org.mockito.Mockito;
97  import org.mockito.invocation.InvocationOnMock;
98  import org.mockito.stubbing.Answer;
99  
100 /**
101  * Testing {@link WAL} splitting code.
102  */
103 @Category({RegionServerTests.class, LargeTests.class})
104 public class TestWALSplit {
105   {
106     // Uncomment the following lines if more verbosity is needed for
107     // debugging (see HBASE-12285 for details).
108     //((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
109     //((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL);
110     //((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL);
111   }
112   private final static Log LOG = LogFactory.getLog(TestWALSplit.class);
113 
114   private static Configuration conf;
115   private FileSystem fs;
116 
117   protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
118 
119   private Path HBASEDIR;
120   private Path HBASELOGDIR;
121   private Path WALDIR;
122   private Path OLDLOGDIR;
123   private Path CORRUPTDIR;
124   private Path TABLEDIR;
125   private String TMPDIRNAME;
126 
127   private static final int NUM_WRITERS = 10;
128   private static final int ENTRIES = 10; // entries per writer per region
129 
130   private static final String FILENAME_BEING_SPLIT = "testfile";
131   private static final TableName TABLE_NAME =
132       TableName.valueOf("t1");
133   private static final byte[] FAMILY = "f1".getBytes();
134   private static final byte[] QUALIFIER = "q1".getBytes();
135   private static final byte[] VALUE = "v1".getBytes();
136   private static final String WAL_FILE_PREFIX = "wal.dat.";
137   private static List<String> REGIONS = new ArrayList<String>();
138   private static final String HBASE_SKIP_ERRORS = "hbase.hlog.split.skip.errors";
139   private static String ROBBER;
140   private static String ZOMBIE;
141   private static String [] GROUP = new String [] {"supergroup"};
142   private RecoveryMode mode;
143 
144   static enum Corruptions {
145     INSERT_GARBAGE_ON_FIRST_LINE,
146     INSERT_GARBAGE_IN_THE_MIDDLE,
147     APPEND_GARBAGE,
148     TRUNCATE,
149     TRUNCATE_TRAILER
150   }
151 
152   @BeforeClass
153   public static void setUpBeforeClass() throws Exception {
154     conf = TEST_UTIL.getConfiguration();
155     conf.setClass("hbase.regionserver.hlog.writer.impl",
156         InstrumentedLogWriter.class, Writer.class);
157     // This is how you turn off shortcircuit read currently.  TODO: Fix.  Should read config.
158     System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
159     // Create fake maping user to group and set it to the conf.
160     Map<String, String []> u2g_map = new HashMap<String, String []>(2);
161     ROBBER = User.getCurrent().getName() + "-robber";
162     ZOMBIE = User.getCurrent().getName() + "-zombie";
163     u2g_map.put(ROBBER, GROUP);
164     u2g_map.put(ZOMBIE, GROUP);
165     DFSTestUtil.updateConfWithFakeGroupMapping(conf, u2g_map);
166     conf.setInt("dfs.heartbeat.interval", 1);
167     TEST_UTIL.startMiniDFSCluster(2);
168   }
169 
170   @AfterClass
171   public static void tearDownAfterClass() throws Exception {
172     TEST_UTIL.shutdownMiniDFSCluster();
173   }
174 
175   @Rule
176   public TestName name = new TestName();
177   private WALFactory wals = null;
178 
179   @Before
180   public void setUp() throws Exception {
181     LOG.info("Cleaning up cluster for new test.");
182     fs = TEST_UTIL.getDFSCluster().getFileSystem();
183     HBASEDIR = TEST_UTIL.createRootDir();
184     HBASELOGDIR = TEST_UTIL.createWALRootDir();
185     OLDLOGDIR = new Path(HBASELOGDIR, HConstants.HREGION_OLDLOGDIR_NAME);
186     CORRUPTDIR = new Path(HBASELOGDIR, HConstants.CORRUPT_DIR_NAME);
187     TABLEDIR = FSUtils.getTableDir(HBASEDIR, TABLE_NAME);
188     TMPDIRNAME = conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
189       HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY);
190     REGIONS.clear();
191     Collections.addAll(REGIONS, "bbb", "ccc");
192     InstrumentedLogWriter.activateFailure = false;
193     this.mode = (conf.getBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false) ?
194         RecoveryMode.LOG_REPLAY : RecoveryMode.LOG_SPLITTING);
195     wals = new WALFactory(conf, null, name.getMethodName());
196     WALDIR = new Path(HBASELOGDIR, DefaultWALProvider.getWALDirectoryName(name.getMethodName()));
197     //fs.mkdirs(WALDIR);
198   }
199 
200   @After
201   public void tearDown() throws Exception {
202     try {
203       wals.close();
204     } catch(IOException exception) {
205       // Some tests will move WALs out from under us. In those cases, we'll get an error on close.
206       LOG.info("Ignoring an error while closing down our WALFactory. Fine for some tests, but if" +
207           " you see a failure look here.");
208       LOG.debug("exception details", exception);
209     } finally {
210       wals = null;
211       fs.delete(HBASEDIR, true);
212       fs.delete(HBASELOGDIR, true);
213     }
214   }
215 
216   /**
217    * Simulates splitting a WAL out from under a regionserver that is still trying to write it.
218    * Ensures we do not lose edits.
219    * @throws IOException
220    * @throws InterruptedException
221    */
222   @Test (timeout=300000)
223   public void testLogCannotBeWrittenOnceParsed() throws IOException, InterruptedException {
224     final AtomicLong counter = new AtomicLong(0);
225     AtomicBoolean stop = new AtomicBoolean(false);
226     // Region we'll write edits too and then later examine to make sure they all made it in.
227     final String region = REGIONS.get(0);
228     final int numWriters = 3;
229     Thread zombie = new ZombieLastLogWriterRegionServer(counter, stop, region, numWriters);
230     try {
231       long startCount = counter.get();
232       zombie.start();
233       // Wait till writer starts going.
234       while (startCount == counter.get()) Threads.sleep(1);
235       // Give it a second to write a few appends.
236       Threads.sleep(1000);
237       final Configuration conf2 = HBaseConfiguration.create(this.conf);
238       final User robber = User.createUserForTesting(conf2, ROBBER, GROUP);
239       int count = robber.runAs(new PrivilegedExceptionAction<Integer>() {
240         @Override
241         public Integer run() throws Exception {
242           StringBuilder ls = new StringBuilder("Contents of WALDIR (").append(WALDIR)
243               .append("):\n");
244           for (FileStatus status : fs.listStatus(WALDIR)) {
245             ls.append("\t").append(status.toString()).append("\n");
246           }
247           LOG.debug(ls);
248           LOG.info("Splitting WALs out from under zombie. Expecting " + numWriters + " files.");
249           WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf2, wals);
250           LOG.info("Finished splitting out from under zombie.");
251           Path[] logfiles = getLogForRegion(TABLE_NAME, region);
252           assertEquals("wrong number of split files for region", numWriters, logfiles.length);
253           int count = 0;
254           for (Path logfile: logfiles) {
255             count += countWAL(logfile);
256           }
257           return count;
258         }
259       });
260       LOG.info("zombie=" + counter.get() + ", robber=" + count);
261       assertTrue("The log file could have at most 1 extra log entry, but can't have less. " +
262               "Zombie could write " + counter.get() + " and logfile had only " + count,
263           counter.get() == count || counter.get() + 1 == count);
264     } finally {
265       stop.set(true);
266       zombie.interrupt();
267       Threads.threadDumpingIsAlive(zombie);
268     }
269   }
270 
271   /**
272    * This thread will keep writing to a 'wal' file even after the split process has started.
273    * It simulates a region server that was considered dead but woke up and wrote some more to the
274    * last log entry. Does its writing as an alternate user in another filesystem instance to
275    * simulate better it being a regionserver.
276    */
277   class ZombieLastLogWriterRegionServer extends Thread {
278     final AtomicLong editsCount;
279     final AtomicBoolean stop;
280     final int numOfWriters;
281     /**
282      * Region to write edits for.
283      */
284     final String region;
285     final User user;
286 
287     public ZombieLastLogWriterRegionServer(AtomicLong counter, AtomicBoolean stop,
288         final String region, final int writers)
289         throws IOException, InterruptedException {
290       super("ZombieLastLogWriterRegionServer");
291       setDaemon(true);
292       this.stop = stop;
293       this.editsCount = counter;
294       this.region = region;
295       this.user = User.createUserForTesting(conf, ZOMBIE, GROUP);
296       numOfWriters = writers;
297     }
298 
299     @Override
300     public void run() {
301       try {
302         doWriting();
303       } catch (IOException e) {
304         LOG.warn(getName() + " Writer exiting " + e);
305       } catch (InterruptedException e) {
306         LOG.warn(getName() + " Writer exiting " + e);
307       }
308     }
309 
310     private void doWriting() throws IOException, InterruptedException {
311       this.user.runAs(new PrivilegedExceptionAction<Object>() {
312         @Override
313         public Object run() throws Exception {
314           // Index of the WAL we want to keep open.  generateWALs will leave open the WAL whose
315           // index we supply here.
316           int walToKeepOpen = numOfWriters - 1;
317           // The below method writes numOfWriters files each with ENTRIES entries for a total of
318           // numOfWriters * ENTRIES added per column family in the region.
319           Writer writer = null;
320           try {
321             writer = generateWALs(numOfWriters, ENTRIES, walToKeepOpen);
322           } catch (IOException e1) {
323             throw new RuntimeException("Failed", e1);
324           }
325           // Update counter so has all edits written so far.
326           editsCount.addAndGet(numOfWriters * ENTRIES);
327           loop(writer);
328           // If we've been interruped, then things should have shifted out from under us.
329           // closing should error
330           try {
331             writer.close();
332             fail("Writing closing after parsing should give an error.");
333           } catch (IOException exception) {
334             LOG.debug("ignoring error when closing final writer.", exception);
335           }
336           return null;
337         }
338       });
339     }
340 
341     private void loop(final Writer writer) {
342       byte [] regionBytes = Bytes.toBytes(this.region);
343       while (!stop.get()) {
344         try {
345           long seq = appendEntry(writer, TABLE_NAME, regionBytes,
346               ("r" + editsCount.get()).getBytes(), regionBytes, QUALIFIER, VALUE, 0);
347           long count = editsCount.incrementAndGet();
348           LOG.info(getName() + " sync count=" + count + ", seq=" + seq);
349           try {
350             Thread.sleep(1);
351           } catch (InterruptedException e) {
352             //
353           }
354         } catch (IOException ex) {
355           LOG.error(getName() + " ex " + ex.toString());
356           if (ex instanceof RemoteException) {
357             LOG.error("Juliet: got RemoteException " + ex.getMessage() +
358                 " while writing " + (editsCount.get() + 1));
359           } else {
360             LOG.error(getName() + " failed to write....at " + editsCount.get());
361             fail("Failed to write " + editsCount.get());
362           }
363           break;
364         } catch (Throwable t) {
365           LOG.error(getName() + " HOW? " + t);
366           LOG.debug("exception details", t);
367           break;
368         }
369       }
370       LOG.info(getName() + " Writer exiting");
371     }
372   }
373 
374   /**
375    * @throws IOException
376    * @see https://issues.apache.org/jira/browse/HBASE-3020
377    */
378   @Test (timeout=300000)
379   public void testRecoveredEditsPathForMeta() throws IOException {
380     byte [] encoded = HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes();
381     Path tdir = FSUtils.getTableDir(HBASEDIR, TableName.META_TABLE_NAME);
382     Path regiondir = new Path(tdir,
383         HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
384     fs.mkdirs(regiondir);
385     long now = System.currentTimeMillis();
386     Entry entry =
387         new Entry(new WALKey(encoded,
388             TableName.META_TABLE_NAME, 1, now, HConstants.DEFAULT_CLUSTER_ID),
389             new WALEdit());
390     Path p = WALSplitter.getRegionSplitEditsPath(entry,
391         FILENAME_BEING_SPLIT, TMPDIRNAME, conf);
392     String parentOfParent = p.getParent().getParent().getName();
393     assertEquals(parentOfParent, HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
394   }
395 
396   /**
397    * Test old recovered edits file doesn't break WALSplitter.
398    * This is useful in upgrading old instances.
399    */
400   @Test (timeout=300000)
401   public void testOldRecoveredEditsFileSidelined() throws IOException {
402     byte [] encoded = HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes();
403     Path tdir = FSUtils.getTableDir(HBASEDIR, TableName.META_TABLE_NAME);
404     Path regiondir = new Path(tdir,
405         HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
406     fs.mkdirs(regiondir);
407     long now = System.currentTimeMillis();
408     Entry entry =
409         new Entry(new WALKey(encoded,
410             TableName.META_TABLE_NAME, 1, now, HConstants.DEFAULT_CLUSTER_ID),
411             new WALEdit());
412     Path parent = WALSplitter.getRegionDirRecoveredEditsDir(regiondir);
413     assertEquals(parent.getName(), HConstants.RECOVERED_EDITS_DIR);
414     fs.createNewFile(parent); // create a recovered.edits file
415 
416     Path p = WALSplitter.getRegionSplitEditsPath(entry,
417         FILENAME_BEING_SPLIT, TMPDIRNAME, conf);
418     String parentOfParent = p.getParent().getParent().getName();
419     assertEquals(parentOfParent, HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
420     WALFactory.createRecoveredEditsWriter(fs, p, conf).close();
421   }
422 
423   private void useDifferentDFSClient() throws IOException {
424     // make fs act as a different client now
425     // initialize will create a new DFSClient with a new client ID
426     fs.initialize(fs.getUri(), conf);
427   }
428 
429   @Test (timeout=300000)
430   public void testSplitPreservesEdits() throws IOException{
431     final String REGION = "region__1";
432     REGIONS.clear();
433     REGIONS.add(REGION);
434 
435     generateWALs(1, 10, -1, 0);
436     useDifferentDFSClient();
437     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
438     Path originalLog = (fs.listStatus(OLDLOGDIR))[0].getPath();
439     Path[] splitLog = getLogForRegion(TABLE_NAME, REGION);
440     assertEquals(1, splitLog.length);
441 
442     assertTrue("edits differ after split", logsAreEqual(originalLog, splitLog[0]));
443   }
444 
445   @Test (timeout=300000)
446   public void testSplitRemovesRegionEventsEdits() throws IOException{
447     final String REGION = "region__1";
448     REGIONS.clear();
449     REGIONS.add(REGION);
450 
451     generateWALs(1, 10, -1, 100);
452     useDifferentDFSClient();
453     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
454     Path originalLog = (fs.listStatus(OLDLOGDIR))[0].getPath();
455     Path[] splitLog = getLogForRegion(TABLE_NAME, REGION);
456     assertEquals(1, splitLog.length);
457 
458     assertFalse("edits differ after split", logsAreEqual(originalLog, splitLog[0]));
459     // split log should only have the test edits
460     assertEquals(10, countWAL(splitLog[0]));
461   }
462 
463 
464   @Test (timeout=300000)
465   public void testSplitLeavesCompactionEventsEdits() throws IOException{
466     HRegionInfo hri = new HRegionInfo(TABLE_NAME);
467     REGIONS.clear();
468     REGIONS.add(hri.getEncodedName());
469     Path regionDir = new Path(FSUtils.getTableDir(HBASEDIR, TABLE_NAME), hri.getEncodedName());
470     LOG.info("Creating region directory: " + regionDir);
471     assertTrue(fs.mkdirs(regionDir));
472 
473     Writer writer = generateWALs(1, 10, 0, 10);
474     String[] compactInputs = new String[]{"file1", "file2", "file3"};
475     String compactOutput = "file4";
476     appendCompactionEvent(writer, hri, compactInputs, compactOutput);
477     writer.close();
478 
479     useDifferentDFSClient();
480     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
481 
482     Path originalLog = (fs.listStatus(OLDLOGDIR))[0].getPath();
483     // original log should have 10 test edits, 10 region markers, 1 compaction marker
484     assertEquals(21, countWAL(originalLog));
485 
486     Path[] splitLog = getLogForRegion(TABLE_NAME, hri.getEncodedName());
487     assertEquals(1, splitLog.length);
488 
489     assertFalse("edits differ after split", logsAreEqual(originalLog, splitLog[0]));
490     // split log should have 10 test edits plus 1 compaction marker
491     assertEquals(11, countWAL(splitLog[0]));
492   }
493 
494   /**
495    * @param expectedEntries -1 to not assert
496    * @return the count across all regions
497    */
498   private int splitAndCount(final int expectedFiles, final int expectedEntries)
499       throws IOException {
500     useDifferentDFSClient();
501     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
502     int result = 0;
503     for (String region : REGIONS) {
504       Path[] logfiles = getLogForRegion(TABLE_NAME, region);
505       assertEquals(expectedFiles, logfiles.length);
506       int count = 0;
507       for (Path logfile: logfiles) {
508         count += countWAL(logfile);
509       }
510       if (-1 != expectedEntries) {
511         assertEquals(expectedEntries, count);
512       }
513       result += count;
514     }
515     return result;
516   }
517 
518   @Test (timeout=300000)
519   public void testEmptyLogFiles() throws IOException {
520     testEmptyLogFiles(true);
521   }
522 
523   @Test (timeout=300000)
524   public void testEmptyOpenLogFiles() throws IOException {
525     testEmptyLogFiles(false);
526   }
527 
528   private void testEmptyLogFiles(final boolean close) throws IOException {
529     // we won't create the hlog dir until getWAL got called, so
530     // make dir here when testing empty log file
531     fs.mkdirs(WALDIR);
532     injectEmptyFile(".empty", close);
533     generateWALs(Integer.MAX_VALUE);
534     injectEmptyFile("empty", close);
535     splitAndCount(NUM_WRITERS, NUM_WRITERS * ENTRIES); // skip 2 empty
536   }
537 
538   @Test (timeout=300000)
539   public void testOpenZeroLengthReportedFileButWithDataGetsSplit() throws IOException {
540     // generate logs but leave wal.dat.5 open.
541     generateWALs(5);
542     splitAndCount(NUM_WRITERS, NUM_WRITERS * ENTRIES);
543   }
544 
545   @Test (timeout=300000)
546   public void testTralingGarbageCorruptionFileSkipErrorsPasses() throws IOException {
547     conf.setBoolean(HBASE_SKIP_ERRORS, true);
548     generateWALs(Integer.MAX_VALUE);
549     corruptWAL(new Path(WALDIR, WAL_FILE_PREFIX + "5"),
550         Corruptions.APPEND_GARBAGE, true);
551     splitAndCount(NUM_WRITERS, NUM_WRITERS * ENTRIES);
552   }
553 
554   @Test (timeout=300000)
555   public void testFirstLineCorruptionLogFileSkipErrorsPasses() throws IOException {
556     conf.setBoolean(HBASE_SKIP_ERRORS, true);
557     generateWALs(Integer.MAX_VALUE);
558     corruptWAL(new Path(WALDIR, WAL_FILE_PREFIX + "5"),
559         Corruptions.INSERT_GARBAGE_ON_FIRST_LINE, true);
560     splitAndCount(NUM_WRITERS - 1, (NUM_WRITERS - 1) * ENTRIES); //1 corrupt
561   }
562 
563   @Test (timeout=300000)
564   public void testMiddleGarbageCorruptionSkipErrorsReadsHalfOfFile() throws IOException {
565     conf.setBoolean(HBASE_SKIP_ERRORS, true);
566     generateWALs(Integer.MAX_VALUE);
567     corruptWAL(new Path(WALDIR, WAL_FILE_PREFIX + "5"),
568         Corruptions.INSERT_GARBAGE_IN_THE_MIDDLE, false);
569     // the entries in the original logs are alternating regions
570     // considering the sequence file header, the middle corruption should
571     // affect at least half of the entries
572     int goodEntries = (NUM_WRITERS - 1) * ENTRIES;
573     int firstHalfEntries = (int) Math.ceil(ENTRIES / 2) - 1;
574     int allRegionsCount = splitAndCount(NUM_WRITERS, -1);
575     assertTrue("The file up to the corrupted area hasn't been parsed",
576         REGIONS.size() * (goodEntries + firstHalfEntries) <= allRegionsCount);
577   }
578 
579   @Test (timeout=300000)
580   public void testCorruptedFileGetsArchivedIfSkipErrors() throws IOException {
581     conf.setBoolean(HBASE_SKIP_ERRORS, true);
582     for (FaultySequenceFileLogReader.FailureType  failureType :
583         FaultySequenceFileLogReader.FailureType.values()) {
584       final Set<String> walDirContents = splitCorruptWALs(failureType);
585       final Set<String> archivedLogs = new HashSet<String>();
586       final StringBuilder archived = new StringBuilder("Archived logs in CORRUPTDIR:");
587       for (FileStatus log : fs.listStatus(CORRUPTDIR)) {
588         archived.append("\n\t").append(log.toString());
589         archivedLogs.add(log.getPath().getName());
590       }
591       LOG.debug(archived.toString());
592       assertEquals(failureType.name() + ": expected to find all of our wals corrupt.",
593           walDirContents, archivedLogs);
594     }
595   }
596 
597   /**
598    * @return set of wal names present prior to split attempt.
599    * @throws IOException if the split process fails
600    */
601   private Set<String> splitCorruptWALs(final FaultySequenceFileLogReader.FailureType failureType)
602       throws IOException {
603     Class<?> backupClass = conf.getClass("hbase.regionserver.hlog.reader.impl",
604         Reader.class);
605     InstrumentedLogWriter.activateFailure = false;
606 
607     try {
608       conf.setClass("hbase.regionserver.hlog.reader.impl",
609           FaultySequenceFileLogReader.class, Reader.class);
610       conf.set("faultysequencefilelogreader.failuretype", failureType.name());
611       // Clean up from previous tests or previous loop
612       try {
613         wals.shutdown();
614       } catch (IOException exception) {
615         // since we're splitting out from under the factory, we should expect some closing failures.
616         LOG.debug("Ignoring problem closing WALFactory.", exception);
617       }
618       wals.close();
619       try {
620         for (FileStatus log : fs.listStatus(CORRUPTDIR)) {
621           fs.delete(log.getPath(), true);
622         }
623       } catch (FileNotFoundException exception) {
624         LOG.debug("no previous CORRUPTDIR to clean.");
625       }
626       // change to the faulty reader
627       wals = new WALFactory(conf, null, name.getMethodName());
628       generateWALs(-1);
629       // Our reader will render all of these files corrupt.
630       final Set<String> walDirContents = new HashSet<String>();
631       for (FileStatus status : fs.listStatus(WALDIR)) {
632         walDirContents.add(status.getPath().getName());
633       }
634       useDifferentDFSClient();
635       WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
636       return walDirContents;
637     } finally {
638       conf.setClass("hbase.regionserver.hlog.reader.impl", backupClass,
639           Reader.class);
640     }
641   }
642 
643   @Test (timeout=300000, expected = IOException.class)
644   public void testTrailingGarbageCorruptionLogFileSkipErrorsFalseThrows()
645       throws IOException {
646     conf.setBoolean(HBASE_SKIP_ERRORS, false);
647     splitCorruptWALs(FaultySequenceFileLogReader.FailureType.BEGINNING);
648   }
649 
650   @Test (timeout=300000)
651   public void testCorruptedLogFilesSkipErrorsFalseDoesNotTouchLogs()
652       throws IOException {
653     conf.setBoolean(HBASE_SKIP_ERRORS, false);
654     try {
655       splitCorruptWALs(FaultySequenceFileLogReader.FailureType.BEGINNING);
656     } catch (IOException e) {
657       LOG.debug("split with 'skip errors' set to 'false' correctly threw");
658     }
659     assertEquals("if skip.errors is false all files should remain in place",
660         NUM_WRITERS, fs.listStatus(WALDIR).length);
661   }
662 
663   private void ignoreCorruption(final Corruptions corruption, final int entryCount,
664       final int expectedCount) throws IOException {
665     conf.setBoolean(HBASE_SKIP_ERRORS, false);
666 
667     final String REGION = "region__1";
668     REGIONS.clear();
669     REGIONS.add(REGION);
670 
671     Path c1 = new Path(WALDIR, WAL_FILE_PREFIX + "0");
672     generateWALs(1, entryCount, -1, 0);
673     corruptWAL(c1, corruption, true);
674 
675     useDifferentDFSClient();
676     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
677 
678     Path[] splitLog = getLogForRegion(TABLE_NAME, REGION);
679     assertEquals(1, splitLog.length);
680 
681     int actualCount = 0;
682     Reader in = wals.createReader(fs, splitLog[0]);
683     @SuppressWarnings("unused")
684     Entry entry;
685     while ((entry = in.next()) != null) ++actualCount;
686     assertEquals(expectedCount, actualCount);
687     in.close();
688 
689     // should not have stored the EOF files as corrupt
690     FileStatus[] archivedLogs = fs.listStatus(CORRUPTDIR);
691     assertEquals(archivedLogs.length, 0);
692 
693   }
694 
695   @Test (timeout=300000)
696   public void testEOFisIgnored() throws IOException {
697     int entryCount = 10;
698     ignoreCorruption(Corruptions.TRUNCATE, entryCount, entryCount-1);
699   }
700 
701   @Test (timeout=300000)
702   public void testCorruptWALTrailer() throws IOException {
703     int entryCount = 10;
704     ignoreCorruption(Corruptions.TRUNCATE_TRAILER, entryCount, entryCount);
705   }
706 
707   @Test (timeout=300000)
708   public void testLogsGetArchivedAfterSplit() throws IOException {
709     conf.setBoolean(HBASE_SKIP_ERRORS, false);
710     generateWALs(-1);
711     useDifferentDFSClient();
712     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
713     FileStatus[] archivedLogs = fs.listStatus(OLDLOGDIR);
714     assertEquals("wrong number of files in the archive log", NUM_WRITERS, archivedLogs.length);
715   }
716 
717   @Test (timeout=300000)
718   public void testSplit() throws IOException {
719     generateWALs(-1);
720     splitAndCount(NUM_WRITERS, NUM_WRITERS * ENTRIES);
721   }
722 
723   @Test (timeout=300000)
724   public void testLogDirectoryShouldBeDeletedAfterSuccessfulSplit()
725       throws IOException {
726     generateWALs(-1);
727     useDifferentDFSClient();
728     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
729     FileStatus [] statuses = null;
730     try {
731       statuses = fs.listStatus(WALDIR);
732       if (statuses != null) {
733         fail("Files left in log dir: " +
734             Joiner.on(",").join(FileUtil.stat2Paths(statuses)));
735       }
736     } catch (FileNotFoundException e) {
737       // hadoop 0.21 throws FNFE whereas hadoop 0.20 returns null
738     }
739   }
740 
741   @Test(timeout=300000, expected = IOException.class)
742   public void testSplitWillFailIfWritingToRegionFails() throws Exception {
743     //leave 5th log open so we could append the "trap"
744     Writer writer = generateWALs(4);
745     useDifferentDFSClient();
746 
747     String region = "break";
748     Path regiondir = new Path(TABLEDIR, region);
749     fs.mkdirs(regiondir);
750 
751     InstrumentedLogWriter.activateFailure = false;
752     appendEntry(writer, TABLE_NAME, Bytes.toBytes(region),
753         ("r" + 999).getBytes(), FAMILY, QUALIFIER, VALUE, 0);
754     writer.close();
755 
756     try {
757       InstrumentedLogWriter.activateFailure = true;
758       WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
759     } catch (IOException e) {
760       assertTrue(e.getMessage().
761           contains("This exception is instrumented and should only be thrown for testing"));
762       throw e;
763     } finally {
764       InstrumentedLogWriter.activateFailure = false;
765     }
766   }
767 
768   @Test (timeout=300000)
769   public void testSplitDeletedRegion() throws IOException {
770     REGIONS.clear();
771     String region = "region_that_splits";
772     REGIONS.add(region);
773 
774     generateWALs(1);
775     useDifferentDFSClient();
776 
777     Path regiondir = new Path(TABLEDIR, region);
778     fs.delete(regiondir, true);
779     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
780     assertFalse(fs.exists(regiondir));
781   }
782 
783   @Test (timeout=300000)
784   public void testIOEOnOutputThread() throws Exception {
785     conf.setBoolean(HBASE_SKIP_ERRORS, false);
786 
787     generateWALs(-1);
788     useDifferentDFSClient();
789     FileStatus[] logfiles = fs.listStatus(WALDIR);
790     assertTrue("There should be some log file",
791         logfiles != null && logfiles.length > 0);
792     // wals with no entries (like the one we don't use in the factory)
793     // won't cause a failure since nothing will ever be written.
794     // pick the largest one since it's most likely to have entries.
795     int largestLogFile = 0;
796     long largestSize = 0;
797     for (int i = 0; i < logfiles.length; i++) {
798       if (logfiles[i].getLen() > largestSize) {
799         largestLogFile = i;
800         largestSize = logfiles[i].getLen();
801       }
802     }
803     assertTrue("There should be some log greater than size 0.", 0 < largestSize);
804     // Set up a splitter that will throw an IOE on the output side
805     WALSplitter logSplitter = new WALSplitter(wals,
806         conf, HBASEDIR, fs, HBASEDIR, fs, null, null, this.mode) {
807       @Override
808       protected Writer createWriter(Path logfile) throws IOException {
809         Writer mockWriter = Mockito.mock(Writer.class);
810         Mockito.doThrow(new IOException("Injected")).when(
811             mockWriter).append(Mockito.<Entry>any());
812         return mockWriter;
813       }
814     };
815     // Set up a background thread dumper.  Needs a thread to depend on and then we need to run
816     // the thread dumping in a background thread so it does not hold up the test.
817     final AtomicBoolean stop = new AtomicBoolean(false);
818     final Thread someOldThread = new Thread("Some-old-thread") {
819       @Override
820       public void run() {
821         while(!stop.get()) Threads.sleep(10);
822       }
823     };
824     someOldThread.setDaemon(true);
825     someOldThread.start();
826     final Thread t = new Thread("Background-thread-dumper") {
827       public void run() {
828         try {
829           Threads.threadDumpingIsAlive(someOldThread);
830         } catch (InterruptedException e) {
831           e.printStackTrace();
832         }
833       }
834     };
835     t.setDaemon(true);
836     t.start();
837     try {
838       logSplitter.splitLogFile(logfiles[largestLogFile], null);
839       fail("Didn't throw!");
840     } catch (IOException ioe) {
841       assertTrue(ioe.toString().contains("Injected"));
842     } finally {
843       // Setting this to true will turn off the background thread dumper.
844       stop.set(true);
845     }
846   }
847 
848   /**
849    * @param spiedFs should be instrumented for failure.
850    */
851   private void retryOverHdfsProblem(final FileSystem spiedFs) throws Exception {
852     generateWALs(-1);
853     useDifferentDFSClient();
854 
855     try {
856       WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, spiedFs, conf, wals);
857       assertEquals(NUM_WRITERS, fs.listStatus(OLDLOGDIR).length);
858       assertFalse(fs.exists(WALDIR));
859     } catch (IOException e) {
860       fail("There shouldn't be any exception but: " + e.toString());
861     }
862   }
863 
864   // Test for HBASE-3412
865   @Test (timeout=300000)
866   public void testMovedWALDuringRecovery() throws Exception {
867     // This partial mock will throw LEE for every file simulating
868     // files that were moved
869     FileSystem spiedFs = Mockito.spy(fs);
870     // The "File does not exist" part is very important,
871     // that's how it comes out of HDFS
872     Mockito.doThrow(new LeaseExpiredException("Injected: File does not exist")).
873         when(spiedFs).append(Mockito.<Path>any());
874     retryOverHdfsProblem(spiedFs);
875   }
876 
877   @Test (timeout=300000)
878   public void testRetryOpenDuringRecovery() throws Exception {
879     FileSystem spiedFs = Mockito.spy(fs);
880     // The "Cannot obtain block length", "Could not obtain the last block",
881     // and "Blocklist for [^ ]* has changed.*" part is very important,
882     // that's how it comes out of HDFS. If HDFS changes the exception
883     // message, this test needs to be adjusted accordingly.
884     //
885     // When DFSClient tries to open a file, HDFS needs to locate
886     // the last block of the file and get its length. However, if the
887     // last block is under recovery, HDFS may have problem to obtain
888     // the block length, in which case, retry may help.
889     Mockito.doAnswer(new Answer<FSDataInputStream>() {
890       private final String[] errors = new String[] {
891           "Cannot obtain block length", "Could not obtain the last block",
892           "Blocklist for " + OLDLOGDIR + " has changed"};
893       private int count = 0;
894 
895       public FSDataInputStream answer(InvocationOnMock invocation) throws Throwable {
896         if (count < 3) {
897           throw new IOException(errors[count++]);
898         }
899         return (FSDataInputStream)invocation.callRealMethod();
900       }
901     }).when(spiedFs).open(Mockito.<Path>any(), Mockito.anyInt());
902     retryOverHdfsProblem(spiedFs);
903   }
904 
905   @Test (timeout=300000)
906   public void testTerminationAskedByReporter() throws IOException, CorruptedLogFileException {
907     generateWALs(1, 10, -1);
908     FileStatus logfile = fs.listStatus(WALDIR)[0];
909     useDifferentDFSClient();
910 
911     final AtomicInteger count = new AtomicInteger();
912 
913     CancelableProgressable localReporter
914         = new CancelableProgressable() {
915       @Override
916       public boolean progress() {
917         count.getAndIncrement();
918         return false;
919       }
920     };
921 
922     FileSystem spiedFs = Mockito.spy(fs);
923     Mockito.doAnswer(new Answer<FSDataInputStream>() {
924       public FSDataInputStream answer(InvocationOnMock invocation) throws Throwable {
925         Thread.sleep(1500); // Sleep a while and wait report status invoked
926         return (FSDataInputStream)invocation.callRealMethod();
927       }
928     }).when(spiedFs).open(Mockito.<Path>any(), Mockito.anyInt());
929 
930     try {
931       conf.setInt("hbase.splitlog.report.period", 1000);
932       boolean ret = WALSplitter.splitLogFile(
933           HBASEDIR, logfile, spiedFs, conf, localReporter, null, null, this.mode, wals);
934       assertFalse("Log splitting should failed", ret);
935       assertTrue(count.get() > 0);
936     } catch (IOException e) {
937       fail("There shouldn't be any exception but: " + e.toString());
938     } finally {
939       // reset it back to its default value
940       conf.setInt("hbase.splitlog.report.period", 59000);
941     }
942   }
943 
944   /**
945    * Test log split process with fake data and lots of edits to trigger threading
946    * issues.
947    */
948   @Test (timeout=300000)
949   public void testThreading() throws Exception {
950     doTestThreading(20000, 128*1024*1024, 0);
951   }
952 
953   /**
954    * Test blocking behavior of the log split process if writers are writing slower
955    * than the reader is reading.
956    */
957   @Test (timeout=300000)
958   public void testThreadingSlowWriterSmallBuffer() throws Exception {
959     doTestThreading(200, 1024, 50);
960   }
961 
962   /**
963    * Sets up a log splitter with a mock reader and writer. The mock reader generates
964    * a specified number of edits spread across 5 regions. The mock writer optionally
965    * sleeps for each edit it is fed.
966    * *
967    * After the split is complete, verifies that the statistics show the correct number
968    * of edits output into each region.
969    *
970    * @param numFakeEdits number of fake edits to push through pipeline
971    * @param bufferSize size of in-memory buffer
972    * @param writerSlowness writer threads will sleep this many ms per edit
973    */
974   private void doTestThreading(final int numFakeEdits,
975       final int bufferSize,
976       final int writerSlowness) throws Exception {
977 
978     Configuration localConf = new Configuration(conf);
979     localConf.setInt("hbase.regionserver.hlog.splitlog.buffersize", bufferSize);
980 
981     // Create a fake log file (we'll override the reader to produce a stream of edits)
982     Path logPath = new Path(WALDIR, WAL_FILE_PREFIX + ".fake");
983     FSDataOutputStream out = fs.create(logPath);
984     out.close();
985 
986     // Make region dirs for our destination regions so the output doesn't get skipped
987     final List<String> regions = ImmutableList.of("r0", "r1", "r2", "r3", "r4");
988     makeRegionDirs(regions);
989 
990     // Create a splitter that reads and writes the data without touching disk
991     WALSplitter logSplitter = new WALSplitter(wals,
992         localConf, HBASEDIR, fs, HBASEDIR, fs, null, null, this.mode) {
993 
994       /* Produce a mock writer that doesn't write anywhere */
995       @Override
996       protected Writer createWriter(Path logfile) throws IOException {
997         Writer mockWriter = Mockito.mock(Writer.class);
998         Mockito.doAnswer(new Answer<Void>() {
999           int expectedIndex = 0;
1000 
1001           @Override
1002           public Void answer(InvocationOnMock invocation) {
1003             if (writerSlowness > 0) {
1004               try {
1005                 Thread.sleep(writerSlowness);
1006               } catch (InterruptedException ie) {
1007                 Thread.currentThread().interrupt();
1008               }
1009             }
1010             Entry entry = (Entry) invocation.getArguments()[0];
1011             WALEdit edit = entry.getEdit();
1012             List<Cell> cells = edit.getCells();
1013             assertEquals(1, cells.size());
1014             Cell cell = cells.get(0);
1015 
1016             // Check that the edits come in the right order.
1017             assertEquals(expectedIndex, Bytes.toInt(cell.getRowArray(), cell.getRowOffset(),
1018                 cell.getRowLength()));
1019             expectedIndex++;
1020             return null;
1021           }
1022         }).when(mockWriter).append(Mockito.<Entry>any());
1023         return mockWriter;
1024       }
1025 
1026       /* Produce a mock reader that generates fake entries */
1027       @Override
1028       protected Reader getReader(Path curLogFile, CancelableProgressable reporter)
1029           throws IOException {
1030         Reader mockReader = Mockito.mock(Reader.class);
1031         Mockito.doAnswer(new Answer<Entry>() {
1032           int index = 0;
1033 
1034           @Override
1035           public Entry answer(InvocationOnMock invocation) throws Throwable {
1036             if (index >= numFakeEdits) return null;
1037 
1038             // Generate r0 through r4 in round robin fashion
1039             int regionIdx = index % regions.size();
1040             byte region[] = new byte[] {(byte)'r', (byte) (0x30 + regionIdx)};
1041 
1042             Entry ret = createTestEntry(TABLE_NAME, region,
1043                 Bytes.toBytes((int)(index / regions.size())),
1044                 FAMILY, QUALIFIER, VALUE, index);
1045             index++;
1046             return ret;
1047           }
1048         }).when(mockReader).next();
1049         return mockReader;
1050       }
1051     };
1052 
1053     logSplitter.splitLogFile(fs.getFileStatus(logPath), null);
1054 
1055     // Verify number of written edits per region
1056     Map<byte[], Long> outputCounts = logSplitter.outputSink.getOutputCounts();
1057     for (Map.Entry<byte[], Long> entry : outputCounts.entrySet()) {
1058       LOG.info("Got " + entry.getValue() + " output edits for region " +
1059           Bytes.toString(entry.getKey()));
1060       assertEquals((long)entry.getValue(), numFakeEdits / regions.size());
1061     }
1062     assertEquals("Should have as many outputs as regions", regions.size(), outputCounts.size());
1063   }
1064 
1065   // Does leaving the writer open in testSplitDeletedRegion matter enough for two tests?
1066   @Test (timeout=300000)
1067   public void testSplitLogFileDeletedRegionDir() throws IOException {
1068     LOG.info("testSplitLogFileDeletedRegionDir");
1069     final String REGION = "region__1";
1070     REGIONS.clear();
1071     REGIONS.add(REGION);
1072 
1073     generateWALs(1, 10, -1);
1074     useDifferentDFSClient();
1075 
1076     Path regiondir = new Path(TABLEDIR, REGION);
1077     LOG.info("Region directory is" + regiondir);
1078     fs.delete(regiondir, true);
1079     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
1080     assertFalse(fs.exists(regiondir));
1081   }
1082 
1083   @Test (timeout=300000)
1084   public void testSplitLogFileEmpty() throws IOException {
1085     LOG.info("testSplitLogFileEmpty");
1086     // we won't create the hlog dir until getWAL got called, so
1087     // make dir here when testing empty log file
1088     fs.mkdirs(WALDIR);
1089     injectEmptyFile(".empty", true);
1090     useDifferentDFSClient();
1091 
1092     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
1093     Path tdir = FSUtils.getTableDir(HBASEDIR, TABLE_NAME);
1094     assertFalse(fs.exists(tdir));
1095 
1096     assertEquals(0, countWAL(fs.listStatus(OLDLOGDIR)[0].getPath()));
1097   }
1098 
1099   @Test (timeout=300000)
1100   public void testSplitLogFileMultipleRegions() throws IOException {
1101     LOG.info("testSplitLogFileMultipleRegions");
1102     generateWALs(1, 10, -1);
1103     splitAndCount(1, 10);
1104   }
1105 
1106   @Test (timeout=300000)
1107   public void testSplitLogFileFirstLineCorruptionLog()
1108       throws IOException {
1109     conf.setBoolean(HBASE_SKIP_ERRORS, true);
1110     generateWALs(1, 10, -1);
1111     FileStatus logfile = fs.listStatus(WALDIR)[0];
1112 
1113     corruptWAL(logfile.getPath(),
1114         Corruptions.INSERT_GARBAGE_ON_FIRST_LINE, true);
1115 
1116     useDifferentDFSClient();
1117     WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals);
1118 
1119     final Path corruptDir = new Path(FSUtils.getWALRootDir(conf), HConstants.CORRUPT_DIR_NAME);
1120     assertEquals(1, fs.listStatus(corruptDir).length);
1121   }
1122 
1123   /**
1124    * @throws IOException
1125    * @see https://issues.apache.org/jira/browse/HBASE-4862
1126    */
1127   @Test (timeout=300000)
1128   public void testConcurrentSplitLogAndReplayRecoverEdit() throws IOException {
1129     LOG.info("testConcurrentSplitLogAndReplayRecoverEdit");
1130     // Generate wals for our destination region
1131     String regionName = "r0";
1132     final Path regiondir = new Path(TABLEDIR, regionName);
1133     REGIONS.clear();
1134     REGIONS.add(regionName);
1135     generateWALs(-1);
1136 
1137     wals.getWAL(Bytes.toBytes(regionName), null);
1138     FileStatus[] logfiles = fs.listStatus(WALDIR);
1139     assertTrue("There should be some log file",
1140         logfiles != null && logfiles.length > 0);
1141 
1142     WALSplitter logSplitter = new WALSplitter(wals,
1143         conf, HBASEDIR, fs, HBASEDIR, fs, null, null, this.mode) {
1144       @Override
1145       protected Writer createWriter(Path logfile)
1146           throws IOException {
1147         Writer writer = wals.createRecoveredEditsWriter(this.walFS, logfile);
1148         // After creating writer, simulate region's
1149         // replayRecoveredEditsIfAny() which gets SplitEditFiles of this
1150         // region and delete them, excluding files with '.temp' suffix.
1151         NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(fs, regiondir);
1152         if (files != null && !files.isEmpty()) {
1153           for (Path file : files) {
1154             if (!this.walFS.delete(file, false)) {
1155               LOG.error("Failed delete of " + file);
1156             } else {
1157               LOG.debug("Deleted recovered.edits file=" + file);
1158             }
1159           }
1160         }
1161         return writer;
1162       }
1163     };
1164     try{
1165       logSplitter.splitLogFile(logfiles[0], null);
1166     } catch (IOException e) {
1167       LOG.info(e);
1168       fail("Throws IOException when spliting "
1169           + "log, it is most likely because writing file does not "
1170           + "exist which is caused by concurrent replayRecoveredEditsIfAny()");
1171     }
1172     if (fs.exists(CORRUPTDIR)) {
1173       if (fs.listStatus(CORRUPTDIR).length > 0) {
1174         fail("There are some corrupt logs, "
1175             + "it is most likely caused by concurrent replayRecoveredEditsIfAny()");
1176       }
1177     }
1178   }
1179 
1180   @Test
1181   public void testRecoveredEditsStoragePolicy() throws IOException {
1182     conf.set(HConstants.WAL_STORAGE_POLICY, "ALL_SSD");
1183     try {
1184       Path path = createRecoveredEditsPathForRegion();
1185       assertEquals("ALL_SSD", fs.getStoragePolicy(path.getParent()).getName());
1186     } finally {
1187       conf.unset(HConstants.WAL_STORAGE_POLICY);
1188     }
1189   }
1190 
1191   private Writer generateWALs(int leaveOpen) throws IOException {
1192     return generateWALs(NUM_WRITERS, ENTRIES, leaveOpen, 0);
1193   }
1194 
1195   private Writer generateWALs(int writers, int entries, int leaveOpen) throws IOException {
1196     return generateWALs(writers, entries, leaveOpen, 7);
1197   }
1198 
1199   private void makeRegionDirs(List<String> regions) throws IOException {
1200     for (String region : regions) {
1201       LOG.debug("Creating dir for region " + region);
1202       fs.mkdirs(new Path(TABLEDIR, region));
1203     }
1204   }
1205 
1206   /**
1207    * @param leaveOpen index to leave un-closed. -1 to close all.
1208    * @return the writer that's still open, or null if all were closed.
1209    */
1210   private Writer generateWALs(int writers, int entries, int leaveOpen, int regionEvents) throws IOException {
1211     makeRegionDirs(REGIONS);
1212     fs.mkdirs(WALDIR);
1213     Writer [] ws = new Writer[writers];
1214     int seq = 0;
1215     int numRegionEventsAdded = 0;
1216     for (int i = 0; i < writers; i++) {
1217       ws[i] = wals.createWALWriter(fs, new Path(WALDIR, WAL_FILE_PREFIX + i));
1218       for (int j = 0; j < entries; j++) {
1219         int prefix = 0;
1220         for (String region : REGIONS) {
1221           String row_key = region + prefix++ + i + j;
1222           appendEntry(ws[i], TABLE_NAME, region.getBytes(), row_key.getBytes(), FAMILY, QUALIFIER,
1223               VALUE, seq++);
1224 
1225           if (numRegionEventsAdded < regionEvents) {
1226             numRegionEventsAdded ++;
1227             appendRegionEvent(ws[i], region);
1228           }
1229         }
1230       }
1231       if (i != leaveOpen) {
1232         ws[i].close();
1233         LOG.info("Closing writer " + i);
1234       }
1235     }
1236     if (leaveOpen < 0 || leaveOpen >= writers) {
1237       return null;
1238     }
1239     return ws[leaveOpen];
1240   }
1241 
1242 
1243 
1244   private Path[] getLogForRegion(TableName table, String region)
1245       throws IOException {
1246     Path tdir = FSUtils.getWALTableDir(conf, table);
1247     @SuppressWarnings("deprecation")
1248     Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(new Path(tdir,
1249         Bytes.toString(region.getBytes())));
1250     FileStatus[] files = fs.listStatus(editsdir, new PathFilter() {
1251       @Override
1252       public boolean accept(Path p) {
1253         if (WALSplitter.isSequenceIdFile(p)) {
1254           return false;
1255         }
1256         return true;
1257       }
1258     });
1259     Path[] paths = new Path[files.length];
1260     for (int i = 0; i < files.length; i++) {
1261       paths[i] = files[i].getPath();
1262     }
1263     return paths;
1264   }
1265 
1266   private void corruptWAL(Path path, Corruptions corruption, boolean close) throws IOException {
1267     FSDataOutputStream out;
1268     int fileSize = (int) fs.listStatus(path)[0].getLen();
1269 
1270     FSDataInputStream in = fs.open(path);
1271     byte[] corrupted_bytes = new byte[fileSize];
1272     in.readFully(0, corrupted_bytes, 0, fileSize);
1273     in.close();
1274 
1275     switch (corruption) {
1276     case APPEND_GARBAGE:
1277       fs.delete(path, false);
1278       out = fs.create(path);
1279       out.write(corrupted_bytes);
1280       out.write("-----".getBytes());
1281       closeOrFlush(close, out);
1282       break;
1283 
1284     case INSERT_GARBAGE_ON_FIRST_LINE:
1285       fs.delete(path, false);
1286       out = fs.create(path);
1287       out.write(0);
1288       out.write(corrupted_bytes);
1289       closeOrFlush(close, out);
1290       break;
1291 
1292     case INSERT_GARBAGE_IN_THE_MIDDLE:
1293       fs.delete(path, false);
1294       out = fs.create(path);
1295       int middle = (int) Math.floor(corrupted_bytes.length / 2);
1296       out.write(corrupted_bytes, 0, middle);
1297       out.write(0);
1298       out.write(corrupted_bytes, middle, corrupted_bytes.length - middle);
1299       closeOrFlush(close, out);
1300       break;
1301 
1302     case TRUNCATE:
1303       fs.delete(path, false);
1304       out = fs.create(path);
1305       out.write(corrupted_bytes, 0, fileSize
1306           - (32 + ProtobufLogReader.PB_WAL_COMPLETE_MAGIC.length + Bytes.SIZEOF_INT));
1307       closeOrFlush(close, out);
1308       break;
1309 
1310     case TRUNCATE_TRAILER:
1311       fs.delete(path, false);
1312       out = fs.create(path);
1313       out.write(corrupted_bytes, 0, fileSize - Bytes.SIZEOF_INT);// trailer is truncated.
1314       closeOrFlush(close, out);
1315       break;
1316     }
1317   }
1318 
1319   private void closeOrFlush(boolean close, FSDataOutputStream out)
1320       throws IOException {
1321     if (close) {
1322       out.close();
1323     } else {
1324       Method syncMethod = null;
1325       try {
1326         syncMethod = out.getClass().getMethod("hflush", new Class<?> []{});
1327       } catch (NoSuchMethodException e) {
1328         try {
1329           syncMethod = out.getClass().getMethod("sync", new Class<?> []{});
1330         } catch (NoSuchMethodException ex) {
1331           throw new IOException("This version of Hadoop supports " +
1332               "neither Syncable.sync() nor Syncable.hflush().");
1333         }
1334       }
1335       try {
1336         syncMethod.invoke(out, new Object[]{});
1337       } catch (Exception e) {
1338         throw new IOException(e);
1339       }
1340       // Not in 0out.hflush();
1341     }
1342   }
1343 
1344   private int countWAL(Path log) throws IOException {
1345     int count = 0;
1346     Reader in = wals.createReader(fs, log);
1347     while (in.next() != null) {
1348       count++;
1349     }
1350     in.close();
1351     return count;
1352   }
1353 
1354   private static void appendCompactionEvent(Writer w, HRegionInfo hri, String[] inputs,
1355       String output) throws IOException {
1356     WALProtos.CompactionDescriptor.Builder desc = WALProtos.CompactionDescriptor.newBuilder();
1357     desc.setTableName(ByteString.copyFrom(hri.getTable().toBytes()))
1358         .setEncodedRegionName(ByteString.copyFrom(hri.getEncodedNameAsBytes()))
1359         .setRegionName(ByteString.copyFrom(hri.getRegionName()))
1360         .setFamilyName(ByteString.copyFrom(FAMILY))
1361         .setStoreHomeDir(hri.getEncodedName() + "/" + Bytes.toString(FAMILY))
1362         .addAllCompactionInput(Arrays.asList(inputs))
1363         .addCompactionOutput(output);
1364 
1365     WALEdit edit = WALEdit.createCompaction(hri, desc.build());
1366     WALKey key = new WALKey(hri.getEncodedNameAsBytes(), TABLE_NAME, 1,
1367         EnvironmentEdgeManager.currentTime(), HConstants.DEFAULT_CLUSTER_ID);
1368     w.append(new Entry(key, edit));
1369     w.sync(false);
1370   }
1371 
1372   private static void appendRegionEvent(Writer w, String region) throws IOException {
1373     WALProtos.RegionEventDescriptor regionOpenDesc = ProtobufUtil.toRegionEventDescriptor(
1374         WALProtos.RegionEventDescriptor.EventType.REGION_OPEN,
1375         TABLE_NAME.toBytes(),
1376         region.getBytes(),
1377         String.valueOf(region.hashCode()).getBytes(),
1378         1,
1379         ServerName.parseServerName("ServerName:9099"), ImmutableMap.<byte[], List<Path>>of());
1380     final long time = EnvironmentEdgeManager.currentTime();
1381     KeyValue kv = new KeyValue(region.getBytes(), WALEdit.METAFAMILY, WALEdit.REGION_EVENT,
1382         time, regionOpenDesc.toByteArray());
1383     final WALKey walKey = new WALKey(region.getBytes(), TABLE_NAME, 1, time,
1384         HConstants.DEFAULT_CLUSTER_ID);
1385     w.append(
1386         new Entry(walKey, new WALEdit().add(kv)));
1387     w.sync(false);
1388   }
1389 
1390   public static long appendEntry(Writer writer, TableName table, byte[] region,
1391       byte[] row, byte[] family, byte[] qualifier,
1392       byte[] value, long seq)
1393       throws IOException {
1394     LOG.info(Thread.currentThread().getName() + " append");
1395     writer.append(createTestEntry(table, region, row, family, qualifier, value, seq));
1396     LOG.info(Thread.currentThread().getName() + " sync");
1397     writer.sync(false);
1398     return seq;
1399   }
1400 
1401   private static Entry createTestEntry(
1402       TableName table, byte[] region,
1403       byte[] row, byte[] family, byte[] qualifier,
1404       byte[] value, long seq) {
1405     long time = System.nanoTime();
1406 
1407     seq++;
1408     final KeyValue cell = new KeyValue(row, family, qualifier, time, KeyValue.Type.Put, value);
1409     WALEdit edit = new WALEdit();
1410     edit.add(cell);
1411     return new Entry(new WALKey(region, table, seq, time,
1412         HConstants.DEFAULT_CLUSTER_ID), edit);
1413   }
1414 
1415   private void injectEmptyFile(String suffix, boolean closeFile)
1416       throws IOException {
1417     Writer writer = wals.createWALWriter(fs, new Path(WALDIR, WAL_FILE_PREFIX + suffix),
1418         conf);
1419     if (closeFile) writer.close();
1420   }
1421 
1422   private boolean logsAreEqual(Path p1, Path p2) throws IOException {
1423     Reader in1, in2;
1424     in1 = wals.createReader(fs, p1);
1425     in2 = wals.createReader(fs, p2);
1426     Entry entry1;
1427     Entry entry2;
1428     while ((entry1 = in1.next()) != null) {
1429       entry2 = in2.next();
1430       if ((entry1.getKey().compareTo(entry2.getKey()) != 0) ||
1431           (!entry1.getEdit().toString().equals(entry2.getEdit().toString()))) {
1432         return false;
1433       }
1434     }
1435     in1.close();
1436     in2.close();
1437     return true;
1438   }
1439 
1440   private Path createRecoveredEditsPathForRegion() throws IOException {
1441     byte[] encoded = HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes();
1442     long now = EnvironmentEdgeManager.currentTime();
1443     Entry entry = new Entry(
1444       new WALKey(encoded, TableName.META_TABLE_NAME, 1, now, HConstants.DEFAULT_CLUSTER_ID),
1445       new WALEdit());
1446     return WALSplitter
1447       .getRegionSplitEditsPath(entry, FILENAME_BEING_SPLIT, TMPDIRNAME, conf);
1448   }
1449 }