View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.cleaner;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotNull;
23  import static org.junit.Assert.assertTrue;
24  import static org.mockito.Mockito.atLeast;
25  import static org.mockito.Mockito.doNothing;
26  import static org.mockito.Mockito.doReturn;
27  import static org.mockito.Mockito.doThrow;
28  import static org.mockito.Mockito.mock;
29  import static org.mockito.Mockito.spy;
30  import static org.mockito.Mockito.doAnswer;
31  import static org.mockito.Mockito.times;
32  import static org.mockito.Mockito.verify;
33  
34  import java.io.IOException;
35  import java.lang.reflect.Field;
36  import java.net.URLEncoder;
37  import java.util.HashMap;
38  import java.util.Iterator;
39  import java.util.LinkedList;
40  import java.util.List;
41  import java.util.Map;
42  import java.util.Random;
43  import java.util.concurrent.atomic.AtomicBoolean;
44  
45  import com.google.common.collect.ImmutableMap;
46  import com.google.common.collect.Lists;
47  import org.apache.hadoop.conf.Configuration;
48  import org.apache.hadoop.fs.FSDataOutputStream;
49  import org.apache.hadoop.fs.FileStatus;
50  import org.apache.hadoop.fs.FileSystem;
51  import org.apache.hadoop.fs.Path;
52  import org.apache.hadoop.hbase.Abortable;
53  import org.apache.hadoop.hbase.ChoreService;
54  import org.apache.hadoop.hbase.CoordinatedStateManager;
55  import org.apache.hadoop.hbase.HBaseTestingUtility;
56  import org.apache.hadoop.hbase.HConstants;
57  import org.apache.hadoop.hbase.master.HMaster;
58  import org.apache.hadoop.hbase.testclassification.MediumTests;
59  import org.apache.hadoop.hbase.Server;
60  import org.apache.hadoop.hbase.ServerName;
61  import org.apache.hadoop.hbase.Waiter;
62  import org.apache.hadoop.hbase.client.ClusterConnection;
63  import org.apache.hadoop.hbase.replication.ReplicationFactory;
64  import org.apache.hadoop.hbase.replication.ReplicationQueues;
65  import org.apache.hadoop.hbase.replication.ReplicationQueuesClient;
66  import org.apache.hadoop.hbase.replication.master.ReplicationLogCleaner;
67  import org.apache.hadoop.hbase.replication.regionserver.Replication;
68  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
69  import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
70  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
71  import org.apache.zookeeper.KeeperException;
72  import org.apache.zookeeper.Watcher;
73  import org.apache.zookeeper.data.Stat;
74  import org.junit.AfterClass;
75  import org.junit.BeforeClass;
76  import org.junit.Test;
77  import org.junit.experimental.categories.Category;
78  import org.mockito.ArgumentCaptor;
79  import org.mockito.Mockito;
80  import org.mockito.invocation.InvocationOnMock;
81  import org.mockito.stubbing.Answer;
82  
83  @Category(MediumTests.class)
84  public class TestLogsCleaner {
85  
86    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
87    private static DirScanPool POOL;
88  
89    /**
90     * @throws java.lang.Exception
91     */
92    @BeforeClass
93    public static void setUpBeforeClass() throws Exception {
94      TEST_UTIL.startMiniZKCluster();
95      TEST_UTIL.startMiniDFSCluster(1);
96      POOL = new DirScanPool(TEST_UTIL.getConfiguration());
97    }
98  
99    /**
100    * @throws java.lang.Exception
101    */
102   @AfterClass
103   public static void tearDownAfterClass() throws Exception {
104     TEST_UTIL.shutdownMiniZKCluster();
105     TEST_UTIL.shutdownMiniDFSCluster();
106     POOL.shutdownNow();
107   }
108 
109   @Test
110   public void testLogCleaning() throws Exception{
111     Configuration conf = TEST_UTIL.getConfiguration();
112     // set TTL
113     long ttl = 10000;
114     conf.setLong("hbase.master.logcleaner.ttl", ttl);
115     conf.setBoolean(HConstants.REPLICATION_ENABLE_KEY, HConstants.REPLICATION_ENABLE_DEFAULT);
116     Replication.decorateMasterConfiguration(conf);
117     Server server = new DummyServer();
118     ReplicationQueues repQueues =
119         ReplicationFactory.getReplicationQueues(server.getZooKeeper(), conf, server);
120     repQueues.init(server.getServerName().toString());
121     final Path oldLogDir = new Path(TEST_UTIL.getDataTestDir(),
122         HConstants.HREGION_OLDLOGDIR_NAME);
123     String fakeMachineName =
124       URLEncoder.encode(server.getServerName().toString(), "UTF8");
125 
126     final FileSystem fs = FileSystem.get(conf);
127 
128     // Create 2 invalid files, 1 "recent" file, 1 very new file and 30 old files
129     long now = System.currentTimeMillis();
130     fs.delete(oldLogDir, true);
131     fs.mkdirs(oldLogDir);
132     // Case 1: 2 invalid files, which would be deleted directly
133     fs.createNewFile(new Path(oldLogDir, "a"));
134     fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + "a"));
135     // Case 2: 1 "recent" file, not even deletable for the first log cleaner
136     // (TimeToLiveLogCleaner), so we are not going down the chain
137     System.out.println("Now is: " + now);
138     for (int i = 1; i < 31; i++) {
139       // Case 3: old files which would be deletable for the first log cleaner
140       // (TimeToLiveLogCleaner), and also for the second (ReplicationLogCleaner)
141       Path fileName = new Path(oldLogDir, fakeMachineName + "." + (now - i) );
142       fs.createNewFile(fileName);
143       // Case 4: put 3 old log files in ZK indicating that they are scheduled
144       // for replication so these files would pass the first log cleaner
145       // (TimeToLiveLogCleaner) but would be rejected by the second
146       // (ReplicationLogCleaner)
147       if (i % (30/3) == 1) {
148         repQueues.addLog(fakeMachineName, fileName.getName());
149         System.out.println("Replication log file: " + fileName);
150       }
151     }
152 
153     // sleep for sometime to get newer modifcation time
154     Thread.sleep(ttl);
155     fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + now));
156 
157     // Case 2: 1 newer file, not even deletable for the first log cleaner
158     // (TimeToLiveLogCleaner), so we are not going down the chain
159     fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + (now + 10000) ));
160 
161     for (FileStatus stat : fs.listStatus(oldLogDir)) {
162       System.out.println(stat.getPath().toString());
163     }
164 
165     assertEquals(34, fs.listStatus(oldLogDir).length);
166 
167     LogCleaner cleaner  = new LogCleaner(1000, server, conf, fs, oldLogDir, POOL, null);
168 
169     cleaner.chore();
170 
171     // We end up with the current log file, a newer one and the 3 old log
172     // files which are scheduled for replication
173     TEST_UTIL.waitFor(1000, new Waiter.Predicate<Exception>() {
174       @Override
175       public boolean evaluate() throws Exception {
176         return 5 == fs.listStatus(oldLogDir).length;
177       }
178     });
179 
180     for (FileStatus file : fs.listStatus(oldLogDir)) {
181       System.out.println("Kept log files: " + file.getPath().getName());
182     }
183   }
184 
185   @Test(timeout=5000)
186   public void testZnodeCversionChange() throws Exception {
187     Configuration conf = TEST_UTIL.getConfiguration();
188     ReplicationLogCleaner cleaner = new ReplicationLogCleaner();
189     cleaner.setConf(conf);
190 
191     ReplicationQueuesClient rqcMock = mock(ReplicationQueuesClient.class);
192     Mockito.when(rqcMock.getQueuesZNodeCversion()).thenReturn(1, 2, 3, 4);
193     // Avoid direct return because there no replicator.
194     Mockito.when(rqcMock.getListOfReplicators())
195       .thenReturn(Lists.newArrayList("s1", "s2"));
196 
197     Field rqc = ReplicationLogCleaner.class.getDeclaredField("replicationQueues");
198     rqc.setAccessible(true);
199 
200     rqc.set(cleaner, rqcMock);
201 
202     // This should return eventually when cversion stabilizes
203     cleaner.getDeletableFiles(new LinkedList<FileStatus>());
204     // Test did get an optimistic lock
205     Mockito.verify(rqcMock, atLeast(5)).getQueuesZNodeCversion();
206   }
207 
208   @Test
209   public void testReplicatorZnodeCversionChange()
210     throws KeeperException, NoSuchFieldException, IllegalAccessException {
211     Configuration conf = TEST_UTIL.getConfiguration();
212     ReplicationLogCleaner cleaner = new ReplicationLogCleaner();
213     cleaner.setConf(conf);
214 
215     ReplicationQueuesClient rqcMock = mock(ReplicationQueuesClient.class);
216     // Avoid direct return because there no replicator.
217     Mockito.when(rqcMock.getListOfReplicators()).thenReturn(Lists.newArrayList("s1", "s2"));
218     Mockito.when(rqcMock.getReplicatorsZNodeCversion()).thenReturn(
219       ImmutableMap.of("s1", 0, "s2", 0),
220       ImmutableMap.of("s1", 1, "s2", 1),
221       ImmutableMap.of("s1", 2, "s2", 2),
222       ImmutableMap.of("s1", 3, "s2", 3));
223 
224     Field rqc = ReplicationLogCleaner.class.getDeclaredField("replicationQueues");
225     rqc.setAccessible(true);
226 
227     rqc.set(cleaner, rqcMock);
228 
229     // This should return eventually when cversion stabilizes
230     cleaner.getDeletableFiles(new LinkedList<FileStatus>());
231     // Test did get an optimistic lock
232     Mockito.verify(rqcMock, atLeast(5)).getReplicatorsZNodeCversion();
233   }
234 
235   @Test(timeout=10000)
236   public void testZooKeeperAbortDuringGetListOfReplicators() throws Exception {
237     Configuration conf = TEST_UTIL.getConfiguration();
238 
239     ReplicationLogCleaner cleaner = new ReplicationLogCleaner();
240 
241     List<FileStatus> dummyFiles = Lists.newArrayList(
242         new FileStatus(100, false, 3, 100, System.currentTimeMillis(), new Path("log1")),
243         new FileStatus(100, false, 3, 100, System.currentTimeMillis(), new Path("log2"))
244     );
245 
246     FaultyZooKeeperWatcher faultyZK =
247         new FaultyZooKeeperWatcher(conf, "testZooKeeperAbort-faulty", null);
248     final AtomicBoolean getListOfReplicatorsFailed = new AtomicBoolean(false);
249     TestAbortable abortable = new TestAbortable();
250 
251     try {
252       faultyZK.init();
253       ReplicationQueuesClient replicationQueuesClient = spy(ReplicationFactory.getReplicationQueuesClient(
254         faultyZK, conf, abortable));
255       doAnswer(new Answer<Object>() {
256         @Override
257         public Object answer(InvocationOnMock invocation) throws Throwable {
258           try {
259             return invocation.callRealMethod();
260           } catch (KeeperException.ConnectionLossException e) {
261             getListOfReplicatorsFailed.set(true);
262             throw e;
263           }
264         }
265       }).when(replicationQueuesClient).getListOfReplicators();
266       replicationQueuesClient.init();
267 
268       cleaner.init(conf, faultyZK, replicationQueuesClient);
269       // should keep all files due to a ConnectionLossException getting the queues znodes
270       Iterable<FileStatus> toDelete = cleaner.getDeletableFiles(dummyFiles);
271 
272       assertTrue(getListOfReplicatorsFailed.get());
273       assertTrue(abortable.isAborted());
274       assertFalse(toDelete.iterator().hasNext());
275       assertFalse(cleaner.isStopped());
276     } finally {
277       faultyZK.close();
278     }
279   }
280 
281   /**
282    * When zk is working both files should be returned
283    * @throws Exception
284    */
285   @Test(timeout=10000)
286   public void testZooKeeperNormal() throws Exception {
287     Configuration conf = TEST_UTIL.getConfiguration();
288     ReplicationLogCleaner cleaner = new ReplicationLogCleaner();
289 
290     List<FileStatus> dummyFiles = Lists.newArrayList(
291         new FileStatus(100, false, 3, 100, System.currentTimeMillis(), new Path("log1")),
292         new FileStatus(100, false, 3, 100, System.currentTimeMillis(), new Path("log2"))
293     );
294 
295     ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "testZooKeeperAbort-normal", null);
296     try {
297       cleaner.init(conf, zkw, null);
298       Iterable<FileStatus> filesToDelete = cleaner.getDeletableFiles(dummyFiles);
299       Iterator<FileStatus> iter = filesToDelete.iterator();
300       assertTrue(iter.hasNext());
301       assertEquals(new Path("log1"), iter.next().getPath());
302       assertTrue(iter.hasNext());
303       assertEquals(new Path("log2"), iter.next().getPath());
304       assertFalse(iter.hasNext());
305     } finally {
306       zkw.close();
307     }
308   }
309 
310   @Test
311   public void testOnConfigurationChange() throws Exception {
312     Configuration conf = TEST_UTIL.getConfiguration();
313     conf.setInt(LogCleaner.OLD_WALS_CLEANER_THREAD_SIZE,
314         LogCleaner.DEFAULT_OLD_WALS_CLEANER_THREAD_SIZE);
315     conf.setLong(LogCleaner.OLD_WALS_CLEANER_THREAD_TIMEOUT_MSEC,
316         LogCleaner.DEFAULT_OLD_WALS_CLEANER_THREAD_TIMEOUT_MSEC);
317     conf.setLong(LogCleaner.OLD_WALS_CLEANER_THREAD_CHECK_INTERVAL_MSEC,
318         LogCleaner.DEFAULT_OLD_WALS_CLEANER_THREAD_CHECK_INTERVAL_MSEC);
319     // Prepare environments
320     Server server = new DummyServer();
321     Path oldWALsDir = new Path(TEST_UTIL.getDefaultRootDirPath(),
322         HConstants.HREGION_OLDLOGDIR_NAME);
323     FileSystem fs = TEST_UTIL.getDFSCluster().getFileSystem();
324     final LogCleaner cleaner = new LogCleaner(3000, server, conf, fs, oldWALsDir, POOL, null);
325     assertEquals(LogCleaner.DEFAULT_OLD_WALS_CLEANER_THREAD_SIZE, cleaner.getSizeOfCleaners());
326     assertEquals(LogCleaner.DEFAULT_OLD_WALS_CLEANER_THREAD_TIMEOUT_MSEC,
327         cleaner.getCleanerThreadTimeoutMsec());
328     assertEquals(LogCleaner.DEFAULT_OLD_WALS_CLEANER_THREAD_CHECK_INTERVAL_MSEC,
329         cleaner.getCleanerThreadCheckIntervalMsec());
330     // Create dir and files for test
331     fs.delete(oldWALsDir, true);
332     fs.mkdirs(oldWALsDir);
333     int numOfFiles = 10;
334     createFiles(fs, oldWALsDir, numOfFiles);
335     FileStatus[] status = fs.listStatus(oldWALsDir);
336     assertEquals(numOfFiles, status.length);
337     // Start cleaner chore
338     Thread thread = new Thread(new Runnable() {
339       @Override
340       public void run() {
341         cleaner.chore();
342       }
343     });
344     thread.setDaemon(true);
345     thread.start();
346     // change size of cleaners dynamically
347     int sizeToChange = 4;
348     long threadTimeoutToChange = 30 * 1000L;
349     long threadCheckIntervalToChange = 250L;
350     conf.setInt(LogCleaner.OLD_WALS_CLEANER_THREAD_SIZE, sizeToChange);
351     conf.setLong(LogCleaner.OLD_WALS_CLEANER_THREAD_TIMEOUT_MSEC, threadTimeoutToChange);
352     conf.setLong(LogCleaner.OLD_WALS_CLEANER_THREAD_CHECK_INTERVAL_MSEC,
353         threadCheckIntervalToChange);
354     cleaner.onConfigurationChange(conf);
355     assertEquals(sizeToChange, cleaner.getSizeOfCleaners());
356     assertEquals(threadTimeoutToChange, cleaner.getCleanerThreadTimeoutMsec());
357     assertEquals(threadCheckIntervalToChange, cleaner.getCleanerThreadCheckIntervalMsec());
358     // Stop chore
359     thread.join();
360     status = fs.listStatus(oldWALsDir);
361     assertEquals(0, status.length);
362   }
363 
364   private void createFiles(FileSystem fs, Path parentDir, int numOfFiles) throws IOException {
365     Random random = new Random();
366     for (int i = 0; i < numOfFiles; i++) {
367       int xMega = 1 + random.nextInt(3); // size of each file is between 1~3M
368       try (FSDataOutputStream fsdos = fs.create(new Path(parentDir, "file-" + i))) {
369         for (int m = 0; m < xMega; m++) {
370           byte[] M = new byte[1024 * 1024];
371           random.nextBytes(M);
372           fsdos.write(M);
373         }
374       }
375     }
376   }
377 
378   static class DummyServer implements Server {
379 
380     @Override
381     public Configuration getConfiguration() {
382       return TEST_UTIL.getConfiguration();
383     }
384 
385     @Override
386     public ZooKeeperWatcher getZooKeeper() {
387       try {
388         return new ZooKeeperWatcher(getConfiguration(), "dummy server", this);
389       } catch (IOException e) {
390         e.printStackTrace();
391       }
392       return null;
393     }
394 
395     @Override
396     public CoordinatedStateManager getCoordinatedStateManager() {
397       return null;
398     }
399 
400     @Override
401     public ClusterConnection getConnection() {
402       return null;
403     }
404 
405     @Override
406     public MetaTableLocator getMetaTableLocator() {
407       return null;
408     }
409 
410     @Override
411     public ServerName getServerName() {
412       return ServerName.valueOf("regionserver,60020,000000");
413     }
414 
415     @Override
416     public void abort(String why, Throwable e) {}
417 
418     @Override
419     public boolean isAborted() {
420       return false;
421     }
422 
423     @Override
424     public void stop(String why) {}
425 
426     @Override
427     public boolean isStopped() {
428       return false;
429     }
430 
431     @Override
432     public ChoreService getChoreService() {
433       return null;
434     }
435   }
436 
437   static class FaultyZooKeeperWatcher extends ZooKeeperWatcher {
438     private RecoverableZooKeeper zk;
439 
440     public FaultyZooKeeperWatcher(Configuration conf, String identifier, Abortable abortable)
441         throws IOException {
442       super(conf, identifier, abortable);
443     }
444 
445     public void init() throws Exception {
446       this.zk = spy(super.getRecoverableZooKeeper());
447       doThrow(new KeeperException.ConnectionLossException())
448         .when(zk).getChildren("/hbase/replication/rs", null);
449     }
450 
451     public RecoverableZooKeeper getRecoverableZooKeeper() {
452       return zk;
453     }
454   }
455 
456   /**
457    * An {@link Abortable} implementation for tests.
458    */
459   class TestAbortable implements Abortable {
460     private volatile boolean aborted = false;
461 
462     @Override
463     public void abort(String why, Throwable e) {
464       this.aborted = true;
465     }
466 
467     @Override
468     public boolean isAborted() {
469       return this.aborted;
470     }
471   }
472 
473   /**
474    * Throw SessionExpiredException when zk#getData is called.
475    */
476   static class SessionExpiredZooKeeperWatcher extends ZooKeeperWatcher {
477     private RecoverableZooKeeper zk;
478 
479     public SessionExpiredZooKeeperWatcher(Configuration conf, String identifier,
480                                           Abortable abortable) throws IOException {
481       super(conf, identifier, abortable);
482     }
483 
484     public void init() throws Exception {
485       this.zk = spy(super.getRecoverableZooKeeper());
486       doThrow(new KeeperException.SessionExpiredException())
487         .when(zk).getData(Mockito.anyString(), Mockito.any(Watcher.class), Mockito.any(Stat.class));
488     }
489 
490     @Override
491     public RecoverableZooKeeper getRecoverableZooKeeper() {
492       return zk;
493     }
494   }
495 
496   /**
497    * Tests that HMaster#abort will be called if ReplicationLogCleaner
498    * encounters SessionExpiredException which is unrecoverable.
499    * @throws Exception Exception
500    */
501   @Test
502   public void testZookeeperSessionExpired() throws Exception {
503     Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
504     try(SessionExpiredZooKeeperWatcher sessionExpiredZK =
505           new SessionExpiredZooKeeperWatcher(conf, "testSessionExpiredZk-faulty", null)) {
506       sessionExpiredZK.init();
507       ReplicationLogCleaner cleaner = new ReplicationLogCleaner();
508       cleaner.setConf(conf);
509       // Mock HMaster
510       HMaster master  = mock(HMaster.class);
511       // Return SessionExpired Zookeeper.
512       doReturn(sessionExpiredZK).when(master).getZooKeeper();
513       doNothing().when(master).abort(Mockito.anyString(), Mockito.any(Throwable.class));
514       Map<String, Object> params = new HashMap<>();
515       params.put(HMaster.MASTER, master);
516       cleaner.init(params);
517       // This will throw SessionExpiredException
518       cleaner.getDeletableFiles(new LinkedList<FileStatus>());
519       // make sure that HMaster#abort was called.
520       ArgumentCaptor<Throwable> throwableCaptor = ArgumentCaptor.forClass(Throwable.class);
521 
522       verify(master, times(1))
523         .abort(Mockito.anyString(), throwableCaptor.capture());
524       assertNotNull(throwableCaptor.getValue());
525       assertTrue("Should be SessionExpiredException",
526         throwableCaptor.getValue() instanceof KeeperException.SessionExpiredException);
527     }
528   }
529 }