View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.cleaner;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertNotNull;
23  import static org.junit.Assert.assertTrue;
24  import static org.mockito.Mockito.doNothing;
25  import static org.mockito.Mockito.doReturn;
26  import static org.mockito.Mockito.doThrow;
27  import static org.mockito.Mockito.mock;
28  import static org.mockito.Mockito.spy;
29  import static org.mockito.Mockito.doAnswer;
30  import static org.mockito.Mockito.times;
31  import static org.mockito.Mockito.verify;
32  
33  import java.io.IOException;
34  import java.lang.reflect.Field;
35  import java.net.URLEncoder;
36  import java.util.HashMap;
37  import java.util.Iterator;
38  import java.util.LinkedList;
39  import java.util.List;
40  import java.util.Map;
41  import java.util.Random;
42  import java.util.concurrent.atomic.AtomicBoolean;
43  
44  import com.google.common.collect.Lists;
45  import org.apache.hadoop.conf.Configuration;
46  import org.apache.hadoop.fs.FSDataOutputStream;
47  import org.apache.hadoop.fs.FileStatus;
48  import org.apache.hadoop.fs.FileSystem;
49  import org.apache.hadoop.fs.Path;
50  import org.apache.hadoop.hbase.Abortable;
51  import org.apache.hadoop.hbase.ChoreService;
52  import org.apache.hadoop.hbase.CoordinatedStateManager;
53  import org.apache.hadoop.hbase.HBaseTestingUtility;
54  import org.apache.hadoop.hbase.HConstants;
55  import org.apache.hadoop.hbase.master.HMaster;
56  import org.apache.hadoop.hbase.testclassification.MediumTests;
57  import org.apache.hadoop.hbase.Server;
58  import org.apache.hadoop.hbase.ServerName;
59  import org.apache.hadoop.hbase.Waiter;
60  import org.apache.hadoop.hbase.client.ClusterConnection;
61  import org.apache.hadoop.hbase.replication.ReplicationFactory;
62  import org.apache.hadoop.hbase.replication.ReplicationQueues;
63  import org.apache.hadoop.hbase.replication.ReplicationQueuesClient;
64  import org.apache.hadoop.hbase.replication.master.ReplicationLogCleaner;
65  import org.apache.hadoop.hbase.replication.regionserver.Replication;
66  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
67  import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
68  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
69  import org.apache.zookeeper.KeeperException;
70  import org.apache.zookeeper.Watcher;
71  import org.apache.zookeeper.data.Stat;
72  import org.junit.AfterClass;
73  import org.junit.BeforeClass;
74  import org.junit.Test;
75  import org.junit.experimental.categories.Category;
76  import org.mockito.ArgumentCaptor;
77  import org.mockito.Mockito;
78  import org.mockito.invocation.InvocationOnMock;
79  import org.mockito.stubbing.Answer;
80  
81  @Category(MediumTests.class)
82  public class TestLogsCleaner {
83  
84    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
85    private static DirScanPool POOL;
86  
87    /**
88     * @throws java.lang.Exception
89     */
90    @BeforeClass
91    public static void setUpBeforeClass() throws Exception {
92      TEST_UTIL.startMiniZKCluster();
93      TEST_UTIL.startMiniDFSCluster(1);
94      POOL = new DirScanPool(TEST_UTIL.getConfiguration());
95    }
96  
97    /**
98     * @throws java.lang.Exception
99     */
100   @AfterClass
101   public static void tearDownAfterClass() throws Exception {
102     TEST_UTIL.shutdownMiniZKCluster();
103     TEST_UTIL.shutdownMiniDFSCluster();
104     POOL.shutdownNow();
105   }
106 
107   @Test
108   public void testLogCleaning() throws Exception{
109     Configuration conf = TEST_UTIL.getConfiguration();
110     // set TTL
111     long ttl = 10000;
112     conf.setLong("hbase.master.logcleaner.ttl", ttl);
113     conf.setBoolean(HConstants.REPLICATION_ENABLE_KEY, HConstants.REPLICATION_ENABLE_DEFAULT);
114     Replication.decorateMasterConfiguration(conf);
115     Server server = new DummyServer();
116     ReplicationQueues repQueues =
117         ReplicationFactory.getReplicationQueues(server.getZooKeeper(), conf, server);
118     repQueues.init(server.getServerName().toString());
119     final Path oldLogDir = new Path(TEST_UTIL.getDataTestDir(),
120         HConstants.HREGION_OLDLOGDIR_NAME);
121     String fakeMachineName =
122       URLEncoder.encode(server.getServerName().toString(), "UTF8");
123 
124     final FileSystem fs = FileSystem.get(conf);
125 
126     // Create 2 invalid files, 1 "recent" file, 1 very new file and 30 old files
127     long now = System.currentTimeMillis();
128     fs.delete(oldLogDir, true);
129     fs.mkdirs(oldLogDir);
130     // Case 1: 2 invalid files, which would be deleted directly
131     fs.createNewFile(new Path(oldLogDir, "a"));
132     fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + "a"));
133     // Case 2: 1 "recent" file, not even deletable for the first log cleaner
134     // (TimeToLiveLogCleaner), so we are not going down the chain
135     System.out.println("Now is: " + now);
136     for (int i = 1; i < 31; i++) {
137       // Case 3: old files which would be deletable for the first log cleaner
138       // (TimeToLiveLogCleaner), and also for the second (ReplicationLogCleaner)
139       Path fileName = new Path(oldLogDir, fakeMachineName + "." + (now - i) );
140       fs.createNewFile(fileName);
141       // Case 4: put 3 old log files in ZK indicating that they are scheduled
142       // for replication so these files would pass the first log cleaner
143       // (TimeToLiveLogCleaner) but would be rejected by the second
144       // (ReplicationLogCleaner)
145       if (i % (30/3) == 1) {
146         repQueues.addLog(fakeMachineName, fileName.getName());
147         System.out.println("Replication log file: " + fileName);
148       }
149     }
150 
151     // sleep for sometime to get newer modifcation time
152     Thread.sleep(ttl);
153     fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + now));
154 
155     // Case 2: 1 newer file, not even deletable for the first log cleaner
156     // (TimeToLiveLogCleaner), so we are not going down the chain
157     fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + (now + 10000) ));
158 
159     for (FileStatus stat : fs.listStatus(oldLogDir)) {
160       System.out.println(stat.getPath().toString());
161     }
162 
163     assertEquals(34, fs.listStatus(oldLogDir).length);
164 
165     LogCleaner cleaner  = new LogCleaner(1000, server, conf, fs, oldLogDir, POOL, null);
166 
167     cleaner.chore();
168 
169     // We end up with the current log file, a newer one and the 3 old log
170     // files which are scheduled for replication
171     TEST_UTIL.waitFor(1000, new Waiter.Predicate<Exception>() {
172       @Override
173       public boolean evaluate() throws Exception {
174         return 5 == fs.listStatus(oldLogDir).length;
175       }
176     });
177 
178     for (FileStatus file : fs.listStatus(oldLogDir)) {
179       System.out.println("Kept log files: " + file.getPath().getName());
180     }
181   }
182 
183   @Test(timeout=5000)
184   public void testZnodeCversionChange() throws Exception {
185     Configuration conf = TEST_UTIL.getConfiguration();
186     ReplicationLogCleaner cleaner = new ReplicationLogCleaner();
187     cleaner.setConf(conf);
188 
189     ReplicationQueuesClient rqcMock = mock(ReplicationQueuesClient.class);
190     Mockito.when(rqcMock.getQueuesZNodeCversion()).thenReturn(1, 2, 3, 4);
191 
192     Field rqc = ReplicationLogCleaner.class.getDeclaredField("replicationQueues");
193     rqc.setAccessible(true);
194 
195     rqc.set(cleaner, rqcMock);
196 
197     // This should return eventually when cversion stabilizes
198     cleaner.getDeletableFiles(new LinkedList<FileStatus>());
199   }
200 
201   @Test(timeout=10000)
202   public void testZooKeeperAbortDuringGetListOfReplicators() throws Exception {
203     Configuration conf = TEST_UTIL.getConfiguration();
204 
205     ReplicationLogCleaner cleaner = new ReplicationLogCleaner();
206 
207     List<FileStatus> dummyFiles = Lists.newArrayList(
208         new FileStatus(100, false, 3, 100, System.currentTimeMillis(), new Path("log1")),
209         new FileStatus(100, false, 3, 100, System.currentTimeMillis(), new Path("log2"))
210     );
211 
212     FaultyZooKeeperWatcher faultyZK =
213         new FaultyZooKeeperWatcher(conf, "testZooKeeperAbort-faulty", null);
214     final AtomicBoolean getListOfReplicatorsFailed = new AtomicBoolean(false);
215     TestAbortable abortable = new TestAbortable();
216 
217     try {
218       faultyZK.init();
219       ReplicationQueuesClient replicationQueuesClient = spy(ReplicationFactory.getReplicationQueuesClient(
220         faultyZK, conf, abortable));
221       doAnswer(new Answer<Object>() {
222         @Override
223         public Object answer(InvocationOnMock invocation) throws Throwable {
224           try {
225             return invocation.callRealMethod();
226           } catch (KeeperException.ConnectionLossException e) {
227             getListOfReplicatorsFailed.set(true);
228             throw e;
229           }
230         }
231       }).when(replicationQueuesClient).getListOfReplicators();
232       replicationQueuesClient.init();
233 
234       cleaner.init(conf, faultyZK, replicationQueuesClient);
235       // should keep all files due to a ConnectionLossException getting the queues znodes
236       Iterable<FileStatus> toDelete = cleaner.getDeletableFiles(dummyFiles);
237 
238       assertTrue(getListOfReplicatorsFailed.get());
239       assertTrue(abortable.isAborted());
240       assertFalse(toDelete.iterator().hasNext());
241       assertFalse(cleaner.isStopped());
242     } finally {
243       faultyZK.close();
244     }
245   }
246 
247   /**
248    * When zk is working both files should be returned
249    * @throws Exception
250    */
251   @Test(timeout=10000)
252   public void testZooKeeperNormal() throws Exception {
253     Configuration conf = TEST_UTIL.getConfiguration();
254     ReplicationLogCleaner cleaner = new ReplicationLogCleaner();
255 
256     List<FileStatus> dummyFiles = Lists.newArrayList(
257         new FileStatus(100, false, 3, 100, System.currentTimeMillis(), new Path("log1")),
258         new FileStatus(100, false, 3, 100, System.currentTimeMillis(), new Path("log2"))
259     );
260 
261     ZooKeeperWatcher zkw = new ZooKeeperWatcher(conf, "testZooKeeperAbort-normal", null);
262     try {
263       cleaner.init(conf, zkw, null);
264       Iterable<FileStatus> filesToDelete = cleaner.getDeletableFiles(dummyFiles);
265       Iterator<FileStatus> iter = filesToDelete.iterator();
266       assertTrue(iter.hasNext());
267       assertEquals(new Path("log1"), iter.next().getPath());
268       assertTrue(iter.hasNext());
269       assertEquals(new Path("log2"), iter.next().getPath());
270       assertFalse(iter.hasNext());
271     } finally {
272       zkw.close();
273     }
274   }
275 
276   @Test
277   public void testOnConfigurationChange() throws Exception {
278     Configuration conf = TEST_UTIL.getConfiguration();
279     conf.setInt(LogCleaner.OLD_WALS_CLEANER_THREAD_SIZE,
280         LogCleaner.DEFAULT_OLD_WALS_CLEANER_THREAD_SIZE);
281     conf.setLong(LogCleaner.OLD_WALS_CLEANER_THREAD_TIMEOUT_MSEC,
282         LogCleaner.DEFAULT_OLD_WALS_CLEANER_THREAD_TIMEOUT_MSEC);
283     conf.setLong(LogCleaner.OLD_WALS_CLEANER_THREAD_CHECK_INTERVAL_MSEC,
284         LogCleaner.DEFAULT_OLD_WALS_CLEANER_THREAD_CHECK_INTERVAL_MSEC);
285     // Prepare environments
286     Server server = new DummyServer();
287     Path oldWALsDir = new Path(TEST_UTIL.getDefaultRootDirPath(),
288         HConstants.HREGION_OLDLOGDIR_NAME);
289     FileSystem fs = TEST_UTIL.getDFSCluster().getFileSystem();
290     final LogCleaner cleaner = new LogCleaner(3000, server, conf, fs, oldWALsDir, POOL, null);
291     assertEquals(LogCleaner.DEFAULT_OLD_WALS_CLEANER_THREAD_SIZE, cleaner.getSizeOfCleaners());
292     assertEquals(LogCleaner.DEFAULT_OLD_WALS_CLEANER_THREAD_TIMEOUT_MSEC,
293         cleaner.getCleanerThreadTimeoutMsec());
294     assertEquals(LogCleaner.DEFAULT_OLD_WALS_CLEANER_THREAD_CHECK_INTERVAL_MSEC,
295         cleaner.getCleanerThreadCheckIntervalMsec());
296     // Create dir and files for test
297     fs.delete(oldWALsDir, true);
298     fs.mkdirs(oldWALsDir);
299     int numOfFiles = 10;
300     createFiles(fs, oldWALsDir, numOfFiles);
301     FileStatus[] status = fs.listStatus(oldWALsDir);
302     assertEquals(numOfFiles, status.length);
303     // Start cleaner chore
304     Thread thread = new Thread(new Runnable() {
305       @Override
306       public void run() {
307         cleaner.chore();
308       }
309     });
310     thread.setDaemon(true);
311     thread.start();
312     // change size of cleaners dynamically
313     int sizeToChange = 4;
314     long threadTimeoutToChange = 30 * 1000L;
315     long threadCheckIntervalToChange = 250L;
316     conf.setInt(LogCleaner.OLD_WALS_CLEANER_THREAD_SIZE, sizeToChange);
317     conf.setLong(LogCleaner.OLD_WALS_CLEANER_THREAD_TIMEOUT_MSEC, threadTimeoutToChange);
318     conf.setLong(LogCleaner.OLD_WALS_CLEANER_THREAD_CHECK_INTERVAL_MSEC,
319         threadCheckIntervalToChange);
320     cleaner.onConfigurationChange(conf);
321     assertEquals(sizeToChange, cleaner.getSizeOfCleaners());
322     assertEquals(threadTimeoutToChange, cleaner.getCleanerThreadTimeoutMsec());
323     assertEquals(threadCheckIntervalToChange, cleaner.getCleanerThreadCheckIntervalMsec());
324     // Stop chore
325     thread.join();
326     status = fs.listStatus(oldWALsDir);
327     assertEquals(0, status.length);
328   }
329 
330   private void createFiles(FileSystem fs, Path parentDir, int numOfFiles) throws IOException {
331     Random random = new Random();
332     for (int i = 0; i < numOfFiles; i++) {
333       int xMega = 1 + random.nextInt(3); // size of each file is between 1~3M
334       try (FSDataOutputStream fsdos = fs.create(new Path(parentDir, "file-" + i))) {
335         for (int m = 0; m < xMega; m++) {
336           byte[] M = new byte[1024 * 1024];
337           random.nextBytes(M);
338           fsdos.write(M);
339         }
340       }
341     }
342   }
343 
344   static class DummyServer implements Server {
345 
346     @Override
347     public Configuration getConfiguration() {
348       return TEST_UTIL.getConfiguration();
349     }
350 
351     @Override
352     public ZooKeeperWatcher getZooKeeper() {
353       try {
354         return new ZooKeeperWatcher(getConfiguration(), "dummy server", this);
355       } catch (IOException e) {
356         e.printStackTrace();
357       }
358       return null;
359     }
360 
361     @Override
362     public CoordinatedStateManager getCoordinatedStateManager() {
363       return null;
364     }
365 
366     @Override
367     public ClusterConnection getConnection() {
368       return null;
369     }
370 
371     @Override
372     public MetaTableLocator getMetaTableLocator() {
373       return null;
374     }
375 
376     @Override
377     public ServerName getServerName() {
378       return ServerName.valueOf("regionserver,60020,000000");
379     }
380 
381     @Override
382     public void abort(String why, Throwable e) {}
383 
384     @Override
385     public boolean isAborted() {
386       return false;
387     }
388 
389     @Override
390     public void stop(String why) {}
391 
392     @Override
393     public boolean isStopped() {
394       return false;
395     }
396 
397     @Override
398     public ChoreService getChoreService() {
399       return null;
400     }
401   }
402 
403   static class FaultyZooKeeperWatcher extends ZooKeeperWatcher {
404     private RecoverableZooKeeper zk;
405 
406     public FaultyZooKeeperWatcher(Configuration conf, String identifier, Abortable abortable)
407         throws IOException {
408       super(conf, identifier, abortable);
409     }
410 
411     public void init() throws Exception {
412       this.zk = spy(super.getRecoverableZooKeeper());
413       doThrow(new KeeperException.ConnectionLossException())
414         .when(zk).getChildren("/hbase/replication/rs", null);
415     }
416 
417     public RecoverableZooKeeper getRecoverableZooKeeper() {
418       return zk;
419     }
420   }
421 
422   /**
423    * An {@link Abortable} implementation for tests.
424    */
425   class TestAbortable implements Abortable {
426     private volatile boolean aborted = false;
427 
428     @Override
429     public void abort(String why, Throwable e) {
430       this.aborted = true;
431     }
432 
433     @Override
434     public boolean isAborted() {
435       return this.aborted;
436     }
437   }
438 
439   /**
440    * Throw SessionExpiredException when zk#getData is called.
441    */
442   static class SessionExpiredZooKeeperWatcher extends ZooKeeperWatcher {
443     private RecoverableZooKeeper zk;
444 
445     public SessionExpiredZooKeeperWatcher(Configuration conf, String identifier,
446                                           Abortable abortable) throws IOException {
447       super(conf, identifier, abortable);
448     }
449 
450     public void init() throws Exception {
451       this.zk = spy(super.getRecoverableZooKeeper());
452       doThrow(new KeeperException.SessionExpiredException())
453         .when(zk).getData(Mockito.anyString(), Mockito.any(Watcher.class), Mockito.any(Stat.class));
454     }
455 
456     @Override
457     public RecoverableZooKeeper getRecoverableZooKeeper() {
458       return zk;
459     }
460   }
461 
462   /**
463    * Tests that HMaster#abort will be called if ReplicationLogCleaner
464    * encounters SessionExpiredException which is unrecoverable.
465    * @throws Exception Exception
466    */
467   @Test
468   public void testZookeeperSessionExpired() throws Exception {
469     Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
470     try(SessionExpiredZooKeeperWatcher sessionExpiredZK =
471           new SessionExpiredZooKeeperWatcher(conf, "testSessionExpiredZk-faulty", null)) {
472       sessionExpiredZK.init();
473       ReplicationLogCleaner cleaner = new ReplicationLogCleaner();
474       cleaner.setConf(conf);
475       // Mock HMaster
476       HMaster master  = mock(HMaster.class);
477       // Return SessionExpired Zookeeper.
478       doReturn(sessionExpiredZK).when(master).getZooKeeper();
479       doNothing().when(master).abort(Mockito.anyString(), Mockito.any(Throwable.class));
480       Map<String, Object> params = new HashMap<>();
481       params.put(HMaster.MASTER, master);
482       cleaner.init(params);
483       // This will throw SessionExpiredException
484       cleaner.getDeletableFiles(new LinkedList<FileStatus>());
485       // make sure that HMaster#abort was called.
486       ArgumentCaptor<Throwable> throwableCaptor = ArgumentCaptor.forClass(Throwable.class);
487 
488       verify(master, times(1))
489         .abort(Mockito.anyString(), throwableCaptor.capture());
490       assertNotNull(throwableCaptor.getValue());
491       assertTrue("Should be SessionExpiredException",
492         throwableCaptor.getValue() instanceof KeeperException.SessionExpiredException);
493     }
494   }
495 }