View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.fs;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.lang.reflect.Field;
23  import java.util.List;
24  import java.util.concurrent.CountDownLatch;
25  import org.apache.hadoop.conf.Configuration;
26  import org.apache.hadoop.fs.BlockLocation;
27  import org.apache.hadoop.fs.FileStatus;
28  import org.apache.hadoop.fs.FileSystem;
29  import org.apache.hadoop.fs.Path;
30  import org.apache.hadoop.hbase.HBaseTestingUtility;
31  import org.apache.hadoop.hbase.HConstants;
32  import org.apache.hadoop.hbase.MiniHBaseCluster;
33  import org.apache.hadoop.hbase.TableName;
34  import org.apache.hadoop.hbase.client.Put;
35  import org.apache.hadoop.hbase.client.Table;
36  import org.apache.hadoop.hbase.regionserver.HRegion;
37  import org.apache.hadoop.hbase.regionserver.HRegionServer;
38  import org.apache.hadoop.hbase.regionserver.Region;
39  import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
40  import org.apache.hadoop.hbase.testclassification.LargeTests;
41  import org.apache.hadoop.hbase.testclassification.MiscTests;
42  import org.apache.hadoop.hbase.util.FSUtils;
43  import org.apache.hadoop.hdfs.DFSClient;
44  import org.apache.hadoop.hdfs.DistributedFileSystem;
45  import org.apache.hadoop.hdfs.MiniDFSCluster;
46  import org.apache.hadoop.hdfs.protocol.ClientProtocol;
47  import org.apache.hadoop.hdfs.protocol.DirectoryListing;
48  import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
49  import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
50  import org.apache.hadoop.ipc.RemoteException;
51  import org.junit.After;
52  import org.junit.Assert;
53  import org.junit.Before;
54  import org.junit.Rule;
55  import org.junit.Test;
56  import org.junit.experimental.categories.Category;
57  import org.junit.rules.TestName;
58  import org.slf4j.Logger;
59  import org.slf4j.LoggerFactory;
60  
61  /**
62   * Tests for the hdfs fix from HBASE-6435.
63   *
64   * Please don't add new subtest which involves starting / stopping MiniDFSCluster in this class.
65   * When stopping MiniDFSCluster, shutdown hooks would be cleared in hadoop's ShutdownHookManager
66   *   in hadoop 3.
67   * This leads to 'Failed suppression of fs shutdown hook' error in region server.
68   */
69  @Category({MiscTests.class, LargeTests.class})
70  public class TestBlockReorderMultiBlocks {
71  
72    private static final Logger LOG = LoggerFactory.getLogger(TestBlockReorderMultiBlocks.class);
73  
74    private Configuration conf;
75    private MiniDFSCluster cluster;
76    private HBaseTestingUtility htu;
77    private DistributedFileSystem dfs;
78    private static final String host1 = "host1";
79    private static final String host2 = "host2";
80    private static final String host3 = "host3";
81  
82    @Rule
83    public TestName name = new TestName();
84  
85    @Before
86    public void setUp() throws Exception {
87      htu = new HBaseTestingUtility();
88      htu.getConfiguration().setInt("dfs.blocksize", 1024);// For the test with multiple blocks
89      htu.getConfiguration().setInt("dfs.replication", 3);
90      htu.startMiniDFSCluster(3,
91          new String[]{"/r1", "/r2", "/r3"}, new String[]{host1, host2, host3});
92  
93      conf = htu.getConfiguration();
94      cluster = htu.getDFSCluster();
95      dfs = (DistributedFileSystem) FileSystem.get(conf);
96    }
97  
98    @After
99    public void tearDownAfterClass() throws Exception {
100     htu.shutdownMiniCluster();
101   }
102 
103   /**
104    * Test that the hook works within HBase, including when there are multiple blocks.
105    */
106   @Test()
107   public void testHBaseCluster() throws Exception {
108     byte[] sb = "sb".getBytes();
109     htu.startMiniZKCluster();
110 
111     MiniHBaseCluster hbm = htu.startMiniHBaseCluster(1, 1);
112     hbm.waitForActiveAndReadyMaster();
113     hbm.getRegionServer(0).waitForServerOnline();
114     HRegionServer targetRs = hbm.getRegionServer(0);
115 
116     // We want to have a datanode with the same name as the region server, so
117     //  we're going to get the regionservername, and start a new datanode with this name.
118     String host4 = targetRs.getServerName().getHostname();
119     LOG.info("Starting a new datanode with the name=" + host4);
120     cluster.startDataNodes(conf, 1, true, null, new String[]{"/r4"}, new String[]{host4}, null);
121     cluster.waitClusterUp();
122 
123     final int repCount = 3;
124 
125     // We use the regionserver file system & conf as we expect it to have the hook.
126     conf = targetRs.getConfiguration();
127     HFileSystem rfs = (HFileSystem) targetRs.getFileSystem();
128     Table h = htu.createTable(TableName.valueOf("table"), sb);
129 
130     // Now, we have 4 datanodes and a replication count of 3. So we don't know if the datanode
131     // with the same node will be used. We can't really stop an existing datanode, this would
132     // make us fall in nasty hdfs bugs/issues. So we're going to try multiple times.
133 
134     // Now we need to find the log file, its locations, and look at it
135 
136     String walDir = new Path(FSUtils.getWALRootDir(conf) + "/" + HConstants.HREGION_LOGDIR_NAME +
137             "/" + targetRs.getServerName().toString()).toUri().getPath();
138 
139     DistributedFileSystem mdfs = (DistributedFileSystem)
140         hbm.getMaster().getMasterFileSystem().getFileSystem();
141 
142 
143     int nbTest = 0;
144     while (nbTest < 10) {
145       final List<Region> regions = targetRs.getOnlineRegions(h.getName());
146       final CountDownLatch latch = new CountDownLatch(regions.size());
147       // listen for successful log rolls
148       final WALActionsListener listener = new WALActionsListener.Base() {
149             @Override
150             public void postLogRoll(final Path oldPath, final Path newPath) throws IOException {
151               latch.countDown();
152             }
153           };
154       for (Region region : regions) {
155         ((HRegion)region).getWAL().registerWALActionsListener(listener);
156       }
157 
158       htu.getHBaseAdmin().rollWALWriter(targetRs.getServerName());
159 
160       // wait
161       try {
162         latch.await();
163       } catch (InterruptedException exception) {
164         LOG.warn("Interrupted while waiting for the wal of '" + targetRs + "' to roll. If later " +
165             "tests fail, it's probably because we should still be waiting.");
166         Thread.currentThread().interrupt();
167       }
168       for (Region region : regions) {
169         ((HRegion)region).getWAL().unregisterWALActionsListener(listener);
170       }
171 
172       // We need a sleep as the namenode is informed asynchronously
173       Thread.sleep(100);
174 
175       // insert one put to ensure a minimal size
176       Put p = new Put(sb);
177       p.addColumn(sb, sb, sb);
178       h.put(p);
179 
180       DirectoryListing dl = dfs.getClient().listPaths(walDir, HdfsFileStatus.EMPTY_NAME);
181       HdfsFileStatus[] hfs = dl.getPartialListing();
182 
183       // As we wrote a put, we should have at least one log file.
184       Assert.assertTrue(hfs.length >= 1);
185       for (HdfsFileStatus hf : hfs) {
186         // Because this is a live cluster, log files might get archived while we're processing
187         try {
188           LOG.info("Log file found: " + hf.getLocalName() + " in " + walDir);
189           String logFile = walDir + "/" + hf.getLocalName();
190           FileStatus fsLog = rfs.getFileStatus(new Path(logFile));
191 
192           LOG.info("Checking log file: " + logFile);
193           // Now checking that the hook is up and running
194           // We can't call directly getBlockLocations, it's not available in HFileSystem
195           // We're trying multiple times to be sure, as the order is random
196 
197           BlockLocation[] bls = rfs.getFileBlockLocations(fsLog, 0, 1);
198           if (bls.length > 0) {
199             BlockLocation bl = bls[0];
200 
201             LOG.info(bl.getHosts().length + " replicas for block 0 in " + logFile + " ");
202             for (int i = 0; i < bl.getHosts().length - 1; i++) {
203               LOG.info(bl.getHosts()[i] + "    " + logFile);
204               Assert.assertNotSame(bl.getHosts()[i], host4);
205             }
206             String last = bl.getHosts()[bl.getHosts().length - 1];
207             LOG.info(last + "    " + logFile);
208             if (host4.equals(last)) {
209               nbTest++;
210               LOG.info(logFile + " is on the new datanode and is ok");
211               if (bl.getHosts().length == 3) {
212                 // We can test this case from the file system as well
213                 // Checking the underlying file system. Multiple times as the order is random
214                 testFromDFS(dfs, logFile, repCount, host4);
215 
216                 // now from the master
217                 testFromDFS(mdfs, logFile, repCount, host4);
218               }
219             }
220           }
221         } catch (FileNotFoundException exception) {
222           LOG.debug("Failed to find log file '" + hf.getLocalName() + "'; it probably was " +
223               "archived out from under us so we'll ignore and retry. If this test hangs " +
224               "indefinitely you should treat this failure as a symptom.", exception);
225         } catch (RemoteException exception) {
226           if (exception.unwrapRemoteException() instanceof FileNotFoundException) {
227             LOG.debug("Failed to find log file '" + hf.getLocalName() + "'; it probably was " +
228                 "archived out from under us so we'll ignore and retry. If this test hangs " +
229                 "indefinitely you should treat this failure as a symptom.", exception);
230           } else {
231             throw exception;
232           }
233         }
234       }
235     }
236   }
237 
238   private void testFromDFS(DistributedFileSystem dfs, String src, int repCount, String localhost)
239       throws Exception {
240     // Multiple times as the order is random
241     for (int i = 0; i < 10; i++) {
242       LocatedBlocks l;
243       // The NN gets the block list asynchronously, so we may need multiple tries to get the list
244       final long max = System.currentTimeMillis() + 10000;
245       boolean done;
246       do {
247         Assert.assertTrue("Can't get enouth replica.", System.currentTimeMillis() < max);
248         l = getNamenode(dfs.getClient()).getBlockLocations(src, 0, 1);
249         Assert.assertNotNull("Can't get block locations for " + src, l);
250         Assert.assertNotNull(l.getLocatedBlocks());
251         Assert.assertTrue(l.getLocatedBlocks().size() > 0);
252 
253         done = true;
254         for (int y = 0; y < l.getLocatedBlocks().size() && done; y++) {
255           done = (l.get(y).getLocations().length == repCount);
256         }
257       } while (!done);
258 
259       for (int y = 0; y < l.getLocatedBlocks().size() && done; y++) {
260         Assert.assertEquals(localhost, l.get(y).getLocations()[repCount - 1].getHostName());
261       }
262     }
263   }
264 
265   private static ClientProtocol getNamenode(DFSClient dfsc) throws Exception {
266     Field nf = DFSClient.class.getDeclaredField("namenode");
267     nf.setAccessible(true);
268     return (ClientProtocol) nf.get(dfsc);
269   }
270 
271 }