View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  
23  import java.io.IOException;
24  import java.util.Collection;
25  import java.util.List;
26  import java.util.concurrent.CountDownLatch;
27  import java.util.concurrent.atomic.AtomicInteger;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.client.Admin;
35  import org.apache.hadoop.hbase.client.HBaseAdmin;
36  import org.apache.hadoop.hbase.client.Table;
37  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
38  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
39  import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
40  import org.apache.hadoop.hbase.regionserver.HRegion;
41  import org.apache.hadoop.hbase.regionserver.HRegionServer;
42  import org.apache.hadoop.hbase.regionserver.HStore;
43  import org.apache.hadoop.hbase.regionserver.Region;
44  import org.apache.hadoop.hbase.regionserver.RegionServerServices;
45  import org.apache.hadoop.hbase.regionserver.Store;
46  import org.apache.hadoop.hbase.regionserver.StoreFile;
47  import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
48  import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController;
49  import org.apache.hadoop.hbase.regionserver.wal.WALUtil;
50  import org.apache.hadoop.hbase.security.User;
51  import org.apache.hadoop.hbase.testclassification.MediumTests;
52  import org.apache.hadoop.hbase.util.Bytes;
53  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
54  import org.apache.hadoop.hbase.wal.WAL;
55  import org.junit.Test;
56  import org.junit.experimental.categories.Category;
57  
58  import com.google.common.collect.Lists;
59  
60  /**
61   * Test for the case where a regionserver going down has enough cycles to do damage to regions
62   * that have actually been assigned elsehwere.
63   *
64   * <p>If we happen to assign a region before it fully done with in its old location -- i.e. it is on two servers at the
65   * same time -- all can work fine until the case where the region on the dying server decides to compact or otherwise
66   * change the region file set.  The region in its new location will then get a surprise when it tries to do something
67   * w/ a file removed by the region in its old location on dying server.
68   *
69   * <p>Making a test for this case is a little tough in that even if a file is deleted up on the namenode,
70   * if the file was opened before the delete, it will continue to let reads happen until something changes the
71   * state of cached blocks in the dfsclient that was already open (a block from the deleted file is cleaned
72   * from the datanode by NN).
73   *
74   * <p>What we will do below is do an explicit check for existence on the files listed in the region that
75   * has had some files removed because of a compaction.  This sort of hurry's along and makes certain what is a chance
76   * occurance.
77   */
78  @Category(MediumTests.class)
79  public class TestIOFencing {
80    private static final Log LOG = LogFactory.getLog(TestIOFencing.class);
81    static {
82      // Uncomment the following lines if more verbosity is needed for
83      // debugging (see HBASE-12285 for details).
84      //((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL);
85      //((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
86      //((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL);
87      //((Log4JLogger)LogFactory.getLog("org.apache.hadoop.hdfs.server.namenode.FSNamesystem"))
88      //    .getLogger().setLevel(Level.ALL);
89      //((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL);
90    }
91  
92    public abstract static class CompactionBlockerRegion extends HRegion {
93      AtomicInteger compactCount = new AtomicInteger(0);
94      CountDownLatch compactionsBlocked = new CountDownLatch(0);
95      CountDownLatch compactionsWaiting = new CountDownLatch(0);
96  
97      @SuppressWarnings("deprecation")
98      public CompactionBlockerRegion(Path tableDir, WAL log,
99          FileSystem fs, Configuration confParam, HRegionInfo info,
100         HTableDescriptor htd, RegionServerServices rsServices) {
101       super(tableDir, log, fs, confParam, info, htd, rsServices);
102     }
103 
104     public void stopCompactions() {
105       compactionsBlocked = new CountDownLatch(1);
106       compactionsWaiting = new CountDownLatch(1);
107     }
108 
109     public void allowCompactions() {
110       LOG.debug("allowing compactions");
111       compactionsBlocked.countDown();
112     }
113     public void waitForCompactionToBlock() throws IOException {
114       try {
115         LOG.debug("waiting for compaction to block");
116         compactionsWaiting.await();
117         LOG.debug("compaction block reached");
118       } catch (InterruptedException ex) {
119         throw new IOException(ex);
120       }
121     }
122 
123     @Override
124     public boolean compact(CompactionContext compaction, Store store,
125         ThroughputController throughputController) throws IOException {
126       try {
127         return super.compact(compaction, store, throughputController);
128       } finally {
129         compactCount.incrementAndGet();
130       }
131     }
132 
133     @Override
134     public boolean compact(CompactionContext compaction, Store store,
135         ThroughputController throughputController, User user) throws IOException {
136       try {
137         return super.compact(compaction, store, throughputController, user);
138       } finally {
139         compactCount.incrementAndGet();
140       }
141     }
142 
143     public int countStoreFiles() {
144       int count = 0;
145       for (Store store : stores.values()) {
146         count += store.getStorefilesCount();
147       }
148       return count;
149     }
150   }
151 
152   /**
153    * An override of HRegion that allows us park compactions in a holding pattern and
154    * then when appropriate for the test, allow them proceed again.
155    */
156   public static class BlockCompactionsInPrepRegion extends CompactionBlockerRegion {
157 
158     public BlockCompactionsInPrepRegion(Path tableDir, WAL log,
159         FileSystem fs, Configuration confParam, HRegionInfo info,
160         HTableDescriptor htd, RegionServerServices rsServices) {
161       super(tableDir, log, fs, confParam, info, htd, rsServices);
162     }
163     @Override
164     protected void doRegionCompactionPrep() throws IOException {
165       compactionsWaiting.countDown();
166       try {
167         compactionsBlocked.await();
168       } catch (InterruptedException ex) {
169         throw new IOException();
170       }
171       super.doRegionCompactionPrep();
172     }
173   }
174 
175   /**
176    * An override of HRegion that allows us park compactions in a holding pattern and
177    * then when appropriate for the test, allow them proceed again. This allows the compaction
178    * entry to go the WAL before blocking, but blocks afterwards
179    */
180   public static class BlockCompactionsInCompletionRegion extends CompactionBlockerRegion {
181     public BlockCompactionsInCompletionRegion(Path tableDir, WAL log,
182         FileSystem fs, Configuration confParam, HRegionInfo info,
183         HTableDescriptor htd, RegionServerServices rsServices) {
184       super(tableDir, log, fs, confParam, info, htd, rsServices);
185     }
186     @Override
187     protected HStore instantiateHStore(final HColumnDescriptor family) throws IOException {
188       return new BlockCompactionsInCompletionHStore(this, family, this.conf);
189     }
190   }
191 
192   public static class BlockCompactionsInCompletionHStore extends HStore {
193     CompactionBlockerRegion r;
194     protected BlockCompactionsInCompletionHStore(HRegion region, HColumnDescriptor family,
195         Configuration confParam) throws IOException {
196       super(region, family, confParam);
197       r = (CompactionBlockerRegion) region;
198     }
199 
200     @Override
201     protected void completeCompaction(Collection<StoreFile> compactedFiles) throws IOException {
202       try {
203         r.compactionsWaiting.countDown();
204         r.compactionsBlocked.await();
205       } catch (InterruptedException ex) {
206         throw new IOException(ex);
207       }
208       super.completeCompaction(compactedFiles);
209     }
210   }
211 
212   private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
213   private final static TableName TABLE_NAME =
214       TableName.valueOf("tabletest");
215   private final static byte[] FAMILY = Bytes.toBytes("family");
216   private static final int FIRST_BATCH_COUNT = 4000;
217   private static final int SECOND_BATCH_COUNT = FIRST_BATCH_COUNT;
218 
219   /**
220    * Test that puts up a regionserver, starts a compaction on a loaded region but holds the
221    * compaction until after we have killed the server and the region has come up on
222    * a new regionserver altogether.  This fakes the double assignment case where region in one
223    * location changes the files out from underneath a region being served elsewhere.
224    */
225   @Test
226   public void testFencingAroundCompaction() throws Exception {
227     doTest(BlockCompactionsInPrepRegion.class);
228   }
229 
230   /**
231    * Test that puts up a regionserver, starts a compaction on a loaded region but holds the
232    * compaction completion until after we have killed the server and the region has come up on
233    * a new regionserver altogether.  This fakes the double assignment case where region in one
234    * location changes the files out from underneath a region being served elsewhere.
235    */
236   @Test
237   public void testFencingAroundCompactionAfterWALSync() throws Exception {
238     doTest(BlockCompactionsInCompletionRegion.class);
239   }
240 
241   public void doTest(Class<?> regionClass) throws Exception {
242     Configuration c = TEST_UTIL.getConfiguration();
243     // Insert our custom region
244     c.setClass(HConstants.REGION_IMPL, regionClass, HRegion.class);
245     c.setBoolean("dfs.support.append", true);
246     // Encourage plenty of flushes
247     c.setLong("hbase.hregion.memstore.flush.size", 200000);
248     c.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, ConstantSizeRegionSplitPolicy.class.getName());
249     // Only run compaction when we tell it to
250     c.setInt("hbase.hstore.compactionThreshold", 1000);
251     c.setLong("hbase.hstore.blockingStoreFiles", 1000);
252     // Compact quickly after we tell it to!
253     c.setInt("hbase.regionserver.thread.splitcompactcheckfrequency", 1000);
254     LOG.info("Starting mini cluster");
255     TEST_UTIL.startMiniCluster(1);
256     CompactionBlockerRegion compactingRegion = null;
257     Admin admin = null;
258     try {
259       LOG.info("Creating admin");
260       admin = TEST_UTIL.getConnection().getAdmin();
261       LOG.info("Creating table");
262       TEST_UTIL.createTable(TABLE_NAME, FAMILY);
263       Table table = TEST_UTIL.getConnection().getTable(TABLE_NAME);
264       LOG.info("Loading test table");
265       // Find the region
266       List<HRegion> testRegions = TEST_UTIL.getMiniHBaseCluster().findRegionsForTable(TABLE_NAME);
267       assertEquals(1, testRegions.size());
268       compactingRegion = (CompactionBlockerRegion)testRegions.get(0);
269       LOG.info("Blocking compactions");
270       compactingRegion.stopCompactions();
271       long lastFlushTime = compactingRegion.getEarliestFlushTimeForAllStores();
272       // Load some rows
273       TEST_UTIL.loadNumericRows(table, FAMILY, 0, FIRST_BATCH_COUNT);
274 
275       // add a compaction from an older (non-existing) region to see whether we successfully skip
276       // those entries
277       HRegionInfo oldHri = new HRegionInfo(table.getName(),
278         HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
279       CompactionDescriptor compactionDescriptor = ProtobufUtil.toCompactionDescriptor(oldHri,
280         FAMILY, Lists.newArrayList(new Path("/a")), Lists.newArrayList(new Path("/b")),
281         new Path("store_dir"));
282       WALUtil.writeCompactionMarker(compactingRegion.getWAL(), table.getTableDescriptor(),
283         oldHri, compactionDescriptor, compactingRegion.getMVCC());
284 
285       // Wait till flush has happened, otherwise there won't be multiple store files
286       long startWaitTime = System.currentTimeMillis();
287       while (compactingRegion.getEarliestFlushTimeForAllStores() <= lastFlushTime ||
288           compactingRegion.countStoreFiles() <= 1) {
289         LOG.info("Waiting for the region to flush " +
290           compactingRegion.getRegionInfo().getRegionNameAsString());
291         Thread.sleep(1000);
292         assertTrue("Timed out waiting for the region to flush",
293           System.currentTimeMillis() - startWaitTime < 30000);
294       }
295       assertTrue(compactingRegion.countStoreFiles() > 1);
296       final byte REGION_NAME[] = compactingRegion.getRegionInfo().getRegionName();
297       LOG.info("Asking for compaction");
298       ((HBaseAdmin)admin).majorCompact(TABLE_NAME.getName());
299       LOG.info("Waiting for compaction to be about to start");
300       compactingRegion.waitForCompactionToBlock();
301       LOG.info("Starting a new server");
302       RegionServerThread newServerThread = TEST_UTIL.getMiniHBaseCluster().startRegionServer();
303       final HRegionServer newServer = newServerThread.getRegionServer();
304       LOG.info("Killing region server ZK lease");
305       TEST_UTIL.expireRegionServerSession(0);
306       CompactionBlockerRegion newRegion = null;
307       startWaitTime = System.currentTimeMillis();
308       LOG.info("Waiting for the new server to pick up the region " + Bytes.toString(REGION_NAME));
309 
310       // wait for region to be assigned and to go out of log replay if applicable
311       Waiter.waitFor(c, 60000, new Waiter.Predicate<Exception>() {
312         @Override
313         public boolean evaluate() throws Exception {
314           Region newRegion = newServer.getOnlineRegion(REGION_NAME);
315           return newRegion != null && !newRegion.isRecovering();
316         }
317       });
318 
319       newRegion = (CompactionBlockerRegion)newServer.getOnlineRegion(REGION_NAME);
320 
321       // After compaction of old region finishes on the server that was going down, make sure that
322       // all the files we expect are still working when region is up in new location.
323       FileSystem fs = newRegion.getFilesystem();
324       for (String f: newRegion.getStoreFileList(new byte [][] {FAMILY})) {
325         assertTrue("After compaction, does not exist: " + f, fs.exists(new Path(f)));
326       }
327       LOG.info("Allowing compaction to proceed");
328       compactingRegion.allowCompactions();
329       while (compactingRegion.compactCount.get() == 0) {
330         Thread.sleep(1000);
331       }
332       // The server we killed stays up until the compaction that was started before it was killed completes.  In logs
333       // you should see the old regionserver now going down.
334       LOG.info("Compaction finished");
335 
336       // If we survive the split keep going...
337       // Now we make sure that the region isn't totally confused.  Load up more rows.
338       TEST_UTIL.loadNumericRows(table, FAMILY, FIRST_BATCH_COUNT, FIRST_BATCH_COUNT + SECOND_BATCH_COUNT);
339       ((HBaseAdmin)admin).majorCompact(TABLE_NAME.getName());
340       startWaitTime = System.currentTimeMillis();
341       while (newRegion.compactCount.get() == 0) {
342         Thread.sleep(1000);
343         assertTrue("New region never compacted", System.currentTimeMillis() - startWaitTime < 180000);
344       }
345       assertEquals(FIRST_BATCH_COUNT + SECOND_BATCH_COUNT, TEST_UTIL.countRows(table));
346     } finally {
347       if (compactingRegion != null) {
348         compactingRegion.allowCompactions();
349       }
350       if (admin != null) {
351         admin.close();
352       }
353       TEST_UTIL.shutdownMiniCluster();
354     }
355   }
356 }