View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertNotNull;
24  import static org.junit.Assert.assertTrue;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.CellScanner;
30  import org.apache.hadoop.hbase.HBaseTestingUtility;
31  import org.apache.hadoop.hbase.MiniHBaseCluster;
32  import org.apache.hadoop.hbase.TableName;
33  import org.apache.hadoop.hbase.Waiter;
34  import org.apache.hadoop.hbase.client.Admin;
35  import org.apache.hadoop.hbase.client.Durability;
36  import org.apache.hadoop.hbase.client.Mutation;
37  import org.apache.hadoop.hbase.client.Put;
38  import org.apache.hadoop.hbase.client.Table;
39  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
40  import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
41  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
42  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
43  import org.apache.hadoop.hbase.coprocessor.RegionServerCoprocessorEnvironment;
44  import org.apache.hadoop.hbase.coprocessor.RegionServerObserver;
45  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
46  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
47  import org.apache.hadoop.hbase.replication.ReplicationEndpoint;
48  import org.apache.hadoop.hbase.testclassification.MediumTests;
49  import org.apache.hadoop.hbase.testclassification.RegionServerTests;
50  import org.apache.hadoop.hbase.util.Bytes;
51  import org.apache.hadoop.hbase.util.JVMClusterUtil;
52  import org.apache.hadoop.hdfs.DFSConfigKeys;
53  import org.apache.hadoop.hdfs.MiniDFSCluster;
54  import org.junit.After;
55  import org.junit.Before;
56  import org.junit.Test;
57  import org.junit.experimental.categories.Category;
58  
59  import java.io.IOException;
60  import java.util.List;
61  import java.util.concurrent.ExecutorService;
62  import java.util.concurrent.Executors;
63  import java.util.concurrent.atomic.AtomicInteger;
64  
65  /**
66   * Tests around regionserver shutdown and abort
67   */
68  @Category({RegionServerTests.class, MediumTests.class})
69  public class TestRegionServerAbort {
70    private static final byte[] FAMILY_BYTES = Bytes.toBytes("f");
71  
72    private static final Log LOG = LogFactory.getLog(TestRegionServerAbort.class);
73  
74    private HBaseTestingUtility testUtil;
75    private Configuration conf;
76    private MiniDFSCluster dfsCluster;
77    private MiniHBaseCluster cluster;
78  
79    @Before
80    public void setup() throws Exception {
81      testUtil = new HBaseTestingUtility();
82      conf = testUtil.getConfiguration();
83      conf.set(CoprocessorHost.REGIONSERVER_COPROCESSOR_CONF_KEY,
84          StopBlockingRegionObserver.class.getName());
85      conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
86          StopBlockingRegionObserver.class.getName());
87      // make sure we have multiple blocks so that the client does not prefetch all block locations
88      conf.set("dfs.blocksize", Long.toString(100 * 1024));
89      // prefetch the first block
90      conf.set(DFSConfigKeys.DFS_CLIENT_READ_PREFETCH_SIZE_KEY, Long.toString(100 * 1024));
91  
92      testUtil.startMiniZKCluster();
93      dfsCluster = testUtil.startMiniDFSCluster(2);
94      cluster = testUtil.startMiniHBaseCluster(1, 2);
95    }
96  
97    @After
98    public void tearDown() throws Exception {
99      for (JVMClusterUtil.RegionServerThread t : cluster.getRegionServerThreads()) {
100       HRegionServer rs = t.getRegionServer();
101       RegionServerCoprocessorHost cpHost = rs.getRegionServerCoprocessorHost();
102       StopBlockingRegionObserver cp = (StopBlockingRegionObserver)
103           cpHost.findCoprocessor(StopBlockingRegionObserver.class.getName());
104       cp.setStopAllowed(true);
105     }
106     testUtil.shutdownMiniCluster();
107   }
108 
109   /**
110    * Test that a regionserver is able to abort properly, even when a coprocessor
111    * throws an exception in preStopRegionServer().
112    */
113   @Test
114   public void testAbortFromRPC() throws Exception {
115     TableName tableName = TableName.valueOf("testAbortFromRPC");
116     // create a test table
117     Table table = testUtil.createTable(tableName, FAMILY_BYTES);
118 
119     // write some edits
120     testUtil.loadTable(table, FAMILY_BYTES);
121     LOG.info("Wrote data");
122     // force a flush
123     cluster.flushcache(tableName);
124     LOG.info("Flushed table");
125 
126     // Send a poisoned put to trigger the abort
127     Put put = new Put(new byte[]{0, 0, 0, 0});
128     put.addColumn(FAMILY_BYTES, Bytes.toBytes("c"), new byte[]{});
129     put.setAttribute(StopBlockingRegionObserver.DO_ABORT, new byte[]{1});
130 
131     table.put(put);
132     // should have triggered an abort due to FileNotFoundException
133 
134     // verify that the regionserver is stopped
135     HRegion firstRegion = cluster.findRegionsForTable(tableName).get(0);
136     assertNotNull(firstRegion);
137     assertNotNull(firstRegion.getRegionServerServices());
138     LOG.info("isAborted = " + firstRegion.getRegionServerServices().isAborted());
139     assertTrue(firstRegion.getRegionServerServices().isAborted());
140     LOG.info("isStopped = " + firstRegion.getRegionServerServices().isStopped());
141     assertTrue(firstRegion.getRegionServerServices().isStopped());
142   }
143 
144   /**
145    * Tests that only a single abort is processed when multiple aborts are requested.
146    */
147   @Test
148   public void testMultiAbort() {
149     assertTrue(cluster.getRegionServerThreads().size() > 0);
150     JVMClusterUtil.RegionServerThread t = cluster.getRegionServerThreads().get(0);
151     assertTrue(t.isAlive());
152     final HRegionServer rs = t.getRegionServer();
153     assertFalse(rs.isAborted());
154     RegionServerCoprocessorHost cpHost = rs.getRegionServerCoprocessorHost();
155     StopBlockingRegionObserver cp = (StopBlockingRegionObserver)cpHost.findCoprocessor(
156       StopBlockingRegionObserver.class.getName());
157     // Enable clean abort.
158     cp.setStopAllowed(true);
159     // Issue two aborts in quick succession.
160     // We need a thread pool here, otherwise the abort() runs into SecurityException when running
161     // from the fork join pool when setting the context classloader.
162     ExecutorService executor = Executors.newFixedThreadPool(2);
163     try {
164       executor.submit(new Runnable() {
165         @Override public void run() {
166           rs.abort("Abort 1");
167         }
168       });
169       executor.submit(new Runnable() {
170         @Override public void run() {
171           rs.abort("Abort 2");
172         }
173       });
174       long testTimeoutMs = 10 * 1000;
175       Waiter.waitFor(cluster.getConf(), testTimeoutMs, new Waiter.Predicate<Exception>() {
176         @Override public boolean evaluate() throws Exception {
177           return rs.isStopped();
178         }
179       });
180       // Make sure only one abort is received.
181       assertEquals(1, cp.getNumAbortsRequested());
182     } finally {
183       executor.shutdownNow();
184     }
185   }
186 
187   /**
188    * Test that a coprocessor is able to override a normal regionserver stop request.
189    */
190   @Test
191   public void testStopOverrideFromCoprocessor() throws Exception {
192     Admin admin = testUtil.getHBaseAdmin();
193     HRegionServer regionserver = cluster.getRegionServer(0);
194     admin.stopRegionServer(regionserver.getServerName().getHostAndPort());
195 
196     // regionserver should have failed to stop due to coprocessor
197     assertFalse(cluster.getRegionServer(0).isAborted());
198     assertFalse(cluster.getRegionServer(0).isStopped());
199   }
200 
201   public static class StopBlockingRegionObserver extends BaseRegionObserver
202       implements RegionServerObserver {
203     public static final String DO_ABORT = "DO_ABORT";
204     private boolean stopAllowed;
205     private AtomicInteger abortCount = new AtomicInteger();
206 
207     @Override
208     public void prePut(ObserverContext<RegionCoprocessorEnvironment> c, Put put, WALEdit edit,
209                        Durability durability) throws IOException {
210       if (put.getAttribute(DO_ABORT) != null) {
211         HRegionServer rs = (HRegionServer) c.getEnvironment().getRegionServerServices();
212         LOG.info("Triggering abort for regionserver " + rs.getServerName());
213         rs.abort("Aborting for test");
214       }
215     }
216 
217     @Override
218     public void preStopRegionServer(ObserverContext<RegionServerCoprocessorEnvironment> env)
219         throws IOException {
220       abortCount.incrementAndGet();
221       if (!stopAllowed) {
222         throw new IOException("Stop not allowed");
223       }
224     }
225 
226     public int getNumAbortsRequested() {
227       return abortCount.get();
228     }
229 
230     @Override
231     public void preMerge(ObserverContext<RegionServerCoprocessorEnvironment> ctx,
232                          Region regionA, Region regionB) throws IOException {
233       // no-op
234     }
235 
236     @Override
237     public void postMerge(ObserverContext<RegionServerCoprocessorEnvironment> c,
238                           Region regionA, Region regionB, Region mergedRegion) throws IOException {
239       // no-op
240     }
241 
242     @Override
243     public void preMergeCommit(ObserverContext<RegionServerCoprocessorEnvironment> ctx,
244                                Region regionA, Region regionB, List<Mutation> metaEntries)
245         throws IOException {
246       // no-op
247     }
248 
249     @Override
250     public void postMergeCommit(ObserverContext<RegionServerCoprocessorEnvironment> ctx,
251                                 Region regionA, Region regionB, Region mergedRegion)
252         throws IOException {
253       // no-op
254     }
255 
256     @Override
257     public void preRollBackMerge(ObserverContext<RegionServerCoprocessorEnvironment> ctx,
258                                  Region regionA, Region regionB) throws IOException {
259       // no-op
260     }
261 
262     @Override
263     public void postRollBackMerge(ObserverContext<RegionServerCoprocessorEnvironment> ctx,
264                                   Region regionA, Region regionB) throws IOException {
265       // no-op
266     }
267 
268     @Override
269     public void preRollWALWriterRequest(ObserverContext<RegionServerCoprocessorEnvironment> ctx)
270         throws IOException {
271       // no-op
272     }
273 
274     @Override
275     public void postRollWALWriterRequest(ObserverContext<RegionServerCoprocessorEnvironment> ctx)
276         throws IOException {
277       // no-op
278     }
279 
280     @Override
281     public ReplicationEndpoint postCreateReplicationEndPoint(
282         ObserverContext<RegionServerCoprocessorEnvironment> ctx, ReplicationEndpoint endpoint) {
283       return null;
284     }
285 
286     @Override
287     public void preReplicateLogEntries(ObserverContext<RegionServerCoprocessorEnvironment> ctx,
288                                        List<AdminProtos.WALEntry> entries, CellScanner cells)
289         throws IOException {
290       // no-op
291     }
292 
293     @Override
294     public void postReplicateLogEntries(ObserverContext<RegionServerCoprocessorEnvironment> ctx,
295                                         List<AdminProtos.WALEntry> entries, CellScanner cells)
296         throws IOException {
297       // no-op
298     }
299 
300     public void setStopAllowed(boolean allowed) {
301       this.stopAllowed = allowed;
302     }
303 
304     public boolean isStopAllowed() {
305       return stopAllowed;
306     }
307   }
308 }