View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertNull;
22  import static org.junit.Assert.assertTrue;
23  import static org.junit.Assert.fail;
24  
25  import com.google.protobuf.RpcController;
26  import com.google.protobuf.ServiceException;
27  
28  import java.io.IOException;
29  import java.util.ArrayList;
30  import java.util.List;
31  import java.util.concurrent.Callable;
32  
33  import org.apache.commons.lang.exception.ExceptionUtils;
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.commons.logging.impl.Log4JLogger;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.hbase.Cell;
41  import org.apache.hadoop.hbase.CellUtil;
42  import org.apache.hadoop.hbase.CoordinatedStateManager;
43  import org.apache.hadoop.hbase.HBaseTestingUtility;
44  import org.apache.hadoop.hbase.HConstants;
45  import org.apache.hadoop.hbase.HRegionInfo;
46  import org.apache.hadoop.hbase.HTableDescriptor;
47  import org.apache.hadoop.hbase.HTestConst;
48  import org.apache.hadoop.hbase.KeyValue;
49  import org.apache.hadoop.hbase.KeyValue.KVComparator;
50  import org.apache.hadoop.hbase.TableName;
51  import org.apache.hadoop.hbase.client.Put;
52  import org.apache.hadoop.hbase.client.Result;
53  import org.apache.hadoop.hbase.client.ResultScanner;
54  import org.apache.hadoop.hbase.client.Scan;
55  import org.apache.hadoop.hbase.client.ScannerCallable;
56  import org.apache.hadoop.hbase.client.Table;
57  import org.apache.hadoop.hbase.filter.Filter;
58  import org.apache.hadoop.hbase.filter.FilterBase;
59  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
60  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanResponse;
61  import org.apache.hadoop.hbase.regionserver.HRegion.RegionScannerImpl;
62  import org.apache.hadoop.hbase.testclassification.MediumTests;
63  import org.apache.hadoop.hbase.util.Bytes;
64  import org.apache.hadoop.hbase.util.Threads;
65  import org.apache.hadoop.hbase.wal.WAL;
66  import org.apache.log4j.Level;
67  import org.junit.After;
68  import org.junit.AfterClass;
69  import org.junit.Before;
70  import org.junit.BeforeClass;
71  import org.junit.Test;
72  import org.junit.experimental.categories.Category;
73  
74  /**
75   * Here we test to make sure that scans return the expected Results when the server is sending the
76   * Client heartbeat messages. Heartbeat messages are essentially keep-alive messages (they prevent
77   * the scanner on the client side from timing out). A heartbeat message is sent from the server to
78   * the client when the server has exceeded the time limit during the processing of the scan. When
79   * the time limit is reached, the server will return to the Client whatever Results it has
80   * accumulated (potentially empty).
81   */
82  @Category(MediumTests.class)
83  public class TestScannerHeartbeatMessages {
84    private static final Log LOG = LogFactory.getLog(TestScannerHeartbeatMessages.class);
85  
86    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
87  
88    private static Table TABLE = null;
89  
90    /**
91     * Table configuration
92     */
93    private static TableName TABLE_NAME = TableName.valueOf("testScannerHeartbeatMessagesTable");
94  
95    private static int NUM_ROWS = 5;
96    private static byte[] ROW = Bytes.toBytes("testRow");
97    private static byte[][] ROWS = HTestConst.makeNAscii(ROW, NUM_ROWS);
98  
99    private static int NUM_FAMILIES = 4;
100   private static byte[] FAMILY = Bytes.toBytes("testFamily");
101   private static byte[][] FAMILIES = HTestConst.makeNAscii(FAMILY, NUM_FAMILIES);
102 
103   private static int NUM_QUALIFIERS = 3;
104   private static byte[] QUALIFIER = Bytes.toBytes("testQualifier");
105   private static byte[][] QUALIFIERS = HTestConst.makeNAscii(QUALIFIER, NUM_QUALIFIERS);
106 
107   private static int VALUE_SIZE = 128;
108   private static byte[] VALUE = Bytes.createMaxByteArray(VALUE_SIZE);
109 
110   // The time limit should be based on the rpc timeout at client, or the client will regards
111   // the request as timeout before server return a heartbeat.
112   private static int SERVER_TIMEOUT = 60000;
113 
114   // Time, in milliseconds, that the client will wait for a response from the server before timing
115   // out. This value is used server side to determine when it is necessary to send a heartbeat
116   // message to the client. Time limit will be 500 ms.
117   private static int CLIENT_TIMEOUT = 1000;
118 
119   // In this test, we sleep after reading each row. So we should make sure after we get some number
120   // of rows and sleep same times we must reach time limit, and do not timeout after next sleeping.
121   private static int DEFAULT_ROW_SLEEP_TIME = 300;
122 
123   // Similar with row sleep time.
124   private static int DEFAULT_CF_SLEEP_TIME = 300;
125 
126   @BeforeClass
127   public static void setUpBeforeClass() throws Exception {
128     ((Log4JLogger) ScannerCallable.LOG).getLogger().setLevel(Level.ALL);
129     ((Log4JLogger) HeartbeatRPCServices.LOG).getLogger().setLevel(Level.ALL);
130     Configuration conf = TEST_UTIL.getConfiguration();
131 
132     conf.setStrings(HConstants.REGION_IMPL, HeartbeatHRegion.class.getName());
133     conf.setStrings(HConstants.REGION_SERVER_IMPL, HeartbeatHRegionServer.class.getName());
134     conf.setInt(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, SERVER_TIMEOUT);
135     conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, SERVER_TIMEOUT);
136     conf.setInt(HConstants.HBASE_CLIENT_PAUSE, 1);
137 
138     // Check the timeout condition after every cell
139     conf.setLong(StoreScanner.HBASE_CELLS_SCANNED_PER_HEARTBEAT_CHECK, 1);
140     TEST_UTIL.startMiniCluster(1);
141 
142     TABLE = createTestTable(TABLE_NAME, ROWS, FAMILIES, QUALIFIERS, VALUE);
143   }
144 
145   static Table createTestTable(TableName name, byte[][] rows, byte[][] families,
146       byte[][] qualifiers, byte[] cellValue) throws IOException {
147     Table ht = TEST_UTIL.createTable(name, families);
148     List<Put> puts = createPuts(rows, families, qualifiers, cellValue);
149     ht.put(puts);
150     ht.getConfiguration().setInt(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, CLIENT_TIMEOUT);
151     return ht;
152   }
153 
154   /**
155    * Make puts to put the input value into each combination of row, family, and qualifier
156    * @param rows
157    * @param families
158    * @param qualifiers
159    * @param value
160    * @return
161    * @throws IOException
162    */
163   static ArrayList<Put> createPuts(byte[][] rows, byte[][] families, byte[][] qualifiers,
164       byte[] value) throws IOException {
165     Put put;
166     ArrayList<Put> puts = new ArrayList<>();
167 
168     for (int row = 0; row < rows.length; row++) {
169       put = new Put(rows[row]);
170       for (int fam = 0; fam < families.length; fam++) {
171         for (int qual = 0; qual < qualifiers.length; qual++) {
172           KeyValue kv = new KeyValue(rows[row], families[fam], qualifiers[qual], qual, value);
173           put.add(kv);
174         }
175       }
176       puts.add(put);
177     }
178 
179     return puts;
180   }
181 
182   @AfterClass
183   public static void tearDownAfterClass() throws Exception {
184     TEST_UTIL.shutdownMiniCluster();
185   }
186 
187   @Before
188   public void setupBeforeTest() throws Exception {
189     disableSleeping();
190   }
191 
192   @After
193   public void teardownAfterTest() throws Exception {
194     disableSleeping();
195   }
196 
197   /**
198    * Run the test callable when heartbeats are enabled/disabled. We expect all tests to only pass
199    * when heartbeat messages are enabled (otherwise the test is pointless). When heartbeats are
200    * disabled, the test should throw an exception.
201    * @param testCallable
202    * @throws InterruptedException
203    */
204   private void testImportanceOfHeartbeats(Callable<Void> testCallable) throws InterruptedException {
205     HeartbeatRPCServices.heartbeatsEnabled = true;
206 
207     try {
208       testCallable.call();
209     } catch (Exception e) {
210       fail("Heartbeat messages are enabled, exceptions should NOT be thrown. Exception trace:"
211           + ExceptionUtils.getStackTrace(e));
212     }
213 
214     HeartbeatRPCServices.heartbeatsEnabled = false;
215     try {
216       testCallable.call();
217     } catch (Exception e) {
218       return;
219     } finally {
220       HeartbeatRPCServices.heartbeatsEnabled = true;
221     }
222     fail("Heartbeats messages are disabled, an exception should be thrown. If an exception "
223         + " is not thrown, the test case is not testing the importance of heartbeat messages");
224   }
225 
226   /**
227    * Test the case that the time limit for the scan is reached after each full row of cells is
228    * fetched.
229    * @throws Exception
230    */
231   @Test
232   public void testHeartbeatBetweenRows() throws Exception {
233     testImportanceOfHeartbeats(new Callable<Void>() {
234 
235       @Override
236       public Void call() throws Exception {
237         // Configure the scan so that it can read the entire table in a single RPC. We want to test
238         // the case where a scan stops on the server side due to a time limit
239         Scan scan = new Scan();
240         scan.setMaxResultSize(Long.MAX_VALUE);
241         scan.setCaching(Integer.MAX_VALUE);
242 
243         testEquivalenceOfScanWithHeartbeats(scan, DEFAULT_ROW_SLEEP_TIME, -1, false);
244         return null;
245       }
246     });
247   }
248 
249   /**
250    * Test the case that the time limit for scans is reached in between column families
251    * @throws Exception
252    */
253   @Test
254   public void testHeartbeatBetweenColumnFamilies() throws Exception {
255     testImportanceOfHeartbeats(new Callable<Void>() {
256       @Override
257       public Void call() throws Exception {
258         // Configure the scan so that it can read the entire table in a single RPC. We want to test
259         // the case where a scan stops on the server side due to a time limit
260         Scan baseScan = new Scan();
261         baseScan.setMaxResultSize(Long.MAX_VALUE);
262         baseScan.setCaching(Integer.MAX_VALUE);
263 
264         // Copy the scan before each test. When a scan object is used by a scanner, some of its
265         // fields may be changed such as start row
266         Scan scanCopy = new Scan(baseScan);
267         testEquivalenceOfScanWithHeartbeats(scanCopy, -1, DEFAULT_CF_SLEEP_TIME, false);
268         scanCopy = new Scan(baseScan);
269         testEquivalenceOfScanWithHeartbeats(scanCopy, -1, DEFAULT_CF_SLEEP_TIME, true);
270         return null;
271       }
272     });
273   }
274 
275   public static class SparseFilter extends FilterBase {
276 
277     @Override
278     public ReturnCode filterKeyValue(Cell v) throws IOException {
279       try {
280         Thread.sleep(CLIENT_TIMEOUT / 2 + 100);
281       } catch (InterruptedException e) {
282         Thread.currentThread().interrupt();
283       }
284       return Bytes.equals(CellUtil.cloneRow(v), ROWS[NUM_ROWS - 1]) ? ReturnCode.INCLUDE
285           : ReturnCode.SKIP;
286     }
287 
288     public static Filter parseFrom(final byte[] pbBytes) {
289       return new SparseFilter();
290     }
291   }
292 
293   /**
294    * Test the case that there is a filter which filters most of cells
295    * @throws Exception
296    */
297   @Test
298   public void testHeartbeatWithSparseFilter() throws Exception {
299     testImportanceOfHeartbeats(new Callable<Void>() {
300       @Override
301       public Void call() throws Exception {
302         Scan scan = new Scan();
303         scan.setMaxResultSize(Long.MAX_VALUE);
304         scan.setCaching(Integer.MAX_VALUE);
305         scan.setFilter(new SparseFilter());
306         ResultScanner scanner = TABLE.getScanner(scan);
307         int num = 0;
308         while (scanner.next() != null) {
309           num++;
310         }
311         assertEquals(1, num);
312         scanner.close();
313 
314         scan = new Scan();
315         scan.setMaxResultSize(Long.MAX_VALUE);
316         scan.setCaching(Integer.MAX_VALUE);
317         scan.setFilter(new SparseFilter());
318         scan.setAllowPartialResults(true);
319         scanner = TABLE.getScanner(scan);
320         num = 0;
321         while (scanner.next() != null) {
322           num++;
323         }
324         assertEquals(NUM_FAMILIES * NUM_QUALIFIERS, num);
325         scanner.close();
326 
327         return null;
328       }
329     });
330   }
331 
332   /**
333    * Test the equivalence of a scan versus the same scan executed when heartbeat messages are
334    * necessary
335    * @param scan The scan configuration being tested
336    * @param rowSleepTime The time to sleep between fetches of row cells
337    * @param cfSleepTime The time to sleep between fetches of column family cells
338    * @param sleepBeforeCf set to true when column family sleeps should occur before the cells for
339    *          that column family are fetched
340    * @throws Exception
341    */
342   private void testEquivalenceOfScanWithHeartbeats(final Scan scan, int rowSleepTime,
343       int cfSleepTime, boolean sleepBeforeCf) throws Exception {
344     disableSleeping();
345     final ResultScanner scanner = TABLE.getScanner(scan);
346     final ResultScanner scannerWithHeartbeats = TABLE.getScanner(scan);
347 
348     Result r1 = null;
349     Result r2 = null;
350 
351     while ((r1 = scanner.next()) != null) {
352       // Enforce the specified sleep conditions during calls to the heartbeat scanner
353       configureSleepTime(rowSleepTime, cfSleepTime, sleepBeforeCf);
354       r2 = scannerWithHeartbeats.next();
355       disableSleeping();
356 
357       assertTrue(r2 != null);
358       try {
359         Result.compareResults(r1, r2);
360       } catch (Exception e) {
361         fail(e.getMessage());
362       }
363     }
364 
365     assertTrue(scannerWithHeartbeats.next() == null);
366     scanner.close();
367     scannerWithHeartbeats.close();
368   }
369 
370   /**
371    * Helper method for setting the time to sleep between rows and column families. If a sleep time
372    * is negative then that sleep will be disabled
373    * @param rowSleepTime
374    * @param cfSleepTime
375    */
376   private static void configureSleepTime(int rowSleepTime, int cfSleepTime, boolean sleepBeforeCf) {
377     HeartbeatHRegion.sleepBetweenRows = rowSleepTime > 0;
378     HeartbeatHRegion.rowSleepTime = rowSleepTime;
379 
380     HeartbeatHRegion.sleepBetweenColumnFamilies = cfSleepTime > 0;
381     HeartbeatHRegion.columnFamilySleepTime = cfSleepTime;
382     HeartbeatHRegion.sleepBeforeColumnFamily = sleepBeforeCf;
383   }
384 
385   /**
386    * Disable the sleeping mechanism server side.
387    */
388   private static void disableSleeping() {
389     HeartbeatHRegion.sleepBetweenRows = false;
390     HeartbeatHRegion.sleepBetweenColumnFamilies = false;
391   }
392 
393   /**
394    * Custom HRegionServer instance that instantiates {@link HeartbeatRPCServices} in place of
395    * {@link RSRpcServices} to allow us to toggle support for heartbeat messages
396    */
397   private static class HeartbeatHRegionServer extends HRegionServer {
398     public HeartbeatHRegionServer(Configuration conf) throws IOException, InterruptedException {
399       super(conf);
400     }
401 
402     public HeartbeatHRegionServer(Configuration conf, CoordinatedStateManager csm)
403         throws IOException, InterruptedException {
404       super(conf, csm);
405     }
406 
407     @Override
408     protected RSRpcServices createRpcServices() throws IOException {
409       return new HeartbeatRPCServices(this);
410     }
411   }
412 
413   /**
414    * Custom RSRpcServices instance that allows heartbeat support to be toggled
415    */
416   private static class HeartbeatRPCServices extends RSRpcServices {
417     private static volatile boolean heartbeatsEnabled = true;
418 
419     public HeartbeatRPCServices(HRegionServer rs) throws IOException {
420       super(rs);
421     }
422 
423     @Override
424     public ScanResponse scan(RpcController controller, ScanRequest request)
425         throws ServiceException {
426       ScanRequest.Builder builder = ScanRequest.newBuilder(request);
427       builder.setClientHandlesHeartbeats(heartbeatsEnabled);
428       return super.scan(controller, builder.build());
429     }
430   }
431 
432   /**
433    * Custom HRegion class that instantiates {@link RegionScanner}s with configurable sleep times
434    * between fetches of row Results and/or column family cells. Useful for emulating an instance
435    * where the server is taking a long time to process a client's scan request
436    */
437   private static class HeartbeatHRegion extends HRegion {
438     // Row sleeps occur AFTER each row worth of cells is retrieved.
439     private static volatile int rowSleepTime = DEFAULT_ROW_SLEEP_TIME;
440     private static volatile boolean sleepBetweenRows = false;
441 
442     // The sleep for column families can be initiated before or after we fetch the cells for the
443     // column family. If the sleep occurs BEFORE then the time limits will be reached inside
444     // StoreScanner while we are fetching individual cells. If the sleep occurs AFTER then the time
445     // limit will be reached inside RegionScanner after all the cells for a column family have been
446     // retrieved.
447     private static volatile boolean sleepBeforeColumnFamily = false;
448     private static volatile int columnFamilySleepTime = DEFAULT_CF_SLEEP_TIME;
449     private static volatile boolean sleepBetweenColumnFamilies = false;
450 
451     public HeartbeatHRegion(Path tableDir, WAL wal, FileSystem fs, Configuration confParam,
452         HRegionInfo regionInfo, HTableDescriptor htd, RegionServerServices rsServices) {
453       super(tableDir, wal, fs, confParam, regionInfo, htd, rsServices);
454     }
455 
456     public HeartbeatHRegion(HRegionFileSystem fs, WAL wal, Configuration confParam,
457         HTableDescriptor htd, RegionServerServices rsServices) {
458       super(fs, wal, confParam, htd, rsServices);
459     }
460 
461     private static void columnFamilySleep() {
462       if (sleepBetweenColumnFamilies) {
463         Threads.sleepWithoutInterrupt(columnFamilySleepTime);
464       }
465     }
466 
467     private static void rowSleep() {
468       if (sleepBetweenRows) {
469         Threads.sleepWithoutInterrupt(rowSleepTime);
470       }
471     }
472 
473     // Instantiate the custom heartbeat region scanners
474     @Override
475     protected RegionScanner instantiateRegionScanner(Scan scan,
476         List<KeyValueScanner> additionalScanners, long nonceGroup, long nonce) throws IOException {
477       if (scan.isReversed()) {
478         if (scan.getFilter() != null) {
479           scan.getFilter().setReversed(true);
480         }
481         return new HeartbeatReversedRegionScanner(scan, additionalScanners, this);
482       }
483       return new HeartbeatRegionScanner(scan, additionalScanners, this);
484     }
485   }
486 
487   /**
488    * Custom ReversedRegionScanner that can be configured to sleep between retrievals of row Results
489    * and/or column family cells
490    */
491   private static class HeartbeatReversedRegionScanner extends ReversedRegionScannerImpl {
492     HeartbeatReversedRegionScanner(Scan scan, List<KeyValueScanner> additionalScanners,
493         HRegion region) throws IOException {
494       super(scan, additionalScanners, region);
495     }
496 
497     @Override
498     public boolean nextRaw(List<Cell> outResults, ScannerContext context) throws IOException {
499       boolean moreRows = super.nextRaw(outResults, context);
500       HeartbeatHRegion.rowSleep();
501       return moreRows;
502     }
503 
504     @Override
505     protected void initializeKVHeap(List<KeyValueScanner> scanners,
506         List<KeyValueScanner> joinedScanners, HRegion region) throws IOException {
507       this.storeHeap = new HeartbeatReversedKVHeap(scanners, region.getComparator());
508       if (!joinedScanners.isEmpty()) {
509         this.joinedHeap = new HeartbeatReversedKVHeap(joinedScanners, region.getComparator());
510       }
511     }
512   }
513 
514   /**
515    * Custom RegionScanner that can be configured to sleep between retrievals of row Results and/or
516    * column family cells
517    */
518   private static class HeartbeatRegionScanner extends RegionScannerImpl {
519     HeartbeatRegionScanner(Scan scan, List<KeyValueScanner> additionalScanners, HRegion region)
520         throws IOException {
521       region.super(scan, additionalScanners, region);
522     }
523 
524     @Override
525     public boolean nextRaw(List<Cell> outResults, ScannerContext context) throws IOException {
526       boolean moreRows = super.nextRaw(outResults, context);
527       HeartbeatHRegion.rowSleep();
528       return moreRows;
529     }
530 
531     @Override
532     protected void initializeKVHeap(List<KeyValueScanner> scanners,
533         List<KeyValueScanner> joinedScanners, HRegion region) throws IOException {
534       this.storeHeap = new HeartbeatKVHeap(scanners, region.getComparator());
535       if (!joinedScanners.isEmpty()) {
536         this.joinedHeap = new HeartbeatKVHeap(joinedScanners, region.getComparator());
537       }
538     }
539   }
540 
541   /**
542    * Custom KV Heap that can be configured to sleep/wait in between retrievals of column family
543    * cells. Useful for testing
544    */
545   private static final class HeartbeatKVHeap extends KeyValueHeap {
546     public HeartbeatKVHeap(List<? extends KeyValueScanner> scanners, KVComparator comparator)
547         throws IOException {
548       super(scanners, comparator);
549     }
550 
551     HeartbeatKVHeap(List<? extends KeyValueScanner> scanners, KVScannerComparator comparator)
552         throws IOException {
553       super(scanners, comparator);
554     }
555 
556     @Override
557     public boolean next(List<Cell> result, ScannerContext context) throws IOException {
558       if (HeartbeatHRegion.sleepBeforeColumnFamily) HeartbeatHRegion.columnFamilySleep();
559       boolean moreRows = super.next(result, context);
560       if (!HeartbeatHRegion.sleepBeforeColumnFamily) HeartbeatHRegion.columnFamilySleep();
561       return moreRows;
562     }
563   }
564 
565   /**
566    * Custom reversed KV Heap that can be configured to sleep in between retrievals of column family
567    * cells.
568    */
569   private static final class HeartbeatReversedKVHeap extends ReversedKeyValueHeap {
570     public HeartbeatReversedKVHeap(List<? extends KeyValueScanner> scanners,
571         KVComparator comparator) throws IOException {
572       super(scanners, comparator);
573     }
574 
575     @Override
576     public boolean next(List<Cell> result, ScannerContext context) throws IOException {
577       if (HeartbeatHRegion.sleepBeforeColumnFamily) HeartbeatHRegion.columnFamilySleep();
578       boolean moreRows = super.next(result, context);
579       if (!HeartbeatHRegion.sleepBeforeColumnFamily) HeartbeatHRegion.columnFamilySleep();
580       return moreRows;
581     }
582   }
583 }