View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.client;
21  
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.HashMap;
25  import java.util.Iterator;
26  import java.util.List;
27  import java.util.Random;
28  import java.util.Set;
29  import java.util.concurrent.CountDownLatch;
30  import java.util.concurrent.TimeUnit;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  import java.util.concurrent.atomic.AtomicInteger;
33  import java.util.concurrent.atomic.AtomicLong;
34  import java.util.concurrent.atomic.AtomicReference;
35  
36  import org.apache.commons.logging.Log;
37  import org.apache.commons.logging.LogFactory;
38  import org.apache.commons.logging.impl.Log4JLogger;
39  import org.apache.hadoop.conf.Configuration;
40  import org.apache.hadoop.hbase.Cell;
41  import org.apache.hadoop.hbase.HBaseTestingUtility;
42  import org.apache.hadoop.hbase.HConstants;
43  import org.apache.hadoop.hbase.HRegionInfo;
44  import org.apache.hadoop.hbase.HTableDescriptor;
45  import org.apache.hadoop.hbase.KeyValue;
46  import org.apache.hadoop.hbase.NotServingRegionException;
47  import org.apache.hadoop.hbase.RegionLocations;
48  import org.apache.hadoop.hbase.TableNotFoundException;
49  import org.apache.hadoop.hbase.client.AsyncProcess.AsyncRequestFuture;
50  import org.apache.hadoop.hbase.client.AsyncProcess.AsyncRequestFutureImpl;
51  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
52  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
53  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
54  import org.apache.hadoop.hbase.protobuf.RequestConverter;
55  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
56  import org.apache.hadoop.hbase.regionserver.HRegionServer;
57  import org.apache.hadoop.hbase.regionserver.InternalScanner;
58  import org.apache.hadoop.hbase.regionserver.RegionScanner;
59  import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore;
60  import org.apache.hadoop.hbase.regionserver.TestRegionServerNoMaster;
61  import org.apache.hadoop.hbase.testclassification.MediumTests;
62  import org.apache.hadoop.hbase.util.Bytes;
63  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
64  import org.apache.log4j.Level;
65  import org.apache.zookeeper.KeeperException;
66  import org.junit.After;
67  import org.junit.AfterClass;
68  import org.junit.Assert;
69  import org.junit.Before;
70  import org.junit.BeforeClass;
71  import org.junit.Test;
72  import org.junit.experimental.categories.Category;
73  
74  import com.yammer.metrics.core.Counter;
75  
76  /**
77   * Tests for region replicas. Sad that we cannot isolate these without bringing up a whole
78   * cluster. See {@link org.apache.hadoop.hbase.regionserver.TestRegionServerNoMaster}.
79   */
80  @Category(MediumTests.class)
81  public class TestReplicasClient {
82    private static final Log LOG = LogFactory.getLog(TestReplicasClient.class);
83  
84    static {
85      ((Log4JLogger)RpcRetryingCaller.LOG).getLogger().setLevel(Level.ALL);
86    }
87  
88    private static final int NB_SERVERS = 1;
89    private static HTable table = null;
90    private static final byte[] row = TestReplicasClient.class.getName().getBytes();
91  
92    private static HRegionInfo hriPrimary;
93    private static HRegionInfo hriSecondary;
94  
95    private static final HBaseTestingUtility HTU = new HBaseTestingUtility();
96    private static final byte[] f = HConstants.CATALOG_FAMILY;
97  
98    private final static int REFRESH_PERIOD = 1000;
99  
100   /**
101    * This copro is used to synchronize the tests.
102    */
103   public static class SlowMeCopro extends BaseRegionObserver {
104     static final AtomicLong sleepTime = new AtomicLong(0);
105     static final AtomicBoolean slowDownNext = new AtomicBoolean(false);
106     static final AtomicInteger countOfNext = new AtomicInteger(0);
107     private static final AtomicReference<CountDownLatch> primaryCdl =
108         new AtomicReference<>(new CountDownLatch(0));
109     private static final AtomicReference<CountDownLatch> secondaryCdl =
110         new AtomicReference<>(new CountDownLatch(0));
111     Random r = new Random();
112     public SlowMeCopro() {
113     }
114 
115     @Override
116     public void preGetOp(final ObserverContext<RegionCoprocessorEnvironment> e,
117                          final Get get, final List<Cell> results) throws IOException {
118       slowdownCode(e);
119     }
120 
121     @Override
122     public RegionScanner preScannerOpen(final ObserverContext<RegionCoprocessorEnvironment> e,
123         final Scan scan, final RegionScanner s) throws IOException {
124       slowdownCode(e);
125       return s;
126     }
127 
128     @Override
129     public boolean preScannerNext(final ObserverContext<RegionCoprocessorEnvironment> e,
130         final InternalScanner s, final List<Result> results,
131         final int limit, final boolean hasMore) throws IOException {
132       //this will slow down a certain next operation if the conditions are met. The slowness
133       //will allow the call to go to a replica
134       if (slowDownNext.get()) {
135         //have some "next" return successfully from the primary; hence countOfNext checked
136         if (countOfNext.incrementAndGet() == 2) {
137           sleepTime.set(2000);
138           slowdownCode(e);
139         }
140       }
141       return true;
142     }
143 
144     private void slowdownCode(final ObserverContext<RegionCoprocessorEnvironment> e) {
145       if (e.getEnvironment().getRegion().getRegionInfo().getReplicaId() == 0) {
146         LOG.info("We're the primary replicas.");
147         CountDownLatch latch = getPrimaryCdl().get();
148         try {
149           if (sleepTime.get() > 0) {
150             LOG.info("Sleeping for " + sleepTime.get() + " ms");
151             Thread.sleep(sleepTime.get());
152           } else if (latch.getCount() > 0) {
153             LOG.info("Waiting for the counterCountDownLatch");
154             latch.await(2, TimeUnit.MINUTES); // To help the tests to finish.
155             if (latch.getCount() > 0) {
156               throw new RuntimeException("Can't wait more");
157             }
158           }
159         } catch (InterruptedException e1) {
160           LOG.error(e1);
161         }
162       } else {
163         LOG.info("We're not the primary replicas.");
164         CountDownLatch latch = getSecondaryCdl().get();
165         try {
166           if (latch.getCount() > 0) {
167             LOG.info("Waiting for the secondary counterCountDownLatch");
168             latch.await(2, TimeUnit.MINUTES); // To help the tests to finish.
169             if (latch.getCount() > 0) {
170               throw new RuntimeException("Can't wait more");
171             }
172           }
173         } catch (InterruptedException e1) {
174           LOG.error(e1);
175         }
176       }
177     }
178 
179     public static AtomicReference<CountDownLatch> getPrimaryCdl() {
180       return primaryCdl;
181     }
182 
183     public static AtomicReference<CountDownLatch> getSecondaryCdl() {
184       return secondaryCdl;
185     }
186   }
187 
188   @BeforeClass
189   public static void beforeClass() throws Exception {
190     // enable store file refreshing
191     HTU.getConfiguration().setInt(
192         StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, REFRESH_PERIOD);
193     HTU.getConfiguration().setBoolean("hbase.client.log.scanner.activity", true);
194     HTU.getConfiguration().setBoolean(MetricsConnection.CLIENT_SIDE_METRICS_ENABLED_KEY, true);
195     ConnectionUtils.setupMasterlessConnection(HTU.getConfiguration());
196     HTU.startMiniCluster(NB_SERVERS);
197 
198     // Create table then get the single region for our new table.
199     HTableDescriptor hdt = HTU.createTableDescriptor(TestReplicasClient.class.getSimpleName());
200     hdt.addCoprocessor(SlowMeCopro.class.getName());
201     table = HTU.createTable(hdt, new byte[][]{f}, HTU.getConfiguration());
202 
203     hriPrimary = table.getRegionLocation(row, false).getRegionInfo();
204 
205     // mock a secondary region info to open
206     hriSecondary = new HRegionInfo(hriPrimary.getTable(), hriPrimary.getStartKey(),
207         hriPrimary.getEndKey(), hriPrimary.isSplit(), hriPrimary.getRegionId(), 1);
208 
209     // No master
210     LOG.info("Master is going to be stopped");
211     TestRegionServerNoMaster.stopMasterAndAssignMeta(HTU);
212     Configuration c = new Configuration(HTU.getConfiguration());
213     c.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
214     LOG.info("Master has stopped");
215   }
216 
217   @AfterClass
218   public static void afterClass() throws Exception {
219     if (table != null) table.close();
220     HTU.shutdownMiniCluster();
221   }
222 
223   @Before
224   public void before() throws IOException {
225     HTU.getHBaseAdmin().getConnection().clearRegionCache();
226     try {
227       openRegion(hriPrimary);
228     } catch (Exception ignored) {
229     }
230     try {
231       openRegion(hriSecondary);
232     } catch (Exception ignored) {
233     }
234   }
235 
236   @After
237   public void after() throws IOException, KeeperException {
238     try {
239       closeRegion(hriSecondary);
240     } catch (Exception ignored) {
241     }
242     try {
243       closeRegion(hriPrimary);
244     } catch (Exception ignored) {
245     }
246     ZKAssign.deleteNodeFailSilent(HTU.getZooKeeperWatcher(), hriPrimary);
247     ZKAssign.deleteNodeFailSilent(HTU.getZooKeeperWatcher(), hriSecondary);
248 
249     HTU.getHBaseAdmin().getConnection().clearRegionCache();
250   }
251 
252   private HRegionServer getRS() {
253     return HTU.getMiniHBaseCluster().getRegionServer(0);
254   }
255 
256   private void openRegion(HRegionInfo hri) throws Exception {
257     try {
258       if (isRegionOpened(hri)) return;
259     } catch (Exception e){}
260     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
261     // first version is '0'
262     AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(
263       getRS().getServerName(), hri, 0, null, null);
264     AdminProtos.OpenRegionResponse responseOpen = getRS().getRSRpcServices().openRegion(null, orr);
265     Assert.assertEquals(responseOpen.getOpeningStateCount(), 1);
266     Assert.assertEquals(responseOpen.getOpeningState(0),
267       AdminProtos.OpenRegionResponse.RegionOpeningState.OPENED);
268     checkRegionIsOpened(hri);
269   }
270 
271   private void closeRegion(HRegionInfo hri) throws Exception {
272     ZKAssign.createNodeClosing(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
273 
274     AdminProtos.CloseRegionRequest crr = RequestConverter.buildCloseRegionRequest(
275       getRS().getServerName(), hri.getEncodedName(), true);
276     AdminProtos.CloseRegionResponse responseClose = getRS()
277         .getRSRpcServices().closeRegion(null, crr);
278     Assert.assertTrue(responseClose.getClosed());
279 
280     checkRegionIsClosed(hri.getEncodedName());
281 
282     ZKAssign.deleteClosedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(), null);
283   }
284 
285   private void checkRegionIsOpened(HRegionInfo hri) throws Exception {
286 
287     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
288       Thread.sleep(1);
289     }
290 
291     Assert.assertTrue(
292         ZKAssign.deleteOpenedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(), null));
293   }
294 
295   private boolean isRegionOpened(HRegionInfo hri) throws Exception {
296     return getRS().getRegionByEncodedName(hri.getEncodedName()).isAvailable();
297   }
298 
299   private void checkRegionIsClosed(String encodedRegionName) throws Exception {
300 
301     while (!getRS().getRegionsInTransitionInRS().isEmpty()) {
302       Thread.sleep(1);
303     }
304 
305     try {
306       Assert.assertFalse(getRS().getRegionByEncodedName(encodedRegionName).isAvailable());
307     } catch (NotServingRegionException expected) {
308       // That's how it work: if the region is closed we have an exception.
309     }
310 
311     // We don't delete the znode here, because there is not always a znode.
312   }
313 
314   private void flushRegion(HRegionInfo regionInfo) throws IOException {
315     TestRegionServerNoMaster.flushRegion(HTU, regionInfo);
316   }
317 
318   @Test
319   public void testUseRegionWithoutReplica() throws Exception {
320     byte[] b1 = "testUseRegionWithoutReplica".getBytes();
321     openRegion(hriSecondary);
322     SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(0));
323     try {
324       Get g = new Get(b1);
325       Result r = table.get(g);
326       Assert.assertFalse(r.isStale());
327     } finally {
328       closeRegion(hriSecondary);
329     }
330   }
331 
332   @Test
333   public void testLocations() throws Exception {
334     byte[] b1 = "testLocations".getBytes();
335     openRegion(hriSecondary);
336     ClusterConnection hc = (ClusterConnection) HTU.getHBaseAdmin().getConnection();
337 
338     try {
339       hc.clearRegionCache();
340       RegionLocations rl = hc.locateRegion(table.getName(), b1, false, false);
341       Assert.assertEquals(2, rl.size());
342 
343       rl = hc.locateRegion(table.getName(), b1, true, false);
344       Assert.assertEquals(2, rl.size());
345 
346       hc.clearRegionCache();
347       rl = hc.locateRegion(table.getName(), b1, true, false);
348       Assert.assertEquals(2, rl.size());
349 
350       rl = hc.locateRegion(table.getName(), b1, false, false);
351       Assert.assertEquals(2, rl.size());
352     } finally {
353       closeRegion(hriSecondary);
354     }
355   }
356 
357   @Test
358   public void testGetNoResultNoStaleRegionWithReplica() throws Exception {
359     byte[] b1 = "testGetNoResultNoStaleRegionWithReplica".getBytes();
360     openRegion(hriSecondary);
361 
362     try {
363       // A get works and is not stale
364       Get g = new Get(b1);
365       Result r = table.get(g);
366       Assert.assertFalse(r.isStale());
367     } finally {
368       closeRegion(hriSecondary);
369     }
370   }
371 
372 
373   @Test
374   public void testGetNoResultStaleRegionWithReplica() throws Exception {
375     byte[] b1 = "testGetNoResultStaleRegionWithReplica".getBytes();
376     openRegion(hriSecondary);
377 
378     SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1));
379     try {
380       Get g = new Get(b1);
381       g.setConsistency(Consistency.TIMELINE);
382       Result r = table.get(g);
383       Assert.assertTrue(r.isStale());
384     } finally {
385       SlowMeCopro.getPrimaryCdl().get().countDown();
386       closeRegion(hriSecondary);
387     }
388   }
389 
390   @Test
391   public void testGetNoResultNotStaleSleepRegionWithReplica() throws Exception {
392     byte[] b1 = "testGetNoResultNotStaleSleepRegionWithReplica".getBytes();
393     openRegion(hriSecondary);
394 
395     try {
396       // We sleep; but we won't go to the stale region as we don't get the stale by default.
397       SlowMeCopro.sleepTime.set(2000);
398       Get g = new Get(b1);
399       Result r = table.get(g);
400       Assert.assertFalse(r.isStale());
401 
402     } finally {
403       SlowMeCopro.sleepTime.set(0);
404       closeRegion(hriSecondary);
405     }
406   }
407 
408   @Test
409   public void testFlushTable() throws Exception {
410     openRegion(hriSecondary);
411     try {
412       flushRegion(hriPrimary);
413       flushRegion(hriSecondary);
414 
415       Put p = new Put(row);
416       p.add(f, row, row);
417       table.put(p);
418 
419       flushRegion(hriPrimary);
420       flushRegion(hriSecondary);
421     } finally {
422       Delete d = new Delete(row);
423       table.delete(d);
424       closeRegion(hriSecondary);
425     }
426   }
427 
428   @Test
429   public void testFlushPrimary() throws Exception {
430     openRegion(hriSecondary);
431 
432     try {
433       flushRegion(hriPrimary);
434 
435       Put p = new Put(row);
436       p.add(f, row, row);
437       table.put(p);
438 
439       flushRegion(hriPrimary);
440     } finally {
441       Delete d = new Delete(row);
442       table.delete(d);
443       closeRegion(hriSecondary);
444     }
445   }
446 
447   @Test
448   public void testFlushSecondary() throws Exception {
449     openRegion(hriSecondary);
450     try {
451       flushRegion(hriSecondary);
452 
453       Put p = new Put(row);
454       p.add(f, row, row);
455       table.put(p);
456 
457       flushRegion(hriSecondary);
458     } catch (TableNotFoundException expected) {
459     } finally {
460       Delete d = new Delete(row);
461       table.delete(d);
462       closeRegion(hriSecondary);
463     }
464   }
465 
466   @Test
467   public void testUseRegionWithReplica() throws Exception {
468     byte[] b1 = "testUseRegionWithReplica".getBytes();
469     openRegion(hriSecondary);
470 
471     try {
472       // A simple put works, even if there here a second replica
473       Put p = new Put(b1);
474       p.add(f, b1, b1);
475       table.put(p);
476       LOG.info("Put done");
477 
478       // A get works and is not stale
479       Get g = new Get(b1);
480       Result r = table.get(g);
481       Assert.assertFalse(r.isStale());
482       Assert.assertFalse(r.getColumnCells(f, b1).isEmpty());
483       LOG.info("get works and is not stale done");
484 
485       // Even if it we have to wait a little on the main region
486       SlowMeCopro.sleepTime.set(2000);
487       g = new Get(b1);
488       r = table.get(g);
489       Assert.assertFalse(r.isStale());
490       Assert.assertFalse(r.getColumnCells(f, b1).isEmpty());
491       SlowMeCopro.sleepTime.set(0);
492       LOG.info("sleep and is not stale done");
493 
494       // But if we ask for stale we will get it
495       SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1));
496       g = new Get(b1);
497       g.setConsistency(Consistency.TIMELINE);
498       r = table.get(g);
499       Assert.assertTrue(r.isStale());
500       Assert.assertTrue(r.getColumnCells(f, b1).isEmpty());
501       SlowMeCopro.getPrimaryCdl().get().countDown();
502 
503       LOG.info("stale done");
504 
505       // exists works and is not stale
506       g = new Get(b1);
507       g.setCheckExistenceOnly(true);
508       r = table.get(g);
509       Assert.assertFalse(r.isStale());
510       Assert.assertTrue(r.getExists());
511       LOG.info("exists not stale done");
512 
513       // exists works on stale but don't see the put
514       SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1));
515       g = new Get(b1);
516       g.setCheckExistenceOnly(true);
517       g.setConsistency(Consistency.TIMELINE);
518       r = table.get(g);
519       Assert.assertTrue(r.isStale());
520       Assert.assertFalse("The secondary has stale data", r.getExists());
521       SlowMeCopro.getPrimaryCdl().get().countDown();
522       LOG.info("exists stale before flush done");
523 
524       flushRegion(hriPrimary);
525       flushRegion(hriSecondary);
526       LOG.info("flush done");
527       Thread.sleep(1000 + REFRESH_PERIOD * 2);
528 
529       // get works and is not stale
530       SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1));
531       g = new Get(b1);
532       g.setConsistency(Consistency.TIMELINE);
533       r = table.get(g);
534       Assert.assertTrue(r.isStale());
535       Assert.assertFalse(r.isEmpty());
536       SlowMeCopro.getPrimaryCdl().get().countDown();
537       LOG.info("stale done");
538 
539       // exists works on stale and we see the put after the flush
540       SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1));
541       g = new Get(b1);
542       g.setCheckExistenceOnly(true);
543       g.setConsistency(Consistency.TIMELINE);
544       r = table.get(g);
545       Assert.assertTrue(r.isStale());
546       Assert.assertTrue(r.getExists());
547       SlowMeCopro.getPrimaryCdl().get().countDown();
548       LOG.info("exists stale after flush done");
549 
550     } finally {
551       SlowMeCopro.getPrimaryCdl().get().countDown();
552       SlowMeCopro.sleepTime.set(0);
553       Delete d = new Delete(b1);
554       table.delete(d);
555       closeRegion(hriSecondary);
556     }
557   }
558 
559   @Test
560   public void testHedgedRead() throws Exception {
561     byte[] b1 = "testHedgedRead".getBytes();
562     openRegion(hriSecondary);
563 
564     try {
565       // A simple put works, even if there here a second replica
566       Put p = new Put(b1);
567       p.addColumn(f, b1, b1);
568       table.put(p);
569       LOG.info("Put done");
570 
571       // A get works and is not stale
572       Get g = new Get(b1);
573       Result r = table.get(g);
574       Assert.assertFalse(r.isStale());
575       Assert.assertFalse(r.getColumnCells(f, b1).isEmpty());
576       LOG.info("get works and is not stale done");
577 
578       //reset
579       ClusterConnection connection = (ClusterConnection) HTU.getConnection();
580       Counter hedgedReadOps = connection.getConnectionMetrics().hedgedReadOps;
581       Counter hedgedReadWin = connection.getConnectionMetrics().hedgedReadWin;
582       hedgedReadOps.dec(hedgedReadOps.count());
583       hedgedReadWin.dec(hedgedReadWin.count());
584 
585       // Wait a little on the main region, just enough to happen once hedged read
586       // and hedged read did not returned faster
587       int primaryCallTimeoutMicroSecond = connection.getConnectionConfiguration().getPrimaryCallTimeoutMicroSecond();
588       SlowMeCopro.sleepTime.set(TimeUnit.MICROSECONDS.toMillis(primaryCallTimeoutMicroSecond));
589       SlowMeCopro.getSecondaryCdl().set(new CountDownLatch(1));
590       g = new Get(b1);
591       g.setConsistency(Consistency.TIMELINE);
592       r = table.get(g);
593       Assert.assertFalse(r.isStale());
594       Assert.assertFalse(r.getColumnCells(f, b1).isEmpty());
595       Assert.assertEquals(hedgedReadOps.count(), 1);
596       Assert.assertEquals(hedgedReadWin.count(), 0);
597       SlowMeCopro.sleepTime.set(0);
598       SlowMeCopro.getSecondaryCdl().get().countDown();
599       LOG.info("hedged read occurred but not faster");
600 
601 
602       // But if we ask for stale we will get it and hedged read returned faster
603       SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1));
604       g = new Get(b1);
605       g.setConsistency(Consistency.TIMELINE);
606       r = table.get(g);
607       Assert.assertTrue(r.isStale());
608       Assert.assertTrue(r.getColumnCells(f, b1).isEmpty());
609       Assert.assertEquals(hedgedReadOps.count(), 2);
610       Assert.assertEquals(hedgedReadWin.count(), 1);
611       SlowMeCopro.getPrimaryCdl().get().countDown();
612       LOG.info("hedged read occurred and faster");
613 
614     } finally {
615       SlowMeCopro.getPrimaryCdl().get().countDown();
616       SlowMeCopro.getSecondaryCdl().get().countDown();
617       SlowMeCopro.sleepTime.set(0);
618       Delete d = new Delete(b1);
619       table.delete(d);
620       closeRegion(hriSecondary);
621     }
622   }
623 
624   @Test
625   public void testCancelOfMultiGet() throws Exception {
626     openRegion(hriSecondary);
627     try {
628       List<Put> puts = new ArrayList<Put>(2);
629       byte[] b1 = Bytes.toBytes("testCancelOfMultiGet" + 0);
630       Put p = new Put(b1);
631       p.add(f, b1, b1);
632       puts.add(p);
633 
634       byte[] b2 = Bytes.toBytes("testCancelOfMultiGet" + 1);
635       p = new Put(b2);
636       p.add(f, b2, b2);
637       puts.add(p);
638       table.put(puts);
639       LOG.debug("PUT done");
640       flushRegion(hriPrimary);
641       LOG.info("flush done");
642 
643       Thread.sleep(1000 + REFRESH_PERIOD * 2);
644 
645       AsyncProcess ap = ((ClusterConnection) HTU.getHBaseAdmin().getConnection())
646           .getAsyncProcess();
647 
648       // Make primary slowdown
649       SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1));
650 
651       List<Get> gets = new ArrayList<Get>();
652       Get g = new Get(b1);
653       g.setCheckExistenceOnly(true);
654       g.setConsistency(Consistency.TIMELINE);
655       gets.add(g);
656       g = new Get(b2);
657       g.setCheckExistenceOnly(true);
658       g.setConsistency(Consistency.TIMELINE);
659       gets.add(g);
660       Object[] results = new Object[2];
661       AsyncRequestFuture reqs = ap.submitAll(table.getPool(), table.getName(),
662           gets, null, results);
663       reqs.waitUntilDone();
664       // verify we got the right results back
665       for (Object r : results) {
666         Assert.assertTrue(((Result)r).isStale());
667         Assert.assertTrue(((Result)r).getExists());
668       }
669       Set<PayloadCarryingServerCallable> set =
670           ((AsyncRequestFutureImpl<?>)reqs).getCallsInProgress();
671       // verify we did cancel unneeded calls
672       Assert.assertTrue(!set.isEmpty());
673       for (PayloadCarryingServerCallable m : set) {
674         Assert.assertTrue(m.isCancelled());
675       }
676     } finally {
677       SlowMeCopro.getPrimaryCdl().get().countDown();
678       SlowMeCopro.sleepTime.set(0);
679       SlowMeCopro.slowDownNext.set(false);
680       SlowMeCopro.countOfNext.set(0);
681       for (int i = 0; i < 2; i++) {
682         byte[] b1 = Bytes.toBytes("testCancelOfMultiGet" + i);
683         Delete d = new Delete(b1);
684         table.delete(d);
685       }
686       closeRegion(hriSecondary);
687     }
688   }
689 
690   @Test
691   public void testScanWithReplicas() throws Exception {
692     //simple scan
693     runMultipleScansOfOneType(false, false);
694   }
695 
696   @Test
697   public void testSmallScanWithReplicas() throws Exception {
698     //small scan
699     runMultipleScansOfOneType(false, true);
700   }
701 
702   @Test
703   public void testReverseScanWithReplicas() throws Exception {
704     //reverse scan
705     runMultipleScansOfOneType(true, false);
706   }
707 
708   @Test
709   public void testCancelOfScan() throws Exception {
710     openRegion(hriSecondary);
711     int NUMROWS = 100;
712     try {
713       for (int i = 0; i < NUMROWS; i++) {
714         byte[] b1 = Bytes.toBytes("testUseRegionWithReplica" + i);
715         Put p = new Put(b1);
716         p.add(f, b1, b1);
717         table.put(p);
718       }
719       LOG.debug("PUT done");
720       int caching = 20;
721       byte[] start;
722       start = Bytes.toBytes("testUseRegionWithReplica" + 0);
723 
724       flushRegion(hriPrimary);
725       LOG.info("flush done");
726       Thread.sleep(1000 + REFRESH_PERIOD * 2);
727 
728       // now make some 'next' calls slow
729       SlowMeCopro.slowDownNext.set(true);
730       SlowMeCopro.countOfNext.set(0);
731       SlowMeCopro.sleepTime.set(5000);
732 
733       Scan scan = new Scan(start);
734       scan.setCaching(caching);
735       scan.setConsistency(Consistency.TIMELINE);
736       ResultScanner scanner = table.getScanner(scan);
737       Iterator<Result> iter = scanner.iterator();
738       iter.next();
739       Assert.assertTrue(((ClientScanner)scanner).isAnyRPCcancelled());
740       SlowMeCopro.slowDownNext.set(false);
741       SlowMeCopro.countOfNext.set(0);
742     } finally {
743       SlowMeCopro.getPrimaryCdl().get().countDown();
744       SlowMeCopro.sleepTime.set(0);
745       SlowMeCopro.slowDownNext.set(false);
746       SlowMeCopro.countOfNext.set(0);
747       for (int i = 0; i < NUMROWS; i++) {
748         byte[] b1 = Bytes.toBytes("testUseRegionWithReplica" + i);
749         Delete d = new Delete(b1);
750         table.delete(d);
751       }
752       closeRegion(hriSecondary);
753     }
754   }
755 
756   private void runMultipleScansOfOneType(boolean reversed, boolean small) throws Exception {
757     openRegion(hriSecondary);
758     int NUMROWS = 100;
759     int NUMCOLS = 10;
760     try {
761       for (int i = 0; i < NUMROWS; i++) {
762         byte[] b1 = Bytes.toBytes("testUseRegionWithReplica" + i);
763         for (int col = 0; col < NUMCOLS; col++) {
764           Put p = new Put(b1);
765           String qualifier = "qualifer" + col;
766           KeyValue kv = new KeyValue(b1, f, qualifier.getBytes());
767           p.add(kv);
768           table.put(p);
769         }
770       }
771       LOG.debug("PUT done");
772       int caching = 20;
773       long maxResultSize = Long.MAX_VALUE;
774 
775       byte[] start;
776       if (reversed) start = Bytes.toBytes("testUseRegionWithReplica" + (NUMROWS - 1));
777       else start = Bytes.toBytes("testUseRegionWithReplica" + 0);
778 
779       scanWithReplicas(reversed, small, Consistency.TIMELINE, caching, maxResultSize,
780         start, NUMROWS, NUMCOLS, false, false);
781 
782       // Even if we were to slow the server down, unless we ask for stale
783       // we won't get it
784       SlowMeCopro.sleepTime.set(5000);
785       scanWithReplicas(reversed, small, Consistency.STRONG, caching, maxResultSize, start, NUMROWS,
786         NUMCOLS, false, false);
787       SlowMeCopro.sleepTime.set(0);
788 
789       flushRegion(hriPrimary);
790       LOG.info("flush done");
791       Thread.sleep(1000 + REFRESH_PERIOD * 2);
792 
793       //Now set the flag to get a response even if stale
794       SlowMeCopro.sleepTime.set(5000);
795       scanWithReplicas(reversed, small, Consistency.TIMELINE, caching, maxResultSize,
796         start, NUMROWS, NUMCOLS, true, false);
797       SlowMeCopro.sleepTime.set(0);
798 
799       // now make some 'next' calls slow
800       SlowMeCopro.slowDownNext.set(true);
801       SlowMeCopro.countOfNext.set(0);
802       scanWithReplicas(reversed, small, Consistency.TIMELINE, caching, maxResultSize, start,
803         NUMROWS, NUMCOLS, true, true);
804       SlowMeCopro.slowDownNext.set(false);
805       SlowMeCopro.countOfNext.set(0);
806 
807       // Make sure we do not get stale data..
808       SlowMeCopro.sleepTime.set(5000);
809       scanWithReplicas(reversed, small, Consistency.STRONG, caching, maxResultSize,
810         start, NUMROWS, NUMCOLS, false, false);
811       SlowMeCopro.sleepTime.set(0);
812 
813       // While the next calls are slow, set maxResultSize to 1 so that some partial results will be
814       // returned from the server before the replica switch occurs.
815       maxResultSize = 1;
816       SlowMeCopro.slowDownNext.set(true);
817       SlowMeCopro.countOfNext.set(0);
818       scanWithReplicas(reversed, small, Consistency.TIMELINE, caching, maxResultSize, start,
819         NUMROWS, NUMCOLS, true, true);
820       maxResultSize = Long.MAX_VALUE;
821       SlowMeCopro.slowDownNext.set(false);
822       SlowMeCopro.countOfNext.set(0);
823     } finally {
824       SlowMeCopro.getPrimaryCdl().get().countDown();
825       SlowMeCopro.sleepTime.set(0);
826       SlowMeCopro.slowDownNext.set(false);
827       SlowMeCopro.countOfNext.set(0);
828       for (int i = 0; i < NUMROWS; i++) {
829         byte[] b1 = Bytes.toBytes("testUseRegionWithReplica" + i);
830         Delete d = new Delete(b1);
831         table.delete(d);
832       }
833       closeRegion(hriSecondary);
834     }
835   }
836 
837   private void scanWithReplicas(boolean reversed, boolean small, Consistency consistency,
838       int caching, long maxResultSize, byte[] startRow, int numRows, int numCols,
839       boolean staleExpected, boolean slowNext)
840           throws Exception {
841     Scan scan = new Scan(startRow);
842     scan.setCaching(caching);
843     scan.setMaxResultSize(maxResultSize);
844     scan.setReversed(reversed);
845     scan.setSmall(small);
846     scan.setConsistency(consistency);
847     ResultScanner scanner = table.getScanner(scan);
848     Iterator<Result> iter = scanner.iterator();
849 
850     // Maps of row keys that we have seen so far
851     HashMap<String, Boolean> map = new HashMap<String, Boolean>();
852 
853     // Tracked metrics
854     int rowCount = 0;
855     int cellCount = 0;
856     int countOfStale = 0;
857 
858     while (iter.hasNext()) {
859       rowCount++;
860       Result r = iter.next();
861       String row = new String(r.getRow());
862 
863       if (map.containsKey(row)) {
864         throw new Exception("Unexpected scan result. Repeated row " + Bytes.toString(r.getRow()));
865       }
866 
867       map.put(row, true);
868 
869       for (Cell cell : r.rawCells()) {
870         cellCount++;
871       }
872 
873       if (!slowNext) Assert.assertTrue(r.isStale() == staleExpected);
874       if (r.isStale()) countOfStale++;
875     }
876     Assert.assertTrue("Count of rows " + rowCount + " num rows expected " + numRows,
877       rowCount == numRows);
878     Assert.assertTrue("Count of cells: " + cellCount + " cells expected: " + numRows * numCols,
879       cellCount == (numRows * numCols));
880 
881     if (slowNext) {
882       LOG.debug("Count of Stale " + countOfStale);
883       Assert.assertTrue(countOfStale > 1);
884 
885       // If the scan was configured in such a way that a full row was NOT retrieved before the
886       // replica switch occurred, then it is possible that all rows were stale
887       if (maxResultSize != Long.MAX_VALUE) {
888         Assert.assertTrue(countOfStale <= numRows);
889       } else {
890         Assert.assertTrue(countOfStale < numRows);
891       }
892     }
893   }
894 }