View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.client;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.fail;
23  
24  import java.io.IOException;
25  import java.io.InterruptedIOException;
26  import java.util.HashSet;
27  import java.util.List;
28  import java.util.Set;
29  import java.util.concurrent.TimeUnit;
30  
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.CategoryBasedTimeout;
33  import org.apache.hadoop.hbase.HBaseTestingUtility;
34  import org.apache.hadoop.hbase.HColumnDescriptor;
35  import org.apache.hadoop.hbase.HConstants;
36  import org.apache.hadoop.hbase.HRegionInfo;
37  import org.apache.hadoop.hbase.HTableDescriptor;
38  import org.apache.hadoop.hbase.MetaTableAccessor;
39  import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
40  import org.apache.hadoop.hbase.coprocessor.ObserverContext;
41  import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
42  import org.apache.hadoop.hbase.master.RegionState;
43  import org.apache.hadoop.hbase.regionserver.InternalScanner;
44  import org.apache.hadoop.hbase.regionserver.ScanType;
45  import org.apache.hadoop.hbase.regionserver.Store;
46  import org.apache.hadoop.hbase.testclassification.LargeTests;
47  import org.apache.hadoop.hbase.TableName;
48  import org.apache.hadoop.hbase.Waiter;
49  import org.apache.hadoop.hbase.master.AssignmentManager;
50  import org.apache.hadoop.hbase.master.MasterFileSystem;
51  import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
52  import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
53  import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException;
54  import org.apache.hadoop.hbase.snapshot.SnapshotDoesNotExistException;
55  import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
56  import org.apache.hadoop.hbase.util.Bytes;
57  import org.apache.hadoop.hbase.util.FSUtils;
58  import org.junit.After;
59  import org.junit.AfterClass;
60  import org.junit.Before;
61  import org.junit.BeforeClass;
62  import org.junit.Rule;
63  import org.junit.Test;
64  import org.junit.experimental.categories.Category;
65  import org.junit.rules.TestRule;
66  
67  /**
68   * Test restore snapshots from the client
69   */
70  @Category(LargeTests.class)
71  public class TestRestoreSnapshotFromClient {
72    @Rule public final TestRule timeout = CategoryBasedTimeout.builder()
73        .withTimeout(this.getClass())
74        .withLookingForStuckThread(true)
75        .build();
76  
77    protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
78  
79    protected final byte[] FAMILY = Bytes.toBytes("cf");
80  
81    protected byte[] emptySnapshot;
82    protected byte[] snapshotName0;
83    protected byte[] snapshotName1;
84    protected byte[] snapshotName2;
85    protected int snapshot0Rows;
86    protected int snapshot1Rows;
87    protected TableName tableName;
88    protected Admin admin;
89  
90    @BeforeClass
91    public static void setUpBeforeClass() throws Exception {
92      TEST_UTIL.getConfiguration().setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
93      TEST_UTIL.getConfiguration().setBoolean("hbase.online.schema.update.enable", true);
94      TEST_UTIL.getConfiguration().setInt("hbase.hstore.compactionThreshold", 10);
95      TEST_UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 100);
96      TEST_UTIL.getConfiguration().setInt("hbase.client.pause", 250);
97      TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 6);
98      TEST_UTIL.getConfiguration().setBoolean("hbase.master.enabletable.roundrobin", true);
99      // Setting bigger value to avoid catalog janitor execution (parent region cleanup)
100     TEST_UTIL.getConfiguration().setLong("hbase.catalogjanitor.interval", 1800000);
101     TEST_UTIL.startMiniCluster(3);
102   }
103 
104   @AfterClass
105   public static void tearDownAfterClass() throws Exception {
106     TEST_UTIL.shutdownMiniCluster();
107   }
108 
109   /**
110    * Initialize the tests with a table filled with some data
111    * and two snapshots (snapshotName0, snapshotName1) of different states.
112    * The tableName, snapshotNames and the number of rows in the snapshot are initialized.
113    */
114   @Before
115   public void setup() throws Exception {
116     this.admin = TEST_UTIL.getHBaseAdmin();
117 
118     long tid = System.currentTimeMillis();
119     tableName =
120         TableName.valueOf("testtb-" + tid);
121     emptySnapshot = Bytes.toBytes("emptySnaptb-" + tid);
122     snapshotName0 = Bytes.toBytes("snaptb0-" + tid);
123     snapshotName1 = Bytes.toBytes("snaptb1-" + tid);
124     snapshotName2 = Bytes.toBytes("snaptb2-" + tid);
125 
126     // create Table and disable it
127     SnapshotTestingUtils.createTable(TEST_UTIL, tableName, getNumReplicas(), FAMILY);
128     admin.disableTable(tableName);
129 
130     // take an empty snapshot
131     admin.snapshot(emptySnapshot, tableName);
132 
133     // enable table and insert data
134     admin.enableTable(tableName);
135     SnapshotTestingUtils.loadData(TEST_UTIL, tableName, 500, FAMILY);
136     try (Table table = TEST_UTIL.getConnection().getTable(tableName)) {
137       snapshot0Rows = TEST_UTIL.countRows(table);
138     }
139     admin.disableTable(tableName);
140 
141     // take a snapshot
142     admin.snapshot(snapshotName0, tableName);
143 
144     // enable table and insert more data
145     admin.enableTable(tableName);
146     SnapshotTestingUtils.loadData(TEST_UTIL, tableName, 500, FAMILY);
147     try (Table table = TEST_UTIL.getConnection().getTable(tableName)) {
148       snapshot1Rows = TEST_UTIL.countRows(table);
149     }
150   }
151 
152   @After
153   public void tearDown() throws Exception {
154     TEST_UTIL.deleteTable(tableName);
155     SnapshotTestingUtils.deleteAllSnapshots(TEST_UTIL.getHBaseAdmin());
156     SnapshotTestingUtils.deleteArchiveDirectory(TEST_UTIL);
157   }
158 
159   @Test
160   public void testRestoreSnapshot() throws IOException {
161     SnapshotTestingUtils.verifyRowCount(TEST_UTIL, tableName, snapshot1Rows);
162     admin.disableTable(tableName);
163     admin.snapshot(snapshotName1, tableName);
164     // Restore from snapshot-0
165     admin.restoreSnapshot(snapshotName0);
166     admin.enableTable(tableName);
167     SnapshotTestingUtils.verifyRowCount(TEST_UTIL, tableName, snapshot0Rows);
168     SnapshotTestingUtils.verifyReplicasCameOnline(tableName, admin, getNumReplicas());
169 
170     // Restore from emptySnapshot
171     admin.disableTable(tableName);
172     admin.restoreSnapshot(emptySnapshot);
173     admin.enableTable(tableName);
174     SnapshotTestingUtils.verifyRowCount(TEST_UTIL, tableName, 0);
175     SnapshotTestingUtils.verifyReplicasCameOnline(tableName, admin, getNumReplicas());
176 
177     // Restore from snapshot-1
178     admin.disableTable(tableName);
179     admin.restoreSnapshot(snapshotName1);
180     admin.enableTable(tableName);
181     SnapshotTestingUtils.verifyRowCount(TEST_UTIL, tableName, snapshot1Rows);
182     SnapshotTestingUtils.verifyReplicasCameOnline(tableName, admin, getNumReplicas());
183 
184     // Restore from snapshot-1
185     TEST_UTIL.deleteTable(tableName);
186     admin.restoreSnapshot(snapshotName1);
187     SnapshotTestingUtils.verifyRowCount(TEST_UTIL, tableName, snapshot1Rows);
188     SnapshotTestingUtils.verifyReplicasCameOnline(tableName, admin, getNumReplicas());
189   }
190 
191   @Test(timeout = 300000)
192   public void testRestoreSnapshotAfterSplit() throws Exception {
193     Admin admin = null;
194     try {
195       admin = TEST_UTIL.getHBaseAdmin();
196       final int regionReplication = admin.getTableDescriptor(tableName).getRegionReplication();
197       // Region count before split
198       final int primaryRegionCountBeforeSplit = MetaTableAccessor
199           .getTableRegions(TEST_UTIL.getZooKeeperWatcher(), TEST_UTIL.getConnection(), tableName)
200           .size() / regionReplication;
201 
202       admin.split(tableName, "m".getBytes());
203       final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
204       // Wait for replica region to become online
205       TEST_UTIL.waitFor(60000, 500, new Waiter.Predicate<IOException>() {
206         @Override
207         public boolean evaluate() throws IOException {
208           return am.getRegionStates().getRegionByStateOfTable(tableName).get(RegionState.State.OPEN)
209               .size() == ((primaryRegionCountBeforeSplit + 1) * regionReplication);
210         }
211       });
212 
213       int regionCountAfterSplit = MetaTableAccessor
214           .getTableRegions(TEST_UTIL.getZooKeeperWatcher(), TEST_UTIL.getConnection(), tableName)
215           .size() / regionReplication;
216       // regionCountAfterSplit will contain parent region, so primaryregionCountBeforeSplit + 2
217       assertEquals(primaryRegionCountBeforeSplit + 2, regionCountAfterSplit);
218 
219       String snapshotName = "testRestoreSnapshotAfterSplit-snap";
220       // Create snapshot after table split
221       admin.snapshot(snapshotName, tableName);
222       assertEquals(1,
223         admin.listTableSnapshots("testtb-.*", "testRestoreSnapshotAfterSplit-snap*").size());
224       // Restore snapshot
225       admin.disableTable(tableName);
226       admin.restoreSnapshot(snapshotName);
227 
228       int regionCountAfterRestoreSnapshot = MetaTableAccessor
229           .getTableRegions(TEST_UTIL.getZooKeeperWatcher(), TEST_UTIL.getConnection(), tableName)
230           .size();
231       assertEquals(primaryRegionCountBeforeSplit + 2, regionCountAfterRestoreSnapshot);
232 
233       // Enable the table
234       admin.enableTable(tableName);
235       assertEquals((primaryRegionCountBeforeSplit + 1) * regionReplication,
236         am.getRegionStates().getRegionByStateOfTable(tableName).get(RegionState.State.OPEN).size());
237 
238     } finally {
239       if (admin != null) {
240         try {
241           admin.deleteTableSnapshots("testtb-.*", "testRestoreSnapshotAfterSplit-snap*");
242         } catch (SnapshotDoesNotExistException ignore) {
243         }
244         admin.close();
245       }
246     }
247   }
248 
249   protected int getNumReplicas() {
250     return 1;
251   }
252 
253   @Test
254   public void testRestoreSchemaChange() throws Exception {
255     byte[] TEST_FAMILY2 = Bytes.toBytes("cf2");
256 
257     HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
258 
259     // Add one column family and put some data in it
260     admin.disableTable(tableName);
261     admin.addColumn(tableName, new HColumnDescriptor(TEST_FAMILY2));
262     admin.enableTable(tableName);
263     assertEquals(2, table.getTableDescriptor().getFamilies().size());
264     HTableDescriptor htd = admin.getTableDescriptor(tableName);
265     assertEquals(2, htd.getFamilies().size());
266     SnapshotTestingUtils.loadData(TEST_UTIL, tableName, 500, TEST_FAMILY2);
267     long snapshot2Rows = snapshot1Rows + 500;
268     assertEquals(snapshot2Rows, TEST_UTIL.countRows(table));
269     assertEquals(500, TEST_UTIL.countRows(table, TEST_FAMILY2));
270     Set<String> fsFamilies = getFamiliesFromFS(tableName);
271     assertEquals(2, fsFamilies.size());
272 
273     // Take a snapshot
274     admin.disableTable(tableName);
275     admin.snapshot(snapshotName2, tableName);
276 
277     // Restore the snapshot (without the cf)
278     admin.restoreSnapshot(snapshotName0);
279     admin.enableTable(tableName);
280     assertEquals(1, table.getTableDescriptor().getFamilies().size());
281     try {
282       TEST_UTIL.countRows(table, TEST_FAMILY2);
283       fail("family '" + Bytes.toString(TEST_FAMILY2) + "' should not exists");
284     } catch (NoSuchColumnFamilyException e) {
285       // expected
286     }
287     assertEquals(snapshot0Rows, TEST_UTIL.countRows(table));
288     htd = admin.getTableDescriptor(tableName);
289     assertEquals(1, htd.getFamilies().size());
290     fsFamilies = getFamiliesFromFS(tableName);
291     assertEquals(1, fsFamilies.size());
292 
293     // Restore back the snapshot (with the cf)
294     admin.disableTable(tableName);
295     admin.restoreSnapshot(snapshotName2);
296     admin.enableTable(tableName);
297     htd = admin.getTableDescriptor(tableName);
298     assertEquals(2, htd.getFamilies().size());
299     assertEquals(2, table.getTableDescriptor().getFamilies().size());
300     assertEquals(500, TEST_UTIL.countRows(table, TEST_FAMILY2));
301     assertEquals(snapshot2Rows, TEST_UTIL.countRows(table));
302     fsFamilies = getFamiliesFromFS(tableName);
303     assertEquals(2, fsFamilies.size());
304     table.close();
305   }
306 
307   @Test
308   public void testCloneSnapshotOfCloned() throws IOException, InterruptedException {
309     TableName clonedTableName =
310         TableName.valueOf("clonedtb-" + System.currentTimeMillis());
311     admin.cloneSnapshot(snapshotName0, clonedTableName);
312     SnapshotTestingUtils.verifyRowCount(TEST_UTIL, clonedTableName, snapshot0Rows);
313     SnapshotTestingUtils.verifyReplicasCameOnline(clonedTableName, admin, getNumReplicas());
314     admin.disableTable(clonedTableName);
315     admin.snapshot(snapshotName2, clonedTableName);
316     TEST_UTIL.deleteTable(clonedTableName);
317     waitCleanerRun();
318 
319     admin.cloneSnapshot(snapshotName2, clonedTableName);
320     SnapshotTestingUtils.verifyRowCount(TEST_UTIL, clonedTableName, snapshot0Rows);
321     SnapshotTestingUtils.verifyReplicasCameOnline(clonedTableName, admin, getNumReplicas());
322     TEST_UTIL.deleteTable(clonedTableName);
323   }
324 
325   @Test
326   public void testCloneAndRestoreSnapshot() throws IOException, InterruptedException {
327     TEST_UTIL.deleteTable(tableName);
328     waitCleanerRun();
329 
330     admin.cloneSnapshot(snapshotName0, tableName);
331     SnapshotTestingUtils.verifyRowCount(TEST_UTIL, tableName, snapshot0Rows);
332     SnapshotTestingUtils.verifyReplicasCameOnline(tableName, admin, getNumReplicas());
333     waitCleanerRun();
334 
335     admin.disableTable(tableName);
336     admin.restoreSnapshot(snapshotName0);
337     admin.enableTable(tableName);
338     SnapshotTestingUtils.verifyRowCount(TEST_UTIL, tableName, snapshot0Rows);
339     SnapshotTestingUtils.verifyReplicasCameOnline(tableName, admin, getNumReplicas());
340   }
341 
342   @Test
343   public void testCorruptedSnapshot() throws IOException, InterruptedException {
344     SnapshotTestingUtils.corruptSnapshot(TEST_UTIL, Bytes.toString(snapshotName0));
345     TableName cloneName = TableName.valueOf("corruptedClone-" + System.currentTimeMillis());
346     try {
347       admin.cloneSnapshot(snapshotName0, cloneName);
348       fail("Expected CorruptedSnapshotException, got succeeded cloneSnapshot()");
349     } catch (CorruptedSnapshotException e) {
350       // Got the expected corruption exception.
351       // check for no references of the cloned table.
352       assertFalse(admin.tableExists(cloneName));
353     } catch (Exception e) {
354       fail("Expected CorruptedSnapshotException got: " + e);
355     }
356   }
357 
358   @Test
359   public void testRestoreSnapshotAfterSplittingRegions() throws IOException, InterruptedException {
360     // HBASE-20008: Add a coprocessor to delay compactions of the daughter regions. To reproduce
361     // the NullPointerException, we need to delay compactions of the daughter regions after
362     // splitting region.
363     HTableDescriptor tableDescriptor = admin.getTableDescriptor(tableName);
364     tableDescriptor.addCoprocessor(DelayCompactionObserver.class.getName());
365     admin.disableTable(tableName);
366     admin.modifyTable(tableName, tableDescriptor);
367     admin.enableTable(tableName);
368 
369     List<HRegionInfo> regionInfos = admin.getTableRegions(tableName);
370     RegionReplicaUtil.removeNonDefaultRegions(regionInfos);
371 
372     // Split the first region
373     splitRegion(regionInfos.get(0));
374 
375     // Take a snapshot
376     admin.snapshot(snapshotName1, tableName);
377 
378     // Restore the snapshot
379     admin.disableTable(tableName);
380     admin.restoreSnapshot(snapshotName1);
381     admin.enableTable(tableName);
382 
383     SnapshotTestingUtils.verifyRowCount(TEST_UTIL, tableName, snapshot1Rows);
384   }
385 
386   public static class DelayCompactionObserver extends BaseRegionObserver {
387     @Override public InternalScanner preCompact(ObserverContext<RegionCoprocessorEnvironment> e,
388         final Store store, final InternalScanner scanner, final ScanType scanType)
389         throws IOException {
390 
391       try {
392         // Delay 5 seconds.
393         TimeUnit.SECONDS.sleep(5);
394       } catch (InterruptedException ex) {
395         throw new InterruptedIOException(ex.getMessage());
396       }
397 
398       return scanner;
399     }
400   }
401 
402   @Test
403   public void testOfflineRegionsShouldBeZeroAfterRestoreSnapshot() throws IOException,
404       InterruptedException {
405     // Load more data to split regions
406     SnapshotTestingUtils.loadData(TEST_UTIL, tableName, 2000, FAMILY);
407 
408     // Split regions
409     List<HRegionInfo> regions = admin.getTableRegions(tableName);
410     RegionReplicaUtil.removeNonDefaultRegions(regions);
411     splitRegion(regions.get(0));
412 
413     // Restore the snapshot
414     admin.disableTable(tableName);
415     admin.restoreSnapshot(snapshotName0);
416     admin.enableTable(tableName);
417 
418     int offlineRegions = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
419       .getRegionStates().getRegionByStateOfTable(tableName).get(RegionState.State.OFFLINE).size();
420 
421     assertEquals(0, offlineRegions);
422   }
423 
424   // ==========================================================================
425   //  Helpers
426   // ==========================================================================
427   private void waitCleanerRun() throws InterruptedException {
428     TEST_UTIL.getMiniHBaseCluster().getMaster().getHFileCleaner().choreForTesting();
429   }
430 
431   private Set<String> getFamiliesFromFS(final TableName tableName) throws IOException {
432     MasterFileSystem mfs = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterFileSystem();
433     Set<String> families = new HashSet<String>();
434     Path tableDir = FSUtils.getTableDir(mfs.getRootDir(), tableName);
435     for (Path regionDir: FSUtils.getRegionDirs(mfs.getFileSystem(), tableDir)) {
436       for (Path familyDir: FSUtils.getFamilyDirs(mfs.getFileSystem(), regionDir)) {
437         families.add(familyDir.getName());
438       }
439     }
440     return families;
441   }
442 
443   protected void splitRegion(final HRegionInfo regionInfo) throws IOException {
444     byte[][] splitPoints = Bytes.split(regionInfo.getStartKey(), regionInfo.getEndKey(), 1);
445     admin.split(regionInfo.getTable(), splitPoints[1]);
446   }
447 }