View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.io.IOException;
21  import java.util.List;
22  import java.util.concurrent.CountDownLatch;
23  
24  import org.apache.hadoop.conf.Configuration;
25  import org.apache.hadoop.fs.FileSystem;
26  import org.apache.hadoop.fs.Path;
27  import org.apache.hadoop.hbase.HBaseTestingUtility;
28  import org.apache.hadoop.hbase.HColumnDescriptor;
29  import org.apache.hadoop.hbase.HTableDescriptor;
30  import org.apache.hadoop.hbase.KeyValue;
31  import org.apache.hadoop.hbase.testclassification.MediumTests;
32  import org.apache.hadoop.hbase.TableName;
33  import org.apache.hadoop.hbase.TableNotFoundException;
34  import org.apache.hadoop.hbase.client.Admin;
35  import org.apache.hadoop.hbase.client.Connection;
36  import org.apache.hadoop.hbase.client.HBaseAdmin;
37  import org.apache.hadoop.hbase.client.HTable;
38  import org.apache.hadoop.hbase.client.Put;
39  import org.apache.hadoop.hbase.client.Result;
40  import org.apache.hadoop.hbase.client.ResultScanner;
41  import org.apache.hadoop.hbase.client.Scan;
42  import org.apache.hadoop.hbase.io.hfile.HFile;
43  import org.apache.hadoop.hbase.io.hfile.HFileContext;
44  import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
45  import org.apache.hadoop.hbase.util.Bytes;
46  import org.junit.AfterClass;
47  import org.junit.Assert;
48  import org.junit.BeforeClass;
49  import org.junit.Test;
50  import org.junit.experimental.categories.Category;
51  
52  @Category(MediumTests.class)
53  public class TestScannerWithBulkload {
54    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
55  
56    @BeforeClass
57    public static void setUpBeforeClass() throws Exception {
58      TEST_UTIL.startMiniCluster(1);
59    }
60  
61    private static void createTable(Admin admin, TableName tableName) throws IOException {
62      HTableDescriptor desc = new HTableDescriptor(tableName);
63      HColumnDescriptor hcd = new HColumnDescriptor("col");
64      hcd.setMaxVersions(3);
65      desc.addFamily(hcd);
66      admin.createTable(desc);
67    }
68  
69    @Test
70    public void testBulkLoad() throws Exception {
71      TableName tableName = TableName.valueOf("testBulkLoad");
72      long l = System.currentTimeMillis();
73      HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
74      createTable(admin, tableName);
75      Scan scan = createScan();
76      final HTable table = init(admin, l, scan, tableName);
77      // use bulkload
78      final Path hfilePath = writeToHFile(l, "/temp/testBulkLoad/", "/temp/testBulkLoad/col/file",
79        false);
80      Configuration conf = TEST_UTIL.getConfiguration();
81      conf.setBoolean("hbase.mapreduce.bulkload.assign.sequenceNumbers", true);
82      final LoadIncrementalHFiles bulkload = new LoadIncrementalHFiles(conf);
83      bulkload.doBulkLoad(hfilePath, table);
84      ResultScanner scanner = table.getScanner(scan);
85      Result result = scanner.next();
86      result = scanAfterBulkLoad(scanner, result, "version2");
87      Put put0 = new Put(Bytes.toBytes("row1"));
88      put0.add(new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes
89          .toBytes("version3")));
90      table.put(put0);
91      admin.flush(tableName);
92      scanner = table.getScanner(scan);
93      result = scanner.next();
94      while (result != null) {
95        List<KeyValue> kvs = result.getColumn(Bytes.toBytes("col"), Bytes.toBytes("q"));
96        for (KeyValue _kv : kvs) {
97          if (Bytes.toString(_kv.getRow()).equals("row1")) {
98            System.out.println(Bytes.toString(_kv.getRow()));
99            System.out.println(Bytes.toString(_kv.getQualifier()));
100           System.out.println(Bytes.toString(_kv.getValue()));
101           Assert.assertEquals("version3", Bytes.toString(_kv.getValue()));
102         }
103       }
104       result = scanner.next();
105     }
106     scanner.close();
107     table.close();
108   }
109 
110   private Result scanAfterBulkLoad(ResultScanner scanner, Result result, String expctedVal)
111       throws IOException {
112     while (result != null) {
113       List<KeyValue> kvs = result.getColumn(Bytes.toBytes("col"), Bytes.toBytes("q"));
114       for (KeyValue _kv : kvs) {
115         if (Bytes.toString(_kv.getRow()).equals("row1")) {
116           System.out.println(Bytes.toString(_kv.getRow()));
117           System.out.println(Bytes.toString(_kv.getQualifier()));
118           System.out.println(Bytes.toString(_kv.getValue()));
119           Assert.assertEquals(expctedVal, Bytes.toString(_kv.getValue()));
120         }
121       }
122       result = scanner.next();
123     }
124     return result;
125   }
126 
127   // If nativeHFile is true, we will set cell seq id and MAX_SEQ_ID_KEY in the file.
128   // Else, we will set BULKLOAD_TIME_KEY.
129   private Path writeToHFile(long l, String hFilePath, String pathStr, boolean nativeHFile)
130       throws IOException {
131     FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration());
132     final Path hfilePath = new Path(hFilePath);
133     fs.mkdirs(hfilePath);
134     Path path = new Path(pathStr);
135     HFile.WriterFactory wf = HFile.getWriterFactoryNoCache(TEST_UTIL.getConfiguration());
136     Assert.assertNotNull(wf);
137     HFileContext context = new HFileContext();
138     HFile.Writer writer = wf.withPath(fs, path).withFileContext(context).create();
139     KeyValue kv = new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l,
140         Bytes.toBytes("version2"));
141 
142     // Set cell seq id to test bulk load native hfiles.
143     if (nativeHFile) {
144       // Set a big seq id. Scan should not look at this seq id in a bulk loaded file.
145       // Scan should only look at the seq id appended at the bulk load time, and not skip
146       // this kv.
147       kv.setSequenceId(9999999);
148     }
149 
150     writer.append(kv);
151 
152     if (nativeHFile) {
153       // Set a big MAX_SEQ_ID_KEY. Scan should not look at this seq id in a bulk loaded file.
154       // Scan should only look at the seq id appended at the bulk load time, and not skip its
155       // kv.
156       writer.appendFileInfo(StoreFile.MAX_SEQ_ID_KEY, Bytes.toBytes(new Long(9999999)));
157     }
158     else {
159     writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
160     }
161     writer.close();
162     return hfilePath;
163   }
164 
165   private HTable init(HBaseAdmin admin, long l, Scan scan, TableName tableName) throws Exception {
166     Connection connection = TEST_UTIL.getConnection();
167     HTable table = (HTable) connection.getTable(tableName);
168     Put put0 = new Put(Bytes.toBytes("row1"));
169     put0.add(new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes
170         .toBytes("version0")));
171     table.put(put0);
172     admin.flush(tableName);
173     Put put1 = new Put(Bytes.toBytes("row2"));
174     put1.add(new KeyValue(Bytes.toBytes("row2"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes
175         .toBytes("version0")));
176     table.put(put1);
177     admin.flush(tableName);
178     put0 = new Put(Bytes.toBytes("row1"));
179     put0.add(new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes
180         .toBytes("version1")));
181     table.put(put0);
182     admin.flush(tableName);
183     admin.compact(tableName);
184 
185     ResultScanner scanner = table.getScanner(scan);
186     Result result = scanner.next();
187     List<KeyValue> kvs = result.getColumn(Bytes.toBytes("col"), Bytes.toBytes("q"));
188     Assert.assertEquals(1, kvs.size());
189     Assert.assertEquals("version1", Bytes.toString(kvs.get(0).getValue()));
190     scanner.close();
191     return table;
192   }
193 
194   @Test
195   public void testBulkLoadWithParallelScan() throws Exception {
196     TableName tableName = TableName.valueOf("testBulkLoadWithParallelScan");
197       final long l = System.currentTimeMillis();
198     HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
199     createTable(admin, tableName);
200     Scan scan = createScan();
201     scan.setCaching(1);
202     final HTable table = init(admin, l, scan, tableName);
203     // use bulkload
204     final Path hfilePath = writeToHFile(l, "/temp/testBulkLoadWithParallelScan/",
205         "/temp/testBulkLoadWithParallelScan/col/file", false);
206     Configuration conf = TEST_UTIL.getConfiguration();
207     conf.setBoolean("hbase.mapreduce.bulkload.assign.sequenceNumbers", true);
208     final LoadIncrementalHFiles bulkload = new LoadIncrementalHFiles(conf);
209     ResultScanner scanner = table.getScanner(scan);
210     Result result = scanner.next();
211     // Create a scanner and then do bulk load
212     final CountDownLatch latch = new CountDownLatch(1);
213     new Thread() {
214       public void run() {
215         try {
216           Put put1 = new Put(Bytes.toBytes("row5"));
217           put1.add(new KeyValue(Bytes.toBytes("row5"), Bytes.toBytes("col"), Bytes.toBytes("q"), l,
218               Bytes.toBytes("version0")));
219           table.put(put1);
220           bulkload.doBulkLoad(hfilePath, (HTable) table);
221           latch.countDown();
222         } catch (TableNotFoundException e) {
223         } catch (IOException e) {
224         }
225       }
226     }.start();
227     latch.await();
228     // By the time we do next() the bulk loaded files are also added to the kv
229     // scanner
230     scanAfterBulkLoad(scanner, result, "version1");
231     scanner.close();
232     table.close();
233 
234   }
235 
236   @Test
237   public void testBulkLoadNativeHFile() throws Exception {
238     TableName tableName = TableName.valueOf("testBulkLoadNativeHFile");
239     long l = System.currentTimeMillis();
240     HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
241     createTable(admin, tableName);
242     Scan scan = createScan();
243     final HTable table = init(admin, l, scan, tableName);
244     // use bulkload
245     final Path hfilePath = writeToHFile(l, "/temp/testBulkLoadNativeHFile/",
246       "/temp/testBulkLoadNativeHFile/col/file", true);
247     Configuration conf = TEST_UTIL.getConfiguration();
248     conf.setBoolean("hbase.mapreduce.bulkload.assign.sequenceNumbers", true);
249     final LoadIncrementalHFiles bulkload = new LoadIncrementalHFiles(conf);
250     bulkload.doBulkLoad(hfilePath, table);
251     ResultScanner scanner = table.getScanner(scan);
252     Result result = scanner.next();
253     // We had 'version0', 'version1' for 'row1,col:q' in the table.
254     // Bulk load added 'version2'  scanner should be able to see 'version2'
255     result = scanAfterBulkLoad(scanner, result, "version2");
256     Put put0 = new Put(Bytes.toBytes("row1"));
257     put0.add(new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes
258         .toBytes("version3")));
259     table.put(put0);
260     admin.flush(tableName);
261     scanner = table.getScanner(scan);
262     result = scanner.next();
263     while (result != null) {
264       List<KeyValue> kvs = result.getColumn(Bytes.toBytes("col"), Bytes.toBytes("q"));
265       for (KeyValue _kv : kvs) {
266         if (Bytes.toString(_kv.getRow()).equals("row1")) {
267           System.out.println(Bytes.toString(_kv.getRow()));
268           System.out.println(Bytes.toString(_kv.getQualifier()));
269           System.out.println(Bytes.toString(_kv.getValue()));
270           Assert.assertEquals("version3", Bytes.toString(_kv.getValue()));
271         }
272       }
273       result = scanner.next();
274     }
275     scanner.close();
276     table.close();
277   }
278 
279   private Scan createScan() {
280     Scan scan = new Scan();
281     scan.setMaxVersions(3);
282     return scan;
283   }
284 
285   @AfterClass
286   public static void tearDownAfterClass() throws Exception {
287     TEST_UTIL.shutdownMiniCluster();
288   }
289 }