View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import static org.junit.Assert.assertEquals;
22  import static org.junit.Assert.assertFalse;
23  import static org.junit.Assert.assertTrue;
24  
25  import java.io.IOException;
26  import java.util.Arrays;
27  import java.util.HashMap;
28  import java.util.HashSet;
29  import java.util.Iterator;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.Set;
33  import java.util.UUID;
34  
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.conf.Configurable;
38  import org.apache.hadoop.conf.Configuration;
39  import org.apache.hadoop.fs.FSDataOutputStream;
40  import org.apache.hadoop.fs.FileStatus;
41  import org.apache.hadoop.fs.FileSystem;
42  import org.apache.hadoop.fs.Path;
43  import org.apache.hadoop.hbase.Cell;
44  import org.apache.hadoop.hbase.CellUtil;
45  import org.apache.hadoop.hbase.HBaseConfiguration;
46  import org.apache.hadoop.hbase.HBaseTestingUtility;
47  import org.apache.hadoop.hbase.HConstants;
48  import org.apache.hadoop.hbase.testclassification.LargeTests;
49  import org.apache.hadoop.hbase.TableName;
50  import org.apache.hadoop.hbase.TableNotFoundException;
51  import org.apache.hadoop.hbase.client.HTable;
52  import org.apache.hadoop.hbase.client.Result;
53  import org.apache.hadoop.hbase.client.ResultScanner;
54  import org.apache.hadoop.hbase.client.Scan;
55  import org.apache.hadoop.hbase.client.Table;
56  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
57  import org.apache.hadoop.hbase.io.hfile.HFile;
58  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
59  import org.apache.hadoop.hbase.util.Bytes;
60  import org.apache.hadoop.io.Text;
61  import org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter;
62  import org.apache.hadoop.mapreduce.Job;
63  import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
64  import org.apache.hadoop.util.GenericOptionsParser;
65  import org.apache.hadoop.util.Tool;
66  import org.apache.hadoop.util.ToolRunner;
67  import org.junit.AfterClass;
68  import org.junit.Before;
69  import org.junit.BeforeClass;
70  import org.junit.Rule;
71  import org.junit.Test;
72  import org.junit.experimental.categories.Category;
73  import org.junit.rules.ExpectedException;
74  
75  @Category(LargeTests.class)
76  public class TestImportTsv implements Configurable {
77  
78    private static final Log LOG = LogFactory.getLog(TestImportTsv.class);
79    protected static final String NAME = TestImportTsv.class.getSimpleName();
80    protected static HBaseTestingUtility util = new HBaseTestingUtility();
81  
82    // Delete the tmp directory after running doMROnTableTest. Boolean. Default is true.
83    protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
84  
85    /**
86     * Force use of combiner in doMROnTableTest. Boolean. Default is true.
87     */
88    protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
89  
90    private final String FAMILY = "FAM";
91    private String table;
92    private Map<String, String> args;
93  
94    @Rule
95    public ExpectedException exception = ExpectedException.none();
96  
97    public Configuration getConf() {
98      return util.getConfiguration();
99    }
100 
101   public void setConf(Configuration conf) {
102     throw new IllegalArgumentException("setConf not supported");
103   }
104 
105   @BeforeClass
106   public static void provisionCluster() throws Exception {
107     util.setJobWithoutMRCluster();
108     util.startMiniCluster();
109   }
110 
111   @AfterClass
112   public static void releaseCluster() throws Exception {
113     util.shutdownMiniCluster();
114   }
115 
116   @Before
117   public void setup() throws Exception {
118     table = "test-" + UUID.randomUUID();
119     args = new HashMap<String, String>();
120     // Prepare the arguments required for the test.
121     args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,FAM:A,FAM:B");
122     args.put(ImportTsv.SEPARATOR_CONF_KEY, "\u001b");
123   }
124 
125   @Test
126   public void testMROnTable() throws Exception {
127     util.createTable(TableName.valueOf(table), FAMILY);
128     doMROnTableTest(null, 1);
129     util.deleteTable(table);
130   }
131   
132   @Test
133   public void testMROnTableWithTimestamp() throws Exception {
134     util.createTable(TableName.valueOf(table), FAMILY);
135     args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");
136     args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");
137     String data = "KEY,1234,VALUE1,VALUE2\n";
138 
139     doMROnTableTest(data, 1);
140     util.deleteTable(table);
141   }
142 
143   @Test
144   public void testMROnTableWithCustomMapper()
145   throws Exception {
146     util.createTable(TableName.valueOf(table), FAMILY);
147     args.put(ImportTsv.MAPPER_CONF_KEY,
148         "org.apache.hadoop.hbase.mapreduce.TsvImporterCustomTestMapper");
149 
150     doMROnTableTest(null, 3);
151     util.deleteTable(table);
152   }
153   
154   @Test
155   public void testBulkOutputWithoutAnExistingTable() throws Exception {
156     // Prepare the arguments required for the test.
157     Path hfiles = new Path(util.getDataTestDirOnTestFS(table), "hfiles");
158     args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
159 
160     doMROnTableTest(null, 3);
161     util.deleteTable(table);
162   }
163 
164   @Test
165   public void testBulkOutputWithAnExistingTable() throws Exception {
166     util.createTable(TableName.valueOf(table), FAMILY);
167 
168     // Prepare the arguments required for the test.
169     Path hfiles = new Path(util.getDataTestDirOnTestFS(table), "hfiles");
170     args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
171 
172     doMROnTableTest(null, 3);
173     util.deleteTable(table);
174   }
175   
176   @Test
177   public void testBulkOutputWithAnExistingTableNoStrictTrue() throws Exception {
178     util.createTable(TableName.valueOf(table), FAMILY);
179 
180     // Prepare the arguments required for the test.
181     Path hfiles = new Path(util.getDataTestDirOnTestFS(table), "hfiles");
182     args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
183     args.put(ImportTsv.NO_STRICT_COL_FAMILY, "true");
184     doMROnTableTest(null, 3);
185     util.deleteTable(table);
186   }
187 
188   @Test
189   public void testJobConfigurationsWithTsvImporterTextMapper() throws Exception {
190     Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(table),"hfiles");
191     String INPUT_FILE = "InputFile1.csv";
192     // Prepare the arguments required for the test.
193     String[] args =
194         new String[] {
195             "-D" + ImportTsv.MAPPER_CONF_KEY
196                 + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper",
197             "-D" + ImportTsv.COLUMNS_CONF_KEY
198                 + "=HBASE_ROW_KEY,FAM:A,FAM:B",
199             "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=,",
200             "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(), table,
201             INPUT_FILE
202             };
203     Configuration conf = new Configuration(util.getConfiguration());
204     GenericOptionsParser opts = new GenericOptionsParser(conf, args);
205     args = opts.getRemainingArgs();
206     assertEquals("running test job configuration failed.", 0,
207         ToolRunner.run(conf, new ImportTsv() {
208           @Override
209           public int run(String[] args) throws Exception {
210             Job job = createSubmittableJob(getConf(), args);
211             assertTrue(job.getMapperClass().equals(TsvImporterTextMapper.class));
212             assertTrue(job.getReducerClass().equals(TextSortReducer.class));
213             assertTrue(job.getMapOutputValueClass().equals(Text.class));
214             return 0;
215           }
216         }, args));
217     // Delete table created by createSubmittableJob.
218     util.deleteTable(table);
219   }
220 
221   @Test
222   public void testBulkOutputWithTsvImporterTextMapper() throws Exception {
223     Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(table),"hfiles");
224     args.put(ImportTsv.MAPPER_CONF_KEY, "org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper");
225     args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());
226     String data = "KEY\u001bVALUE4\u001bVALUE8\n";
227     doMROnTableTest(data, 4);
228     util.deleteTable(table);
229   }
230 
231   @Test
232   public void testWithoutAnExistingTableAndCreateTableSetToNo() throws Exception {
233     String[] args = new String[] { table, "/inputFile" };
234 
235     Configuration conf = new Configuration(util.getConfiguration());
236     conf.set(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,FAM:A");
237     conf.set(ImportTsv.BULK_OUTPUT_CONF_KEY, "/output");
238     conf.set(ImportTsv.CREATE_TABLE_CONF_KEY, "no");
239     exception.expect(TableNotFoundException.class);
240     assertEquals("running test job configuration failed.", 0,
241         ToolRunner.run(conf, new ImportTsv() {
242           @Override public int run(String[] args) throws Exception {
243             createSubmittableJob(getConf(), args);
244             return 0;
245           }
246         }, args));
247   }
248 
249   @Test
250   public void testMRWithoutAnExistingTable() throws Exception {
251     String[] args =
252         new String[] { table, "/inputFile" };
253 
254     exception.expect(TableNotFoundException.class);
255     assertEquals("running test job configuration failed.", 0, ToolRunner.run(
256         new Configuration(util.getConfiguration()),
257         new ImportTsv() {
258           @Override
259           public int run(String[] args) throws Exception {
260             createSubmittableJob(getConf(), args);
261             return 0;
262           }
263         }, args));
264   }
265 
266   @Test
267   public void testJobConfigurationsWithDryMode() throws Exception {
268     Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(table),"hfiles");
269     String INPUT_FILE = "InputFile1.csv";
270     // Prepare the arguments required for the test.
271     String[] argsArray = new String[] {
272         "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B",
273         "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=,",
274         "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(),
275         "-D" + ImportTsv.DRY_RUN_CONF_KEY + "=true",
276         table,
277         INPUT_FILE };
278     assertEquals("running test job configuration failed.", 0, ToolRunner.run(
279         new Configuration(util.getConfiguration()),
280         new ImportTsv() {
281           @Override
282           public int run(String[] args) throws Exception {
283             Job job = createSubmittableJob(getConf(), args);
284             assertTrue(job.getOutputFormatClass().equals(NullOutputFormat.class));
285             return 0;
286           }
287         }, argsArray));
288     // Delete table created by createSubmittableJob.
289     util.deleteTable(table);
290   }
291 
292   @Test
293   public void testDryModeWithoutBulkOutputAndTableExists() throws Exception {
294     util.createTable(TableName.valueOf(table), FAMILY);
295     args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
296     doMROnTableTest(null, 1);
297     // Dry mode should not delete an existing table. If it's not present,
298     // this will throw TableNotFoundException.
299     util.deleteTable(table);
300   }
301 
302   /**
303    * If table is not present in non-bulk mode, dry run should fail just like
304    * normal mode.
305    */
306   @Test
307   public void testDryModeWithoutBulkOutputAndTableDoesNotExists() throws Exception {
308     args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
309     exception.expect(TableNotFoundException.class);
310     doMROnTableTest(null, 1);
311   }
312 
313   @Test public void testDryModeWithBulkOutputAndTableExists() throws Exception {
314     util.createTable(TableName.valueOf(table), FAMILY);
315     // Prepare the arguments required for the test.
316     Path hfiles = new Path(util.getDataTestDirOnTestFS(table), "hfiles");
317     args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
318     args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
319     doMROnTableTest(null, 1);
320     // Dry mode should not delete an existing table. If it's not present,
321     // this will throw TableNotFoundException.
322     util.deleteTable(table);
323   }
324 
325   /**
326    * If table is not present in bulk mode and create.table is not set to yes,
327    * import should fail with TableNotFoundException.
328    */
329   @Test
330   public void testDryModeWithBulkOutputAndTableDoesNotExistsCreateTableSetToNo() throws
331       Exception {
332     // Prepare the arguments required for the test.
333     Path hfiles = new Path(util.getDataTestDirOnTestFS(table), "hfiles");
334     args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
335     args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
336     args.put(ImportTsv.CREATE_TABLE_CONF_KEY, "no");
337     exception.expect(TableNotFoundException.class);
338     doMROnTableTest(null, 1);
339   }
340 
341   @Test
342   public void testDryModeWithBulkModeAndTableDoesNotExistsCreateTableSetToYes() throws Exception {
343     // Prepare the arguments required for the test.
344     Path hfiles = new Path(util.getDataTestDirOnTestFS(table), "hfiles");
345     args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, hfiles.toString());
346     args.put(ImportTsv.DRY_RUN_CONF_KEY, "true");
347     args.put(ImportTsv.CREATE_TABLE_CONF_KEY, "yes");
348     doMROnTableTest(null, 1);
349     // Verify temporary table was deleted.
350     exception.expect(TableNotFoundException.class);
351     util.deleteTable(table);
352   }
353 
354   /**
355    * If there are invalid data rows as inputs, then only those rows should be ignored.
356    */
357   @Test
358   public void testTsvImporterTextMapperWithInvalidData() throws Exception {
359     Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(table), "hfiles");
360     args.put(ImportTsv.MAPPER_CONF_KEY, "org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper");
361     args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());
362     args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");
363     args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");
364     // 3 Rows of data as input. 2 Rows are valid and 1 row is invalid as it doesn't have TS
365     String data = "KEY,1234,VALUE1,VALUE2\nKEY\nKEY,1235,VALUE1,VALUE2\n";
366     doMROnTableTest(data, 1, 4);
367     util.deleteTable(table);
368   }
369   
370   @Test
371   public void testSkipEmptyColumns() throws Exception {
372     Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(table), "hfiles");
373     args.put(ImportTsv.BULK_OUTPUT_CONF_KEY, bulkOutputPath.toString());
374     args.put(ImportTsv.COLUMNS_CONF_KEY, "HBASE_ROW_KEY,HBASE_TS_KEY,FAM:A,FAM:B");
375     args.put(ImportTsv.SEPARATOR_CONF_KEY, ",");
376     args.put(ImportTsv.SKIP_EMPTY_COLUMNS, "true");
377     // 2 Rows of data as input. Both rows are valid and only 3 columns are no-empty among 4
378     String data = "KEY,1234,VALUE1,VALUE2\nKEY,1235,,VALUE2\n";
379     doMROnTableTest(util, table, FAMILY, data, args, 1, 3);
380     util.deleteTable(table);
381   }
382 
383   private Tool doMROnTableTest(String data, int valueMultiplier,int expectedKVCount)
384       throws Exception {
385     return doMROnTableTest(util, table, FAMILY, data, args, valueMultiplier,expectedKVCount);
386   }
387 
388   private Tool doMROnTableTest(String data, int valueMultiplier) throws Exception {
389     return doMROnTableTest(util, table, FAMILY, data, args, valueMultiplier,-1);
390   }
391 
392   protected static Tool doMROnTableTest(HBaseTestingUtility util, String table,
393       String family, String data, Map<String, String> args) throws Exception {
394     return doMROnTableTest(util, table, family, data, args, 1,-1);
395   }
396 
397   /**
398    * Run an ImportTsv job and perform basic validation on the results.
399    * Returns the ImportTsv <code>Tool</code> instance so that other tests can
400    * inspect it for further validation as necessary. This method is static to
401    * insure non-reliance on instance's util/conf facilities.
402    * @param args Any arguments to pass BEFORE inputFile path is appended.
403    * @return The Tool instance used to run the test.
404    */
405   protected static Tool doMROnTableTest(HBaseTestingUtility util, String table,
406       String family, String data, Map<String, String> args, int valueMultiplier,int expectedKVCount)
407   throws Exception {
408     Configuration conf = new Configuration(util.getConfiguration());
409 
410     // populate input file
411     FileSystem fs = FileSystem.get(conf);
412     Path inputPath = fs.makeQualified(new Path(util.getDataTestDirOnTestFS(table), "input.dat"));
413     FSDataOutputStream op = fs.create(inputPath, true);
414     if (data == null) {
415       data = "KEY\u001bVALUE1\u001bVALUE2\n";
416     }
417     op.write(Bytes.toBytes(data));
418     op.close();
419     LOG.debug(String.format("Wrote test data to file: %s", inputPath));
420 
421     if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
422       LOG.debug("Forcing combiner.");
423       conf.setInt("mapreduce.map.combine.minspills", 1);
424     }
425 
426     // Build args array.
427     String[] argsArray = new String[args.size() + 2];
428     Iterator it = args.entrySet().iterator();
429     int i = 0;
430     while (it.hasNext()) {
431       Map.Entry pair = (Map.Entry) it.next();
432       argsArray[i] = "-D" + pair.getKey() + "=" + pair.getValue();
433       i++;
434     }
435     argsArray[i] = table;
436     argsArray[i + 1] = inputPath.toString();
437 
438     // run the import
439     Tool tool = new ImportTsv();
440     LOG.debug("Running ImportTsv with arguments: " + Arrays.toString(argsArray));
441     assertEquals(0, ToolRunner.run(conf, tool, argsArray));
442 
443     // Perform basic validation. If the input args did not include
444     // ImportTsv.BULK_OUTPUT_CONF_KEY then validate data in the table.
445     // Otherwise, validate presence of hfiles.
446     boolean isDryRun = args.containsKey(ImportTsv.DRY_RUN_CONF_KEY) &&
447         "true".equalsIgnoreCase(args.get(ImportTsv.DRY_RUN_CONF_KEY));
448     if (args.containsKey(ImportTsv.BULK_OUTPUT_CONF_KEY)) {
449       if (isDryRun) {
450         assertFalse(String.format("Dry run mode, %s should not have been created.",
451                  ImportTsv.BULK_OUTPUT_CONF_KEY),
452             fs.exists(new Path(ImportTsv.BULK_OUTPUT_CONF_KEY)));
453       } else {
454         validateHFiles(fs, args.get(ImportTsv.BULK_OUTPUT_CONF_KEY), family,expectedKVCount);
455       }
456     } else {
457       validateTable(conf, TableName.valueOf(table), family, valueMultiplier, isDryRun);
458     }
459 
460     if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
461       LOG.debug("Deleting test subdirectory");
462       util.cleanupDataTestDirOnTestFS(table);
463     }
464     return tool;
465   }
466 
467   /**
468    * Confirm ImportTsv via data in online table.
469    */
470   private static void validateTable(Configuration conf, TableName tableName,
471       String family, int valueMultiplier, boolean isDryRun) throws IOException {
472 
473     LOG.debug("Validating table.");
474     Table table = new HTable(conf, tableName);
475     boolean verified = false;
476     long pause = conf.getLong("hbase.client.pause", 5 * 1000);
477     int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
478     for (int i = 0; i < numRetries; i++) {
479       try {
480         Scan scan = new Scan();
481         // Scan entire family.
482         scan.addFamily(Bytes.toBytes(family));
483         ResultScanner resScanner = table.getScanner(scan);
484         int numRows = 0;
485         for (Result res : resScanner) {
486           numRows++;
487           assertEquals(2, res.size());
488           List<Cell> kvs = res.listCells();
489           assertTrue(CellUtil.matchingRow(kvs.get(0), Bytes.toBytes("KEY")));
490           assertTrue(CellUtil.matchingRow(kvs.get(1), Bytes.toBytes("KEY")));
491           assertTrue(CellUtil.matchingValue(kvs.get(0), Bytes.toBytes("VALUE" + valueMultiplier)));
492           assertTrue(CellUtil.matchingValue(kvs.get(1), Bytes.toBytes("VALUE" + 2 * valueMultiplier)));
493           // Only one result set is expected, so let it loop.
494         }
495         if (isDryRun) {
496           assertEquals(0, numRows);
497         } else {
498           assertEquals(1, numRows);
499         }
500         verified = true;
501         break;
502       } catch (NullPointerException e) {
503         // If here, a cell was empty. Presume its because updates came in
504         // after the scanner had been opened. Wait a while and retry.
505       }
506       try {
507         Thread.sleep(pause);
508       } catch (InterruptedException e) {
509         // continue
510       }
511     }
512     table.close();
513     assertTrue(verified);
514   }
515 
516   /**
517    * Confirm ImportTsv via HFiles on fs.
518    */
519   private static void validateHFiles(FileSystem fs, String outputPath, String family,
520       int expectedKVCount) throws IOException {
521     // validate number and content of output columns
522     LOG.debug("Validating HFiles.");
523     Set<String> configFamilies = new HashSet<String>();
524     configFamilies.add(family);
525     Set<String> foundFamilies = new HashSet<String>();
526     int actualKVCount = 0;
527     for (FileStatus cfStatus : fs.listStatus(new Path(outputPath), new OutputFilesFilter())) {
528       String[] elements = cfStatus.getPath().toString().split(Path.SEPARATOR);
529       String cf = elements[elements.length - 1];
530       foundFamilies.add(cf);
531       assertTrue(
532         String.format(
533           "HFile output contains a column family (%s) not present in input families (%s)",
534           cf, configFamilies),
535           configFamilies.contains(cf));
536       for (FileStatus hfile : fs.listStatus(cfStatus.getPath())) {
537         assertTrue(
538           String.format("HFile %s appears to contain no data.", hfile.getPath()),
539           hfile.getLen() > 0);
540         // count the number of KVs from all the hfiles
541         if (expectedKVCount > -1) {
542           actualKVCount += getKVCountFromHfile(fs, hfile.getPath());
543         }
544       }
545     }
546     assertTrue(String.format("HFile output does not contain the input family '%s'.", family),
547         foundFamilies.contains(family));
548     if (expectedKVCount > -1) {
549       assertTrue(String.format(
550         "KV count in ouput hfile=<%d> doesn't match with expected KV count=<%d>", actualKVCount,
551         expectedKVCount), actualKVCount == expectedKVCount);
552     }
553   }
554 
555   /**
556    * Method returns the total KVs in given hfile
557    * @param fs File System
558    * @param p HFile path
559    * @return KV count in the given hfile
560    * @throws IOException
561    */
562   private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
563     Configuration conf = util.getConfiguration();
564     HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), conf);
565     reader.loadFileInfo();
566     HFileScanner scanner = reader.getScanner(false, false);
567     scanner.seekTo();
568     int count = 0;
569     do {
570       count++;
571     } while (scanner.next());
572     reader.close();
573     return count;
574   }
575 }
576