View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  
23  import java.io.IOException;
24  import java.security.PrivilegedExceptionAction;
25  import java.util.ArrayList;
26  import java.util.Arrays;
27  import java.util.HashSet;
28  import java.util.List;
29  import java.util.Set;
30  import java.util.UUID;
31  
32  import org.apache.commons.logging.Log;
33  import org.apache.commons.logging.LogFactory;
34  import org.apache.hadoop.conf.Configurable;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.FileStatus;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.hbase.Cell;
41  import org.apache.hadoop.hbase.CellUtil;
42  import org.apache.hadoop.hbase.HBaseTestingUtility;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.TableName;
45  import org.apache.hadoop.hbase.client.Admin;
46  import org.apache.hadoop.hbase.client.Connection;
47  import org.apache.hadoop.hbase.client.ConnectionFactory;
48  import org.apache.hadoop.hbase.client.Delete;
49  import org.apache.hadoop.hbase.client.HBaseAdmin;
50  import org.apache.hadoop.hbase.client.HTable;
51  import org.apache.hadoop.hbase.client.Result;
52  import org.apache.hadoop.hbase.client.ResultScanner;
53  import org.apache.hadoop.hbase.client.Scan;
54  import org.apache.hadoop.hbase.client.Table;
55  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
56  import org.apache.hadoop.hbase.io.hfile.HFile;
57  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
58  import org.apache.hadoop.hbase.protobuf.generated.VisibilityLabelsProtos.VisibilityLabelsResponse;
59  import org.apache.hadoop.hbase.security.User;
60  import org.apache.hadoop.hbase.security.visibility.Authorizations;
61  import org.apache.hadoop.hbase.security.visibility.CellVisibility;
62  import org.apache.hadoop.hbase.security.visibility.ScanLabelGenerator;
63  import org.apache.hadoop.hbase.security.visibility.SimpleScanLabelGenerator;
64  import org.apache.hadoop.hbase.security.visibility.VisibilityClient;
65  import org.apache.hadoop.hbase.security.visibility.VisibilityConstants;
66  import org.apache.hadoop.hbase.security.visibility.VisibilityController;
67  import org.apache.hadoop.hbase.security.visibility.VisibilityTestUtil;
68  import org.apache.hadoop.hbase.security.visibility.VisibilityUtils;
69  import org.apache.hadoop.hbase.testclassification.LargeTests;
70  import org.apache.hadoop.hbase.util.Bytes;
71  import org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter;
72  import org.apache.hadoop.util.Tool;
73  import org.apache.hadoop.util.ToolRunner;
74  import org.junit.AfterClass;
75  import org.junit.BeforeClass;
76  import org.junit.Test;
77  import org.junit.experimental.categories.Category;
78  
79  @Category(LargeTests.class)
80  public class TestImportTSVWithVisibilityLabels implements Configurable {
81  
82    private static final Log LOG = LogFactory.getLog(TestImportTSVWithVisibilityLabels.class);
83    protected static final String NAME = TestImportTsv.class.getSimpleName();
84    protected static HBaseTestingUtility util = new HBaseTestingUtility();
85  
86    /**
87     * Delete the tmp directory after running doMROnTableTest. Boolean. Default is
88     * false.
89     */
90    protected static final String DELETE_AFTER_LOAD_CONF = NAME + ".deleteAfterLoad";
91  
92    /**
93     * Force use of combiner in doMROnTableTest. Boolean. Default is true.
94     */
95    protected static final String FORCE_COMBINER_CONF = NAME + ".forceCombiner";
96  
97    private final String FAMILY = "FAM";
98    private final static String TOPSECRET = "topsecret";
99    private final static String PUBLIC = "public";
100   private final static String PRIVATE = "private";
101   private final static String CONFIDENTIAL = "confidential";
102   private final static String SECRET = "secret";
103   private static User SUPERUSER;
104   private static Configuration conf;
105 
106   @Override
107   public Configuration getConf() {
108     return util.getConfiguration();
109   }
110 
111   @Override
112   public void setConf(Configuration conf) {
113     throw new IllegalArgumentException("setConf not supported");
114   }
115 
116   @BeforeClass
117   public static void provisionCluster() throws Exception {
118     conf = util.getConfiguration();
119     SUPERUSER = User.createUserForTesting(conf, "admin", new String[] { "supergroup" });
120     conf.set("hbase.superuser", "admin,"+User.getCurrent().getName());
121     VisibilityTestUtil.enableVisiblityLabels(conf);
122     conf.setClass(VisibilityUtils.VISIBILITY_LABEL_GENERATOR_CLASS, SimpleScanLabelGenerator.class,
123         ScanLabelGenerator.class);
124     util.setJobWithoutMRCluster();
125     util.startMiniCluster();
126     // Wait for the labels table to become available
127     util.waitTableEnabled(VisibilityConstants.LABELS_TABLE_NAME.getName(), 50000);
128     createLabels();
129   }
130 
131   private static void createLabels() throws IOException, InterruptedException {
132     PrivilegedExceptionAction<VisibilityLabelsResponse> action =
133         new PrivilegedExceptionAction<VisibilityLabelsResponse>() {
134       @Override
135       public VisibilityLabelsResponse run() throws Exception {
136         String[] labels = { SECRET, TOPSECRET, CONFIDENTIAL, PUBLIC, PRIVATE };
137         try (Connection conn = ConnectionFactory.createConnection(conf)) {
138           VisibilityClient.addLabels(conn, labels);
139           LOG.info("Added labels ");
140         } catch (Throwable t) {
141           LOG.error("Error in adding labels" , t);
142           throw new IOException(t);
143         }
144         return null;
145       }
146     };
147     SUPERUSER.runAs(action);
148   }
149 
150   @AfterClass
151   public static void releaseCluster() throws Exception {
152     util.shutdownMiniCluster();
153   }
154 
155   @Test
156   public void testMROnTable() throws Exception {
157     String tableName = "test-" + UUID.randomUUID();
158 
159     // Prepare the arguments required for the test.
160     String[] args = new String[] {
161         "-D" + ImportTsv.MAPPER_CONF_KEY
162             + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
163         "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
164         "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName };
165     String data = "KEY\u001bVALUE1\u001bVALUE2\u001bsecret&private\n";
166     util.createTable(TableName.valueOf(tableName), FAMILY);
167     doMROnTableTest(util, FAMILY, data, args, 1);
168     util.deleteTable(tableName);
169   }
170 
171   @Test
172   public void testMROnTableWithDeletes() throws Exception {
173     TableName tableName = TableName.valueOf("test-" + UUID.randomUUID());
174 
175     // Prepare the arguments required for the test.
176     String[] args = new String[] {
177         "-D" + ImportTsv.MAPPER_CONF_KEY + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
178         "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
179         "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
180     String data = "KEY\u001bVALUE1\u001bVALUE2\u001bsecret&private\n";
181     util.createTable(tableName, FAMILY);
182     doMROnTableTest(util, FAMILY, data, args, 1);
183     issueDeleteAndVerifyData(tableName);
184     util.deleteTable(tableName);
185   }
186 
187   private void issueDeleteAndVerifyData(TableName tableName) throws IOException {
188     LOG.debug("Validating table after delete.");
189     Table table = new HTable(conf, tableName);
190     boolean verified = false;
191     long pause = conf.getLong("hbase.client.pause", 5 * 1000);
192     int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
193     for (int i = 0; i < numRetries; i++) {
194       try {
195         Delete d = new Delete(Bytes.toBytes("KEY"));
196         d.deleteFamily(Bytes.toBytes(FAMILY));
197         d.setCellVisibility(new CellVisibility("private&secret"));
198         table.delete(d);
199 
200         Scan scan = new Scan();
201         // Scan entire family.
202         scan.addFamily(Bytes.toBytes(FAMILY));
203         scan.setAuthorizations(new Authorizations("secret", "private"));
204         ResultScanner resScanner = table.getScanner(scan);
205         Result[] next = resScanner.next(5);
206         assertEquals(0, next.length);
207         verified = true;
208         break;
209       } catch (NullPointerException e) {
210         // If here, a cell was empty. Presume its because updates came in
211         // after the scanner had been opened. Wait a while and retry.
212       }
213       try {
214         Thread.sleep(pause);
215       } catch (InterruptedException e) {
216         // continue
217       }
218     }
219     table.close();
220     assertTrue(verified);
221   }
222 
223   @Test
224   public void testMROnTableWithBulkload() throws Exception {
225     String tableName = "test-" + UUID.randomUUID();
226     Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName), "hfiles");
227     // Prepare the arguments required for the test.
228     String[] args = new String[] {
229         "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
230         "-D" + ImportTsv.COLUMNS_CONF_KEY
231             + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
232         "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName };
233     String data = "KEY\u001bVALUE1\u001bVALUE2\u001bsecret&private\n";
234     util.createTable(TableName.valueOf(tableName), FAMILY);
235     doMROnTableTest(util, FAMILY, data, args, 1);
236     util.deleteTable(tableName);
237   }
238 
239   @Test
240   public void testBulkOutputWithTsvImporterTextMapper() throws Exception {
241     String table = "test-" + UUID.randomUUID();
242     String FAMILY = "FAM";
243     Path bulkOutputPath = new Path(util.getDataTestDirOnTestFS(table),"hfiles");
244     // Prepare the arguments required for the test.
245     String[] args =
246         new String[] {
247             "-D" + ImportTsv.MAPPER_CONF_KEY
248                 + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper",
249             "-D" + ImportTsv.COLUMNS_CONF_KEY
250                 + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
251             "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b",
252             "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + bulkOutputPath.toString(), table
253             };
254     String data = "KEY\u001bVALUE4\u001bVALUE8\u001bsecret&private\n";
255     doMROnTableTest(util, FAMILY, data, args, 4);
256     util.deleteTable(table);
257   }
258 
259   @Test
260   public void testMRWithOutputFormat() throws Exception {
261     String tableName = "test-" + UUID.randomUUID();
262     Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName), "hfiles");
263     // Prepare the arguments required for the test.
264     String[] args = new String[] {
265         "-D" + ImportTsv.MAPPER_CONF_KEY
266             + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
267         "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
268         "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
269         "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName };
270     String data = "KEY\u001bVALUE4\u001bVALUE8\u001bsecret&private\n";
271     util.createTable(TableName.valueOf(tableName), FAMILY);
272     doMROnTableTest(util, FAMILY, data, args, 1);
273     util.deleteTable(tableName);
274   }
275 
276   @Test
277   public void testBulkOutputWithInvalidLabels() throws Exception {
278     TableName tableName = TableName.valueOf("test-" + UUID.randomUUID());
279     Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
280     // Prepare the arguments required for the test.
281     String[] args =
282         new String[] { "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
283             "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
284             "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
285 
286     // 2 Data rows, one with valid label and one with invalid label
287     String data =
288         "KEY\u001bVALUE1\u001bVALUE2\u001bprivate\nKEY1\u001bVALUE1\u001bVALUE2\u001binvalid\n";
289     util.createTable(tableName, FAMILY);
290     doMROnTableTest(util, FAMILY, data, args, 1, 2);
291     util.deleteTable(tableName);
292   }
293 
294   @Test
295   public void testBulkOutputWithTsvImporterTextMapperWithInvalidLabels() throws Exception {
296     TableName tableName = TableName.valueOf("test-" + UUID.randomUUID());
297     Path hfiles = new Path(util.getDataTestDirOnTestFS(tableName.getNameAsString()), "hfiles");
298     // Prepare the arguments required for the test.
299     String[] args =
300         new String[] {
301             "-D" + ImportTsv.MAPPER_CONF_KEY
302                 + "=org.apache.hadoop.hbase.mapreduce.TsvImporterTextMapper",
303             "-D" + ImportTsv.BULK_OUTPUT_CONF_KEY + "=" + hfiles.toString(),
304             "-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
305             "-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName.getNameAsString() };
306 
307     // 2 Data rows, one with valid label and one with invalid label
308     String data =
309         "KEY\u001bVALUE1\u001bVALUE2\u001bprivate\nKEY1\u001bVALUE1\u001bVALUE2\u001binvalid\n";
310     util.createTable(tableName, FAMILY);
311     doMROnTableTest(util, FAMILY, data, args, 1, 2);
312     util.deleteTable(tableName);
313   }
314 
315   protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, String data,
316       String[] args, int valueMultiplier) throws Exception {
317     return doMROnTableTest(util, family, data, args, valueMultiplier, -1);
318   }
319 
320 
321   /**
322    * Run an ImportTsv job and perform basic validation on the results. Returns
323    * the ImportTsv <code>Tool</code> instance so that other tests can inspect it
324    * for further validation as necessary. This method is static to insure
325    * non-reliance on instance's util/conf facilities.
326    *
327    * @param args
328    *          Any arguments to pass BEFORE inputFile path is appended.
329    * @param expectedKVCount Expected KV count. pass -1 to skip the kvcount check
330    *
331    * @return The Tool instance used to run the test.
332    */
333   protected static Tool doMROnTableTest(HBaseTestingUtility util, String family, String data,
334       String[] args, int valueMultiplier, int expectedKVCount) throws Exception {
335     TableName table = TableName.valueOf(args[args.length - 1]);
336     Configuration conf = new Configuration(util.getConfiguration());
337 
338     // populate input file
339     FileSystem fs = FileSystem.get(conf);
340     Path inputPath = fs.makeQualified(new Path(util
341         .getDataTestDirOnTestFS(table.getNameAsString()), "input.dat"));
342     FSDataOutputStream op = fs.create(inputPath, true);
343     if (data == null) {
344       data = "KEY\u001bVALUE1\u001bVALUE2\n";
345     }
346     op.write(Bytes.toBytes(data));
347     op.close();
348     LOG.debug(String.format("Wrote test data to file: %s", inputPath));
349 
350     if (conf.getBoolean(FORCE_COMBINER_CONF, true)) {
351       LOG.debug("Forcing combiner.");
352       conf.setInt("mapreduce.map.combine.minspills", 1);
353     }
354 
355     // run the import
356     List<String> argv = new ArrayList<String>(Arrays.asList(args));
357     argv.add(inputPath.toString());
358     Tool tool = new ImportTsv();
359     LOG.debug("Running ImportTsv with arguments: " + argv);
360     assertEquals(0, ToolRunner.run(conf, tool, argv.toArray(args)));
361 
362     // Perform basic validation. If the input args did not include
363     // ImportTsv.BULK_OUTPUT_CONF_KEY then validate data in the table.
364     // Otherwise, validate presence of hfiles.
365     boolean createdHFiles = false;
366     String outputPath = null;
367     for (String arg : argv) {
368       if (arg.contains(ImportTsv.BULK_OUTPUT_CONF_KEY)) {
369         createdHFiles = true;
370         // split '-Dfoo=bar' on '=' and keep 'bar'
371         outputPath = arg.split("=")[1];
372         break;
373       }
374     }
375     LOG.debug("validating the table " + createdHFiles);
376     if (createdHFiles)
377       validateHFiles(fs, outputPath, family, expectedKVCount);
378     else
379       validateTable(conf, table, family, valueMultiplier);
380 
381     if (conf.getBoolean(DELETE_AFTER_LOAD_CONF, true)) {
382       LOG.debug("Deleting test subdirectory");
383       util.cleanupDataTestDirOnTestFS(table.getNameAsString());
384     }
385     return tool;
386   }
387 
388   /**
389    * Confirm ImportTsv via HFiles on fs.
390    */
391   private static void validateHFiles(FileSystem fs, String outputPath, String family,
392       int expectedKVCount) throws IOException {
393 
394     // validate number and content of output columns
395     LOG.debug("Validating HFiles.");
396     Set<String> configFamilies = new HashSet<String>();
397     configFamilies.add(family);
398     Set<String> foundFamilies = new HashSet<String>();
399     int actualKVCount = 0;
400     for (FileStatus cfStatus : fs.listStatus(new Path(outputPath), new OutputFilesFilter())) {
401       LOG.debug("The output path has files");
402       String[] elements = cfStatus.getPath().toString().split(Path.SEPARATOR);
403       String cf = elements[elements.length - 1];
404       foundFamilies.add(cf);
405       assertTrue(String.format(
406           "HFile ouput contains a column family (%s) not present in input families (%s)", cf,
407           configFamilies), configFamilies.contains(cf));
408       for (FileStatus hfile : fs.listStatus(cfStatus.getPath())) {
409         assertTrue(String.format("HFile %s appears to contain no data.", hfile.getPath()),
410             hfile.getLen() > 0);
411         if (expectedKVCount > -1) {
412           actualKVCount += getKVCountFromHfile(fs, hfile.getPath());
413         }
414       }
415     }
416     if (expectedKVCount > -1) {
417       assertTrue(String.format(
418         "KV count in output hfile=<%d> doesn't match with expected KV count=<%d>", actualKVCount,
419         expectedKVCount), actualKVCount == expectedKVCount);
420     }
421   }
422 
423   /**
424    * Method returns the total KVs in given hfile
425    * @param fs File System
426    * @param p HFile path
427    * @return KV count in the given hfile
428    * @throws IOException
429    */
430   private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
431     Configuration conf = util.getConfiguration();
432     HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), conf);
433     reader.loadFileInfo();
434     HFileScanner scanner = reader.getScanner(false, false);
435     scanner.seekTo();
436     int count = 0;
437     do {
438       count++;
439     } while (scanner.next());
440     reader.close();
441     return count;
442   }
443 
444   /**
445    * Confirm ImportTsv via data in online table.
446    */
447   private static void validateTable(Configuration conf, TableName tableName, String family,
448       int valueMultiplier) throws IOException {
449 
450     LOG.debug("Validating table.");
451     Table table = new HTable(conf, tableName);
452     boolean verified = false;
453     long pause = conf.getLong("hbase.client.pause", 5 * 1000);
454     int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
455     for (int i = 0; i < numRetries; i++) {
456       try {
457         Scan scan = new Scan();
458         // Scan entire family.
459         scan.addFamily(Bytes.toBytes(family));
460         scan.setAuthorizations(new Authorizations("secret","private"));
461         ResultScanner resScanner = table.getScanner(scan);
462         Result[] next = resScanner.next(5);
463         assertEquals(1, next.length);
464         for (Result res : resScanner) {
465           LOG.debug("Getting results " + res.size());
466           assertTrue(res.size() == 2);
467           List<Cell> kvs = res.listCells();
468           assertTrue(CellUtil.matchingRow(kvs.get(0), Bytes.toBytes("KEY")));
469           assertTrue(CellUtil.matchingRow(kvs.get(1), Bytes.toBytes("KEY")));
470           assertTrue(CellUtil.matchingValue(kvs.get(0), Bytes.toBytes("VALUE" + valueMultiplier)));
471           assertTrue(CellUtil.matchingValue(kvs.get(1),
472               Bytes.toBytes("VALUE" + 2 * valueMultiplier)));
473           // Only one result set is expected, so let it loop.
474         }
475         verified = true;
476         break;
477       } catch (NullPointerException e) {
478         // If here, a cell was empty. Presume its because updates came in
479         // after the scanner had been opened. Wait a while and retry.
480       }
481       try {
482         Thread.sleep(pause);
483       } catch (InterruptedException e) {
484         // continue
485       }
486     }
487     table.close();
488     assertTrue(verified);
489   }
490 
491 }