View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.util;
20  
21  import com.google.common.collect.Lists;
22  
23  import java.io.FileNotFoundException;
24  import java.io.IOException;
25  import java.lang.reflect.InvocationTargetException;
26  import java.lang.reflect.Method;
27  import java.net.URI;
28  import java.net.URISyntaxException;
29  import java.util.List;
30  import java.util.Locale;
31  import java.util.Map;
32  import java.util.concurrent.ConcurrentHashMap;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.HadoopIllegalArgumentException;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.FSDataOutputStream;
39  import org.apache.hadoop.fs.FileStatus;
40  import org.apache.hadoop.fs.FileSystem;
41  import org.apache.hadoop.fs.LocatedFileStatus;
42  import org.apache.hadoop.fs.Path;
43  import org.apache.hadoop.fs.PathFilter;
44  import org.apache.hadoop.fs.RemoteIterator;
45  import org.apache.hadoop.fs.permission.FsPermission;
46  import org.apache.hadoop.hbase.HConstants;
47  import org.apache.hadoop.hbase.TableName;
48  import org.apache.hadoop.hbase.classification.InterfaceAudience;
49  import org.apache.hadoop.ipc.RemoteException;
50  
51  /**
52   * Utility methods for interacting with the underlying file system.
53   */
54  @InterfaceAudience.Private
55  public abstract class CommonFSUtils {
56    private static final Log LOG = LogFactory.getLog(CommonFSUtils.class);
57  
58    /** Parameter name for HBase WAL directory */
59    public static final String HBASE_WAL_DIR = "hbase.wal.dir";
60  
61    /** Parameter to disable stream capability enforcement checks */
62    public static final String UNSAFE_STREAM_CAPABILITY_ENFORCE =
63      "hbase.unsafe.stream.capability.enforce";
64  
65    /** Full access permissions (starting point for a umask) */
66    public static final String FULL_RWX_PERMISSIONS = "777";
67  
68    protected CommonFSUtils() {
69      super();
70    }
71  
72    /**
73     * Compare of path component. Does not consider schema; i.e. if schemas
74     * different but <code>path</code> starts with <code>rootPath</code>,
75     * then the function returns true
76     * @param rootPath value to check for
77     * @param path subject to check
78     * @return True if <code>path</code> starts with <code>rootPath</code>
79     */
80    public static boolean isStartingWithPath(final Path rootPath, final String path) {
81      String uriRootPath = rootPath.toUri().getPath();
82      String tailUriPath = (new Path(path)).toUri().getPath();
83      return tailUriPath.startsWith(uriRootPath);
84    }
85  
86    /**
87     * Compare path component of the Path URI; e.g. if hdfs://a/b/c and /a/b/c, it will compare the
88     * '/a/b/c' part. Does not consider schema; i.e. if schemas different but path or subpath matches,
89     * the two will equate.
90     * @param pathToSearch Path we will be trying to match against.
91     * @param pathTail what to match
92     * @return True if <code>pathTail</code> is tail on the path of <code>pathToSearch</code>
93     */
94    public static boolean isMatchingTail(final Path pathToSearch, String pathTail) {
95      return isMatchingTail(pathToSearch, new Path(pathTail));
96    }
97  
98    /**
99     * Compare path component of the Path URI; e.g. if hdfs://a/b/c and /a/b/c, it will compare the
100    * '/a/b/c' part. If you passed in 'hdfs://a/b/c and b/c, it would return true.  Does not consider
101    * schema; i.e. if schemas different but path or subpath matches, the two will equate.
102    * @param pathToSearch Path we will be trying to match agains against
103    * @param pathTail what to match
104    * @return True if <code>pathTail</code> is tail on the path of <code>pathToSearch</code>
105    */
106   public static boolean isMatchingTail(final Path pathToSearch, final Path pathTail) {
107     if (pathToSearch.depth() != pathTail.depth()) {
108       return false;
109     }
110     Path tailPath = pathTail;
111     String tailName;
112     Path toSearch = pathToSearch;
113     String toSearchName;
114     boolean result = false;
115     do {
116       tailName = tailPath.getName();
117       if (tailName == null || tailName.length() <= 0) {
118         result = true;
119         break;
120       }
121       toSearchName = toSearch.getName();
122       if (toSearchName == null || toSearchName.length() <= 0) {
123         break;
124       }
125       // Move up a parent on each path for next go around.  Path doesn't let us go off the end.
126       tailPath = tailPath.getParent();
127       toSearch = toSearch.getParent();
128     } while(tailName.equals(toSearchName));
129     return result;
130   }
131 
132   /**
133    * Delete if exists.
134    * @param fs filesystem object
135    * @param dir directory to delete
136    * @return True if deleted <code>dir</code>
137    * @throws IOException e
138    */
139   public static boolean deleteDirectory(final FileSystem fs, final Path dir) throws IOException {
140     return fs.exists(dir) && fs.delete(dir, true);
141   }
142 
143   /**
144    * Return the number of bytes that large input files should be optimally
145    * be split into to minimize i/o time.
146    *
147    * @param fs filesystem object
148    * @return the default block size for the path's filesystem
149    */
150   public static long getDefaultBlockSize(final FileSystem fs, final Path path) {
151     return fs.getDefaultBlockSize(path);
152   }
153 
154   /*
155    * Get the default replication.
156    *
157    * @param fs filesystem object
158    * @param f path of file
159    * @return default replication for the path's filesystem
160    */
161   public static short getDefaultReplication(final FileSystem fs, final Path path) {
162     return fs.getDefaultReplication(path);
163   }
164 
165   /**
166    * Returns the default buffer size to use during writes.
167    *
168    * The size of the buffer should probably be a multiple of hardware
169    * page size (4096 on Intel x86), and it determines how much data is
170    * buffered during read and write operations.
171    *
172    * @param fs filesystem object
173    * @return default buffer size to use during writes
174    */
175   public static int getDefaultBufferSize(final FileSystem fs) {
176     return fs.getConf().getInt("io.file.buffer.size", 4096);
177   }
178 
179   /**
180    * Create the specified file on the filesystem. By default, this will:
181    * <ol>
182    * <li>apply the umask in the configuration (if it is enabled)</li>
183    * <li>use the fs configured buffer size (or 4096 if not set)</li>
184    * <li>use the default replication</li>
185    * <li>use the default block size</li>
186    * <li>not track progress</li>
187    * </ol>
188    *
189    * @param fs {@link FileSystem} on which to write the file
190    * @param path {@link Path} to the file to write
191    * @param perm intial permissions
192    * @param overwrite Whether or not the created file should be overwritten.
193    * @return output stream to the created file
194    * @throws IOException if the file cannot be created
195    */
196   public static FSDataOutputStream create(FileSystem fs, Path path,
197       FsPermission perm, boolean overwrite) throws IOException {
198     if (LOG.isTraceEnabled()) {
199       LOG.trace("Creating file=" + path + " with permission=" + perm + ", overwrite=" + overwrite);
200     }
201     return fs.create(path, perm, overwrite, getDefaultBufferSize(fs),
202         getDefaultReplication(fs, path), getDefaultBlockSize(fs, path), null);
203   }
204 
205   /**
206    * Get the file permissions specified in the configuration, if they are
207    * enabled.
208    *
209    * @param fs filesystem that the file will be created on.
210    * @param conf configuration to read for determining if permissions are
211    *          enabled and which to use
212    * @param permssionConfKey property key in the configuration to use when
213    *          finding the permission
214    * @return the permission to use when creating a new file on the fs. If
215    *         special permissions are not specified in the configuration, then
216    *         the default permissions on the the fs will be returned.
217    */
218   public static FsPermission getFilePermissions(final FileSystem fs,
219       final Configuration conf, final String permssionConfKey) {
220     boolean enablePermissions = conf.getBoolean(
221         HConstants.ENABLE_DATA_FILE_UMASK, false);
222 
223     if (enablePermissions) {
224       try {
225         FsPermission perm = new FsPermission(FULL_RWX_PERMISSIONS);
226         // make sure that we have a mask, if not, go default.
227         String mask = conf.get(permssionConfKey);
228         if (mask == null) {
229           return FsPermission.getFileDefault();
230         }
231         // appy the umask
232         FsPermission umask = new FsPermission(mask);
233         return perm.applyUMask(umask);
234       } catch (IllegalArgumentException e) {
235         LOG.warn(
236             "Incorrect umask attempted to be created: "
237                 + conf.get(permssionConfKey)
238                 + ", using default file permissions.", e);
239         return FsPermission.getFileDefault();
240       }
241     }
242     return FsPermission.getFileDefault();
243   }
244 
245   /**
246    * Verifies root directory path is a valid URI with a scheme
247    *
248    * @param root root directory path
249    * @return Passed <code>root</code> argument.
250    * @throws IOException if not a valid URI with a scheme
251    */
252   public static Path validateRootPath(Path root) throws IOException {
253     try {
254       URI rootURI = new URI(root.toString());
255       String scheme = rootURI.getScheme();
256       if (scheme == null) {
257         throw new IOException("Root directory does not have a scheme");
258       }
259       return root;
260     } catch (URISyntaxException e) {
261       throw new IOException("Root directory path is not a valid " +
262         "URI -- check your " + HConstants.HBASE_DIR + " configuration", e);
263     }
264   }
265 
266   /**
267    * Checks for the presence of the WAL log root path (using the provided conf object) in the given
268    * path. If it exists, this method removes it and returns the String representation of remaining
269    * relative path.
270    * @param path must not be null
271    * @param conf must not be null
272    * @return String representation of the remaining relative path
273    * @throws IOException from underlying filesystem
274    */
275   public static String removeWALRootPath(Path path, final Configuration conf) throws IOException {
276     Path root = getWALRootDir(conf);
277     String pathStr = path.toString();
278     // check that the path is absolute... it has the root path in it.
279     if (!pathStr.startsWith(root.toString())) {
280       return pathStr;
281     }
282     // if not, return as it is.
283     return pathStr.substring(root.toString().length() + 1);// remove the "/" too.
284   }
285 
286   /**
287    * Return the 'path' component of a Path.  In Hadoop, Path is an URI.  This
288    * method returns the 'path' component of a Path's URI: e.g. If a Path is
289    * <code>hdfs://example.org:9000/hbase_trunk/TestTable/compaction.dir</code>,
290    * this method returns <code>/hbase_trunk/TestTable/compaction.dir</code>.
291    * This method is useful if you want to print out a Path without qualifying
292    * Filesystem instance.
293    * @param p Filesystem Path whose 'path' component we are to return.
294    * @return Path portion of the Filesystem
295    */
296   public static String getPath(Path p) {
297     return p.toUri().getPath();
298   }
299 
300   /**
301    * @param c configuration
302    * @return {@link Path} to hbase root directory from
303    *     configuration as a qualified Path.
304    * @throws IOException e
305    */
306   public static Path getRootDir(final Configuration c) throws IOException {
307     Path p = new Path(c.get(HConstants.HBASE_DIR));
308     FileSystem fs = p.getFileSystem(c);
309     return p.makeQualified(fs);
310   }
311 
312   public static void setRootDir(final Configuration c, final Path root) {
313     c.set(HConstants.HBASE_DIR, root.toString());
314   }
315 
316   public static void setFsDefault(final Configuration c, final Path root) {
317     c.set("fs.defaultFS", root.toString());    // for hadoop 0.21+
318   }
319 
320   public static FileSystem getRootDirFileSystem(final Configuration c) throws IOException {
321     Path p = getRootDir(c);
322     return p.getFileSystem(c);
323   }
324 
325   /**
326    * @param c configuration
327    * @return {@link Path} to hbase log root directory: e.g. {@value HBASE_WAL_DIR} from
328    *     configuration as a qualified Path. Defaults to HBase root dir.
329    * @throws IOException e
330    */
331   public static Path getWALRootDir(final Configuration c) throws IOException {
332     Path p = new Path(c.get(HBASE_WAL_DIR, c.get(HConstants.HBASE_DIR)));
333     if (!isValidWALRootDir(p, c)) {
334       return getRootDir(c);
335     }
336     FileSystem fs = p.getFileSystem(c);
337     return p.makeQualified(fs);
338   }
339 
340   public static void setWALRootDir(final Configuration c, final Path root) {
341     c.set(HBASE_WAL_DIR, root.toString());
342   }
343 
344   public static FileSystem getWALFileSystem(final Configuration c) throws IOException {
345     Path p = getWALRootDir(c);
346     FileSystem fs = p.getFileSystem(c);
347     // hadoop-core does fs caching, so need to propagate this if set
348     String enforceStreamCapability = c.get(UNSAFE_STREAM_CAPABILITY_ENFORCE);
349     if (enforceStreamCapability != null) {
350       fs.getConf().set(UNSAFE_STREAM_CAPABILITY_ENFORCE, enforceStreamCapability);
351     }
352     return fs;
353   }
354 
355   private static boolean isValidWALRootDir(Path walDir, final Configuration c) throws IOException {
356     Path rootDir = getRootDir(c);
357     if (walDir != rootDir) {
358       if (walDir.toString().startsWith(rootDir.toString() + "/")) {
359         throw new IllegalStateException("Illegal WAL directory specified. " +
360             "WAL directories are not permitted to be under the root directory if set.");
361       }
362     }
363     return true;
364   }
365 
366   /**
367    * Returns the {@link org.apache.hadoop.fs.Path} object representing the table directory under
368    * path rootdir
369    *
370    * @param rootdir qualified path of HBase root directory
371    * @param tableName name of table
372    * @return {@link org.apache.hadoop.fs.Path} for table
373    */
374   public static Path getTableDir(Path rootdir, final TableName tableName) {
375     return new Path(getNamespaceDir(rootdir, tableName.getNamespaceAsString()),
376         tableName.getQualifierAsString());
377   }
378 
379   /**
380    * Returns the {@link org.apache.hadoop.fs.Path} object representing the region
381    * directory under path rootdir
382    *
383    * @param rootdir    qualified path of HBase root directory
384    * @param tableName  name of table
385    * @param regionName The encoded region name
386    * @return {@link org.apache.hadoop.fs.Path} for region
387    */
388   public static Path getRegionDir(Path rootdir, TableName tableName, String regionName) {
389     return new Path(getTableDir(rootdir, tableName), regionName);
390   }
391 
392   public static Path getWALTableDir(Configuration c, TableName tableName) throws IOException {
393     return new Path(getNamespaceDir(getWALRootDir(c), tableName.getNamespaceAsString()),
394         tableName.getQualifierAsString());
395   }
396 
397   /**
398    * Returns the {@link org.apache.hadoop.hbase.TableName} object representing
399    * the table directory under
400    * path rootdir
401    *
402    * @param tablePath path of table
403    * @return {@link org.apache.hadoop.fs.Path} for table
404    */
405   public static TableName getTableName(Path tablePath) {
406     return TableName.valueOf(tablePath.getParent().getName(), tablePath.getName());
407   }
408 
409   /**
410    * Returns the {@link org.apache.hadoop.fs.Path} object representing
411    * the namespace directory under path rootdir
412    *
413    * @param rootdir qualified path of HBase root directory
414    * @param namespace namespace name
415    * @return {@link org.apache.hadoop.fs.Path} for table
416    */
417   public static Path getNamespaceDir(Path rootdir, final String namespace) {
418     return new Path(rootdir, new Path(HConstants.BASE_NAMESPACE_DIR,
419         new Path(namespace)));
420   }
421 
422   /**
423    * Sets storage policy for given path according to config setting.
424    * If the passed path is a directory, we'll set the storage policy for all files
425    * created in the future in said directory. Note that this change in storage
426    * policy takes place at the FileSystem level; it will persist beyond this RS's lifecycle.
427    * If we're running on a FileSystem implementation that doesn't support the given storage policy
428    * (or storage policies at all), then we'll issue a log message and continue.
429    *
430    * See http://hadoop.apache.org/docs/r2.6.0/hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html
431    *
432    * @param fs We only do anything it implements a setStoragePolicy method
433    * @param conf used to look up storage policy with given key; not modified.
434    * @param path the Path whose storage policy is to be set
435    * @param policyKey Key to use pulling a policy from Configuration:
436    *   e.g. HConstants.WAL_STORAGE_POLICY (hbase.wal.storage.policy).
437    * @param defaultPolicy if the configured policy is equal to this policy name, we will skip
438    *   telling the FileSystem to set a storage policy.
439    */
440   public static void setStoragePolicy(final FileSystem fs, final Configuration conf,
441       final Path path, final String policyKey, final String defaultPolicy) {
442     String storagePolicy = conf.get(policyKey, defaultPolicy).toUpperCase(Locale.ROOT);
443     if (storagePolicy.equals(defaultPolicy)) {
444       if (LOG.isTraceEnabled()) {
445         LOG.trace("default policy of " + defaultPolicy + " requested, exiting early.");
446       }
447       return;
448     }
449     setStoragePolicy(fs, path, storagePolicy);
450   }
451 
452   private static final Map<FileSystem, Boolean> warningMap = new ConcurrentHashMap<>();
453 
454   /**
455    * Sets storage policy for given path.
456    * <p>
457    * If the passed path is a directory, we'll set the storage policy for all files
458    * created in the future in said directory. Note that this change in storage
459    * policy takes place at the HDFS level; it will persist beyond this RS's lifecycle.
460    * If we're running on a version of HDFS that doesn't support the given storage policy
461    * (or storage policies at all), then we'll issue a log message and continue.
462    * See http://hadoop.apache.org/docs/r2.6.0/hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html
463    * for possible list e.g 'COLD', 'WARM', 'HOT', 'ONE_SSD', 'ALL_SSD', 'LAZY_PERSIST'.
464    *
465    * @param fs We only do anything if an instance of DistributedFileSystem
466    * @param path the Path whose storage policy is to be set
467    * @param storagePolicy Policy to set on <code>path</code>
468    */
469   public static void setStoragePolicy(final FileSystem fs, final Path path,
470       final String storagePolicy) {
471     try {
472       setStoragePolicy(fs, path, storagePolicy, false);
473     } catch (IOException e) {
474       // should never arrive here
475       LOG.warn("We have chosen not to throw exception but some unexpectedly thrown out", e);
476     }
477   }
478 
479   static void setStoragePolicy(final FileSystem fs, final Path path, final String storagePolicy,
480       boolean throwException) throws IOException {
481     if (storagePolicy == null) {
482       if (LOG.isTraceEnabled()) {
483         LOG.trace("We were passed a null storagePolicy, exiting early.");
484       }
485       return;
486     }
487     String trimmedStoragePolicy = storagePolicy.trim();
488     if (trimmedStoragePolicy.isEmpty()) {
489       if (LOG.isTraceEnabled()) {
490         LOG.trace("We were passed an empty storagePolicy, exiting early.");
491       }
492       return;
493     } else {
494       trimmedStoragePolicy = trimmedStoragePolicy.toUpperCase(Locale.ROOT);
495     }
496     if (trimmedStoragePolicy.equals(HConstants.DEFER_TO_HDFS_STORAGE_POLICY)) {
497       if (LOG.isTraceEnabled()) {
498         LOG.trace(
499           "We were passed the defer-to-hdfs policy " + trimmedStoragePolicy + ", exiting early.");
500       }
501       return;
502     }
503     try {
504       invokeSetStoragePolicy(fs, path, trimmedStoragePolicy);
505     } catch (IOException e) {
506       if (!warningMap.containsKey(fs)) {
507         warningMap.put(fs, true);
508         LOG.warn("Failed to invoke set storage policy API on FS; presuming it doesn't "
509             + "support setStoragePolicy. Unable to set storagePolicy=" + trimmedStoragePolicy
510             + " on path=" + path);
511       } else if (LOG.isDebugEnabled()) {
512         LOG.debug("Failed to invoke set storage policy API on FS; presuming it doesn't "
513             + "support setStoragePolicy. Unable to set storagePolicy=" + trimmedStoragePolicy
514             + " on path=" + path);
515       }
516       if (throwException) {
517         throw e;
518       }
519     }
520   }
521 
522   /*
523    * All args have been checked and are good. Run the setStoragePolicy invocation.
524    */
525   private static void invokeSetStoragePolicy(final FileSystem fs, final Path path,
526       final String storagePolicy) throws IOException {
527     Exception toThrow = null;
528 
529     try {
530       fs.setStoragePolicy(path, storagePolicy);
531 
532       if (LOG.isDebugEnabled()) {
533         LOG.debug("Set storagePolicy=" + storagePolicy + " for path=" + path);
534       }
535     } catch (Exception e) {
536       toThrow = e;
537       // This swallows FNFE, should we be throwing it? seems more likely to indicate dev
538       // misuse than a runtime problem with HDFS.
539       if (!warningMap.containsKey(fs)) {
540         warningMap.put(fs, true);
541         LOG.warn("Unable to set storagePolicy=" + storagePolicy + " for path=" + path, e);
542       } else if (LOG.isDebugEnabled()) {
543         LOG.debug("Unable to set storagePolicy=" + storagePolicy + " for path=" + path, e);
544       }
545 
546       // check for lack of HDFS-7228
547       if (e instanceof RemoteException &&
548           HadoopIllegalArgumentException.class.getName().equals(
549             ((RemoteException)e).getClassName())) {
550         if (LOG.isDebugEnabled()) {
551           LOG.debug("Given storage policy, '" +storagePolicy +"', was rejected and probably " +
552             "isn't a valid policy for the version of Hadoop you're running. I.e. if you're " +
553             "trying to use SSD related policies then you're likely missing HDFS-7228. For " +
554             "more information see the 'ArchivalStorage' docs for your Hadoop release.");
555         }
556       }
557     }
558 
559     if (toThrow != null) {
560       throw new IOException(toThrow);
561     }
562   }
563 
564   /**
565    * @param conf must not be null
566    * @return True if this filesystem whose scheme is 'hdfs'.
567    * @throws IOException from underlying FileSystem
568    */
569   public static boolean isHDFS(final Configuration conf) throws IOException {
570     FileSystem fs = FileSystem.get(conf);
571     String scheme = fs.getUri().getScheme();
572     return scheme.equalsIgnoreCase("hdfs");
573   }
574 
575   /**
576    * Checks if the given path is the one with 'recovered.edits' dir.
577    * @param path must not be null
578    * @return True if we recovered edits
579    */
580   public static boolean isRecoveredEdits(Path path) {
581     return path.toString().contains(HConstants.RECOVERED_EDITS_DIR);
582   }
583 
584   /**
585    * @param conf must not be null
586    * @return Returns the filesystem of the hbase rootdir.
587    * @throws IOException from underlying FileSystem
588    */
589   public static FileSystem getCurrentFileSystem(Configuration conf) throws IOException {
590     return getRootDir(conf).getFileSystem(conf);
591   }
592 
593   /**
594    * Calls fs.listStatus() and treats FileNotFoundException as non-fatal
595    * This accommodates differences between hadoop versions, where hadoop 1
596    * does not throw a FileNotFoundException, and return an empty FileStatus[]
597    * while Hadoop 2 will throw FileNotFoundException.
598    *
599    * Where possible, prefer FSUtils#listStatusWithStatusFilter(FileSystem,
600    * Path, FileStatusFilter) instead.
601    *
602    * @param fs file system
603    * @param dir directory
604    * @param filter path filter
605    * @return null if dir is empty or doesn't exist, otherwise FileStatus array
606    */
607   public static FileStatus[] listStatus(final FileSystem fs,
608       final Path dir, final PathFilter filter) throws IOException {
609     FileStatus [] status = null;
610     try {
611       status = filter == null ? fs.listStatus(dir) : fs.listStatus(dir, filter);
612     } catch (FileNotFoundException fnfe) {
613       // if directory doesn't exist, return null
614       if (LOG.isTraceEnabled()) {
615         LOG.trace(dir + " doesn't exist");
616       }
617     }
618     if (status == null || status.length < 1) {
619       return null;
620     }
621     return status;
622   }
623 
624   /**
625    * Calls fs.listStatus() and treats FileNotFoundException as non-fatal
626    * This would accommodates differences between hadoop versions
627    *
628    * @param fs file system
629    * @param dir directory
630    * @return null if dir is empty or doesn't exist, otherwise FileStatus array
631    */
632   public static FileStatus[] listStatus(final FileSystem fs, final Path dir) throws IOException {
633     return listStatus(fs, dir, null);
634   }
635 
636   /**
637    * Calls fs.listFiles() to get FileStatus and BlockLocations together for reducing rpc call
638    *
639    * @param fs file system
640    * @param dir directory
641    * @return LocatedFileStatus list
642    */
643   public static List<LocatedFileStatus> listLocatedStatus(final FileSystem fs,
644       final Path dir) throws IOException {
645     List<LocatedFileStatus> status = null;
646     try {
647       RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fs
648           .listFiles(dir, false);
649       while (locatedFileStatusRemoteIterator.hasNext()) {
650         if (status == null) {
651           status = Lists.newArrayList();
652         }
653         status.add(locatedFileStatusRemoteIterator.next());
654       }
655     } catch (FileNotFoundException fnfe) {
656       // if directory doesn't exist, return null
657       if (LOG.isTraceEnabled()) {
658         LOG.trace(dir + " doesn't exist");
659       }
660     }
661     return status;
662   }
663 
664   /**
665    * Calls fs.delete() and returns the value returned by the fs.delete()
666    *
667    * @param fs must not be null
668    * @param path must not be null
669    * @param recursive delete tree rooted at path
670    * @return the value returned by the fs.delete()
671    * @throws IOException from underlying FileSystem
672    */
673   public static boolean delete(final FileSystem fs, final Path path, final boolean recursive)
674       throws IOException {
675     return fs.delete(path, recursive);
676   }
677 
678   /**
679    * Calls fs.exists(). Checks if the specified path exists
680    *
681    * @param fs must not be null
682    * @param path must not be null
683    * @return the value returned by fs.exists()
684    * @throws IOException from underlying FileSystem
685    */
686   public static boolean isExists(final FileSystem fs, final Path path) throws IOException {
687     return fs.exists(path);
688   }
689 
690   /**
691    * Log the current state of the filesystem from a certain root directory
692    * @param fs filesystem to investigate
693    * @param root root file/directory to start logging from
694    * @param log log to output information
695    * @throws IOException if an unexpected exception occurs
696    */
697   public static void logFileSystemState(final FileSystem fs, final Path root, Log log)
698       throws IOException {
699     log.debug("Current file system:");
700     logFSTree(log, fs, root, "|-");
701   }
702 
703   /**
704    * Recursive helper to log the state of the FS
705    *
706    * @see #logFileSystemState(FileSystem, Path, Log)
707    */
708   private static void logFSTree(Log log, final FileSystem fs, final Path root, String prefix)
709       throws IOException {
710     FileStatus[] files = listStatus(fs, root, null);
711     if (files == null) {
712       return;
713     }
714 
715     for (FileStatus file : files) {
716       if (file.isDirectory()) {
717         log.debug(prefix + file.getPath().getName() + "/");
718         logFSTree(log, fs, file.getPath(), prefix + "---");
719       } else {
720         log.debug(prefix + file.getPath().getName());
721       }
722     }
723   }
724 
725   public static boolean renameAndSetModifyTime(final FileSystem fs, final Path src, final Path dest)
726       throws IOException {
727     // set the modify time for TimeToLive Cleaner
728     fs.setTimes(src, EnvironmentEdgeManager.currentTime(), -1);
729     return fs.rename(src, dest);
730   }
731 
732   /**
733    * Check if short circuit read buffer size is set and if not, set it to hbase value.
734    * @param conf must not be null
735    */
736   public static void checkShortCircuitReadBufferSize(final Configuration conf) {
737     final int defaultSize = HConstants.DEFAULT_BLOCKSIZE * 2;
738     final int notSet = -1;
739     // DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY is only defined in h2
740     final String dfsKey = "dfs.client.read.shortcircuit.buffer.size";
741     int size = conf.getInt(dfsKey, notSet);
742     // If a size is set, return -- we will use it.
743     if (size != notSet) {
744       return;
745     }
746     // But short circuit buffer size is normally not set.  Put in place the hbase wanted size.
747     int hbaseSize = conf.getInt("hbase." + dfsKey, defaultSize);
748     conf.setIfUnset(dfsKey, Integer.toString(hbaseSize));
749   }
750 
751   // Holder singleton idiom. JVM spec ensures this will be run at most once per Classloader, and
752   // not until we attempt to reference it.
753   private static class StreamCapabilities {
754     public static final boolean PRESENT;
755     public static final Class<?> CLASS;
756     public static final Method METHOD;
757     static {
758       boolean tmp = false;
759       Class<?> clazz = null;
760       Method method = null;
761       try {
762         clazz = Class.forName("org.apache.hadoop.fs.StreamCapabilities");
763         method = clazz.getMethod("hasCapability", String.class);
764         tmp = true;
765       } catch(ClassNotFoundException|NoSuchMethodException|SecurityException exception) {
766         LOG.warn("Your Hadoop installation does not include the StreamCapabilities class from " +
767                  "HDFS-11644, so we will skip checking if any FSDataOutputStreams actually " +
768                  "support hflush/hsync. If you are running on top of HDFS this probably just " +
769                  "means you have an older version and this can be ignored. If you are running on " +
770                  "top of an alternate FileSystem implementation you should manually verify that " +
771                  "hflush and hsync are implemented; otherwise you risk data loss and hard to " +
772                  "diagnose errors when our assumptions are violated.");
773         LOG.debug("The first request to check for StreamCapabilities came from this stacktrace.",
774             exception);
775       } finally {
776         PRESENT = tmp;
777         CLASS = clazz;
778         METHOD = method;
779       }
780     }
781   }
782 
783   /**
784    * If our FileSystem version includes the StreamCapabilities class, check if
785    * the given stream has a particular capability.
786    * @param stream capabilities are per-stream instance, so check this one specifically. must not be
787    *        null
788    * @param capability what to look for, per Hadoop Common's FileSystem docs
789    * @return true if there are no StreamCapabilities. false if there are, but this stream doesn't
790    *         implement it. return result of asking the stream otherwise.
791    */
792   public static boolean hasCapability(FSDataOutputStream stream, String capability) {
793     // be consistent whether or not StreamCapabilities is present
794     if (stream == null) {
795       throw new NullPointerException("stream parameter must not be null.");
796     }
797     // If o.a.h.fs.StreamCapabilities doesn't exist, assume everyone does everything
798     // otherwise old versions of Hadoop will break.
799     boolean result = true;
800     if (StreamCapabilities.PRESENT) {
801       // if StreamCapabilities is present, but the stream doesn't implement it
802       // or we run into a problem invoking the method,
803       // we treat that as equivalent to not declaring anything
804       result = false;
805       if (StreamCapabilities.CLASS.isAssignableFrom(stream.getClass())) {
806         try {
807           result = ((Boolean)StreamCapabilities.METHOD.invoke(stream, capability)).booleanValue();
808         } catch (IllegalAccessException|IllegalArgumentException|InvocationTargetException
809             exception) {
810           LOG.warn("Your Hadoop installation's StreamCapabilities implementation doesn't match " +
811               "our understanding of how it's supposed to work. Please file a JIRA and include " +
812               "the following stack trace. In the mean time we're interpreting this behavior " +
813               "difference as a lack of capability support, which will probably cause a failure.",
814               exception);
815         }
816       }
817     }
818     return result;
819   }
820 
821   /**
822    * Helper exception for those cases where the place where we need to check a stream capability
823    * is not where we have the needed context to explain the impact and mitigation for a lack.
824    */
825   public static class StreamLacksCapabilityException extends IOException {
826     private static final long serialVersionUID = 1L;
827     public StreamLacksCapabilityException(String message, Throwable cause) {
828       super(message, cause);
829     }
830     public StreamLacksCapabilityException(String message) {
831       super(message);
832     }
833   }
834 }