View Javadoc

1   /*
2    * Copyright The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.fs;
22  
23  import java.io.Closeable;
24  import java.io.IOException;
25  import java.lang.reflect.Field;
26  import java.lang.reflect.InvocationHandler;
27  import java.lang.reflect.InvocationTargetException;
28  import java.lang.reflect.Method;
29  import java.lang.reflect.Modifier;
30  import java.lang.reflect.Proxy;
31  import java.lang.reflect.UndeclaredThrowableException;
32  import java.net.URI;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FSDataOutputStream;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.FilterFileSystem;
40  import org.apache.hadoop.fs.LocalFileSystem;
41  import org.apache.hadoop.fs.Path;
42  import org.apache.hadoop.hbase.ServerName;
43  import org.apache.hadoop.hbase.util.FSUtils;
44  import org.apache.hadoop.hbase.util.ReflectionUtils;
45  import org.apache.hadoop.hbase.wal.DefaultWALProvider;
46  import org.apache.hadoop.hdfs.DFSClient;
47  import org.apache.hadoop.hdfs.DistributedFileSystem;
48  import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
49  import org.apache.hadoop.hdfs.protocol.ClientProtocol;
50  import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
51  import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
52  import org.apache.hadoop.hdfs.protocol.LocatedBlock;
53  import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
54  import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
55  import org.apache.hadoop.ipc.RPC;
56  import org.apache.hadoop.util.Progressable;
57  
58  /**
59   * An encapsulation for the FileSystem object that hbase uses to access
60   * data. This class allows the flexibility of using
61   * separate filesystem objects for reading and writing hfiles and wals.
62   */
63  public class HFileSystem extends FilterFileSystem {
64    public static final Log LOG = LogFactory.getLog(HFileSystem.class);
65  
66    private final FileSystem noChecksumFs;   // read hfile data from storage
67    private final boolean useHBaseChecksum;
68    private static volatile byte unspecifiedStoragePolicyId = Byte.MIN_VALUE;
69  
70    /**
71     * Create a FileSystem object for HBase regionservers.
72     * @param conf The configuration to be used for the filesystem
73     * @param useHBaseChecksum if true, then use
74     *        checksum verfication in hbase, otherwise
75     *        delegate checksum verification to the FileSystem.
76     */
77    public HFileSystem(Configuration conf, boolean useHBaseChecksum)
78      throws IOException {
79  
80      // Create the default filesystem with checksum verification switched on.
81      // By default, any operation to this FilterFileSystem occurs on
82      // the underlying filesystem that has checksums switched on.
83      this.fs = FileSystem.get(conf);
84      this.useHBaseChecksum = useHBaseChecksum;
85  
86      fs.initialize(getDefaultUri(conf), conf);
87      
88      // disable checksum verification for local fileSystem, see HBASE-11218
89      if (fs instanceof LocalFileSystem) {
90        fs.setWriteChecksum(false);
91        fs.setVerifyChecksum(false);
92      }
93  
94      addLocationsOrderInterceptor(conf);
95  
96      // If hbase checksum verification is switched on, then create a new
97      // filesystem object that has cksum verification turned off.
98      // We will avoid verifying checksums in the fs client, instead do it
99      // inside of hbase.
100     // If this is the local file system hadoop has a bug where seeks
101     // do not go to the correct location if setVerifyChecksum(false) is called.
102     // This manifests itself in that incorrect data is read and HFileBlocks won't be able to read
103     // their header magic numbers. See HBASE-5885
104     if (useHBaseChecksum && !(fs instanceof LocalFileSystem)) {
105       conf = new Configuration(conf);
106       conf.setBoolean("dfs.client.read.shortcircuit.skip.checksum", true);
107       this.noChecksumFs = maybeWrapFileSystem(newInstanceFileSystem(conf), conf);
108       this.noChecksumFs.setVerifyChecksum(false);
109     } else {
110       this.noChecksumFs = maybeWrapFileSystem(fs, conf);
111     }
112 
113     this.fs = maybeWrapFileSystem(this.fs, conf);
114   }
115 
116   /**
117    * Wrap a FileSystem object within a HFileSystem. The noChecksumFs and
118    * writefs are both set to be the same specified fs. 
119    * Do not verify hbase-checksums while reading data from filesystem.
120    * @param fs Set the noChecksumFs and writeFs to this specified filesystem.
121    */
122   public HFileSystem(FileSystem fs) {
123     this.fs = fs;
124     this.noChecksumFs = fs;
125     this.useHBaseChecksum = false;
126   }
127 
128   /**
129    * Returns the filesystem that is specially setup for 
130    * doing reads from storage. This object avoids doing 
131    * checksum verifications for reads.
132    * @return The FileSystem object that can be used to read data
133    *         from files.
134    */
135   public FileSystem getNoChecksumFs() {
136     return noChecksumFs;
137   }
138 
139   /**
140    * Returns the underlying filesystem
141    * @return The underlying FileSystem for this FilterFileSystem object.
142    */
143   public FileSystem getBackingFs() throws IOException {
144     return fs;
145   }
146 
147   /**
148    * Are we verifying checksums in HBase?
149    * @return True, if hbase is configured to verify checksums,
150    *         otherwise false.
151    */
152   public boolean useHBaseChecksum() {
153     return useHBaseChecksum;
154   }
155 
156   /**
157    * Close this filesystem object
158    */
159   @Override
160   public void close() throws IOException {
161     super.close();
162     if (this.noChecksumFs != fs) {
163       this.noChecksumFs.close();
164     }
165   }
166 
167  /**
168    * Returns a brand new instance of the FileSystem. It does not use
169    * the FileSystem.Cache. In newer versions of HDFS, we can directly
170    * invoke FileSystem.newInstance(Configuration).
171    * 
172    * @param conf Configuration
173    * @return A new instance of the filesystem
174    */
175   private static FileSystem newInstanceFileSystem(Configuration conf)
176     throws IOException {
177     URI uri = FileSystem.getDefaultUri(conf);
178     FileSystem fs = null;
179     Class<?> clazz = conf.getClass("fs." + uri.getScheme() + ".impl", null);
180     if (clazz != null) {
181       // This will be true for Hadoop 1.0, or 0.20.
182       fs = (FileSystem) org.apache.hadoop.util.ReflectionUtils.newInstance(clazz, conf);
183       fs.initialize(uri, conf);
184     } else {
185       // For Hadoop 2.0, we have to go through FileSystem for the filesystem
186       // implementation to be loaded by the service loader in case it has not
187       // been loaded yet.
188       Configuration clone = new Configuration(conf);
189       clone.setBoolean("fs." + uri.getScheme() + ".impl.disable.cache", true);
190       fs = FileSystem.get(uri, clone);
191     }
192     if (fs == null) {
193       throw new IOException("No FileSystem for scheme: " + uri.getScheme());
194     }
195 
196     return fs;
197   }
198 
199   /**
200    * Returns an instance of Filesystem wrapped into the class specified in
201    * hbase.fs.wrapper property, if one is set in the configuration, returns
202    * unmodified FS instance passed in as an argument otherwise.
203    * @param base Filesystem instance to wrap
204    * @param conf Configuration
205    * @return wrapped instance of FS, or the same instance if no wrapping configured.
206    */
207   private FileSystem maybeWrapFileSystem(FileSystem base, Configuration conf) {
208     try {
209       Class<?> clazz = conf.getClass("hbase.fs.wrapper", null);
210       if (clazz != null) {
211         return (FileSystem) clazz.getConstructor(FileSystem.class, Configuration.class)
212           .newInstance(base, conf);
213       }
214     } catch (Exception e) {
215       LOG.error("Failed to wrap filesystem: " + e);
216     }
217     return base;
218   }
219 
220   public static boolean addLocationsOrderInterceptor(Configuration conf) throws IOException {
221     return addLocationsOrderInterceptor(conf, new ReorderWALBlocks());
222   }
223 
224   /**
225    * Add an interceptor on the calls to the namenode#getBlockLocations from the DFSClient
226    * linked to this FileSystem. See HBASE-6435 for the background.
227    * <p/>
228    * There should be no reason, except testing, to create a specific ReorderBlocks.
229    *
230    * @return true if the interceptor was added, false otherwise.
231    */
232   static boolean addLocationsOrderInterceptor(Configuration conf, final ReorderBlocks lrb) {
233     if (!conf.getBoolean("hbase.filesystem.reorder.blocks", true)) {  // activated by default
234       LOG.debug("addLocationsOrderInterceptor configured to false");
235       return false;
236     }
237 
238     FileSystem fs;
239     try {
240       fs = FileSystem.get(conf);
241     } catch (IOException e) {
242       LOG.warn("Can't get the file system from the conf.", e);
243       return false;
244     }
245 
246     if (!(fs instanceof DistributedFileSystem)) {
247       LOG.debug("The file system is not a DistributedFileSystem. " +
248           "Skipping on block location reordering");
249       return false;
250     }
251 
252     DistributedFileSystem dfs = (DistributedFileSystem) fs;
253     DFSClient dfsc = dfs.getClient();
254     if (dfsc == null) {
255       LOG.warn("The DistributedFileSystem does not contain a DFSClient. Can't add the location " +
256           "block reordering interceptor. Continuing, but this is unexpected."
257       );
258       return false;
259     }
260 
261     try {
262       Field nf = DFSClient.class.getDeclaredField("namenode");
263       nf.setAccessible(true);
264       Field modifiersField = Field.class.getDeclaredField("modifiers");
265       modifiersField.setAccessible(true);
266       modifiersField.setInt(nf, nf.getModifiers() & ~Modifier.FINAL);
267 
268       ClientProtocol namenode = (ClientProtocol) nf.get(dfsc);
269       if (namenode == null) {
270         LOG.warn("The DFSClient is not linked to a namenode. Can't add the location block" +
271             " reordering interceptor. Continuing, but this is unexpected."
272         );
273         return false;
274       }
275 
276       ClientProtocol cp1 = createReorderingProxy(namenode, lrb, conf);
277       nf.set(dfsc, cp1);
278       LOG.info("Added intercepting call to namenode#getBlockLocations so can do block reordering" +
279         " using class " + lrb.getClass().getName());
280     } catch (NoSuchFieldException e) {
281       LOG.warn("Can't modify the DFSClient#namenode field to add the location reorder.", e);
282       return false;
283     } catch (IllegalAccessException e) {
284       LOG.warn("Can't modify the DFSClient#namenode field to add the location reorder.", e);
285       return false;
286     }
287 
288     return true;
289   }
290 
291   private static ClientProtocol createReorderingProxy(final ClientProtocol cp,
292       final ReorderBlocks lrb, final Configuration conf) {
293     return (ClientProtocol) Proxy.newProxyInstance
294         (cp.getClass().getClassLoader(),
295             new Class[]{ClientProtocol.class, Closeable.class},
296             new InvocationHandler() {
297               @Override
298               public Object invoke(Object proxy, Method method,
299                                    Object[] args) throws Throwable {
300                 try {
301                   if ((args == null || args.length == 0)
302                       && "close".equals(method.getName())) {
303                     RPC.stopProxy(cp);
304                     return null;
305                   } else {
306                     Object res = method.invoke(cp, args);
307                     if (res != null && args != null && args.length == 3
308                         && "getBlockLocations".equals(method.getName())
309                         && res instanceof LocatedBlocks
310                         && args[0] instanceof String
311                         && args[0] != null) {
312                       lrb.reorderBlocks(conf, (LocatedBlocks) res, (String) args[0]);
313                     }
314                     return res;
315                   }
316                 } catch  (InvocationTargetException ite) {
317                   // We will have this for all the exception, checked on not, sent
318                   //  by any layer, including the functional exception
319                   Throwable cause = ite.getCause();
320                   if (cause == null){
321                     throw new RuntimeException(
322                       "Proxy invocation failed and getCause is null", ite);
323                   }
324                   if (cause instanceof UndeclaredThrowableException) {
325                     Throwable causeCause = cause.getCause();
326                     if (causeCause == null) {
327                       throw new RuntimeException("UndeclaredThrowableException had null cause!");
328                     }
329                     cause = cause.getCause();
330                   }
331                   throw cause;
332                 }
333               }
334             });
335   }
336 
337   /**
338    * Interface to implement to add a specific reordering logic in hdfs.
339    */
340   interface ReorderBlocks {
341     /**
342      *
343      * @param conf - the conf to use
344      * @param lbs - the LocatedBlocks to reorder
345      * @param src - the file name currently read
346      * @throws IOException - if something went wrong
347      */
348     void reorderBlocks(Configuration conf, LocatedBlocks lbs, String src) throws IOException;
349   }
350 
351   /**
352    * We're putting at lowest priority the wal files blocks that are on the same datanode
353    * as the original regionserver which created these files. This because we fear that the
354    * datanode is actually dead, so if we use it it will timeout.
355    */
356   static class ReorderWALBlocks implements ReorderBlocks {
357     @Override
358     public void reorderBlocks(Configuration conf, LocatedBlocks lbs, String src)
359         throws IOException {
360 
361       ServerName sn = DefaultWALProvider.getServerNameFromWALDirectoryName(conf, src);
362       if (sn == null) {
363         // It's not an WAL
364         return;
365       }
366 
367       // Ok, so it's an WAL
368       String hostName = sn.getHostname();
369       if (LOG.isTraceEnabled()) {
370         LOG.trace(src +
371             " is an WAL file, so reordering blocks, last hostname will be:" + hostName);
372       }
373 
374       // Just check for all blocks
375       for (LocatedBlock lb : lbs.getLocatedBlocks()) {
376         DatanodeInfo[] dnis = lb.getLocations();
377         if (dnis != null && dnis.length > 1) {
378           boolean found = false;
379           for (int i = 0; i < dnis.length - 1 && !found; i++) {
380             if (hostName.equals(dnis[i].getHostName())) {
381               // advance the other locations by one and put this one at the last place.
382               DatanodeInfo toLast = dnis[i];
383               System.arraycopy(dnis, i + 1, dnis, i, dnis.length - i - 1);
384               dnis[dnis.length - 1] = toLast;
385               found = true;
386             }
387           }
388         }
389       }
390     }
391   }
392 
393   /**
394    * Create a new HFileSystem object, similar to FileSystem.get().
395    * This returns a filesystem object that avoids checksum
396    * verification in the filesystem for hfileblock-reads.
397    * For these blocks, checksum verification is done by HBase.
398    */
399   static public FileSystem get(Configuration conf) throws IOException {
400     return new HFileSystem(conf, true);
401   }
402 
403   /**
404    * Wrap a LocalFileSystem within a HFileSystem.
405    */
406   static public FileSystem getLocalFs(Configuration conf) throws IOException {
407     return new HFileSystem(FileSystem.getLocal(conf));
408   }
409 
410   /**
411    * The org.apache.hadoop.fs.FilterFileSystem does not yet support 
412    * createNonRecursive. This is a hadoop bug and when it is fixed in Hadoop,
413    * this definition will go away.
414    */
415   @SuppressWarnings("deprecation")
416   public FSDataOutputStream createNonRecursive(Path f,
417       boolean overwrite,
418       int bufferSize, short replication, long blockSize,
419       Progressable progress) throws IOException {
420     return fs.createNonRecursive(f, overwrite, bufferSize, replication,
421                                  blockSize, progress);
422   }
423 
424   /**
425    * Set the source path (directory/file) to the specified storage policy.
426    * @param path The source path (directory/file).
427    * @param policyName The name of the storage policy: 'HOT', 'COLD', etc.
428    * See see hadoop 2.6+ org.apache.hadoop.hdfs.protocol.HdfsConstants for possible list e.g
429    * 'COLD', 'WARM', 'HOT', 'ONE_SSD', 'ALL_SSD', 'LAZY_PERSIST'.
430    */
431   public void setStoragePolicy(Path path, String policyName) {
432     FSUtils.setStoragePolicy(this.fs, path, policyName);
433   }
434 
435   /**
436    * Get the storage policy of the source path (directory/file).
437    * @param path The source path (directory/file).
438    * @return Storage policy name, or {@code null} if not using {@link DistributedFileSystem} or
439    *         exception thrown when trying to get policy
440    */
441   public String getStoragePolicyName(Path path) {
442     try {
443       Object blockStoragePolicySpi =
444           ReflectionUtils.invokeMethod(this.fs, "getStoragePolicy", path);
445       return (String) ReflectionUtils.invokeMethod(blockStoragePolicySpi, "getName");
446     } catch (Exception e) {
447       // Maybe fail because of using old HDFS version, try the old way
448       if (LOG.isTraceEnabled()) {
449         LOG.trace("Failed to get policy directly", e);
450       }
451       return getStoragePolicyForOldHDFSVersion(path);
452     }
453   }
454 
455   /**
456    * Before Hadoop 2.8.0, there's no getStoragePolicy method for FileSystem interface, and we need
457    * to keep compatible with it. See HADOOP-12161 for more details.
458    * @param path Path to get storage policy against
459    * @return the storage policy name
460    */
461   private String getStoragePolicyForOldHDFSVersion(Path path) {
462     try {
463       if (this.fs instanceof DistributedFileSystem) {
464         DistributedFileSystem dfs = (DistributedFileSystem) this.fs;
465         HdfsFileStatus status = dfs.getClient().getFileInfo(path.toUri().getPath());
466         if (null != status) {
467           if (unspecifiedStoragePolicyId < 0) {
468             // Get the unspecified id field through reflection to avoid compilation error.
469             // In later version BlockStoragePolicySuite#ID_UNSPECIFIED is moved to
470             // HdfsConstants#BLOCK_STORAGE_POLICY_ID_UNSPECIFIED
471             Field idUnspecified = BlockStoragePolicySuite.class.getField("ID_UNSPECIFIED");
472             unspecifiedStoragePolicyId = idUnspecified.getByte(BlockStoragePolicySuite.class);
473           }
474           byte storagePolicyId = status.getStoragePolicy();
475           if (storagePolicyId != unspecifiedStoragePolicyId) {
476             BlockStoragePolicy[] policies = dfs.getStoragePolicies();
477             for (BlockStoragePolicy policy : policies) {
478               if (policy.getId() == storagePolicyId) {
479                 return policy.getName();
480               }
481             }
482           }
483         }
484       }
485     } catch (Throwable e) {
486       LOG.warn("failed to get block storage policy of [" + path + "]", e);
487     }
488 
489     return null;
490   }
491 }