View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.io;
20  
21  import java.util.ArrayList;
22  import java.util.Arrays;
23  import java.util.Collection;
24  import java.io.IOException;
25  import java.io.InputStream;
26  import java.io.FileNotFoundException;
27  import java.util.List;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.hbase.classification.InterfaceAudience;
32  import org.apache.hadoop.fs.CanUnbuffer;
33  import org.apache.hadoop.fs.FSDataInputStream;
34  import org.apache.hadoop.fs.FileSystem;
35  import org.apache.hadoop.fs.FileStatus;
36  import org.apache.hadoop.fs.Path;
37  import org.apache.hadoop.fs.PositionedReadable;
38  import org.apache.hadoop.fs.Seekable;
39  import org.apache.hadoop.hbase.util.FSUtils;
40  import org.apache.hadoop.ipc.RemoteException;
41  
42  /**
43   * The FileLink is a sort of hardlink, that allows access to a file given a set of locations.
44   *
45   * <p><b>The Problem:</b>
46   * <ul>
47   *  <li>
48   *    HDFS doesn't have support for hardlinks, and this make impossible to referencing
49   *    the same data blocks using different names.
50   *  </li>
51   *  <li>
52   *    HBase store files in one location (e.g. table/region/family/) and when the file is not
53   *    needed anymore (e.g. compaction, region deletion, ...) moves it to an archive directory.
54   *  </li>
55   * </ul>
56   * If we want to create a reference to a file, we need to remember that it can be in its
57   * original location or in the archive folder.
58   * The FileLink class tries to abstract this concept and given a set of locations
59   * it is able to switch between them making this operation transparent for the user.
60   * {@link HFileLink} is a more concrete implementation of the {@code FileLink}.
61   *
62   * <p><b>Back-references:</b>
63   * To help the {@link org.apache.hadoop.hbase.master.cleaner.CleanerChore} to keep track of
64   * the links to a particular file, during the {@code FileLink} creation, a new file is placed
65   * inside a back-reference directory. There's one back-reference directory for each file that
66   * has links, and in the directory there's one file per link.
67   *
68   * <p>HFileLink Example
69   * <ul>
70   *  <li>
71   *      /hbase/table/region-x/cf/file-k
72   *      (Original File)
73   *  </li>
74   *  <li>
75   *      /hbase/table-cloned/region-y/cf/file-k.region-x.table
76   *     (HFileLink to the original file)
77   *  </li>
78   *  <li>
79   *      /hbase/table-2nd-cloned/region-z/cf/file-k.region-x.table
80   *      (HFileLink to the original file)
81   *  </li>
82   *  <li>
83   *      /hbase/.archive/table/region-x/.links-file-k/region-y.table-cloned
84   *      (Back-reference to the link in table-cloned)
85   *  </li>
86   *  <li>
87   *      /hbase/.archive/table/region-x/.links-file-k/region-z.table-2nd-cloned
88   *      (Back-reference to the link in table-2nd-cloned)
89   *  </li>
90   * </ul>
91   */
92  @InterfaceAudience.Private
93  public class FileLink {
94    private static final Log LOG = LogFactory.getLog(FileLink.class);
95  
96    /** Define the Back-reference directory name prefix: .links-&lt;hfile&gt;/ */
97    public static final String BACK_REFERENCES_DIRECTORY_PREFIX = ".links-";
98  
99    /**
100    * FileLink InputStream that handles the switch between the original path
101    * and the alternative locations, when the file is moved.
102    */
103   private static class FileLinkInputStream extends InputStream
104       implements Seekable, PositionedReadable, CanUnbuffer {
105     private FSDataInputStream in = null;
106     private Path currentPath = null;
107     private long pos = 0;
108 
109     private final FileLink fileLink;
110     private final int bufferSize;
111     private final FileSystem fs;
112 
113     public FileLinkInputStream(final FileSystem fs, final FileLink fileLink)
114         throws IOException {
115       this(fs, fileLink, FSUtils.getDefaultBufferSize(fs));
116     }
117 
118     public FileLinkInputStream(final FileSystem fs, final FileLink fileLink, int bufferSize)
119         throws IOException {
120       this.bufferSize = bufferSize;
121       this.fileLink = fileLink;
122       this.fs = fs;
123 
124       this.in = tryOpen();
125     }
126 
127     @Override
128     public int read() throws IOException {
129       int res;
130       try {
131         res = in.read();
132       } catch (FileNotFoundException e) {
133         res = tryOpen().read();
134       } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
135         res = tryOpen().read();
136       } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
137         res = tryOpen().read();
138       }
139       if (res > 0) pos += 1;
140       return res;
141     }
142 
143     @Override
144     public int read(byte[] b) throws IOException {
145        return read(b, 0, b.length);
146     }
147 
148     @Override
149     public int read(byte[] b, int off, int len) throws IOException {
150       int n;
151       try {
152         n = in.read(b, off, len);
153       } catch (FileNotFoundException e) {
154         n = tryOpen().read(b, off, len);
155       } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
156         n = tryOpen().read(b, off, len);
157       } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
158         n = tryOpen().read(b, off, len);
159       }
160       if (n > 0) pos += n;
161       assert(in.getPos() == pos);
162       return n;
163     }
164 
165     @Override
166     public int read(long position, byte[] buffer, int offset, int length) throws IOException {
167       int n;
168       try {
169         n = in.read(position, buffer, offset, length);
170       } catch (FileNotFoundException e) {
171         n = tryOpen().read(position, buffer, offset, length);
172       } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
173         n = tryOpen().read(position, buffer, offset, length);
174       } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
175         n = tryOpen().read(position, buffer, offset, length);
176       }
177       return n;
178     }
179 
180     @Override
181     public void readFully(long position, byte[] buffer) throws IOException {
182       readFully(position, buffer, 0, buffer.length);
183     }
184 
185     @Override
186     public void readFully(long position, byte[] buffer, int offset, int length) throws IOException {
187       try {
188         in.readFully(position, buffer, offset, length);
189       } catch (FileNotFoundException e) {
190         tryOpen().readFully(position, buffer, offset, length);
191       } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
192         tryOpen().readFully(position, buffer, offset, length);
193       } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
194         tryOpen().readFully(position, buffer, offset, length);
195       }
196     }
197 
198     @Override
199     public long skip(long n) throws IOException {
200       long skipped;
201 
202       try {
203         skipped = in.skip(n);
204       } catch (FileNotFoundException e) {
205         skipped = tryOpen().skip(n);
206       } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
207         skipped = tryOpen().skip(n);
208       } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
209         skipped = tryOpen().skip(n);
210       }
211 
212       if (skipped > 0) pos += skipped;
213       return skipped;
214     }
215 
216     @Override
217     public int available() throws IOException {
218       try {
219         return in.available();
220       } catch (FileNotFoundException e) {
221         return tryOpen().available();
222       } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
223         return tryOpen().available();
224       } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
225         return tryOpen().available();
226       }
227     }
228 
229     @Override
230     public void seek(long pos) throws IOException {
231       try {
232         in.seek(pos);
233       } catch (FileNotFoundException e) {
234         tryOpen().seek(pos);
235       } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
236         tryOpen().seek(pos);
237       } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
238         tryOpen().seek(pos);
239       }
240       this.pos = pos;
241     }
242 
243     @Override
244     public long getPos() throws IOException {
245       return pos;
246     }
247 
248     @Override
249     public boolean seekToNewSource(long targetPos) throws IOException {
250       boolean res;
251       try {
252         res = in.seekToNewSource(targetPos);
253       } catch (FileNotFoundException e) {
254         res = tryOpen().seekToNewSource(targetPos);
255       } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
256         res = tryOpen().seekToNewSource(targetPos);
257       } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
258         res = tryOpen().seekToNewSource(targetPos);
259       }
260       if (res) pos = targetPos;
261       return res;
262     }
263 
264     @Override
265     public void close() throws IOException {
266       in.close();
267     }
268 
269     @Override
270     public synchronized void mark(int readlimit) {
271     }
272 
273     @Override
274     public synchronized void reset() throws IOException {
275       throw new IOException("mark/reset not supported");
276     }
277 
278     @Override
279     public boolean markSupported() {
280       return false;
281     }
282 
283     @Override
284     public void unbuffer() {
285       if (in == null) {
286         return;
287       }
288       in.unbuffer();
289     }
290 
291     /**
292      * Try to open the file from one of the available locations.
293      *
294      * @return FSDataInputStream stream of the opened file link
295      * @throws IOException on unexpected error, or file not found.
296      */
297     private FSDataInputStream tryOpen() throws IOException {
298       for (Path path: fileLink.getLocations()) {
299         if (path.equals(currentPath)) continue;
300         try {
301           in = fs.open(path, bufferSize);
302           if (pos != 0) in.seek(pos);
303           assert(in.getPos() == pos) : "Link unable to seek to the right position=" + pos;
304           if (LOG.isTraceEnabled()) {
305             if (currentPath == null) {
306               LOG.debug("link open path=" + path);
307             } else {
308               LOG.trace("link switch from path=" + currentPath + " to path=" + path);
309             }
310           }
311           currentPath = path;
312           return(in);
313         } catch (FileNotFoundException e) {
314           // Try another file location
315         } catch (RemoteException re) {
316           IOException ioe = re.unwrapRemoteException(FileNotFoundException.class);
317           if (!(ioe instanceof FileNotFoundException)) throw re;
318         }
319       }
320       throw new FileNotFoundException(this.fileLink.toString());
321     }
322   }
323 
324   private Path[] locations = null;
325 
326   protected FileLink() {
327     this.locations = null;
328   }
329 
330   /**
331    * @param originPath Original location of the file to link
332    * @param alternativePaths Alternative locations to look for the linked file
333    */
334   public FileLink(Path originPath, Path... alternativePaths) {
335     setLocations(originPath, alternativePaths);
336   }
337 
338   /**
339    * @param locations locations to look for the linked file
340    */
341   public FileLink(final Collection<Path> locations) {
342     this.locations = locations.toArray(new Path[locations.size()]);
343   }
344 
345   /**
346    * @return the locations to look for the linked file.
347    */
348   public Path[] getLocations() {
349     return locations;
350   }
351 
352   @Override
353   public String toString() {
354     StringBuilder str = new StringBuilder(getClass().getSimpleName());
355     str.append(" locations=[");
356     for (int i = 0; i < locations.length; ++i) {
357       if (i > 0) str.append(", ");
358       str.append(locations[i].toString());
359     }
360     str.append("]");
361     return str.toString();
362   }
363 
364   /**
365    * @return true if the file pointed by the link exists
366    */
367   public boolean exists(final FileSystem fs) throws IOException {
368     for (int i = 0; i < locations.length; ++i) {
369       if (fs.exists(locations[i])) {
370         return true;
371       }
372     }
373     return false;
374   }
375 
376   /**
377    * @return the path of the first available link.
378    */
379   public Path getAvailablePath(FileSystem fs) throws IOException {
380     for (int i = 0; i < locations.length; ++i) {
381       if (fs.exists(locations[i])) {
382         return locations[i];
383       }
384     }
385     throw new FileNotFoundException(toString());
386   }
387 
388   /**
389    * Get the FileStatus of the referenced file.
390    *
391    * @param fs {@link FileSystem} on which to get the file status
392    * @return InputStream for the hfile link.
393    * @throws IOException on unexpected error.
394    */
395   public FileStatus getFileStatus(FileSystem fs) throws IOException {
396     for (int i = 0; i < locations.length; ++i) {
397       try {
398         return fs.getFileStatus(locations[i]);
399       } catch (FileNotFoundException e) {
400         // Try another file location
401       }
402     }
403     throw new FileNotFoundException(toString());
404   }
405 
406   /**
407    * Open the FileLink for read.
408    * <p>
409    * It uses a wrapper of FSDataInputStream that is agnostic to the location
410    * of the file, even if the file switches between locations.
411    *
412    * @param fs {@link FileSystem} on which to open the FileLink
413    * @return InputStream for reading the file link.
414    * @throws IOException on unexpected error.
415    */
416   public FSDataInputStream open(final FileSystem fs) throws IOException {
417     return new FSDataInputStream(new FileLinkInputStream(fs, this));
418   }
419 
420   /**
421    * Open the FileLink for read.
422    * <p>
423    * It uses a wrapper of FSDataInputStream that is agnostic to the location
424    * of the file, even if the file switches between locations.
425    *
426    * @param fs {@link FileSystem} on which to open the FileLink
427    * @param bufferSize the size of the buffer to be used.
428    * @return InputStream for reading the file link.
429    * @throws IOException on unexpected error.
430    */
431   public FSDataInputStream open(final FileSystem fs, int bufferSize) throws IOException {
432     return new FSDataInputStream(new FileLinkInputStream(fs, this, bufferSize));
433   }
434 
435   /**
436    * NOTE: This method must be used only in the constructor!
437    * It creates a List with the specified locations for the link.
438    */
439   protected void setLocations(Path originPath, Path... alternativePaths) {
440     assert this.locations == null : "Link locations already set";
441 
442     List<Path> paths = new ArrayList<Path>(alternativePaths.length +1);
443     if (originPath != null) {
444       paths.add(originPath);
445     }
446 
447     for (int i = 0; i < alternativePaths.length; i++) {
448       if (alternativePaths[i] != null) {
449         paths.add(alternativePaths[i]);
450       }
451     }
452     this.locations = paths.toArray(new Path[0]);
453   }
454 
455   /**
456    * Get the directory to store the link back references
457    *
458    * <p>To simplify the reference count process, during the FileLink creation
459    * a back-reference is added to the back-reference directory of the specified file.
460    *
461    * @param storeDir Root directory for the link reference folder
462    * @param fileName File Name with links
463    * @return Path for the link back references.
464    */
465   public static Path getBackReferencesDir(final Path storeDir, final String fileName) {
466     return new Path(storeDir, BACK_REFERENCES_DIRECTORY_PREFIX + fileName);
467   }
468 
469   /**
470    * Get the referenced file name from the reference link directory path.
471    *
472    * @param dirPath Link references directory path
473    * @return Name of the file referenced
474    */
475   public static String getBackReferenceFileName(final Path dirPath) {
476     return dirPath.getName().substring(BACK_REFERENCES_DIRECTORY_PREFIX.length());
477   }
478 
479   /**
480    * Checks if the specified directory path is a back reference links folder.
481    *
482    * @param dirPath Directory path to verify
483    * @return True if the specified directory is a link references folder
484    */
485   public static boolean isBackReferencesDir(final Path dirPath) {
486     if (dirPath == null) return false;
487     return dirPath.getName().startsWith(BACK_REFERENCES_DIRECTORY_PREFIX);
488   }
489 
490   @Override
491   public boolean equals(Object obj) {
492     if (obj == null) {
493       return false;
494     }
495     // Assumes that the ordering of locations between objects are the same. This is true for the
496     // current subclasses already (HFileLink, WALLink). Otherwise, we may have to sort the locations
497     // or keep them presorted
498     if (this.getClass().equals(obj.getClass())) {
499       return Arrays.equals(this.locations, ((FileLink) obj).locations);
500     }
501 
502     return false;
503   }
504 
505   @Override
506   public int hashCode() {
507     return Arrays.hashCode(locations);
508   }
509 }
510