View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.util.regex.Matcher;
25  import java.util.regex.Pattern;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileStatus;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
35  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
36  import org.apache.hadoop.hbase.io.HFileLink;
37  import org.apache.hadoop.hbase.io.HalfStoreFileReader;
38  import org.apache.hadoop.hbase.io.Reference;
39  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
40  import org.apache.hadoop.hbase.util.FSUtils;
41  
42  /**
43   * Describe a StoreFile (hfile, reference, link)
44   */
45  @InterfaceAudience.Private
46  public class StoreFileInfo {
47    private static final Log LOG = LogFactory.getLog(StoreFileInfo.class);
48  
49    /**
50     * A non-capture group, for hfiles, so that this can be embedded.
51     * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
52     */
53    public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:_SeqId_[0-9]+_)?";
54  
55    /** Regex that will work for hfiles */
56    private static final Pattern HFILE_NAME_PATTERN =
57      Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
58  
59    /**
60     * Regex that will work for straight reference names (<hfile>.<parentEncRegion>)
61     * and hfilelink reference names (<table>=<region>-<hfile>.<parentEncRegion>)
62     * If reference, then the regex has more than just one group.
63     * Group 1, hfile/hfilelink pattern, is this file's id.
64     * Group 2 '(.+)' is the reference's parent region name.
65     */
66    private static final Pattern REF_NAME_PATTERN =
67      Pattern.compile(String.format("^(%s|%s)\\.(.+)$",
68        HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
69  
70    // Configuration
71    private Configuration conf;
72  
73    // FileSystem handle
74    private final FileSystem fs;
75  
76    // HDFS blocks distribution information
77    private HDFSBlocksDistribution hdfsBlocksDistribution = null;
78  
79    // If this storefile references another, this is the reference instance.
80    private final Reference reference;
81  
82    // If this storefile is a link to another, this is the link instance.
83    private final HFileLink link;
84  
85    private final Path initialPath;
86  
87    private RegionCoprocessorHost coprocessorHost;
88  
89    // timestamp on when the file was created, is 0 and ignored for reference or link files
90    private long createdTimestamp;
91  
92    /**
93     * Create a Store File Info
94     * @param conf the {@link Configuration} to use
95     * @param fs The current file system to use.
96     * @param initialPath The {@link Path} of the file
97     */
98    public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initialPath)
99        throws IOException {
100     assert fs != null;
101     assert initialPath != null;
102     assert conf != null;
103 
104     this.fs = fs;
105     this.conf = conf;
106     this.initialPath = initialPath;
107     Path p = initialPath;
108     if (HFileLink.isHFileLink(p)) {
109       // HFileLink
110       this.reference = null;
111       this.link = HFileLink.buildFromHFileLinkPattern(conf, p);
112       if (LOG.isTraceEnabled()) LOG.trace(p + " is a link");
113     } else if (isReference(p)) {
114       this.reference = Reference.read(fs, p);
115       Path referencePath = getReferredToFile(p);
116       if (HFileLink.isHFileLink(referencePath)) {
117         // HFileLink Reference
118         this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath);
119       } else {
120         // Reference
121         this.link = null;
122       }
123       if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() +
124               " reference to " + referencePath);
125     } else if (isHFile(p)) {
126       // HFile
127       this.createdTimestamp = fs.getFileStatus(initialPath).getModificationTime();
128       this.reference = null;
129       this.link = null;
130     } else {
131       throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
132     }
133   }
134 
135   /**
136    * Create a Store File Info
137    * @param conf the {@link Configuration} to use
138    * @param fs The current file system to use.
139    * @param fileStatus The {@link FileStatus} of the file
140    */
141   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
142       throws IOException {
143     this(conf, fs, fileStatus.getPath());
144   }
145 
146   /**
147    * Create a Store File Info from an HFileLink
148    * @param conf The {@link Configuration} to use
149    * @param fs The current file system to use
150    * @param fileStatus The {@link FileStatus} of the file
151    */
152   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
153       final HFileLink link) {
154     this.fs = fs;
155     this.conf = conf;
156     // initialPath can be null only if we get a link.
157     this.initialPath = (fileStatus == null) ? null : fileStatus.getPath();
158       // HFileLink
159     this.reference = null;
160     this.link = link;
161   }
162 
163   /**
164    * Create a Store File Info from an HFileLink
165    * @param conf The {@link Configuration} to use
166    * @param fs The current file system to use
167    * @param fileStatus The {@link FileStatus} of the file
168    * @param reference The reference instance
169    */
170   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
171       final Reference reference) {
172     this.fs = fs;
173     this.conf = conf;
174     this.initialPath = fileStatus.getPath();
175     this.createdTimestamp = fileStatus.getModificationTime();
176     this.reference = reference;
177     this.link = null;
178   }
179 
180   /**
181    * Create a Store File Info from an HFileLink and a Reference
182    * @param conf The {@link Configuration} to use
183    * @param fs The current file system to use
184    * @param fileStatus The {@link FileStatus} of the file
185    * @param reference The reference instance
186    * @param link The link instance
187    */
188   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
189       final Reference reference, final HFileLink link) {
190     this.fs = fs;
191     this.conf = conf;
192     this.initialPath = fileStatus.getPath();
193     this.createdTimestamp = fileStatus.getModificationTime();
194     this.reference = reference;
195     this.link = link;
196   }
197 
198   /**
199    * Sets the region coprocessor env.
200    * @param coprocessorHost
201    */
202   public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
203     this.coprocessorHost = coprocessorHost;
204   }
205 
206   /*
207    * @return the Reference object associated to this StoreFileInfo.
208    *         null if the StoreFile is not a reference.
209    */
210   public Reference getReference() {
211     return this.reference;
212   }
213 
214   /** @return True if the store file is a Reference */
215   public boolean isReference() {
216     return this.reference != null;
217   }
218 
219   /** @return True if the store file is a top Reference */
220   public boolean isTopReference() {
221     return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
222   }
223 
224   /** @return True if the store file is a link */
225   public boolean isLink() {
226     return this.link != null && this.reference == null;
227   }
228 
229   /** @return the HDFS block distribution */
230   public HDFSBlocksDistribution getHDFSBlockDistribution() {
231     return this.hdfsBlocksDistribution;
232   }
233 
234   /**
235    * Open a Reader for the StoreFile
236    * @param fs The current file system to use.
237    * @param cacheConf The cache configuration and block cache reference.
238    * @return The StoreFile.Reader for the file
239    */
240   public StoreFile.Reader open(final FileSystem fs,
241       final CacheConfig cacheConf, final boolean canUseDropBehind) throws IOException {
242     FSDataInputStreamWrapper in;
243     FileStatus status;
244 
245     final boolean doDropBehind = canUseDropBehind && cacheConf.shouldDropBehindCompaction();
246     if (this.link != null) {
247       // HFileLink
248       in = new FSDataInputStreamWrapper(fs, this.link, doDropBehind);
249       status = this.link.getFileStatus(fs);
250     } else if (this.reference != null) {
251       // HFile Reference
252       Path referencePath = getReferredToFile(this.getPath());
253       try {
254         in = new FSDataInputStreamWrapper(fs, referencePath, doDropBehind);
255       } catch (FileNotFoundException fnfe) {
256         // Intercept the exception so can insert more info about the Reference; otherwise
257         // exception just complains about some random file -- operator doesn't realize it
258         // other end of a Reference
259         FileNotFoundException newFnfe = new FileNotFoundException(toString());
260         newFnfe.initCause(fnfe);
261         throw newFnfe;
262       }
263       status = fs.getFileStatus(referencePath);
264     } else {
265       in = new FSDataInputStreamWrapper(fs, this.getPath(),
266           doDropBehind);
267       status = fs.getFileStatus(initialPath);
268     }
269     long length = status.getLen();
270     hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs);
271 
272     StoreFile.Reader reader = null;
273     if (this.coprocessorHost != null) {
274       reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), in, length,
275         cacheConf, reference);
276     }
277     if (reader == null) {
278       if (this.reference != null) {
279         reader = new HalfStoreFileReader(fs, this.getPath(), in, length, cacheConf, reference,
280           conf);
281       } else {
282         reader = new StoreFile.Reader(fs, status.getPath(), in, length, cacheConf, conf);
283       }
284     }
285     if (this.coprocessorHost != null) {
286       reader = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), in, length,
287         cacheConf, reference, reader);
288     }
289     return reader;
290   }
291 
292   /**
293    * Compute the HDFS Block Distribution for this StoreFile
294    */
295   public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
296       throws IOException {
297 
298     // guard against the case where we get the FileStatus from link, but by the time we
299     // call compute the file is moved again
300     if (this.link != null) {
301       FileNotFoundException exToThrow = null;
302       for (int i = 0; i < this.link.getLocations().length; i++) {
303         try {
304           return computeHDFSBlocksDistributionInternal(fs);
305         } catch (FileNotFoundException ex) {
306           // try the other location
307           exToThrow = ex;
308         }
309       }
310       throw exToThrow;
311     } else {
312       return computeHDFSBlocksDistributionInternal(fs);
313     }
314   }
315 
316   private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs)
317       throws IOException {
318     FileStatus status = getReferencedFileStatus(fs);
319     if (this.reference != null) {
320       return computeRefFileHDFSBlockDistribution(fs, reference, status);
321     } else {
322       return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
323     }
324   }
325 
326   /**
327    * Get the {@link FileStatus} of the file referenced by this StoreFileInfo
328    * @param fs The current file system to use.
329    * @return The {@link FileStatus} of the file referenced by this StoreFileInfo
330    */
331   public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException {
332     FileStatus status;
333     if (this.reference != null) {
334       if (this.link != null) {
335         FileNotFoundException exToThrow = null;
336         for (int i = 0; i < this.link.getLocations().length; i++) {
337           // HFileLink Reference
338           try {
339             return link.getFileStatus(fs);
340           } catch (FileNotFoundException ex) {
341             // try the other location
342             exToThrow = ex;
343           }
344         }
345         throw exToThrow;
346       } else {
347         // HFile Reference
348         Path referencePath = getReferredToFile(this.getPath());
349         status = fs.getFileStatus(referencePath);
350       }
351     } else {
352       if (this.link != null) {
353         FileNotFoundException exToThrow = null;
354         for (int i = 0; i < this.link.getLocations().length; i++) {
355           // HFileLink
356           try {
357             return link.getFileStatus(fs);
358           } catch (FileNotFoundException ex) {
359             // try the other location
360             exToThrow = ex;
361           }
362         }
363         throw exToThrow;
364       } else {
365         status = fs.getFileStatus(initialPath);
366       }
367     }
368     return status;
369   }
370 
371   /** @return The {@link Path} of the file */
372   public Path getPath() {
373     return initialPath;
374   }
375 
376   /** @return The {@link FileStatus} of the file */
377   public FileStatus getFileStatus() throws IOException {
378     return getReferencedFileStatus(fs);
379   }
380 
381   /** @return Get the modification time of the file. */
382   public long getModificationTime() throws IOException {
383     return getFileStatus().getModificationTime();
384   }
385 
386   @Override
387   public String toString() {
388     return this.getPath() +
389       (isReference() ? "-" + getReferredToFile(this.getPath()) + "-" + reference : "");
390   }
391 
392   /**
393    * @param path Path to check.
394    * @return True if the path has format of a HFile.
395    */
396   public static boolean isHFile(final Path path) {
397     return isHFile(path.getName());
398   }
399 
400   public static boolean isHFile(final String fileName) {
401     Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
402     return m.matches() && m.groupCount() > 0;
403   }
404 
405   /**
406    * @param path Path to check.
407    * @return True if the path has format of a HStoreFile reference.
408    */
409   public static boolean isReference(final Path path) {
410     return isReference(path.getName());
411   }
412 
413   /**
414    * @param name file name to check.
415    * @return True if the path has format of a HStoreFile reference.
416    */
417   public static boolean isReference(final String name) {
418     Matcher m = REF_NAME_PATTERN.matcher(name);
419     return m.matches() && m.groupCount() > 1;
420   }
421 
422   /**
423    * @return timestamp when this file was created (as returned by filesystem)
424    */
425   public long getCreatedTimestamp() {
426     return createdTimestamp;
427   }
428 
429   /*
430    * Return path to the file referred to by a Reference.  Presumes a directory
431    * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
432    * @param p Path to a Reference file.
433    * @return Calculated path to parent region file.
434    * @throws IllegalArgumentException when path regex fails to match.
435    */
436   public static Path getReferredToFile(final Path p) {
437     Matcher m = REF_NAME_PATTERN.matcher(p.getName());
438     if (m == null || !m.matches()) {
439       LOG.warn("Failed match of store file name " + p.toString());
440       throw new IllegalArgumentException("Failed match of store file name " +
441           p.toString());
442     }
443 
444     // Other region name is suffix on the passed Reference file name
445     String otherRegion = m.group(2);
446     // Tabledir is up two directories from where Reference was written.
447     Path tableDir = p.getParent().getParent().getParent();
448     String nameStrippedOfSuffix = m.group(1);
449     if (LOG.isDebugEnabled()) {
450       LOG.debug("reference '" + p + "' to region=" + otherRegion
451         + " hfile=" + nameStrippedOfSuffix);
452     }
453 
454     // Build up new path with the referenced region in place of our current
455     // region in the reference path.  Also strip regionname suffix from name.
456     return new Path(new Path(new Path(tableDir, otherRegion),
457       p.getParent().getName()), nameStrippedOfSuffix);
458   }
459 
460   /**
461    * Validate the store file name.
462    * @param fileName name of the file to validate
463    * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
464    */
465   public static boolean validateStoreFileName(final String fileName) {
466     if (HFileLink.isHFileLink(fileName) || isReference(fileName))
467       return(true);
468     return !fileName.contains("-");
469   }
470 
471   /**
472    * Return if the specified file is a valid store file or not.
473    * @param fileStatus The {@link FileStatus} of the file
474    * @return <tt>true</tt> if the file is valid
475    */
476   public static boolean isValid(final FileStatus fileStatus)
477       throws IOException {
478     final Path p = fileStatus.getPath();
479 
480     if (fileStatus.isDirectory())
481       return false;
482 
483     // Check for empty hfile. Should never be the case but can happen
484     // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
485     // NOTE: that the HFileLink is just a name, so it's an empty file.
486     if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
487       LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?");
488       return false;
489     }
490 
491     return validateStoreFileName(p.getName());
492   }
493 
494   /**
495    * helper function to compute HDFS blocks distribution of a given reference
496    * file.For reference file, we don't compute the exact value. We use some
497    * estimate instead given it might be good enough. we assume bottom part
498    * takes the first half of reference file, top part takes the second half
499    * of the reference file. This is just estimate, given
500    * midkey ofregion != midkey of HFile, also the number and size of keys vary.
501    * If this estimate isn't good enough, we can improve it later.
502    * @param fs  The FileSystem
503    * @param reference  The reference
504    * @param status  The reference FileStatus
505    * @return HDFS blocks distribution
506    */
507   private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(
508       final FileSystem fs, final Reference reference, final FileStatus status)
509       throws IOException {
510     if (status == null) {
511       return null;
512     }
513 
514     long start = 0;
515     long length = 0;
516 
517     if (Reference.isTopFileRegion(reference.getFileRegion())) {
518       start = status.getLen()/2;
519       length = status.getLen() - status.getLen()/2;
520     } else {
521       start = 0;
522       length = status.getLen()/2;
523     }
524     return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
525   }
526 
527   @Override
528   public int hashCode() {
529     final int prime = 31;
530     int result = 1;
531     result = prime * result + ((initialPath == null) ? 0 : initialPath.hashCode());
532     result = prime * result + ((link == null) ? 0 : link.hashCode());
533     result = prime * result + ((reference == null) ? 0 : reference.hashCode());
534     return result;
535   }
536 
537   @Override
538   public boolean equals(Object obj) {
539     if (this == obj) {
540       return true;
541     }
542     if (obj == null) {
543       return false;
544     }
545     if (getClass() != obj.getClass()) {
546       return false;
547     }
548     StoreFileInfo other = (StoreFileInfo) obj;
549     if (initialPath == null) {
550       if (other.initialPath != null) {
551         return false;
552       }
553     } else if (!initialPath.equals(other.initialPath)) {
554       return false;
555     }
556     if (link == null) {
557       if (other.link != null) {
558         return false;
559       }
560     } else if (!link.equals(other.link)) {
561       return false;
562     }
563     if (reference == null) {
564       if (other.reference != null) {
565         return false;
566       }
567     } else if (!reference.equals(other.reference)) {
568       return false;
569     }
570     return true;
571   }
572 
573 }