View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.io;
20  
21  import java.io.IOException;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FileSystem;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.TableName;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
35  import org.apache.hadoop.hbase.util.FSUtils;
36  import org.apache.hadoop.hbase.util.HFileArchiveUtil;
37  import org.apache.hadoop.hbase.util.Pair;
38  
39  /**
40   * HFileLink describes a link to an hfile.
41   *
42   * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive)
43   * HFileLink allows to access the referenced hfile regardless of the location where it is.
44   *
45   * <p>Searches for hfiles in the following order and locations:
46   * <ul>
47   *  <li>/hbase/table/region/cf/hfile</li>
48   *  <li>/hbase/.archive/table/region/cf/hfile</li>
49   * </ul>
50   *
51   * The link checks first in the original path if it is not present
52   * it fallbacks to the archived path.
53   */
54  @InterfaceAudience.Private
55  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_DOESNT_OVERRIDE_EQUALS",
56    justification="To be fixed but warning suppressed for now")
57  public class HFileLink extends FileLink {
58    private static final Log LOG = LogFactory.getLog(HFileLink.class);
59  
60    /**
61     * A non-capture group, for HFileLink, so that this can be embedded.
62     * The HFileLink describe a link to an hfile in a different table/region
63     * and the name is in the form: table=region-hfile.
64     * <p>
65     * Table name is ([a-zA-Z_0-9][a-zA-Z_0-9.-]*), so '=' is an invalid character for the table name.
66     * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
67     * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
68     * and the bulk loaded (_SeqId_[0-9]+_) hfiles.
69     *
70     * <p>Here is an example name: /hbase/test/0123/cf/testtb=4567-abcd where 'testtb' is table name
71     * and '4567' is region name and 'abcd' is filename.
72     */
73    public static final String LINK_NAME_REGEX =
74      String.format("(?:(?:%s=)?)%s=%s-%s",
75        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
76        HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX);
77  
78    /** Define the HFile Link name parser in the form of: table=region-hfile */
79    //made package private for testing
80    static final Pattern LINK_NAME_PATTERN =
81      Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$",
82        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
83        HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX));
84  
85    /**
86     * The pattern should be used for hfile and reference links
87     * that can be found in /hbase/table/region/family/
88     */
89    private static final Pattern REF_OR_HFILE_LINK_PATTERN =
90      Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$",
91        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
92        HRegionInfo.ENCODED_REGION_NAME_REGEX));
93  
94    private final Path archivePath;
95    private final Path originPath;
96    private final Path tempPath;
97  
98    /**
99     * Dead simple hfile link constructor
100    */
101   public HFileLink(final Path originPath, final Path tempPath,
102                    final Path archivePath) {
103     this.tempPath  = tempPath;
104     this.originPath = originPath;
105     this.archivePath = archivePath;
106 
107     setLocations(originPath, tempPath, archivePath);
108   }
109 
110   /**
111    * @param conf {@link Configuration} from which to extract specific archive locations
112    * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile)
113    * @throws IOException on unexpected error.
114    */
115   public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern)
116           throws IOException {
117     return buildFromHFileLinkPattern(FSUtils.getRootDir(conf),
118             HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern);
119   }
120 
121   /**
122    * @param rootDir Path to the root directory where hbase files are stored
123    * @param archiveDir Path to the hbase archive directory
124    * @param hFileLinkPattern The path of the HFile Link.
125    */
126   public final static HFileLink buildFromHFileLinkPattern(final Path rootDir,
127                                                           final Path archiveDir,
128                                                           final Path hFileLinkPattern) {
129     Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern);
130     Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath);
131     Path originPath = new Path(rootDir, hfilePath);
132     Path archivePath = new Path(archiveDir, hfilePath);
133     return new HFileLink(originPath, tempPath, archivePath);
134   }
135 
136   /**
137    * Create an HFileLink relative path for the table/region/family/hfile location
138    * @param table Table name
139    * @param region Region Name
140    * @param family Family Name
141    * @param hfile HFile Name
142    * @return the relative Path to open the specified table/region/family/hfile link
143    */
144   public static Path createPath(final TableName table, final String region,
145                                 final String family, final String hfile) {
146     if (HFileLink.isHFileLink(hfile)) {
147       return new Path(family, hfile);
148     }
149     return new Path(family, HFileLink.createHFileLinkName(table, region, hfile));
150   }
151 
152   /**
153    * Create an HFileLink instance from table/region/family/hfile location
154    * @param conf {@link Configuration} from which to extract specific archive locations
155    * @param table Table name
156    * @param region Region Name
157    * @param family Family Name
158    * @param hfile HFile Name
159    * @return Link to the file with the specified table/region/family/hfile location
160    * @throws IOException on unexpected error.
161    */
162   public static HFileLink build(final Configuration conf, final TableName table,
163                                  final String region, final String family, final String hfile)
164           throws IOException {
165     return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile));
166   }
167 
168   /**
169    * @return the origin path of the hfile.
170    */
171   public Path getOriginPath() {
172     return this.originPath;
173   }
174 
175   /**
176    * @return the path of the archived hfile.
177    */
178   public Path getArchivePath() {
179     return this.archivePath;
180   }
181 
182   /**
183    * @param path Path to check.
184    * @return True if the path is a HFileLink.
185    */
186   public static boolean isHFileLink(final Path path) {
187     return isHFileLink(path.getName());
188   }
189 
190 
191   /**
192    * @param fileName File name to check.
193    * @return True if the path is a HFileLink.
194    */
195   public static boolean isHFileLink(String fileName) {
196     Matcher m = LINK_NAME_PATTERN.matcher(fileName);
197     if (!m.matches()) return false;
198     return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null;
199   }
200 
201   /**
202    * Convert a HFileLink path to a table relative path.
203    * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd
204    *      becomes: /hbase/testtb/4567/cf/abcd
205    *
206    * @param path HFileLink path
207    * @return Relative table path
208    * @throws IOException on unexpected error.
209    */
210   private static Path getHFileLinkPatternRelativePath(final Path path) {
211     // table=region-hfile
212     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName());
213     if (!m.matches()) {
214       throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!");
215     }
216 
217     // Convert the HFileLink name into a real table/region/cf/hfile path.
218     TableName tableName = TableName.valueOf(m.group(1), m.group(2));
219     String regionName = m.group(3);
220     String hfileName = m.group(4);
221     String familyName = path.getParent().getName();
222     Path tableDir = FSUtils.getTableDir(new Path("./"), tableName);
223     return new Path(tableDir, new Path(regionName, new Path(familyName,
224         hfileName)));
225   }
226 
227   /**
228    * Get the HFile name of the referenced link
229    *
230    * @param fileName HFileLink file name
231    * @return the name of the referenced HFile
232    */
233   public static String getReferencedHFileName(final String fileName) {
234     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
235     if (!m.matches()) {
236       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
237     }
238     return(m.group(4));
239   }
240 
241   /**
242    * Get the Region name of the referenced link
243    *
244    * @param fileName HFileLink file name
245    * @return the name of the referenced Region
246    */
247   public static String getReferencedRegionName(final String fileName) {
248     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
249     if (!m.matches()) {
250       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
251     }
252     return(m.group(3));
253   }
254 
255   /**
256    * Get the Table name of the referenced link
257    *
258    * @param fileName HFileLink file name
259    * @return the name of the referenced Table
260    */
261   public static TableName getReferencedTableName(final String fileName) {
262     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
263     if (!m.matches()) {
264       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
265     }
266     return(TableName.valueOf(m.group(1), m.group(2)));
267   }
268 
269   /**
270    * Create a new HFileLink name
271    *
272    * @param hfileRegionInfo - Linked HFile Region Info
273    * @param hfileName - Linked HFile name
274    * @return file name of the HFile Link
275    */
276   public static String createHFileLinkName(final HRegionInfo hfileRegionInfo,
277       final String hfileName) {
278     return createHFileLinkName(hfileRegionInfo.getTable(),
279             hfileRegionInfo.getEncodedName(), hfileName);
280   }
281 
282   /**
283    * Create a new HFileLink name
284    *
285    * @param tableName - Linked HFile table name
286    * @param regionName - Linked HFile region name
287    * @param hfileName - Linked HFile name
288    * @return file name of the HFile Link
289    */
290   public static String createHFileLinkName(final TableName tableName,
291       final String regionName, final String hfileName) {
292     String s = String.format("%s=%s-%s",
293         tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='),
294         regionName, hfileName);
295     return s;
296   }
297 
298   /**
299    * Create a new HFileLink
300    *
301    * <p>It also adds a back-reference to the hfile back-reference directory
302    * to simplify the reference-count and the cleaning process.
303    *
304    * @param conf {@link Configuration} to read for the archive directory name
305    * @param fs {@link FileSystem} on which to write the HFileLink
306    * @param dstFamilyPath - Destination path (table/region/cf/)
307    * @param hfileRegionInfo - Linked HFile Region Info
308    * @param hfileName - Linked HFile name
309    * @return true if the file is created, otherwise the file exists.
310    * @throws IOException on file or parent directory creation failure
311    */
312   public static boolean create(final Configuration conf, final FileSystem fs,
313       final Path dstFamilyPath, final HRegionInfo hfileRegionInfo,
314       final String hfileName) throws IOException {
315     return create(conf, fs, dstFamilyPath, hfileRegionInfo, hfileName, true);
316   }
317 
318   /**
319    * Create a new HFileLink
320    *
321    * <p>It also adds a back-reference to the hfile back-reference directory
322    * to simplify the reference-count and the cleaning process.
323    *
324    * @param conf {@link Configuration} to read for the archive directory name
325    * @param fs {@link FileSystem} on which to write the HFileLink
326    * @param dstFamilyPath - Destination path (table/region/cf/)
327    * @param hfileRegionInfo - Linked HFile Region Info
328    * @param hfileName - Linked HFile name
329    * @param createBackRef - Whether back reference should be created. Defaults to true.
330    * @return true if the file is created, otherwise the file exists.
331    * @throws IOException on file or parent directory creation failure
332    */
333   public static boolean create(final Configuration conf, final FileSystem fs,
334       final Path dstFamilyPath, final HRegionInfo hfileRegionInfo,
335       final String hfileName, final boolean createBackRef) throws IOException {
336     TableName linkedTable = hfileRegionInfo.getTable();
337     String linkedRegion = hfileRegionInfo.getEncodedName();
338     return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, createBackRef);
339   }
340 
341   /**
342    * Create a new HFileLink
343    *
344    * <p>It also adds a back-reference to the hfile back-reference directory
345    * to simplify the reference-count and the cleaning process.
346    *
347    * @param conf {@link Configuration} to read for the archive directory name
348    * @param fs {@link FileSystem} on which to write the HFileLink
349    * @param dstFamilyPath - Destination path (table/region/cf/)
350    * @param linkedTable - Linked Table Name
351    * @param linkedRegion - Linked Region Name
352    * @param hfileName - Linked HFile name
353    * @return true if the file is created, otherwise the file exists.
354    * @throws IOException on file or parent directory creation failure
355    */
356   public static boolean create(final Configuration conf, final FileSystem fs,
357       final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
358       final String hfileName) throws IOException {
359     return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, true);
360   }
361 
362   /**
363    * Create a new HFileLink
364    *
365    * <p>It also adds a back-reference to the hfile back-reference directory
366    * to simplify the reference-count and the cleaning process.
367    *
368    * @param conf {@link Configuration} to read for the archive directory name
369    * @param fs {@link FileSystem} on which to write the HFileLink
370    * @param dstFamilyPath - Destination path (table/region/cf/)
371    * @param linkedTable - Linked Table Name
372    * @param linkedRegion - Linked Region Name
373    * @param hfileName - Linked HFile name
374    * @param createBackRef - Whether back reference should be created. Defaults to true.
375    * @return true if the file is created, otherwise the file exists.
376    * @throws IOException on file or parent directory creation failure
377    */
378   public static boolean create(final Configuration conf, final FileSystem fs,
379       final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
380       final String hfileName, final boolean createBackRef) throws IOException {
381     String familyName = dstFamilyPath.getName();
382     String regionName = dstFamilyPath.getParent().getName();
383     String tableName = FSUtils.getTableName(dstFamilyPath.getParent().getParent())
384         .getNameAsString();
385 
386     String name = createHFileLinkName(linkedTable, linkedRegion, hfileName);
387     String refName = createBackReferenceName(tableName, regionName);
388 
389     // Make sure the destination directory exists
390     fs.mkdirs(dstFamilyPath);
391 
392     // Make sure the FileLink reference directory exists
393     Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf,
394           linkedTable, linkedRegion, familyName);
395     Path backRefPath = null;
396     if (createBackRef) {
397       Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName);
398       fs.mkdirs(backRefssDir);
399 
400       // Create the reference for the link
401       backRefPath = new Path(backRefssDir, refName);
402       fs.createNewFile(backRefPath);
403     }
404     try {
405       // Create the link
406       return fs.createNewFile(new Path(dstFamilyPath, name));
407     } catch (IOException e) {
408       LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e);
409       // Revert the reference if the link creation failed
410       if (createBackRef) {
411         fs.delete(backRefPath, false);
412       }
413       throw e;
414     }
415   }
416 
417   /**
418    * Create a new HFileLink starting from a hfileLink name
419    *
420    * <p>It also adds a back-reference to the hfile back-reference directory
421    * to simplify the reference-count and the cleaning process.
422    *
423    * @param conf {@link Configuration} to read for the archive directory name
424    * @param fs {@link FileSystem} on which to write the HFileLink
425    * @param dstFamilyPath - Destination path (table/region/cf/)
426    * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
427    * @return true if the file is created, otherwise the file exists.
428    * @throws IOException on file or parent directory creation failure
429    */
430   public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
431       final Path dstFamilyPath, final String hfileLinkName)
432           throws IOException {
433     return createFromHFileLink(conf, fs, dstFamilyPath, hfileLinkName, true);
434   }
435 
436   /**
437    * Create a new HFileLink starting from a hfileLink name
438    *
439    * <p>It also adds a back-reference to the hfile back-reference directory
440    * to simplify the reference-count and the cleaning process.
441    *
442    * @param conf {@link Configuration} to read for the archive directory name
443    * @param fs {@link FileSystem} on which to write the HFileLink
444    * @param dstFamilyPath - Destination path (table/region/cf/)
445    * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
446    * @param createBackRef - Whether back reference should be created. Defaults to true.
447    * @return true if the file is created, otherwise the file exists.
448    * @throws IOException on file or parent directory creation failure
449    */
450   public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
451       final Path dstFamilyPath, final String hfileLinkName, final boolean createBackRef)
452           throws IOException {
453     Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName);
454     if (!m.matches()) {
455       throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!");
456     }
457     return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)),
458         m.group(3), m.group(4), createBackRef);
459   }
460 
461   /**
462    * Create the back reference name
463    */
464   //package-private for testing
465   static String createBackReferenceName(final String tableNameStr,
466                                         final String regionName) {
467 
468     return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '=');
469   }
470 
471   /**
472    * Get the full path of the HFile referenced by the back reference
473    *
474    * @param rootDir root hbase directory
475    * @param linkRefPath Link Back Reference path
476    * @return full path of the referenced hfile
477    */
478   public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) {
479     Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName());
480     TableName linkTableName = p.getFirst();
481     String linkRegionName = p.getSecond();
482 
483     String hfileName = getBackReferenceFileName(linkRefPath.getParent());
484     Path familyPath = linkRefPath.getParent().getParent();
485     Path regionPath = familyPath.getParent();
486     Path tablePath = regionPath.getParent();
487 
488     String linkName = createHFileLinkName(FSUtils.getTableName(tablePath),
489             regionPath.getName(), hfileName);
490     Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName);
491     Path regionDir = new Path(linkTableDir, linkRegionName);
492     return new Path(new Path(regionDir, familyPath.getName()), linkName);
493   }
494 
495   static Pair<TableName, String> parseBackReferenceName(String name) {
496     int separatorIndex = name.indexOf('.');
497     String linkRegionName = name.substring(0, separatorIndex);
498     String tableSubstr = name.substring(separatorIndex + 1)
499         .replace('=', TableName.NAMESPACE_DELIM);
500     TableName linkTableName = TableName.valueOf(tableSubstr);
501     return new Pair<TableName, String>(linkTableName, linkRegionName);
502   }
503 
504   /**
505    * Get the full path of the HFile referenced by the back reference
506    *
507    * @param conf {@link Configuration} to read for the archive directory name
508    * @param linkRefPath Link Back Reference path
509    * @return full path of the referenced hfile
510    * @throws IOException on unexpected error.
511    */
512   public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath)
513       throws IOException {
514     return getHFileFromBackReference(FSUtils.getRootDir(conf), linkRefPath);
515   }
516 
517 }