View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import com.google.common.primitives.Ints;
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.util.Comparator;
24  import java.util.List;
25  import java.util.Map;
26  import java.util.TreeMap;
27  import java.util.concurrent.ConcurrentHashMap;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  import org.apache.commons.lang.NotImplementedException;
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.DeprecatedTableDescriptor;
34  import org.apache.hadoop.hbase.classification.InterfaceAudience;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FSDataInputStream;
37  import org.apache.hadoop.fs.FSDataOutputStream;
38  import org.apache.hadoop.fs.FileStatus;
39  import org.apache.hadoop.fs.FileSystem;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.fs.PathFilter;
42  import org.apache.hadoop.hbase.TableName;
43  import org.apache.hadoop.hbase.exceptions.DeserializationException;
44  import org.apache.hadoop.hbase.HConstants;
45  import org.apache.hadoop.hbase.HTableDescriptor;
46  import org.apache.hadoop.hbase.TableDescriptors;
47  import org.apache.hadoop.hbase.TableInfoMissingException;
48  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
49  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table;
50  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
51  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
52  import org.apache.zookeeper.KeeperException;
53  
54  /**
55   * Implementation of {@link TableDescriptors} that reads descriptors from the
56   * passed filesystem.  It expects descriptors to be in a file in the
57   * {@link #TABLEINFO_DIR} subdir of the table's directory in FS.  Can be read-only
58   *  -- i.e. does not modify the filesystem or can be read and write.
59   *
60   * <p>Also has utility for keeping up the table descriptors tableinfo file.
61   * The table schema file is kept in the {@link #TABLEINFO_DIR} subdir
62   * of the table directory in the filesystem.
63   * It has a {@link #TABLEINFO_FILE_PREFIX} and then a suffix that is the
64   * edit sequenceid: e.g. <code>.tableinfo.0000000003</code>.  This sequenceid
65   * is always increasing.  It starts at zero.  The table schema file with the
66   * highest sequenceid has the most recent schema edit. Usually there is one file
67   * only, the most recent but there may be short periods where there are more
68   * than one file. Old files are eventually cleaned.  Presumption is that there
69   * will not be lots of concurrent clients making table schema edits.  If so,
70   * the below needs a bit of a reworking and perhaps some supporting api in hdfs.
71   */
72  @InterfaceAudience.Private
73  public class FSTableDescriptors implements TableDescriptors {
74    private static final Log LOG = LogFactory.getLog(FSTableDescriptors.class);
75    private final FileSystem fs;
76    private final Path rootdir;
77    private final boolean fsreadonly;
78    private volatile boolean usecache;
79    private volatile boolean fsvisited;
80  
81    long cachehits = 0;
82    long invocations = 0;
83  
84    /** The file name prefix used to store HTD in HDFS  */
85    static final String TABLEINFO_FILE_PREFIX = ".tableinfo";
86    static final String TABLEINFO_DIR = ".tabledesc";
87    static final String TMP_DIR = ".tmp";
88  
89    // This cache does not age out the old stuff.  Thinking is that the amount
90    // of data we keep up in here is so small, no need to do occasional purge.
91    // TODO.
92    private final Map<TableName, HTableDescriptor> cache =
93      new ConcurrentHashMap<TableName, HTableDescriptor>();
94  
95    /**
96     * Table descriptor for <code>hbase:meta</code> catalog table
97     */
98     private final HTableDescriptor metaTableDescriptor;
99  
100    /**
101    * Construct a FSTableDescriptors instance using the hbase root dir of the given
102    * conf and the filesystem where that root dir lives.
103    * This instance can do write operations (is not read only).
104    */
105   public FSTableDescriptors(final Configuration conf) throws IOException {
106     this(conf, FSUtils.getCurrentFileSystem(conf), FSUtils.getRootDir(conf));
107   }
108 
109   public FSTableDescriptors(final Configuration conf, final FileSystem fs, final Path rootdir)
110       throws IOException {
111     this(conf, fs, rootdir, false, true);
112   }
113 
114   /**
115    * @param fsreadonly True if we are read-only when it comes to filesystem
116    * operations; i.e. on remove, we do not do delete in fs.
117    */
118   public FSTableDescriptors(final Configuration conf, final FileSystem fs,
119     final Path rootdir, final boolean fsreadonly, final boolean usecache) throws IOException {
120     super();
121     this.fs = fs;
122     this.rootdir = rootdir;
123     this.fsreadonly = fsreadonly;
124     this.usecache = usecache;
125     this.metaTableDescriptor = HTableDescriptor.metaTableDescriptor(conf);
126   }
127 
128   @Override
129   public void setCacheOn() throws IOException {
130     this.cache.clear();
131     this.usecache = true;
132   }
133 
134   @Override
135   public void setCacheOff() throws IOException {
136     this.usecache = false;
137     this.cache.clear();
138   }
139 
140   public boolean isUsecache() {
141     return this.usecache;
142   }
143 
144   /**
145    * Get the current table descriptor for the given table, or null if none exists.
146    *
147    * Uses a local cache of the descriptor but still checks the filesystem on each call
148    * to see if a newer file has been created since the cached one was read.
149    */
150   @Override
151   public HTableDescriptor get(final TableName tablename)
152   throws IOException {
153     invocations++;
154     if (TableName.META_TABLE_NAME.equals(tablename)) {
155       cachehits++;
156       return metaTableDescriptor;
157     }
158     // hbase:meta is already handled. If some one tries to get the descriptor for
159     // .logs, .oldlogs or .corrupt throw an exception.
160     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tablename.getNameAsString())) {
161        throw new IOException("No descriptor found for non table = " + tablename);
162     }
163 
164     if (usecache) {
165       // Look in cache of descriptors.
166       HTableDescriptor cachedtdm = this.cache.get(tablename);
167       if (cachedtdm != null) {
168         cachehits++;
169         return cachedtdm;
170       }
171     }
172     HTableDescriptor tdmt = null;
173     try {
174       tdmt = getTableDescriptorFromFs(fs, rootdir, tablename, !fsreadonly);
175     } catch (NullPointerException e) {
176       LOG.debug("Exception during readTableDecriptor. Current table name = "
177           + tablename, e);
178     } catch (TableInfoMissingException e) {
179       // ignore. This is regular operation
180     } catch (IOException ioe) {
181       LOG.debug("Exception during readTableDecriptor. Current table name = "
182           + tablename, ioe);
183     }
184     // last HTD written wins
185     if (usecache && tdmt != null) {
186       this.cache.put(tablename, tdmt);
187     }
188 
189     return tdmt;
190   }
191 
192   /**
193    * Returns a map from table name to table descriptor for all tables.
194    */
195   @Override
196   public Map<String, HTableDescriptor> getAll()
197   throws IOException {
198     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
199 
200     if (fsvisited && usecache) {
201       for (Map.Entry<TableName, HTableDescriptor> entry: this.cache.entrySet()) {
202         htds.put(entry.getKey().toString(), entry.getValue());
203       }
204       // add hbase:meta to the response
205       htds.put(HTableDescriptor.META_TABLEDESC.getTableName().getNameAsString(),
206         HTableDescriptor.META_TABLEDESC);
207     } else {
208       LOG.debug("Fetching table descriptors from the filesystem.");
209       boolean allvisited = true;
210       for (Path d : FSUtils.getTableDirs(fs, rootdir)) {
211         HTableDescriptor htd = null;
212         try {
213           htd = get(FSUtils.getTableName(d));
214         } catch (FileNotFoundException fnfe) {
215           // inability of retrieving one HTD shouldn't stop getting the remaining
216           LOG.warn("Trouble retrieving htd", fnfe);
217         }
218         if (htd == null) {
219           allvisited = false;
220           continue;
221         } else {
222           htds.put(htd.getTableName().getNameAsString(), htd);
223         }
224         fsvisited = allvisited;
225       }
226     }
227     return htds;
228   }
229 
230   /* (non-Javadoc)
231    * @see org.apache.hadoop.hbase.TableDescriptors#getTableDescriptors(org.apache.hadoop.fs.FileSystem, org.apache.hadoop.fs.Path)
232    */
233   @Override
234   public Map<String, HTableDescriptor> getByNamespace(String name)
235   throws IOException {
236     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
237     List<Path> tableDirs =
238         FSUtils.getLocalTableDirs(fs, FSUtils.getNamespaceDir(rootdir, name));
239     for (Path d: tableDirs) {
240       HTableDescriptor htd = null;
241       try {
242         htd = get(FSUtils.getTableName(d));
243       } catch (FileNotFoundException fnfe) {
244         // inability of retrieving one HTD shouldn't stop getting the remaining
245         LOG.warn("Trouble retrieving htd", fnfe);
246       }
247       if (htd == null) continue;
248       htds.put(FSUtils.getTableName(d).getNameAsString(), htd);
249     }
250     return htds;
251   }
252 
253   /**
254    * Adds (or updates) the table descriptor to the FileSystem
255    * and updates the local cache with it.
256    */
257   @Override
258   public void add(HTableDescriptor htd) throws IOException {
259     if (fsreadonly) {
260       throw new NotImplementedException("Cannot add a table descriptor - in read only mode");
261     }
262     if (TableName.META_TABLE_NAME.equals(htd.getTableName())) {
263       throw new NotImplementedException();
264     }
265     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(htd.getTableName().getNameAsString())) {
266       throw new NotImplementedException(
267         "Cannot add a table descriptor for a reserved subdirectory name: " + htd.getNameAsString());
268     }
269     updateTableDescriptor(htd);
270   }
271 
272   /**
273    * Removes the table descriptor from the local cache and returns it.
274    * If not in read only mode, it also deletes the entire table directory(!)
275    * from the FileSystem.
276    */
277   @Override
278   public HTableDescriptor remove(final TableName tablename)
279   throws IOException {
280     if (fsreadonly) {
281       throw new NotImplementedException("Cannot remove a table descriptor - in read only mode");
282     }
283     Path tabledir = getTableDir(tablename);
284     if (this.fs.exists(tabledir)) {
285       if (!this.fs.delete(tabledir, true)) {
286         throw new IOException("Failed delete of " + tabledir.toString());
287       }
288     }
289     HTableDescriptor descriptor = this.cache.remove(tablename);
290     if (descriptor == null) {
291       return null;
292     } else {
293       return descriptor;
294     }
295   }
296 
297   /**
298    * Checks if a current table info file exists for the given table
299    *
300    * @param tableName name of table
301    * @return true if exists
302    * @throws IOException
303    */
304   public boolean isTableInfoExists(TableName tableName) throws IOException {
305     return getTableInfoPath(tableName) != null;
306   }
307 
308   /**
309    * Find the most current table info file for the given table in the hbase root directory.
310    * @return The file status of the current table info file or null if it does not exist
311    */
312   private FileStatus getTableInfoPath(final TableName tableName) throws IOException {
313     Path tableDir = getTableDir(tableName);
314     return getTableInfoPath(tableDir);
315   }
316 
317   private FileStatus getTableInfoPath(Path tableDir)
318   throws IOException {
319     return getTableInfoPath(fs, tableDir, !fsreadonly);
320   }
321 
322   /**
323    * Find the most current table info file for the table located in the given table directory.
324    *
325    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
326    * files and takes the 'current' one - meaning the one with the highest sequence number if present
327    * or no sequence number at all if none exist (for backward compatibility from before there
328    * were sequence numbers).
329    *
330    * @return The file status of the current table info file or null if it does not exist
331    * @throws IOException
332    */
333   public static FileStatus getTableInfoPath(FileSystem fs, Path tableDir)
334   throws IOException {
335     return getTableInfoPath(fs, tableDir, false);
336   }
337 
338   /**
339    * Find the most current table info file for the table in the given table directory.
340    *
341    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
342    * files and takes the 'current' one - meaning the one with the highest sequence number if
343    * present or no sequence number at all if none exist (for backward compatibility from before
344    * there were sequence numbers).
345    * If there are multiple table info files found and removeOldFiles is true it also deletes the
346    * older files.
347    *
348    * @return The file status of the current table info file or null if none exist
349    * @throws IOException
350    */
351   private static FileStatus getTableInfoPath(FileSystem fs, Path tableDir, boolean removeOldFiles)
352   throws IOException {
353     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
354     return getCurrentTableInfoStatus(fs, tableInfoDir, removeOldFiles);
355   }
356 
357   /**
358    * Find the most current table info file in the given directory
359    *
360    * Looks within the given directory for any table info files
361    * and takes the 'current' one - meaning the one with the highest sequence number if present
362    * or no sequence number at all if none exist (for backward compatibility from before there
363    * were sequence numbers).
364    * If there are multiple possible files found
365    * and the we're not in read only mode it also deletes the older files.
366    *
367    * @return The file status of the current table info file or null if it does not exist
368    * @throws IOException
369    */
370   // only visible for FSTableDescriptorMigrationToSubdir, can be removed with that
371   static FileStatus getCurrentTableInfoStatus(FileSystem fs, Path dir, boolean removeOldFiles)
372   throws IOException {
373     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
374     if (status == null || status.length < 1) return null;
375     FileStatus mostCurrent = null;
376     for (FileStatus file : status) {
377       if (mostCurrent == null || TABLEINFO_FILESTATUS_COMPARATOR.compare(file, mostCurrent) < 0) {
378         mostCurrent = file;
379       }
380     }
381     if (removeOldFiles && status.length > 1) {
382       // Clean away old versions
383       for (FileStatus file : status) {
384         Path path = file.getPath();
385         if (!file.equals(mostCurrent)) {
386           if (!fs.delete(file.getPath(), false)) {
387             LOG.warn("Failed cleanup of " + path);
388           } else {
389             LOG.debug("Cleaned up old tableinfo file " + path);
390           }
391         }
392       }
393     }
394     return mostCurrent;
395   }
396 
397   /**
398    * Compare {@link FileStatus} instances by {@link Path#getName()}. Returns in
399    * reverse order.
400    */
401   static final Comparator<FileStatus> TABLEINFO_FILESTATUS_COMPARATOR =
402   new Comparator<FileStatus>() {
403     @Override
404     public int compare(FileStatus left, FileStatus right) {
405       return right.compareTo(left);
406     }};
407 
408   /**
409    * Return the table directory in HDFS
410    */
411   Path getTableDir(final TableName tableName) {
412     return FSUtils.getTableDir(rootdir, tableName);
413   }
414 
415   private static final PathFilter TABLEINFO_PATHFILTER = new PathFilter() {
416     @Override
417     public boolean accept(Path p) {
418       // Accept any file that starts with TABLEINFO_NAME
419       return p.getName().startsWith(TABLEINFO_FILE_PREFIX);
420     }};
421 
422   /**
423    * Width of the sequenceid that is a suffix on a tableinfo file.
424    */
425   static final int WIDTH_OF_SEQUENCE_ID = 10;
426 
427   /*
428    * @param number Number to use as suffix.
429    * @return Returns zero-prefixed decimal version of passed
430    * number (Does absolute in case number is negative).
431    */
432   private static String formatTableInfoSequenceId(final int number) {
433     byte [] b = new byte[WIDTH_OF_SEQUENCE_ID];
434     int d = Math.abs(number);
435     for (int i = b.length - 1; i >= 0; i--) {
436       b[i] = (byte)((d % 10) + '0');
437       d /= 10;
438     }
439     return Bytes.toString(b);
440   }
441 
442   /**
443    * Regex to eat up sequenceid suffix on a .tableinfo file.
444    * Use regex because may encounter oldstyle .tableinfos where there is no
445    * sequenceid on the end.
446    */
447   private static final Pattern TABLEINFO_FILE_REGEX =
448     Pattern.compile(TABLEINFO_FILE_PREFIX + "(\\.([0-9]{" + WIDTH_OF_SEQUENCE_ID + "}))?$");
449 
450   /**
451    * @param p Path to a <code>.tableinfo</code> file.
452    * @return The current editid or 0 if none found.
453    */
454   static int getTableInfoSequenceId(final Path p) {
455     if (p == null) return 0;
456     Matcher m = TABLEINFO_FILE_REGEX.matcher(p.getName());
457     if (!m.matches()) throw new IllegalArgumentException(p.toString());
458     String suffix = m.group(2);
459     if (suffix == null || suffix.length() <= 0) return 0;
460     return Integer.parseInt(m.group(2));
461   }
462 
463   /**
464    * @param sequenceid
465    * @return Name of tableinfo file.
466    */
467   static String getTableInfoFileName(final int sequenceid) {
468     return TABLEINFO_FILE_PREFIX + "." + formatTableInfoSequenceId(sequenceid);
469   }
470 
471   /**
472    * Returns the latest table descriptor for the given table directly from the file system
473    * if it exists, bypassing the local cache.
474    * Returns null if it's not found.
475    */
476   public static HTableDescriptor getTableDescriptorFromFs(FileSystem fs,
477     Path hbaseRootDir, TableName tableName) throws IOException {
478     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
479     return getTableDescriptorFromFs(fs, tableDir);
480   }
481 
482   /**
483    * Returns the latest table descriptor for the table located at the given directory
484    * directly from the file system if it exists.
485    * @throws TableInfoMissingException if there is no descriptor
486    */
487   public static HTableDescriptor getTableDescriptorFromFs(FileSystem fs,
488     Path hbaseRootDir, TableName tableName, boolean rewritePb) throws IOException {
489     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
490     return getTableDescriptorFromFs(fs, tableDir, rewritePb);
491   }
492   /**
493    * Returns the latest table descriptor for the table located at the given directory
494    * directly from the file system if it exists.
495    * @throws TableInfoMissingException if there is no descriptor
496    */
497   public static HTableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir)
498     throws IOException {
499     return getTableDescriptorFromFs(fs, tableDir, false);
500   }
501 
502   /**
503    * Returns the latest table descriptor for the table located at the given directory
504    * directly from the file system if it exists.
505    * @throws TableInfoMissingException if there is no descriptor
506    */
507   public static HTableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir,
508     boolean rewritePb) throws IOException {
509     FileStatus status = getTableInfoPath(fs, tableDir, false);
510     if (status == null) {
511       throw new TableInfoMissingException("No table descriptor file under " + tableDir);
512     }
513     return readTableDescriptor(fs, status, rewritePb).first;
514   }
515 
516   /**
517    * Reads the HTableDescriptor from FS. This handles any deprecated TableDescriptor objects from
518    * HBase 1.7.0's faulty serialization and rewrites them on fs. Returns the corresponding
519    * table's State so that caller can populate it back in ZK if needed.
520    */
521   private static Pair<HTableDescriptor, Table> readTableDescriptor(FileSystem fs, FileStatus status,
522       boolean rewritePb) throws IOException {
523     int len = Ints.checkedCast(status.getLen());
524     byte [] content = new byte[len];
525     FSDataInputStream fsDataInputStream = fs.open(status.getPath());
526     try {
527       fsDataInputStream.readFully(content);
528     } finally {
529       fsDataInputStream.close();
530     }
531     HTableDescriptor htd = null;
532     // From deprecated TableDescriptor, if any. Null otherwise.
533     Table tableState = null;
534     try {
535       htd = HTableDescriptor.parseFrom(content);
536     } catch (DeserializationException e) {
537       // we have old HTableDescriptor here
538       try {
539         DeprecatedTableDescriptor dtd = DeprecatedTableDescriptor.parseFrom(content);
540         htd = dtd.getHTableDescriptor();
541         tableState = dtd.getTableState();
542         LOG.warn("Found incompatible table descriptor from 1.7.0 version: "
543           + dtd.getHTableDescriptor().getTableName() + " state: " + tableState.getState().name());
544         if (rewritePb) {
545           LOG.warn("converting to new format for table " + htd.getTableName());
546           rewriteTableDescriptor(fs, status, htd);
547           rewritePb = false; // already rewritten
548         }
549       } catch (DeserializationException e1) {
550         throw new IOException("content=" + Bytes.toShort(content), e1);
551       }
552     }
553     if (rewritePb && !ProtobufUtil.isPBMagicPrefix(content)) {
554       // Convert the file over to be pb before leaving here.
555       rewriteTableDescriptor(fs, status, htd);
556     }
557     return new Pair<>(htd, tableState);
558   }
559 
560   private static void rewriteTableDescriptor(final FileSystem fs, final FileStatus status,
561     final HTableDescriptor td) throws IOException {
562     Path tableInfoDir = status.getPath().getParent();
563     Path tableDir = tableInfoDir.getParent();
564     writeTableDescriptor(fs, td, tableDir, status);
565   }
566 
567   /**
568    * Update table descriptor on the file system
569    * @throws IOException Thrown if failed update.
570    * @throws NotImplementedException if in read only mode
571    */
572   Path updateTableDescriptor(HTableDescriptor htd) throws IOException {
573     if (fsreadonly) {
574       throw new NotImplementedException("Cannot update a table descriptor - in read only mode");
575     }
576     Path tableDir = getTableDir(htd.getTableName());
577     Path p = writeTableDescriptor(fs, htd, tableDir, getTableInfoPath(tableDir));
578     if (p == null) throw new IOException("Failed update");
579     LOG.info("Updated tableinfo=" + p);
580     if (usecache) {
581       this.cache.put(htd.getTableName(), htd);
582     }
583     return p;
584   }
585 
586   /**
587    * Deletes all the table descriptor files from the file system.
588    * Used in unit tests only.
589    * @throws NotImplementedException if in read only mode
590    */
591   public void deleteTableDescriptorIfExists(TableName tableName) throws IOException {
592     if (fsreadonly) {
593       throw new NotImplementedException("Cannot delete a table descriptor - in read only mode");
594     }
595 
596     Path tableDir = getTableDir(tableName);
597     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
598     deleteTableDescriptorFiles(fs, tableInfoDir, Integer.MAX_VALUE);
599   }
600 
601   /**
602    * Deletes files matching the table info file pattern within the given directory
603    * whose sequenceId is at most the given max sequenceId.
604    */
605   private static void deleteTableDescriptorFiles(FileSystem fs, Path dir, int maxSequenceId)
606   throws IOException {
607     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
608     for (FileStatus file : status) {
609       Path path = file.getPath();
610       int sequenceId = getTableInfoSequenceId(path);
611       if (sequenceId <= maxSequenceId) {
612         boolean success = FSUtils.delete(fs, path, false);
613         if (success) {
614           LOG.debug("Deleted table descriptor at " + path);
615         } else {
616           LOG.error("Failed to delete descriptor at " + path);
617         }
618       }
619     }
620   }
621 
622   /**
623    * Attempts to write a new table descriptor to the given table's directory.
624    * It first writes it to the .tmp dir then uses an atomic rename to move it into place.
625    * It begins at the currentSequenceId + 1 and tries 10 times to find a new sequence number
626    * not already in use.
627    * Removes the current descriptor file if passed in.
628    *
629    * @return Descriptor file or null if we failed write.
630    */
631   private static Path writeTableDescriptor(final FileSystem fs,
632     final HTableDescriptor htd, final Path tableDir,
633     final FileStatus currentDescriptorFile)
634   throws IOException {
635     // Get temporary dir into which we'll first write a file to avoid half-written file phenomenon.
636     // This directory is never removed to avoid removing it out from under a concurrent writer.
637     Path tmpTableDir = new Path(tableDir, TMP_DIR);
638     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
639 
640     // What is current sequenceid?  We read the current sequenceid from
641     // the current file.  After we read it, another thread could come in and
642     // compete with us writing out next version of file.  The below retries
643     // should help in this case some but its hard to do guarantees in face of
644     // concurrent schema edits.
645     int currentSequenceId = currentDescriptorFile == null ? 0 :
646       getTableInfoSequenceId(currentDescriptorFile.getPath());
647     int newSequenceId = currentSequenceId;
648 
649     // Put arbitrary upperbound on how often we retry
650     int retries = 10;
651     int retrymax = currentSequenceId + retries;
652     Path tableInfoDirPath = null;
653     do {
654       newSequenceId += 1;
655       String filename = getTableInfoFileName(newSequenceId);
656       Path tempPath = new Path(tmpTableDir, filename);
657       if (fs.exists(tempPath)) {
658         LOG.debug(tempPath + " exists; retrying up to " + retries + " times");
659         continue;
660       }
661       tableInfoDirPath = new Path(tableInfoDir, filename);
662       try {
663         writeHTD(fs, tempPath, htd);
664         fs.mkdirs(tableInfoDirPath.getParent());
665         if (!fs.rename(tempPath, tableInfoDirPath)) {
666           throw new IOException("Failed rename of " + tempPath + " to " + tableInfoDirPath);
667         }
668         LOG.debug("Wrote descriptor into: " + tableInfoDirPath);
669       } catch (IOException ioe) {
670         // Presume clash of names or something; go around again.
671         LOG.debug("Failed write and/or rename; retrying", ioe);
672         if (!FSUtils.deleteDirectory(fs, tempPath)) {
673           LOG.warn("Failed cleanup of " + tempPath);
674         }
675         tableInfoDirPath = null;
676         continue;
677       }
678       break;
679     } while (newSequenceId < retrymax);
680     if (tableInfoDirPath != null) {
681       // if we succeeded, remove old table info files.
682       deleteTableDescriptorFiles(fs, tableInfoDir, newSequenceId - 1);
683     }
684     return tableInfoDirPath;
685   }
686 
687   private static void writeHTD(final FileSystem fs, final Path p, final HTableDescriptor htd)
688   throws IOException {
689     FSDataOutputStream out = fs.create(p, false);
690     try {
691       // We used to write this file out as a serialized HTD Writable followed by two '\n's and then
692       // the toString version of HTD.  Now we just write out the pb serialization.
693       out.write(htd.toByteArray());
694     } finally {
695       out.close();
696     }
697   }
698 
699   /**
700    * Create new HTableDescriptor in HDFS. Happens when we are creating table.
701    * Used by tests.
702    * @return True if we successfully created file.
703    */
704   public boolean createTableDescriptor(HTableDescriptor htd) throws IOException {
705     return createTableDescriptor(htd, false);
706   }
707 
708   /**
709    * Create new HTableDescriptor in HDFS. Happens when we are creating table. If
710    * forceCreation is true then even if previous table descriptor is present it
711    * will be overwritten
712    *
713    * @return True if we successfully created file.
714    */
715   public boolean createTableDescriptor(HTableDescriptor htd, boolean forceCreation)
716   throws IOException {
717     Path tableDir = getTableDir(htd.getTableName());
718     return createTableDescriptorForTableDirectory(tableDir, htd, forceCreation);
719   }
720 
721   /**
722    * Create a new HTableDescriptor in HDFS in the specified table directory. Happens when we create
723    * a new table or snapshot a table.
724    * @param tableDir table directory under which we should write the file
725    * @param htd description of the table to write
726    * @param forceCreation if <tt>true</tt>,then even if previous table descriptor is present it will
727    *          be overwritten
728    * @return <tt>true</tt> if the we successfully created the file, <tt>false</tt> if the file
729    *         already exists and we weren't forcing the descriptor creation.
730    * @throws IOException if a filesystem error occurs
731    */
732   public boolean createTableDescriptorForTableDirectory(Path tableDir,
733       HTableDescriptor htd, boolean forceCreation) throws IOException {
734     if (fsreadonly) {
735       throw new NotImplementedException("Cannot create a table descriptor - in read only mode");
736     }
737     FileStatus status = getTableInfoPath(fs, tableDir);
738     if (status != null) {
739       LOG.debug("Current tableInfoPath = " + status.getPath());
740       if (!forceCreation) {
741         if (fs.exists(status.getPath()) && status.getLen() > 0) {
742           if (readTableDescriptor(fs, status, false).first.equals(htd)) {
743             LOG.debug("TableInfo already exists.. Skipping creation");
744             return false;
745           }
746         }
747       }
748     }
749     Path p = writeTableDescriptor(fs, htd, tableDir, status);
750     return p != null;
751   }
752 
753   /**
754    * Reads all the table descriptors fs and populates any missing TableStates. Should be called once
755    * at HMaster bootstrap before calling any other FSDescriptors methods as they can potentially
756    * overwrite the descriptors states. Not thread safe.
757    */
758   public void repairHBase170TableDescriptors(final ZooKeeperWatcher zkw)
759       throws IOException, KeeperException {
760     LOG.info("Attempting to repair HBase 1.7.0 tables, if any.");
761     for (Path tableDir : FSUtils.getTableDirs(fs, rootdir)) {
762       FileStatus status = getTableInfoPath(fs, tableDir, false);
763       if (status == null) {
764         LOG.warn("No table descriptor file under " + tableDir);
765         continue;
766       }
767       // Read and rewrite the table descriptors from FS, if any.
768       Pair<HTableDescriptor, Table> result = readTableDescriptor(fs, status, true);
769       if (result.second == null) {
770         // No deprecated TableDescriptor
771         continue;
772       }
773       TableName tableName = result.first.getTableName();
774       Table tableState = result.second;
775       LOG.warn("Rewriting ZK Table state for table " + tableName);
776       // Tricky to plumb TSM here, so instead assume ZK based TSM as default and overwrite table
777       // state Znodes.
778       String znode = ZKUtil.joinZNode(zkw.tableZNode, tableName.getNameAsString());
779       if (ZKUtil.checkExists(zkw, znode) != -1) {
780         LOG.warn("Table state znode already exists for table: " + tableName + ". Ignoring.");
781         continue;
782       }
783       ZKUtil.createAndFailSilent(zkw, znode);
784       byte [] data = ProtobufUtil.prependPBMagic(tableState.toByteArray());
785       ZKUtil.setData(zkw, znode, data);
786       LOG.info("Repaired ZK table state for table: " + tableName);
787     }
788   }
789 }
790