View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  
21  package org.apache.hadoop.hbase.wal;
22  
23  import java.io.IOException;
24  import java.util.Arrays;
25  import java.io.InterruptedIOException;
26  import java.util.Collections;
27  import java.util.List;
28  import java.util.concurrent.atomic.AtomicReference;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.hbase.classification.InterfaceAudience;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FSDataInputStream;
35  import org.apache.hadoop.fs.FileSystem;
36  import org.apache.hadoop.fs.Path;
37  import org.apache.hadoop.hbase.wal.WAL.Reader;
38  import org.apache.hadoop.hbase.wal.WALProvider.Writer;
39  import org.apache.hadoop.hbase.util.CancelableProgressable;
40  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
41  import org.apache.hadoop.hbase.util.LeaseNotRecoveredException;
42  
43  // imports for things that haven't moved from regionserver.wal yet.
44  import org.apache.hadoop.hbase.regionserver.wal.MetricsWAL;
45  import org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader;
46  import org.apache.hadoop.hbase.regionserver.wal.SequenceFileLogReader;
47  import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
48  
49  /**
50   * Entry point for users of the Write Ahead Log.
51   * Acts as the shim between internal use and the particular WALProvider we use to handle wal
52   * requests.
53   *
54   * Configure which provider gets used with the configuration setting "hbase.wal.provider". Available
55   * implementations:
56   * <ul>
57   *   <li><em>defaultProvider</em> : whatever provider is standard for the hbase version. Currently
58   *                                  "filesystem"</li>
59   *   <li><em>filesystem</em> : a provider that will run on top of an implementation of the Hadoop
60   *                             FileSystem interface, normally HDFS.</li>
61   *   <li><em>multiwal</em> : a provider that will use multiple "filesystem" wal instances per region
62   *                           server.</li>
63   * </ul>
64   *
65   * Alternatively, you may provide a custome implementation of {@link WALProvider} by class name.
66   */
67  @InterfaceAudience.Private
68  public class WALFactory {
69  
70    private static final Log LOG = LogFactory.getLog(WALFactory.class);
71  
72    /**
73     * Maps between configuration names for providers and implementation classes.
74     */
75    static enum Providers {
76      defaultProvider(DefaultWALProvider.class),
77      filesystem(DefaultWALProvider.class),
78      multiwal(RegionGroupingProvider.class);
79  
80      final Class<? extends WALProvider> clazz;
81  
82      Providers(Class<? extends WALProvider> clazz) {
83        this.clazz = clazz;
84      }
85    }
86  
87    public static final String WAL_PROVIDER = "hbase.wal.provider";
88    static final String DEFAULT_WAL_PROVIDER = Providers.defaultProvider.name();
89  
90    static final String META_WAL_PROVIDER = "hbase.wal.meta_provider";
91    static final String DEFAULT_META_WAL_PROVIDER = Providers.defaultProvider.name();
92  
93    final String factoryId;
94    final WALProvider provider;
95    // The meta updates are written to a different wal. If this
96    // regionserver holds meta regions, then this ref will be non-null.
97    // lazily intialized; most RegionServers don't deal with META
98    final AtomicReference<WALProvider> metaProvider = new AtomicReference<WALProvider>();
99  
100   /**
101    * Configuration-specified WAL Reader used when a custom reader is requested
102    */
103   private final Class<? extends DefaultWALProvider.Reader> logReaderClass;
104 
105   /**
106    * How long to attempt opening in-recovery wals
107    */
108   private final int timeoutMillis;
109 
110   private final Configuration conf;
111 
112   // Used for the singleton WALFactory, see below.
113   private WALFactory(Configuration conf) {
114     // this code is duplicated here so we can keep our members final.
115     // until we've moved reader/writer construction down into providers, this initialization must
116     // happen prior to provider initialization, in case they need to instantiate a reader/writer.
117     timeoutMillis = conf.getInt("hbase.hlog.open.timeout", 300000);
118     /* TODO Both of these are probably specific to the fs wal provider */
119     logReaderClass = conf.getClass("hbase.regionserver.hlog.reader.impl", ProtobufLogReader.class,
120         DefaultWALProvider.Reader.class);
121     this.conf = conf;
122     // end required early initialization
123 
124     // this instance can't create wals, just reader/writers.
125     provider = null;
126     factoryId = SINGLETON_ID;
127   }
128 
129   Class<? extends WALProvider> getProviderClass(String key, String defaultValue) {
130     try {
131       return Providers.valueOf(conf.get(key, defaultValue)).clazz;
132     } catch (IllegalArgumentException exception) {
133       // Fall back to them specifying a class name
134       // Note that the passed default class shouldn't actually be used, since the above only fails
135       // when there is a config value present.
136       return conf.getClass(key, DefaultWALProvider.class, WALProvider.class);
137     }
138   }
139 
140   WALProvider createProvider(Class<? extends WALProvider> clazz,
141       List<WALActionsListener> listeners, String providerId) throws IOException {
142     LOG.info("Instantiating WALProvider of type " + clazz);
143     try {
144       final WALProvider result = clazz.getDeclaredConstructor().newInstance();
145       result.init(this, conf, listeners, providerId);
146       return result;
147     } catch (Exception e) {
148       LOG.error("couldn't set up WALProvider, the configured class is " + clazz);
149       LOG.debug("Exception details for failure to load WALProvider.", e);
150       throw new IOException("couldn't set up WALProvider", e);
151     }
152   }
153 
154   /**
155    * instantiate a provider from a config property.
156    * requires conf to have already been set (as well as anything the provider might need to read).
157    */
158   WALProvider getProvider(final String key, final String defaultValue,
159       final List<WALActionsListener> listeners, final String providerId) throws IOException {
160     Class<? extends WALProvider> clazz = getProviderClass(key, defaultValue);
161     return createProvider(clazz, listeners, providerId);
162   }
163 
164   /**
165    * @param conf must not be null, will keep a reference to read params in later reader/writer
166    *     instances.
167    * @param listeners may be null. will be given to all created wals (and not meta-wals)
168    * @param factoryId a unique identifier for this factory. used i.e. by filesystem implementations
169    *     to make a directory
170    */
171   public WALFactory(final Configuration conf, final List<WALActionsListener> listeners,
172       final String factoryId) throws IOException {
173     // until we've moved reader/writer construction down into providers, this initialization must
174     // happen prior to provider initialization, in case they need to instantiate a reader/writer.
175     timeoutMillis = conf.getInt("hbase.hlog.open.timeout", 300000);
176     /* TODO Both of these are probably specific to the fs wal provider */
177     logReaderClass = conf.getClass("hbase.regionserver.hlog.reader.impl", ProtobufLogReader.class,
178         DefaultWALProvider.Reader.class);
179     this.conf = conf;
180     this.factoryId = factoryId;
181     // end required early initialization
182     if (conf.getBoolean("hbase.regionserver.hlog.enabled", true)) {
183       provider = getProvider(WAL_PROVIDER, DEFAULT_WAL_PROVIDER, listeners, null);
184     } else {
185       // special handling of existing configuration behavior.
186       LOG.warn("Running with WAL disabled.");
187       provider = new DisabledWALProvider();
188       provider.init(this, conf, null, factoryId);
189     }
190   }
191 
192   /**
193    * Shutdown all WALs and clean up any underlying storage.
194    * Use only when you will not need to replay and edits that have gone to any wals from this
195    * factory.
196    */
197   public void close() throws IOException {
198     final WALProvider metaProvider = this.metaProvider.get();
199     if (null != metaProvider) {
200       metaProvider.close();
201     }
202     // close is called on a WALFactory with null provider in the case of contention handling
203     // within the getInstance method.
204     if (null != provider) {
205       provider.close();
206     }
207   }
208 
209   /**
210    * Tell the underlying WAL providers to shut down, but do not clean up underlying storage.
211    * If you are not ending cleanly and will need to replay edits from this factory's wals,
212    * use this method if you can as it will try to leave things as tidy as possible.
213    */
214   public void shutdown() throws IOException {
215     IOException exception = null;
216     final WALProvider metaProvider = this.metaProvider.get();
217     if (null != metaProvider) {
218       try {
219         metaProvider.shutdown();
220       } catch(IOException ioe) {
221         exception = ioe;
222       }
223     }
224     provider.shutdown();
225     if (null != exception) {
226       throw exception;
227     }
228   }
229 
230   public List<WAL> getWALs() throws IOException {
231     return provider.getWALs();
232   }
233 
234   /**
235    * @param identifier may not be null, contents will not be altered
236    * @param namespace could be null, and will use default namespace if null
237    */
238   public WAL getWAL(final byte[] identifier, final byte[] namespace) throws IOException {
239     return provider.getWAL(identifier, namespace);
240   }
241 
242   /**
243    * @param identifier may not be null, contents will not be altered
244    */
245   public WAL getMetaWAL(final byte[] identifier) throws IOException {
246     WALProvider metaProvider = this.metaProvider.get();
247     if (null == metaProvider) {
248       final WALProvider temp = getProvider(META_WAL_PROVIDER, DEFAULT_META_WAL_PROVIDER,
249           Collections.<WALActionsListener>singletonList(new MetricsWAL()),
250           DefaultWALProvider.META_WAL_PROVIDER_ID);
251       if (this.metaProvider.compareAndSet(null, temp)) {
252         metaProvider = temp;
253       } else {
254         // reference must now be to a provider created in another thread.
255         temp.close();
256         metaProvider = this.metaProvider.get();
257       }
258     }
259     return metaProvider.getWAL(identifier, null);
260   }
261 
262   public Reader createReader(final FileSystem fs, final Path path) throws IOException {
263     return createReader(fs, path, (CancelableProgressable)null);
264   }
265 
266   /**
267    * Create a reader for the WAL. If you are reading from a file that's being written to and need
268    * to reopen it multiple times, use {@link WAL.Reader#reset()} instead of this method
269    * then just seek back to the last known good position.
270    * @return A WAL reader.  Close when done with it.
271    * @throws IOException
272    */
273   public Reader createReader(final FileSystem fs, final Path path,
274       CancelableProgressable reporter) throws IOException {
275     return createReader(fs, path, reporter, true);
276   }
277 
278   public Reader createReader(final FileSystem fs, final Path path,
279       CancelableProgressable reporter, boolean allowCustom)
280       throws IOException {
281     Class<? extends DefaultWALProvider.Reader> lrClass =
282         allowCustom ? logReaderClass : ProtobufLogReader.class;
283 
284     try {
285       // A wal file could be under recovery, so it may take several
286       // tries to get it open. Instead of claiming it is corrupted, retry
287       // to open it up to 5 minutes by default.
288       long startWaiting = EnvironmentEdgeManager.currentTime();
289       long openTimeout = timeoutMillis + startWaiting;
290       int nbAttempt = 0;
291       FSDataInputStream stream = null;
292       DefaultWALProvider.Reader reader = null;
293       while (true) {
294         try {
295           if (lrClass != ProtobufLogReader.class) {
296             // User is overriding the WAL reader, let them.
297             reader = lrClass.getDeclaredConstructor().newInstance();
298             reader.init(fs, path, conf, null);
299             return reader;
300           } else {
301             stream = fs.open(path);
302             // Note that zero-length file will fail to read PB magic, and attempt to create
303             // a non-PB reader and fail the same way existing code expects it to. If we get
304             // rid of the old reader entirely, we need to handle 0-size files differently from
305             // merely non-PB files.
306             byte[] magic = new byte[ProtobufLogReader.PB_WAL_MAGIC.length];
307             boolean isPbWal = (stream.read(magic) == magic.length)
308                 && Arrays.equals(magic, ProtobufLogReader.PB_WAL_MAGIC);
309             reader =
310                 isPbWal ? new ProtobufLogReader() : new SequenceFileLogReader();
311             reader.init(fs, path, conf, stream);
312             return reader;
313           }
314         } catch (Exception e) {
315           if (stream != null) {
316             try {
317               stream.close();
318             } catch (IOException exception) {
319               LOG.warn("Could not close DefaultWALProvider.Reader" + exception.getMessage());
320               LOG.debug("exception details", exception);
321             }
322           }
323           if (reader != null) {
324             try {
325               reader.close();
326             } catch (IOException exception) {
327               LOG.warn("Could not close FSDataInputStream" + exception.getMessage());
328               LOG.debug("exception details", exception);
329             }
330           }
331           if (e instanceof IOException) {
332             String msg = e.getMessage();
333             if (msg != null && (msg.contains("Cannot obtain block length")
334                 || msg.contains("Could not obtain the last block")
335                 || msg.matches("Blocklist for [^ ]* has changed.*"))) {
336               if (++nbAttempt == 1) {
337                 LOG.warn("Lease should have recovered. This is not expected. Will retry", e);
338               }
339               if (reporter != null && !reporter.progress()) {
340                 throw new InterruptedIOException("Operation is cancelled");
341               }
342               if (nbAttempt > 2 && openTimeout < EnvironmentEdgeManager.currentTime()) {
343                 LOG.error("Can't open after " + nbAttempt + " attempts and "
344                     + (EnvironmentEdgeManager.currentTime() - startWaiting)
345                     + "ms " + " for " + path);
346               } else {
347                 try {
348                   Thread.sleep(nbAttempt < 3 ? 500 : 1000);
349                   continue; // retry
350                 } catch (InterruptedException ie) {
351                   InterruptedIOException iioe = new InterruptedIOException();
352                   iioe.initCause(ie);
353                   throw iioe;
354                 }
355               }
356               throw new LeaseNotRecoveredException(e);
357             } else {
358               throw e;
359             }
360           }
361 
362           // Rethrow the original exception if we are not retrying due to HDFS-isms.
363           throw e;
364         }
365       }
366     } catch (IOException ie) {
367       throw ie;
368     } catch (Exception e) {
369       throw new IOException("Cannot get log reader", e);
370     }
371   }
372 
373   /**
374    * Create a writer for the WAL.
375    * should be package-private. public only for tests and
376    * {@link org.apache.hadoop.hbase.regionserver.wal.Compressor}
377    * @return A WAL writer.  Close when done with it.
378    * @throws IOException
379    */
380   public Writer createWALWriter(final FileSystem fs, final Path path) throws IOException {
381     return DefaultWALProvider.createWriter(conf, fs, path, false);
382   }
383 
384   /**
385    * should be package-private, visible for recovery testing.
386    * @return an overwritable writer for recovered edits. caller should close.
387    */
388   public Writer createRecoveredEditsWriter(final FileSystem fs, final Path path)
389       throws IOException {
390     return DefaultWALProvider.createWriter(conf, fs, path, true);
391   }
392 
393   // These static methods are currently used where it's impractical to
394   // untangle the reliance on state in the filesystem. They rely on singleton
395   // WALFactory that just provides Reader / Writers.
396   // For now, first Configuration object wins. Practically this just impacts the reader/writer class
397   private static final AtomicReference<WALFactory> singleton = new AtomicReference<WALFactory>();
398   private static final String SINGLETON_ID = WALFactory.class.getName();
399   
400   // public only for FSHLog and UpgradeTo96
401   public static WALFactory getInstance(Configuration configuration) {
402     WALFactory factory = singleton.get();
403     if (null == factory) {
404       WALFactory temp = new WALFactory(configuration);
405       if (singleton.compareAndSet(null, temp)) {
406         factory = temp;
407       } else {
408         // someone else beat us to initializing
409         try {
410           temp.close();
411         } catch (IOException exception) {
412           LOG.debug("failed to close temporary singleton. ignoring.", exception);
413         }
414         factory = singleton.get();
415       }
416     }
417     return factory;
418   }
419 
420   /**
421    * Create a reader for the given path, accept custom reader classes from conf.
422    * If you already have a WALFactory, you should favor the instance method.
423    * @return a WAL Reader, caller must close.
424    */
425   public static Reader createReader(final FileSystem fs, final Path path,
426       final Configuration configuration) throws IOException {
427     return getInstance(configuration).createReader(fs, path);
428   }
429 
430   /**
431    * Create a reader for the given path, accept custom reader classes from conf.
432    * If you already have a WALFactory, you should favor the instance method.
433    * @return a WAL Reader, caller must close.
434    */
435   static Reader createReader(final FileSystem fs, final Path path,
436       final Configuration configuration, final CancelableProgressable reporter) throws IOException {
437     return getInstance(configuration).createReader(fs, path, reporter);
438   }
439 
440   /**
441    * Create a reader for the given path, ignore custom reader classes from conf.
442    * If you already have a WALFactory, you should favor the instance method.
443    * only public pending move of {@link org.apache.hadoop.hbase.regionserver.wal.Compressor}
444    * @return a WAL Reader, caller must close.
445    */
446   public static Reader createReaderIgnoreCustomClass(final FileSystem fs, final Path path,
447       final Configuration configuration) throws IOException {
448     return getInstance(configuration).createReader(fs, path, null, false);
449   }
450 
451   /**
452    * If you already have a WALFactory, you should favor the instance method.
453    * @return a Writer that will overwrite files. Caller must close.
454    */
455   static Writer createRecoveredEditsWriter(final FileSystem fs, final Path path,
456       final Configuration configuration)
457       throws IOException {
458     return DefaultWALProvider.createWriter(configuration, fs, path, true);
459   }
460 
461   /**
462    * If you already have a WALFactory, you should favor the instance method.
463    * @return a writer that won't overwrite files. Caller must close.
464    */
465   public static Writer createWALWriter(final FileSystem fs, final Path path,
466       final Configuration configuration)
467       throws IOException {
468     return DefaultWALProvider.createWriter(configuration, fs, path, false);
469   }
470 
471   public final WALProvider getWALProvider() {
472     return this.provider;
473   }
474 
475   public final WALProvider getMetaWALProvider() {
476     return this.metaProvider.get();
477   }
478 }