View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations under
15   * the License.
16   */
17  package org.apache.hadoop.hbase.io.compress;
18  
19  import java.io.BufferedInputStream;
20  import java.io.BufferedOutputStream;
21  import java.io.FilterOutputStream;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.io.OutputStream;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.hbase.classification.InterfaceAudience;
29  import org.apache.hadoop.hbase.classification.InterfaceStability;
30  import org.apache.hadoop.conf.Configurable;
31  import org.apache.hadoop.conf.Configuration;
32  import org.apache.hadoop.io.IOUtils;
33  import org.apache.hadoop.io.compress.CodecPool;
34  import org.apache.hadoop.io.compress.CompressionCodec;
35  import org.apache.hadoop.io.compress.CompressionInputStream;
36  import org.apache.hadoop.io.compress.CompressionOutputStream;
37  import org.apache.hadoop.io.compress.Compressor;
38  import org.apache.hadoop.io.compress.Decompressor;
39  import org.apache.hadoop.io.compress.DefaultCodec;
40  import org.apache.hadoop.io.compress.DoNotPool;
41  import org.apache.hadoop.io.compress.GzipCodec;
42  import org.apache.hadoop.util.ReflectionUtils;
43  
44  /**
45   * Compression related stuff.
46   * Copied from hadoop-3315 tfile.
47   */
48  @InterfaceAudience.Private
49  public final class Compression {
50    private static final Log LOG = LogFactory.getLog(Compression.class);
51  
52    /**
53     * Prevent the instantiation of class.
54     */
55    private Compression() {
56      super();
57    }
58  
59    static class FinishOnFlushCompressionStream extends FilterOutputStream {
60      public FinishOnFlushCompressionStream(CompressionOutputStream cout) {
61        super(cout);
62      }
63  
64      @Override
65      public void write(byte b[], int off, int len) throws IOException {
66        out.write(b, off, len);
67      }
68  
69      @Override
70      public void flush() throws IOException {
71        CompressionOutputStream cout = (CompressionOutputStream) out;
72        cout.finish();
73        cout.flush();
74        cout.resetState();
75      }
76    }
77  
78    /**
79     * Returns the classloader to load the Codec class from.
80     */
81    private static ClassLoader getClassLoaderForCodec() {
82      ClassLoader cl = Thread.currentThread().getContextClassLoader();
83      if (cl == null) {
84        cl = Compression.class.getClassLoader();
85      }
86      if (cl == null) {
87        cl = ClassLoader.getSystemClassLoader();
88      }
89      if (cl == null) {
90        throw new RuntimeException("A ClassLoader to load the Codec could not be determined");
91      }
92      return cl;
93    }
94  
95    /**
96     * Compression algorithms. The ordinal of these cannot change or else you
97     * risk breaking all existing HFiles out there.  Even the ones that are
98     * not compressed! (They use the NONE algorithm)
99     */
100   @edu.umd.cs.findbugs.annotations.SuppressWarnings(
101       value="SE_TRANSIENT_FIELD_NOT_RESTORED",
102       justification="We are not serializing so doesn't apply (not sure why transient though)")
103   @InterfaceAudience.Public
104   @InterfaceStability.Evolving
105   public static enum Algorithm {
106     LZO("lzo") {
107       // Use base type to avoid compile-time dependencies.
108       private volatile transient CompressionCodec lzoCodec;
109       private final transient Object lock = new Object();
110 
111       @Override
112       CompressionCodec getCodec(Configuration conf) {
113         if (lzoCodec == null) {
114           synchronized (lock) {
115             if (lzoCodec == null) {
116               lzoCodec = buildCodec(conf);
117             }
118           }
119         }
120         return lzoCodec;
121       }
122 
123       private CompressionCodec buildCodec(Configuration conf) {
124         try {
125           Class<?> externalCodec =
126               getClassLoaderForCodec().loadClass("com.hadoop.compression.lzo.LzoCodec");
127           return (CompressionCodec) ReflectionUtils.newInstance(externalCodec,
128               new Configuration(conf));
129         } catch (ClassNotFoundException e) {
130           throw new RuntimeException(e);
131         }
132       }
133     },
134     GZ("gz") {
135       private volatile transient GzipCodec codec;
136       private final transient Object lock = new Object();
137 
138       @Override
139       DefaultCodec getCodec(Configuration conf) {
140         if (codec == null) {
141           synchronized (lock) {
142             if (codec == null) {
143               codec = buildCodec(conf);
144             }
145           }
146         }
147 
148         return codec;
149       }
150 
151       private GzipCodec buildCodec(Configuration conf) {
152         GzipCodec gzcodec = new ReusableStreamGzipCodec();
153         gzcodec.setConf(new Configuration(conf));
154         return gzcodec;
155       }
156     },
157 
158     NONE("none") {
159       @Override
160       DefaultCodec getCodec(Configuration conf) {
161         return null;
162       }
163 
164       @Override
165       public synchronized InputStream createDecompressionStream(
166           InputStream downStream, Decompressor decompressor,
167           int downStreamBufferSize) throws IOException {
168         if (downStreamBufferSize > 0) {
169           return new BufferedInputStream(downStream, downStreamBufferSize);
170         }
171         return downStream;
172       }
173 
174       @Override
175       public synchronized OutputStream createCompressionStream(
176           OutputStream downStream, Compressor compressor,
177           int downStreamBufferSize) throws IOException {
178         if (downStreamBufferSize > 0) {
179           return new BufferedOutputStream(downStream, downStreamBufferSize);
180         }
181 
182         return downStream;
183       }
184     },
185     SNAPPY("snappy") {
186       // Use base type to avoid compile-time dependencies.
187       private volatile transient CompressionCodec snappyCodec;
188       private final transient Object lock = new Object();
189 
190       @Override
191       CompressionCodec getCodec(Configuration conf) {
192         if (snappyCodec == null) {
193           synchronized (lock) {
194             if (snappyCodec == null) {
195               snappyCodec = buildCodec(conf);
196             }
197           }
198         }
199         return snappyCodec;
200       }
201 
202       private CompressionCodec buildCodec(Configuration conf) {
203         try {
204           Class<?> externalCodec =
205               getClassLoaderForCodec().loadClass("org.apache.hadoop.io.compress.SnappyCodec");
206           return (CompressionCodec) ReflectionUtils.newInstance(externalCodec, conf);
207         } catch (ClassNotFoundException e) {
208           throw new RuntimeException(e);
209         }
210       }
211     },
212     LZ4("lz4") {
213       // Use base type to avoid compile-time dependencies.
214       private volatile transient CompressionCodec lz4Codec;
215       private final transient Object lock = new Object();
216 
217       @Override
218       CompressionCodec getCodec(Configuration conf) {
219         if (lz4Codec == null) {
220           synchronized (lock) {
221             if (lz4Codec == null) {
222               lz4Codec = buildCodec(conf);
223             }
224           }
225         }
226         return lz4Codec;
227       }
228 
229       private CompressionCodec buildCodec(Configuration conf) {
230         try {
231           Class<?> externalCodec =
232               getClassLoaderForCodec().loadClass("org.apache.hadoop.io.compress.Lz4Codec");
233           return (CompressionCodec) ReflectionUtils.newInstance(externalCodec, conf);
234         } catch (ClassNotFoundException e) {
235           throw new RuntimeException(e);
236         }
237       }
238     },
239     BZIP2("bzip2") {
240       // Use base type to avoid compile-time dependencies.
241       private volatile transient CompressionCodec bzipCodec;
242       private final transient Object lock = new Object();
243 
244       @Override
245       CompressionCodec getCodec(Configuration conf) {
246         if (bzipCodec == null) {
247           synchronized (lock) {
248             if (bzipCodec == null) {
249               bzipCodec = buildCodec(conf);
250             }
251           }
252         }
253         return bzipCodec;
254       }
255 
256       private CompressionCodec buildCodec(Configuration conf) {
257         try {
258           Class<?> externalCodec =
259               getClassLoaderForCodec().loadClass("org.apache.hadoop.io.compress.BZip2Codec");
260           return (CompressionCodec) ReflectionUtils.newInstance(externalCodec, conf);
261         } catch (ClassNotFoundException e) {
262           throw new RuntimeException(e);
263         }
264       }
265     },
266     ZSTD("zstd") {
267       // Use base type to avoid compile-time dependencies.
268       private volatile transient CompressionCodec zStandardCodec;
269       private final transient Object lock = new Object();
270 
271       @Override
272       CompressionCodec getCodec(Configuration conf) {
273         if (zStandardCodec == null) {
274           synchronized (lock) {
275             if (zStandardCodec == null) {
276               zStandardCodec = buildCodec(conf);
277             }
278           }
279         }
280         return zStandardCodec;
281       }
282 
283       private CompressionCodec buildCodec(Configuration conf) {
284         try {
285           Class<?> externalCodec =
286               getClassLoaderForCodec().loadClass("org.apache.hadoop.io.compress.ZStandardCodec");
287           return (CompressionCodec) ReflectionUtils.newInstance(externalCodec, conf);
288         } catch (ClassNotFoundException e) {
289           throw new RuntimeException(e);
290         }
291       }
292     };
293 
294     private final transient Configuration conf; // FindBugs: SE_BAD_FIELD so just made it transient
295     private final String compressName;
296     /** data input buffer size to absorb small reads from application. */
297     private static final int DATA_IBUF_SIZE = 1 * 1024;
298     /** data output buffer size to absorb small writes from application. */
299     private static final int DATA_OBUF_SIZE = 4 * 1024;
300 
301     Algorithm(String name) {
302       this.conf = new Configuration();
303       this.conf.setBoolean("io.native.lib.available", true);
304       this.compressName = name;
305     }
306 
307     abstract CompressionCodec getCodec(Configuration conf);
308 
309     public InputStream createDecompressionStream(
310         InputStream downStream, Decompressor decompressor,
311         int downStreamBufferSize) throws IOException {
312       CompressionCodec codec = getCodec(conf);
313       // Set the internal buffer size to read from down stream.
314       if (downStreamBufferSize > 0) {
315         ((Configurable)codec).getConf().setInt("io.file.buffer.size",
316             downStreamBufferSize);
317       }
318       CompressionInputStream cis =
319           codec.createInputStream(downStream, decompressor);
320       BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
321       return bis2;
322 
323     }
324 
325     public OutputStream createCompressionStream(
326         OutputStream downStream, Compressor compressor, int downStreamBufferSize)
327         throws IOException {
328       OutputStream bos1 = null;
329       if (downStreamBufferSize > 0) {
330         bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
331       }
332       else {
333         bos1 = downStream;
334       }
335       CompressionOutputStream cos =
336           createPlainCompressionStream(bos1, compressor);
337       BufferedOutputStream bos2 =
338           new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
339               DATA_OBUF_SIZE);
340       return bos2;
341     }
342 
343     /**
344      * Creates a compression stream without any additional wrapping into
345      * buffering streams.
346      */
347     public CompressionOutputStream createPlainCompressionStream(
348         OutputStream downStream, Compressor compressor) throws IOException {
349       CompressionCodec codec = getCodec(conf);
350       ((Configurable)codec).getConf().setInt("io.file.buffer.size", 32 * 1024);
351       return codec.createOutputStream(downStream, compressor);
352     }
353 
354     public Compressor getCompressor() {
355       CompressionCodec codec = getCodec(conf);
356       if (codec != null) {
357         Compressor compressor = CodecPool.getCompressor(codec);
358         if (LOG.isTraceEnabled()) LOG.trace("Retrieved compressor " + compressor + " from pool.");
359         if (compressor != null) {
360           if (compressor.finished()) {
361             // Somebody returns the compressor to CodecPool but is still using it.
362             LOG.warn("Compressor obtained from CodecPool is already finished()");
363           }
364           compressor.reset();
365         }
366         return compressor;
367       }
368       return null;
369     }
370 
371     public void returnCompressor(Compressor compressor) {
372       if (compressor != null) {
373         if (LOG.isTraceEnabled()) LOG.trace("Returning compressor " + compressor + " to pool.");
374         CodecPool.returnCompressor(compressor);
375       }
376     }
377 
378     public Decompressor getDecompressor() {
379       CompressionCodec codec = getCodec(conf);
380       if (codec != null) {
381         Decompressor decompressor = CodecPool.getDecompressor(codec);
382         if (LOG.isTraceEnabled()) LOG.trace("Retrieved decompressor " + decompressor + " from pool.");
383         if (decompressor != null) {
384           if (decompressor.finished()) {
385             // Somebody returns the decompressor to CodecPool but is still using it.
386             LOG.warn("Deompressor obtained from CodecPool is already finished()");
387           }
388           decompressor.reset();
389         }
390         return decompressor;
391       }
392 
393       return null;
394     }
395 
396     public void returnDecompressor(Decompressor decompressor) {
397       if (decompressor != null) {
398         if (LOG.isTraceEnabled()) LOG.trace("Returning decompressor " + decompressor + " to pool.");
399         CodecPool.returnDecompressor(decompressor);
400         if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) {
401           if (LOG.isTraceEnabled()) LOG.trace("Ending decompressor " + decompressor);
402           decompressor.end();
403         }
404       }
405     }
406 
407     public String getName() {
408       return compressName;
409     }
410   }
411 
412   public static Algorithm getCompressionAlgorithmByName(String compressName) {
413     Algorithm[] algos = Algorithm.class.getEnumConstants();
414 
415     for (Algorithm a : algos) {
416       if (a.getName().equals(compressName)) {
417         return a;
418       }
419     }
420 
421     throw new IllegalArgumentException("Unsupported compression algorithm name: " + compressName);
422   }
423 
424   /**
425    * Get names of supported compression algorithms.
426    *
427    * @return Array of strings, each represents a supported compression
428    * algorithm. Currently, the following compression algorithms are supported.
429    */
430   public static String[] getSupportedAlgorithms() {
431     Algorithm[] algos = Algorithm.class.getEnumConstants();
432 
433     String[] ret = new String[algos.length];
434     int i = 0;
435     for (Algorithm a : algos) {
436       ret[i++] = a.getName();
437     }
438 
439     return ret;
440   }
441 
442   /**
443    * Decompresses data from the given stream using the configured compression
444    * algorithm. It will throw an exception if the dest buffer does not have
445    * enough space to hold the decompressed data.
446    *
447    * @param dest
448    *          the output bytes buffer
449    * @param destOffset
450    *          start writing position of the output buffer
451    * @param bufferedBoundedStream
452    *          a stream to read compressed data from, bounded to the exact amount
453    *          of compressed data
454    * @param compressedSize
455    *          compressed data size, header not included
456    * @param uncompressedSize
457    *          uncompressed data size, header not included
458    * @param compressAlgo
459    *          compression algorithm used
460    * @throws IOException
461    */
462   public static void decompress(byte[] dest, int destOffset,
463       InputStream bufferedBoundedStream, int compressedSize,
464       int uncompressedSize, Compression.Algorithm compressAlgo)
465       throws IOException {
466 
467     if (dest.length - destOffset < uncompressedSize) {
468       throw new IllegalArgumentException(
469           "Output buffer does not have enough space to hold "
470               + uncompressedSize + " decompressed bytes, available: "
471               + (dest.length - destOffset));
472     }
473 
474     Decompressor decompressor = null;
475     try {
476       decompressor = compressAlgo.getDecompressor();
477       InputStream is = compressAlgo.createDecompressionStream(
478           bufferedBoundedStream, decompressor, 0);
479 
480       IOUtils.readFully(is, dest, destOffset, uncompressedSize);
481       is.close();
482     } finally {
483       if (decompressor != null) {
484         compressAlgo.returnDecompressor(decompressor);
485       }
486     }
487   }
488 }