1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.hadoop.hbase.io.compress;
18
19 import java.io.BufferedInputStream;
20 import java.io.BufferedOutputStream;
21 import java.io.FilterOutputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.OutputStream;
25
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.hadoop.hbase.classification.InterfaceAudience;
29 import org.apache.hadoop.hbase.classification.InterfaceStability;
30 import org.apache.hadoop.conf.Configurable;
31 import org.apache.hadoop.conf.Configuration;
32 import org.apache.hadoop.io.IOUtils;
33 import org.apache.hadoop.io.compress.CodecPool;
34 import org.apache.hadoop.io.compress.CompressionCodec;
35 import org.apache.hadoop.io.compress.CompressionInputStream;
36 import org.apache.hadoop.io.compress.CompressionOutputStream;
37 import org.apache.hadoop.io.compress.Compressor;
38 import org.apache.hadoop.io.compress.Decompressor;
39 import org.apache.hadoop.io.compress.DefaultCodec;
40 import org.apache.hadoop.io.compress.DoNotPool;
41 import org.apache.hadoop.io.compress.GzipCodec;
42 import org.apache.hadoop.util.ReflectionUtils;
43
44
45
46
47
48 @InterfaceAudience.Private
49 public final class Compression {
50 private static final Log LOG = LogFactory.getLog(Compression.class);
51
52
53
54
55 private Compression() {
56 super();
57 }
58
59 static class FinishOnFlushCompressionStream extends FilterOutputStream {
60 public FinishOnFlushCompressionStream(CompressionOutputStream cout) {
61 super(cout);
62 }
63
64 @Override
65 public void write(byte b[], int off, int len) throws IOException {
66 out.write(b, off, len);
67 }
68
69 @Override
70 public void flush() throws IOException {
71 CompressionOutputStream cout = (CompressionOutputStream) out;
72 cout.finish();
73 cout.flush();
74 cout.resetState();
75 }
76 }
77
78
79
80
81 private static ClassLoader getClassLoaderForCodec() {
82 ClassLoader cl = Thread.currentThread().getContextClassLoader();
83 if (cl == null) {
84 cl = Compression.class.getClassLoader();
85 }
86 if (cl == null) {
87 cl = ClassLoader.getSystemClassLoader();
88 }
89 if (cl == null) {
90 throw new RuntimeException("A ClassLoader to load the Codec could not be determined");
91 }
92 return cl;
93 }
94
95
96
97
98
99
100 @edu.umd.cs.findbugs.annotations.SuppressWarnings(
101 value="SE_TRANSIENT_FIELD_NOT_RESTORED",
102 justification="We are not serializing so doesn't apply (not sure why transient though)")
103 @InterfaceAudience.Public
104 @InterfaceStability.Evolving
105 public static enum Algorithm {
106 LZO("lzo") {
107
108 private volatile transient CompressionCodec lzoCodec;
109 private final transient Object lock = new Object();
110
111 @Override
112 CompressionCodec getCodec(Configuration conf) {
113 if (lzoCodec == null) {
114 synchronized (lock) {
115 if (lzoCodec == null) {
116 lzoCodec = buildCodec(conf);
117 }
118 }
119 }
120 return lzoCodec;
121 }
122
123 private CompressionCodec buildCodec(Configuration conf) {
124 try {
125 Class<?> externalCodec =
126 getClassLoaderForCodec().loadClass("com.hadoop.compression.lzo.LzoCodec");
127 return (CompressionCodec) ReflectionUtils.newInstance(externalCodec,
128 new Configuration(conf));
129 } catch (ClassNotFoundException e) {
130 throw new RuntimeException(e);
131 }
132 }
133 },
134 GZ("gz") {
135 private volatile transient GzipCodec codec;
136 private final transient Object lock = new Object();
137
138 @Override
139 DefaultCodec getCodec(Configuration conf) {
140 if (codec == null) {
141 synchronized (lock) {
142 if (codec == null) {
143 codec = buildCodec(conf);
144 }
145 }
146 }
147
148 return codec;
149 }
150
151 private GzipCodec buildCodec(Configuration conf) {
152 GzipCodec gzcodec = new ReusableStreamGzipCodec();
153 gzcodec.setConf(new Configuration(conf));
154 return gzcodec;
155 }
156 },
157
158 NONE("none") {
159 @Override
160 DefaultCodec getCodec(Configuration conf) {
161 return null;
162 }
163
164 @Override
165 public synchronized InputStream createDecompressionStream(
166 InputStream downStream, Decompressor decompressor,
167 int downStreamBufferSize) throws IOException {
168 if (downStreamBufferSize > 0) {
169 return new BufferedInputStream(downStream, downStreamBufferSize);
170 }
171 return downStream;
172 }
173
174 @Override
175 public synchronized OutputStream createCompressionStream(
176 OutputStream downStream, Compressor compressor,
177 int downStreamBufferSize) throws IOException {
178 if (downStreamBufferSize > 0) {
179 return new BufferedOutputStream(downStream, downStreamBufferSize);
180 }
181
182 return downStream;
183 }
184 },
185 SNAPPY("snappy") {
186
187 private volatile transient CompressionCodec snappyCodec;
188 private final transient Object lock = new Object();
189
190 @Override
191 CompressionCodec getCodec(Configuration conf) {
192 if (snappyCodec == null) {
193 synchronized (lock) {
194 if (snappyCodec == null) {
195 snappyCodec = buildCodec(conf);
196 }
197 }
198 }
199 return snappyCodec;
200 }
201
202 private CompressionCodec buildCodec(Configuration conf) {
203 try {
204 Class<?> externalCodec =
205 getClassLoaderForCodec().loadClass("org.apache.hadoop.io.compress.SnappyCodec");
206 return (CompressionCodec) ReflectionUtils.newInstance(externalCodec, conf);
207 } catch (ClassNotFoundException e) {
208 throw new RuntimeException(e);
209 }
210 }
211 },
212 LZ4("lz4") {
213
214 private volatile transient CompressionCodec lz4Codec;
215 private final transient Object lock = new Object();
216
217 @Override
218 CompressionCodec getCodec(Configuration conf) {
219 if (lz4Codec == null) {
220 synchronized (lock) {
221 if (lz4Codec == null) {
222 lz4Codec = buildCodec(conf);
223 }
224 }
225 }
226 return lz4Codec;
227 }
228
229 private CompressionCodec buildCodec(Configuration conf) {
230 try {
231 Class<?> externalCodec =
232 getClassLoaderForCodec().loadClass("org.apache.hadoop.io.compress.Lz4Codec");
233 return (CompressionCodec) ReflectionUtils.newInstance(externalCodec, conf);
234 } catch (ClassNotFoundException e) {
235 throw new RuntimeException(e);
236 }
237 }
238 },
239 BZIP2("bzip2") {
240
241 private volatile transient CompressionCodec bzipCodec;
242 private final transient Object lock = new Object();
243
244 @Override
245 CompressionCodec getCodec(Configuration conf) {
246 if (bzipCodec == null) {
247 synchronized (lock) {
248 if (bzipCodec == null) {
249 bzipCodec = buildCodec(conf);
250 }
251 }
252 }
253 return bzipCodec;
254 }
255
256 private CompressionCodec buildCodec(Configuration conf) {
257 try {
258 Class<?> externalCodec =
259 getClassLoaderForCodec().loadClass("org.apache.hadoop.io.compress.BZip2Codec");
260 return (CompressionCodec) ReflectionUtils.newInstance(externalCodec, conf);
261 } catch (ClassNotFoundException e) {
262 throw new RuntimeException(e);
263 }
264 }
265 },
266 ZSTD("zstd") {
267
268 private volatile transient CompressionCodec zStandardCodec;
269 private final transient Object lock = new Object();
270
271 @Override
272 CompressionCodec getCodec(Configuration conf) {
273 if (zStandardCodec == null) {
274 synchronized (lock) {
275 if (zStandardCodec == null) {
276 zStandardCodec = buildCodec(conf);
277 }
278 }
279 }
280 return zStandardCodec;
281 }
282
283 private CompressionCodec buildCodec(Configuration conf) {
284 try {
285 Class<?> externalCodec =
286 getClassLoaderForCodec().loadClass("org.apache.hadoop.io.compress.ZStandardCodec");
287 return (CompressionCodec) ReflectionUtils.newInstance(externalCodec, conf);
288 } catch (ClassNotFoundException e) {
289 throw new RuntimeException(e);
290 }
291 }
292 };
293
294 private final transient Configuration conf;
295 private final String compressName;
296
297 private static final int DATA_IBUF_SIZE = 1 * 1024;
298
299 private static final int DATA_OBUF_SIZE = 4 * 1024;
300
301 Algorithm(String name) {
302 this.conf = new Configuration();
303 this.conf.setBoolean("io.native.lib.available", true);
304 this.compressName = name;
305 }
306
307 abstract CompressionCodec getCodec(Configuration conf);
308
309 public InputStream createDecompressionStream(
310 InputStream downStream, Decompressor decompressor,
311 int downStreamBufferSize) throws IOException {
312 CompressionCodec codec = getCodec(conf);
313
314 if (downStreamBufferSize > 0) {
315 ((Configurable)codec).getConf().setInt("io.file.buffer.size",
316 downStreamBufferSize);
317 }
318 CompressionInputStream cis =
319 codec.createInputStream(downStream, decompressor);
320 BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
321 return bis2;
322
323 }
324
325 public OutputStream createCompressionStream(
326 OutputStream downStream, Compressor compressor, int downStreamBufferSize)
327 throws IOException {
328 OutputStream bos1 = null;
329 if (downStreamBufferSize > 0) {
330 bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
331 }
332 else {
333 bos1 = downStream;
334 }
335 CompressionOutputStream cos =
336 createPlainCompressionStream(bos1, compressor);
337 BufferedOutputStream bos2 =
338 new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
339 DATA_OBUF_SIZE);
340 return bos2;
341 }
342
343
344
345
346
347 public CompressionOutputStream createPlainCompressionStream(
348 OutputStream downStream, Compressor compressor) throws IOException {
349 CompressionCodec codec = getCodec(conf);
350 ((Configurable)codec).getConf().setInt("io.file.buffer.size", 32 * 1024);
351 return codec.createOutputStream(downStream, compressor);
352 }
353
354 public Compressor getCompressor() {
355 CompressionCodec codec = getCodec(conf);
356 if (codec != null) {
357 Compressor compressor = CodecPool.getCompressor(codec);
358 if (LOG.isTraceEnabled()) LOG.trace("Retrieved compressor " + compressor + " from pool.");
359 if (compressor != null) {
360 if (compressor.finished()) {
361
362 LOG.warn("Compressor obtained from CodecPool is already finished()");
363 }
364 compressor.reset();
365 }
366 return compressor;
367 }
368 return null;
369 }
370
371 public void returnCompressor(Compressor compressor) {
372 if (compressor != null) {
373 if (LOG.isTraceEnabled()) LOG.trace("Returning compressor " + compressor + " to pool.");
374 CodecPool.returnCompressor(compressor);
375 }
376 }
377
378 public Decompressor getDecompressor() {
379 CompressionCodec codec = getCodec(conf);
380 if (codec != null) {
381 Decompressor decompressor = CodecPool.getDecompressor(codec);
382 if (LOG.isTraceEnabled()) LOG.trace("Retrieved decompressor " + decompressor + " from pool.");
383 if (decompressor != null) {
384 if (decompressor.finished()) {
385
386 LOG.warn("Deompressor obtained from CodecPool is already finished()");
387 }
388 decompressor.reset();
389 }
390 return decompressor;
391 }
392
393 return null;
394 }
395
396 public void returnDecompressor(Decompressor decompressor) {
397 if (decompressor != null) {
398 if (LOG.isTraceEnabled()) LOG.trace("Returning decompressor " + decompressor + " to pool.");
399 CodecPool.returnDecompressor(decompressor);
400 if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) {
401 if (LOG.isTraceEnabled()) LOG.trace("Ending decompressor " + decompressor);
402 decompressor.end();
403 }
404 }
405 }
406
407 public String getName() {
408 return compressName;
409 }
410 }
411
412 public static Algorithm getCompressionAlgorithmByName(String compressName) {
413 Algorithm[] algos = Algorithm.class.getEnumConstants();
414
415 for (Algorithm a : algos) {
416 if (a.getName().equals(compressName)) {
417 return a;
418 }
419 }
420
421 throw new IllegalArgumentException("Unsupported compression algorithm name: " + compressName);
422 }
423
424
425
426
427
428
429
430 public static String[] getSupportedAlgorithms() {
431 Algorithm[] algos = Algorithm.class.getEnumConstants();
432
433 String[] ret = new String[algos.length];
434 int i = 0;
435 for (Algorithm a : algos) {
436 ret[i++] = a.getName();
437 }
438
439 return ret;
440 }
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462 public static void decompress(byte[] dest, int destOffset,
463 InputStream bufferedBoundedStream, int compressedSize,
464 int uncompressedSize, Compression.Algorithm compressAlgo)
465 throws IOException {
466
467 if (dest.length - destOffset < uncompressedSize) {
468 throw new IllegalArgumentException(
469 "Output buffer does not have enough space to hold "
470 + uncompressedSize + " decompressed bytes, available: "
471 + (dest.length - destOffset));
472 }
473
474 Decompressor decompressor = null;
475 try {
476 decompressor = compressAlgo.getDecompressor();
477 InputStream is = compressAlgo.createDecompressionStream(
478 bufferedBoundedStream, decompressor, 0);
479
480 IOUtils.readFully(is, dest, destOffset, uncompressedSize);
481 is.close();
482 } finally {
483 if (decompressor != null) {
484 compressAlgo.returnDecompressor(decompressor);
485 }
486 }
487 }
488 }