View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations under
15   * the License.
16   */
17  package org.apache.hadoop.hbase.io.hfile;
18  
19  import java.io.BufferedInputStream;
20  import java.io.BufferedOutputStream;
21  import java.io.FilterOutputStream;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.io.OutputStream;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.conf.Configurable;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.io.compress.CodecPool;
31  import org.apache.hadoop.io.compress.CompressionCodec;
32  import org.apache.hadoop.io.compress.CompressionInputStream;
33  import org.apache.hadoop.io.compress.CompressionOutputStream;
34  import org.apache.hadoop.io.compress.Compressor;
35  import org.apache.hadoop.io.compress.Decompressor;
36  import org.apache.hadoop.io.compress.GzipCodec;
37  import org.apache.hadoop.io.compress.DefaultCodec;
38  import org.apache.hadoop.util.ReflectionUtils;
39  
40  /**
41   * Compression related stuff.
42   * Copied from hadoop-3315 tfile.
43   */
44  public final class Compression {
45    static final Log LOG = LogFactory.getLog(Compression.class);
46  
47    /**
48     * Prevent the instantiation of class.
49     */
50    private Compression() {
51      super();
52    }
53  
54    static class FinishOnFlushCompressionStream extends FilterOutputStream {
55      public FinishOnFlushCompressionStream(CompressionOutputStream cout) {
56        super(cout);
57      }
58  
59      @Override
60      public void write(byte b[], int off, int len) throws IOException {
61        out.write(b, off, len);
62      }
63  
64      @Override
65      public void flush() throws IOException {
66        CompressionOutputStream cout = (CompressionOutputStream) out;
67        cout.finish();
68        cout.flush();
69        cout.resetState();
70      }
71    }
72  
73    /**
74     * Returns the classloader to load the Codec class from.
75     * @return
76     */
77    private static ClassLoader getClassLoaderForCodec() {
78      ClassLoader cl = Thread.currentThread().getContextClassLoader();
79      if (cl == null) {
80        cl = Compression.class.getClassLoader();
81      }
82      if (cl == null) {
83        cl = ClassLoader.getSystemClassLoader();
84      }
85      if (cl == null) {
86        throw new RuntimeException("A ClassLoader to load the Codec could not be determined");
87      }
88      return cl;
89    }
90  
91    /**
92     * Compression algorithms. The ordinal of these cannot change or else you
93     * risk breaking all existing HFiles out there.  Even the ones that are
94     * not compressed! (They use the NONE algorithm)
95     */
96    public static enum Algorithm {
97      LZO("lzo") {
98        // Use base type to avoid compile-time dependencies.
99        private transient CompressionCodec lzoCodec;
100 
101       @Override
102       CompressionCodec getCodec(Configuration conf) {
103         if (lzoCodec == null) {
104           try {
105             Class<?> externalCodec =
106                 getClassLoaderForCodec().loadClass("com.hadoop.compression.lzo.LzoCodec");
107             lzoCodec = (CompressionCodec) ReflectionUtils.newInstance(externalCodec, 
108                 new Configuration(conf));
109           } catch (ClassNotFoundException e) {
110             throw new RuntimeException(e);
111           }
112         }
113         return lzoCodec;
114       }
115     },
116     GZ("gz") {
117       private transient GzipCodec codec;
118 
119       @Override
120       DefaultCodec getCodec(Configuration conf) {
121         if (codec == null) {
122           codec = new GzipCodec();
123           codec.setConf(new Configuration(conf));
124         }
125 
126         return codec;
127       }
128     },
129 
130     NONE("none") {
131       @Override
132       DefaultCodec getCodec(Configuration conf) {
133         return null;
134       }
135 
136       @Override
137       public synchronized InputStream createDecompressionStream(
138           InputStream downStream, Decompressor decompressor,
139           int downStreamBufferSize) throws IOException {
140         if (downStreamBufferSize > 0) {
141           return new BufferedInputStream(downStream, downStreamBufferSize);
142         }
143         // else {
144           // Make sure we bypass FSInputChecker buffer.
145         // return new BufferedInputStream(downStream, 1024);
146         // }
147         // }
148         return downStream;
149       }
150 
151       @Override
152       public synchronized OutputStream createCompressionStream(
153           OutputStream downStream, Compressor compressor,
154           int downStreamBufferSize) throws IOException {
155         if (downStreamBufferSize > 0) {
156           return new BufferedOutputStream(downStream, downStreamBufferSize);
157         }
158 
159         return downStream;
160       }
161     },
162     SNAPPY("snappy") {
163         // Use base type to avoid compile-time dependencies.
164         private transient CompressionCodec snappyCodec;
165 
166         @Override
167         CompressionCodec getCodec(Configuration conf) {
168           if (snappyCodec == null) {
169             try {
170               Class<?> externalCodec =
171                   getClassLoaderForCodec().loadClass("org.apache.hadoop.io.compress.SnappyCodec");
172               snappyCodec = (CompressionCodec) ReflectionUtils.newInstance(externalCodec, 
173                   conf);
174             } catch (ClassNotFoundException e) {
175               throw new RuntimeException(e);
176             }
177           }
178           return snappyCodec;
179         }
180     };
181 
182     private final Configuration conf;
183     private final String compressName;
184   // data input buffer size to absorb small reads from application.
185     private static final int DATA_IBUF_SIZE = 1 * 1024;
186   // data output buffer size to absorb small writes from application.
187     private static final int DATA_OBUF_SIZE = 4 * 1024;
188 
189     Algorithm(String name) {
190       this.conf = new Configuration();
191       this.conf.setBoolean("hadoop.native.lib", true);
192       this.compressName = name;
193     }
194 
195     abstract CompressionCodec getCodec(Configuration conf);
196 
197     public InputStream createDecompressionStream(
198         InputStream downStream, Decompressor decompressor,
199         int downStreamBufferSize) throws IOException {
200       CompressionCodec codec = getCodec(conf);
201       // Set the internal buffer size to read from down stream.
202       if (downStreamBufferSize > 0) {
203         ((Configurable)codec).getConf().setInt("io.file.buffer.size",
204             downStreamBufferSize);
205       }
206       CompressionInputStream cis =
207           codec.createInputStream(downStream, decompressor);
208       BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
209       return bis2;
210 
211     }
212 
213     public OutputStream createCompressionStream(
214         OutputStream downStream, Compressor compressor, int downStreamBufferSize)
215         throws IOException {
216       CompressionCodec codec = getCodec(conf);
217       OutputStream bos1 = null;
218       if (downStreamBufferSize > 0) {
219         bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
220       }
221       else {
222         bos1 = downStream;
223       }
224       ((Configurable)codec).getConf().setInt("io.file.buffer.size", 32 * 1024);
225       CompressionOutputStream cos =
226           codec.createOutputStream(bos1, compressor);
227       BufferedOutputStream bos2 =
228           new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
229               DATA_OBUF_SIZE);
230       return bos2;
231     }
232 
233     public Compressor getCompressor() {
234       CompressionCodec codec = getCodec(conf);
235       if (codec != null) {
236         Compressor compressor = CodecPool.getCompressor(codec);
237         if (compressor != null) {
238           if (compressor.finished()) {
239             // Somebody returns the compressor to CodecPool but is still using
240             // it.
241             LOG
242                 .warn("Compressor obtained from CodecPool is already finished()");
243             // throw new AssertionError(
244             // "Compressor obtained from CodecPool is already finished()");
245           }
246           compressor.reset();
247         }
248         return compressor;
249       }
250       return null;
251     }
252 
253     public void returnCompressor(Compressor compressor) {
254       if (compressor != null) {
255         CodecPool.returnCompressor(compressor);
256       }
257     }
258 
259     public Decompressor getDecompressor() {
260       CompressionCodec codec = getCodec(conf);
261       if (codec != null) {
262         Decompressor decompressor = CodecPool.getDecompressor(codec);
263         if (decompressor != null) {
264           if (decompressor.finished()) {
265             // Somebody returns the decompressor to CodecPool but is still using
266             // it.
267             LOG
268                 .warn("Deompressor obtained from CodecPool is already finished()");
269             // throw new AssertionError(
270             // "Decompressor obtained from CodecPool is already finished()");
271           }
272           decompressor.reset();
273         }
274         return decompressor;
275       }
276 
277       return null;
278     }
279 
280     public void returnDecompressor(Decompressor decompressor) {
281       if (decompressor != null) {
282         CodecPool.returnDecompressor(decompressor);
283       }
284     }
285 
286     public String getName() {
287       return compressName;
288     }
289   }
290 
291   public static Algorithm getCompressionAlgorithmByName(String compressName) {
292     Algorithm[] algos = Algorithm.class.getEnumConstants();
293 
294     for (Algorithm a : algos) {
295       if (a.getName().equals(compressName)) {
296         return a;
297       }
298     }
299 
300     throw new IllegalArgumentException(
301         "Unsupported compression algorithm name: " + compressName);
302   }
303 
304   static String[] getSupportedAlgorithms() {
305     Algorithm[] algos = Algorithm.class.getEnumConstants();
306 
307     String[] ret = new String[algos.length];
308     int i = 0;
309     for (Algorithm a : algos) {
310       ret[i++] = a.getName();
311     }
312 
313     return ret;
314   }
315 }