1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.hadoop.hbase.io.hfile;
18
19 import java.io.BufferedInputStream;
20 import java.io.BufferedOutputStream;
21 import java.io.FilterOutputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.OutputStream;
25
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.hadoop.conf.Configurable;
29 import org.apache.hadoop.conf.Configuration;
30 import org.apache.hadoop.io.compress.CodecPool;
31 import org.apache.hadoop.io.compress.CompressionCodec;
32 import org.apache.hadoop.io.compress.CompressionInputStream;
33 import org.apache.hadoop.io.compress.CompressionOutputStream;
34 import org.apache.hadoop.io.compress.Compressor;
35 import org.apache.hadoop.io.compress.Decompressor;
36 import org.apache.hadoop.io.compress.GzipCodec;
37 import org.apache.hadoop.io.compress.DefaultCodec;
38 import org.apache.hadoop.util.ReflectionUtils;
39
40
41
42
43
44 public final class Compression {
45 static final Log LOG = LogFactory.getLog(Compression.class);
46
47
48
49
50 private Compression() {
51 super();
52 }
53
54 static class FinishOnFlushCompressionStream extends FilterOutputStream {
55 public FinishOnFlushCompressionStream(CompressionOutputStream cout) {
56 super(cout);
57 }
58
59 @Override
60 public void write(byte b[], int off, int len) throws IOException {
61 out.write(b, off, len);
62 }
63
64 @Override
65 public void flush() throws IOException {
66 CompressionOutputStream cout = (CompressionOutputStream) out;
67 cout.finish();
68 cout.flush();
69 cout.resetState();
70 }
71 }
72
73
74
75
76
77 private static ClassLoader getClassLoaderForCodec() {
78 ClassLoader cl = Thread.currentThread().getContextClassLoader();
79 if (cl == null) {
80 cl = Compression.class.getClassLoader();
81 }
82 if (cl == null) {
83 cl = ClassLoader.getSystemClassLoader();
84 }
85 if (cl == null) {
86 throw new RuntimeException("A ClassLoader to load the Codec could not be determined");
87 }
88 return cl;
89 }
90
91
92
93
94
95
96 public static enum Algorithm {
97 LZO("lzo") {
98
99 private transient CompressionCodec lzoCodec;
100
101 @Override
102 CompressionCodec getCodec(Configuration conf) {
103 if (lzoCodec == null) {
104 try {
105 Class<?> externalCodec =
106 getClassLoaderForCodec().loadClass("com.hadoop.compression.lzo.LzoCodec");
107 lzoCodec = (CompressionCodec) ReflectionUtils.newInstance(externalCodec,
108 new Configuration(conf));
109 } catch (ClassNotFoundException e) {
110 throw new RuntimeException(e);
111 }
112 }
113 return lzoCodec;
114 }
115 },
116 GZ("gz") {
117 private transient GzipCodec codec;
118
119 @Override
120 DefaultCodec getCodec(Configuration conf) {
121 if (codec == null) {
122 codec = new GzipCodec();
123 codec.setConf(new Configuration(conf));
124 }
125
126 return codec;
127 }
128 },
129
130 NONE("none") {
131 @Override
132 DefaultCodec getCodec(Configuration conf) {
133 return null;
134 }
135
136 @Override
137 public synchronized InputStream createDecompressionStream(
138 InputStream downStream, Decompressor decompressor,
139 int downStreamBufferSize) throws IOException {
140 if (downStreamBufferSize > 0) {
141 return new BufferedInputStream(downStream, downStreamBufferSize);
142 }
143
144
145
146
147
148 return downStream;
149 }
150
151 @Override
152 public synchronized OutputStream createCompressionStream(
153 OutputStream downStream, Compressor compressor,
154 int downStreamBufferSize) throws IOException {
155 if (downStreamBufferSize > 0) {
156 return new BufferedOutputStream(downStream, downStreamBufferSize);
157 }
158
159 return downStream;
160 }
161 },
162 SNAPPY("snappy") {
163
164 private transient CompressionCodec snappyCodec;
165
166 @Override
167 CompressionCodec getCodec(Configuration conf) {
168 if (snappyCodec == null) {
169 try {
170 Class<?> externalCodec =
171 getClassLoaderForCodec().loadClass("org.apache.hadoop.io.compress.SnappyCodec");
172 snappyCodec = (CompressionCodec) ReflectionUtils.newInstance(externalCodec,
173 conf);
174 } catch (ClassNotFoundException e) {
175 throw new RuntimeException(e);
176 }
177 }
178 return snappyCodec;
179 }
180 };
181
182 private final Configuration conf;
183 private final String compressName;
184
185 private static final int DATA_IBUF_SIZE = 1 * 1024;
186
187 private static final int DATA_OBUF_SIZE = 4 * 1024;
188
189 Algorithm(String name) {
190 this.conf = new Configuration();
191 this.conf.setBoolean("hadoop.native.lib", true);
192 this.compressName = name;
193 }
194
195 abstract CompressionCodec getCodec(Configuration conf);
196
197 public InputStream createDecompressionStream(
198 InputStream downStream, Decompressor decompressor,
199 int downStreamBufferSize) throws IOException {
200 CompressionCodec codec = getCodec(conf);
201
202 if (downStreamBufferSize > 0) {
203 ((Configurable)codec).getConf().setInt("io.file.buffer.size",
204 downStreamBufferSize);
205 }
206 CompressionInputStream cis =
207 codec.createInputStream(downStream, decompressor);
208 BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
209 return bis2;
210
211 }
212
213 public OutputStream createCompressionStream(
214 OutputStream downStream, Compressor compressor, int downStreamBufferSize)
215 throws IOException {
216 CompressionCodec codec = getCodec(conf);
217 OutputStream bos1 = null;
218 if (downStreamBufferSize > 0) {
219 bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
220 }
221 else {
222 bos1 = downStream;
223 }
224 ((Configurable)codec).getConf().setInt("io.file.buffer.size", 32 * 1024);
225 CompressionOutputStream cos =
226 codec.createOutputStream(bos1, compressor);
227 BufferedOutputStream bos2 =
228 new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
229 DATA_OBUF_SIZE);
230 return bos2;
231 }
232
233 public Compressor getCompressor() {
234 CompressionCodec codec = getCodec(conf);
235 if (codec != null) {
236 Compressor compressor = CodecPool.getCompressor(codec);
237 if (compressor != null) {
238 if (compressor.finished()) {
239
240
241 LOG
242 .warn("Compressor obtained from CodecPool is already finished()");
243
244
245 }
246 compressor.reset();
247 }
248 return compressor;
249 }
250 return null;
251 }
252
253 public void returnCompressor(Compressor compressor) {
254 if (compressor != null) {
255 CodecPool.returnCompressor(compressor);
256 }
257 }
258
259 public Decompressor getDecompressor() {
260 CompressionCodec codec = getCodec(conf);
261 if (codec != null) {
262 Decompressor decompressor = CodecPool.getDecompressor(codec);
263 if (decompressor != null) {
264 if (decompressor.finished()) {
265
266
267 LOG
268 .warn("Deompressor obtained from CodecPool is already finished()");
269
270
271 }
272 decompressor.reset();
273 }
274 return decompressor;
275 }
276
277 return null;
278 }
279
280 public void returnDecompressor(Decompressor decompressor) {
281 if (decompressor != null) {
282 CodecPool.returnDecompressor(decompressor);
283 }
284 }
285
286 public String getName() {
287 return compressName;
288 }
289 }
290
291 public static Algorithm getCompressionAlgorithmByName(String compressName) {
292 Algorithm[] algos = Algorithm.class.getEnumConstants();
293
294 for (Algorithm a : algos) {
295 if (a.getName().equals(compressName)) {
296 return a;
297 }
298 }
299
300 throw new IllegalArgumentException(
301 "Unsupported compression algorithm name: " + compressName);
302 }
303
304 static String[] getSupportedAlgorithms() {
305 Algorithm[] algos = Algorithm.class.getEnumConstants();
306
307 String[] ret = new String[algos.length];
308 int i = 0;
309 for (Algorithm a : algos) {
310 ret[i++] = a.getName();
311 }
312
313 return ret;
314 }
315 }