View Javadoc

1   /*
2    * Copyright 2011 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import java.io.ByteArrayInputStream;
23  import java.io.ByteArrayOutputStream;
24  import java.io.DataInputStream;
25  import java.io.DataOutput;
26  import java.io.DataOutputStream;
27  import java.io.IOException;
28  import java.nio.ByteBuffer;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.fs.FSDataInputStream;
33  import org.apache.hadoop.hbase.util.Bytes;
34  import org.apache.hadoop.io.RawComparator;
35  
36  import static org.apache.hadoop.hbase.io.hfile.HFile.MIN_FORMAT_VERSION;
37  import static org.apache.hadoop.hbase.io.hfile.HFile.MAX_FORMAT_VERSION;
38  
39  import com.google.common.io.NullOutputStream;
40  
41  /**
42   * The {@link HFile} has a fixed trailer which contains offsets to other
43   * variable parts of the file. Also includes basic metadata on this file. The
44   * trailer size is fixed within a given {@link HFile} format version only, but
45   * we always store the version number as the last four-byte integer of the file.
46   */
47  public class FixedFileTrailer {
48  
49    private static final Log LOG = LogFactory.getLog(FixedFileTrailer.class);
50  
51    /**
52     * We store the comparator class name as a fixed-length field in the trailer.
53     */
54    private static final int MAX_COMPARATOR_NAME_LENGTH = 128;
55  
56    /**
57     * Offset to the fileinfo data, a small block of vitals. Necessary in v1 but
58     * only potentially useful for pretty-printing in v2.
59     */
60    private long fileInfoOffset;
61  
62    /**
63     * In version 1, the offset to the data block index. Starting from version 2,
64     * the meaning of this field is the offset to the section of the file that
65     * should be loaded at the time the file is being opened, and as of the time
66     * of writing, this happens to be the offset of the file info section.
67     */
68    private long loadOnOpenDataOffset;
69  
70    /** The number of entries in the root data index. */
71    private int dataIndexCount;
72  
73    /** Total uncompressed size of all blocks of the data index */
74    private long uncompressedDataIndexSize;
75  
76    /** The number of entries in the meta index */
77    private int metaIndexCount;
78  
79    /** The total uncompressed size of keys/values stored in the file. */
80    private long totalUncompressedBytes;
81  
82    /**
83     * The number of key/value pairs in the file. This field was int in version 1,
84     * but is now long.
85     */
86    private long entryCount;
87  
88    /** The compression codec used for all blocks. */
89    private Compression.Algorithm compressionCodec = Compression.Algorithm.NONE;
90  
91    /**
92     * The number of levels in the potentially multi-level data index. Used from
93     * version 2 onwards.
94     */
95    private int numDataIndexLevels;
96  
97    /** The offset of the first data block. */
98    private long firstDataBlockOffset;
99  
100   /**
101    * It is guaranteed that no key/value data blocks start after this offset in
102    * the file.
103    */
104   private long lastDataBlockOffset;
105 
106   /** Raw key comparator class name in version 2 */
107   private String comparatorClassName = RawComparator.class.getName();
108 
109   /** The {@link HFile} format version. */
110   private final int version;
111 
112   FixedFileTrailer(int version) {
113     this.version = version;
114     HFile.checkFormatVersion(version);
115   }
116 
117   private static int[] computeTrailerSizeByVersion() {
118     int versionToSize[] = new int[HFile.MAX_FORMAT_VERSION + 1];
119     for (int version = MIN_FORMAT_VERSION;
120          version <= MAX_FORMAT_VERSION;
121          ++version) {
122       FixedFileTrailer fft = new FixedFileTrailer(version);
123       DataOutputStream dos = new DataOutputStream(new NullOutputStream());
124       try {
125         fft.serialize(dos);
126       } catch (IOException ex) {
127         // The above has no reason to fail.
128         throw new RuntimeException(ex);
129       }
130       versionToSize[version] = dos.size();
131     }
132     return versionToSize;
133   }
134 
135   private static int getMaxTrailerSize() {
136     int maxSize = 0;
137     for (int version = MIN_FORMAT_VERSION;
138          version <= MAX_FORMAT_VERSION;
139          ++version)
140       maxSize = Math.max(getTrailerSize(version), maxSize);
141     return maxSize;
142   }
143 
144   private static final int TRAILER_SIZE[] = computeTrailerSizeByVersion();
145   private static final int MAX_TRAILER_SIZE = getMaxTrailerSize();
146 
147   static int getTrailerSize(int version) {
148     return TRAILER_SIZE[version];
149   }
150 
151   public int getTrailerSize() {
152     return getTrailerSize(version);
153   }
154 
155   /**
156    * Write the trailer to a data stream. We support writing version 1 for
157    * testing and for determining version 1 trailer size. It is also easy to see
158    * what fields changed in version 2.
159    *
160    * @param outputStream
161    * @throws IOException
162    */
163   void serialize(DataOutputStream outputStream) throws IOException {
164     HFile.checkFormatVersion(version);
165 
166     ByteArrayOutputStream baos = new ByteArrayOutputStream();
167     DataOutput baosDos = new DataOutputStream(baos);
168 
169     BlockType.TRAILER.write(baosDos);
170     baosDos.writeLong(fileInfoOffset);
171     baosDos.writeLong(loadOnOpenDataOffset);
172     baosDos.writeInt(dataIndexCount);
173 
174     if (version == 1) {
175       // This used to be metaIndexOffset, but it was not used in version 1.
176       baosDos.writeLong(0);
177     } else {
178       baosDos.writeLong(uncompressedDataIndexSize);
179     }
180 
181     baosDos.writeInt(metaIndexCount);
182     baosDos.writeLong(totalUncompressedBytes);
183     if (version == 1) {
184       baosDos.writeInt((int) Math.min(Integer.MAX_VALUE, entryCount));
185     } else {
186       // This field is long from version 2 onwards.
187       baosDos.writeLong(entryCount);
188     }
189     baosDos.writeInt(compressionCodec.ordinal());
190 
191     if (version > 1) {
192       baosDos.writeInt(numDataIndexLevels);
193       baosDos.writeLong(firstDataBlockOffset);
194       baosDos.writeLong(lastDataBlockOffset);
195       Bytes.writeStringFixedSize(baosDos, comparatorClassName,
196           MAX_COMPARATOR_NAME_LENGTH);
197     }
198     baosDos.writeInt(version);
199 
200     outputStream.write(baos.toByteArray());
201   }
202 
203   /**
204    * Deserialize the fixed file trailer from the given stream. The version needs
205    * to already be specified. Make sure this is consistent with
206    * {@link #serialize(DataOutputStream)}.
207    *
208    * @param inputStream
209    * @param version
210    * @throws IOException
211    */
212   void deserialize(DataInputStream inputStream) throws IOException {
213     HFile.checkFormatVersion(version);
214 
215     BlockType.TRAILER.readAndCheck(inputStream);
216 
217     fileInfoOffset = inputStream.readLong();
218     loadOnOpenDataOffset = inputStream.readLong();
219     dataIndexCount = inputStream.readInt();
220 
221     if (version == 1) {
222       inputStream.readLong(); // Read and skip metaIndexOffset.
223     } else {
224       uncompressedDataIndexSize = inputStream.readLong();
225     }
226     metaIndexCount = inputStream.readInt();
227 
228     totalUncompressedBytes = inputStream.readLong();
229     entryCount = version == 1 ? inputStream.readInt() : inputStream.readLong();
230     compressionCodec = Compression.Algorithm.values()[inputStream.readInt()];
231     if (version > 1) {
232       numDataIndexLevels = inputStream.readInt();
233       firstDataBlockOffset = inputStream.readLong();
234       lastDataBlockOffset = inputStream.readLong();
235       comparatorClassName =
236           Bytes.readStringFixedSize(inputStream, MAX_COMPARATOR_NAME_LENGTH);
237     }
238 
239     expectVersion(inputStream.readInt());
240   }
241 
242   private void append(StringBuilder sb, String s) {
243     if (sb.length() > 0)
244       sb.append(", ");
245     sb.append(s);
246   }
247 
248   @Override
249   public String toString() {
250     StringBuilder sb = new StringBuilder();
251     append(sb, "fileinfoOffset=" + fileInfoOffset);
252     append(sb, "loadOnOpenDataOffset=" + loadOnOpenDataOffset);
253     append(sb, "dataIndexCount=" + dataIndexCount);
254     append(sb, "metaIndexCount=" + metaIndexCount);
255     append(sb, "totalUncomressedBytes=" + totalUncompressedBytes);
256     append(sb, "entryCount=" + entryCount);
257     append(sb, "compressionCodec=" + compressionCodec);
258     if (version == 2) {
259       append(sb, "uncompressedDataIndexSize=" + uncompressedDataIndexSize);
260       append(sb, "numDataIndexLevels=" + numDataIndexLevels);
261       append(sb, "firstDataBlockOffset=" + firstDataBlockOffset);
262       append(sb, "lastDataBlockOffset=" + lastDataBlockOffset);
263       append(sb, "comparatorClassName=" + comparatorClassName);
264     }
265     append(sb, "version=" + version);
266 
267     return sb.toString();
268   }
269 
270   /**
271    * Reads a file trailer from the given file.
272    *
273    * @param istream the input stream with the ability to seek. Does not have to
274    *          be buffered, as only one read operation is made.
275    * @param fileSize the file size. Can be obtained using
276    *          {@link org.apache.hadoop.fs.FileSystem#getFileStatus(
277    *          org.apache.hadoop.fs.Path)}.
278    * @return the fixed file trailer read
279    * @throws IOException if failed to read from the underlying stream, or the
280    *           trailer is corrupted, or the version of the trailer is
281    *           unsupported
282    */
283   public static FixedFileTrailer readFromStream(FSDataInputStream istream,
284       long fileSize) throws IOException {
285     int bufferSize = MAX_TRAILER_SIZE;
286     long seekPoint = fileSize - bufferSize;
287     if (seekPoint < 0) {
288       // It is hard to imagine such a small HFile.
289       seekPoint = 0;
290       bufferSize = (int) fileSize;
291     }
292 
293     istream.seek(seekPoint);
294     ByteBuffer buf = ByteBuffer.allocate(bufferSize);
295     istream.readFully(buf.array(), buf.arrayOffset(),
296         buf.arrayOffset() + buf.limit());
297 
298     // Read the version from the last int of the file.
299     buf.position(buf.limit() - Bytes.SIZEOF_INT);
300     int version = buf.getInt();
301 
302     try {
303       HFile.checkFormatVersion(version);
304     } catch (IllegalArgumentException iae) {
305       // In this context, an invalid version might indicate a corrupt HFile.
306       throw new IOException(iae);
307     }
308 
309     int trailerSize = getTrailerSize(version);
310 
311     FixedFileTrailer fft = new FixedFileTrailer(version);
312     fft.deserialize(new DataInputStream(new ByteArrayInputStream(buf.array(),
313         buf.arrayOffset() + bufferSize - trailerSize, trailerSize)));
314     return fft;
315   }
316 
317   public void expectVersion(int expected) {
318     if (version != expected) {
319       throw new IllegalArgumentException("Invalid HFile version: " + version
320           + " (expected: " + expected + ")");
321     }
322   }
323 
324   public void expectAtLeastVersion(int lowerBound) {
325     if (version < lowerBound) {
326       throw new IllegalArgumentException("Invalid HFile version: " + version
327           + " (expected: " + lowerBound + " or higher).");
328     }
329   }
330 
331   public long getFileInfoOffset() {
332     return fileInfoOffset;
333   }
334 
335   public void setFileInfoOffset(long fileInfoOffset) {
336     this.fileInfoOffset = fileInfoOffset;
337   }
338 
339   public long getLoadOnOpenDataOffset() {
340     return loadOnOpenDataOffset;
341   }
342 
343   public void setLoadOnOpenOffset(long loadOnOpenDataOffset) {
344     this.loadOnOpenDataOffset = loadOnOpenDataOffset;
345   }
346 
347   public int getDataIndexCount() {
348     return dataIndexCount;
349   }
350 
351   public void setDataIndexCount(int dataIndexCount) {
352     this.dataIndexCount = dataIndexCount;
353   }
354 
355   public int getMetaIndexCount() {
356     return metaIndexCount;
357   }
358 
359   public void setMetaIndexCount(int metaIndexCount) {
360     this.metaIndexCount = metaIndexCount;
361   }
362 
363   public long getTotalUncompressedBytes() {
364     return totalUncompressedBytes;
365   }
366 
367   public void setTotalUncompressedBytes(long totalUncompressedBytes) {
368     this.totalUncompressedBytes = totalUncompressedBytes;
369   }
370 
371   public long getEntryCount() {
372     return entryCount;
373   }
374 
375   public void setEntryCount(long newEntryCount) {
376     if (version == 1) {
377       int intEntryCount = (int) Math.min(Integer.MAX_VALUE, newEntryCount);
378       if (intEntryCount != newEntryCount) {
379         LOG.info("Warning: entry count is " + newEntryCount + " but writing "
380             + intEntryCount + " into the version " + version + " trailer");
381       }
382       entryCount = intEntryCount;
383       return;
384     }
385     entryCount = newEntryCount;
386   }
387 
388   public Compression.Algorithm getCompressionCodec() {
389     return compressionCodec;
390   }
391 
392   public void setCompressionCodec(Compression.Algorithm compressionCodec) {
393     this.compressionCodec = compressionCodec;
394   }
395 
396   public int getNumDataIndexLevels() {
397     expectAtLeastVersion(2);
398     return numDataIndexLevels;
399   }
400 
401   public void setNumDataIndexLevels(int numDataIndexLevels) {
402     expectAtLeastVersion(2);
403     this.numDataIndexLevels = numDataIndexLevels;
404   }
405 
406   public long getLastDataBlockOffset() {
407     expectAtLeastVersion(2);
408     return lastDataBlockOffset;
409   }
410 
411   public void setLastDataBlockOffset(long lastDataBlockOffset) {
412     expectAtLeastVersion(2);
413     this.lastDataBlockOffset = lastDataBlockOffset;
414   }
415 
416   public long getFirstDataBlockOffset() {
417     expectAtLeastVersion(2);
418     return firstDataBlockOffset;
419   }
420 
421   public void setFirstDataBlockOffset(long firstDataBlockOffset) {
422     expectAtLeastVersion(2);
423     this.firstDataBlockOffset = firstDataBlockOffset;
424   }
425 
426   public int getVersion() {
427     return version;
428   }
429 
430   @SuppressWarnings("rawtypes")
431   public void setComparatorClass(Class<? extends RawComparator> klass) {
432     expectAtLeastVersion(2);
433     comparatorClassName = klass.getName();
434   }
435 
436   @SuppressWarnings("unchecked")
437   private static Class<? extends RawComparator<byte[]>> getComparatorClass(
438       String comparatorClassName) throws IOException {
439     try {
440       return (Class<? extends RawComparator<byte[]>>)
441           Class.forName(comparatorClassName);
442     } catch (ClassNotFoundException ex) {
443       throw new IOException(ex);
444     }
445   }
446 
447   public static RawComparator<byte[]> createComparator(
448       String comparatorClassName) throws IOException {
449     try {
450       return getComparatorClass(comparatorClassName).newInstance();
451     } catch (InstantiationException e) {
452       throw new IOException(e);
453     } catch (IllegalAccessException e) {
454       throw new IOException(e);
455     }
456   }
457 
458   RawComparator<byte[]> createComparator() throws IOException {
459     expectAtLeastVersion(2);
460     return createComparator(comparatorClassName);
461   }
462 
463   public long getUncompressedDataIndexSize() {
464     if (version == 1)
465       return 0;
466     return uncompressedDataIndexSize;
467   }
468 
469   public void setUncompressedDataIndexSize(
470       long uncompressedDataIndexSize) {
471     expectAtLeastVersion(2);
472     this.uncompressedDataIndexSize = uncompressedDataIndexSize;
473   }
474 
475 }