1   /*
2    * Copyright 2011 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.io.hfile;
21  
22  import static org.junit.Assert.*;
23  
24  import java.io.ByteArrayOutputStream;
25  import java.io.DataOutputStream;
26  import java.io.IOException;
27  import java.io.OutputStream;
28  import java.nio.ByteBuffer;
29  import java.util.ArrayList;
30  import java.util.HashMap;
31  import java.util.List;
32  import java.util.Map;
33  import java.util.Random;
34  import java.util.concurrent.Callable;
35  import java.util.concurrent.Executor;
36  import java.util.concurrent.ExecutorCompletionService;
37  import java.util.concurrent.Executors;
38  import java.util.concurrent.Future;
39  
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.fs.FSDataInputStream;
43  import org.apache.hadoop.fs.FSDataOutputStream;
44  import org.apache.hadoop.fs.FileSystem;
45  import org.apache.hadoop.fs.Path;
46  import org.apache.hadoop.hbase.HBaseTestingUtility;
47  import org.apache.hadoop.hbase.util.Bytes;
48  import org.apache.hadoop.hbase.util.ClassSize;
49  import org.apache.hadoop.io.compress.Compressor;
50  
51  import static org.apache.hadoop.hbase.io.hfile.Compression.Algorithm.*;
52  import org.junit.Before;
53  import org.junit.Test;
54  
55  public class TestHFileBlock {
56    // change this value to activate more logs
57    private static final boolean detailedLogging = false;
58    private static final boolean[] BOOLEAN_VALUES = new boolean[] { false, true };
59  
60    private static final Log LOG = LogFactory.getLog(TestHFileBlock.class);
61  
62    static final Compression.Algorithm[] COMPRESSION_ALGORITHMS = {
63        NONE, GZ };
64  
65    // In case we need to temporarily switch some test cases to just test gzip.
66    static final Compression.Algorithm[] GZIP_ONLY  = { GZ };
67  
68    private static final int NUM_TEST_BLOCKS = 1000;
69  
70    private static final int NUM_READER_THREADS = 26;
71  
72    private static final HBaseTestingUtility TEST_UTIL =
73      new HBaseTestingUtility();
74    private FileSystem fs;
75    private int uncompressedSizeV1;
76  
77    @Before
78    public void setUp() throws IOException {
79      fs = FileSystem.get(TEST_UTIL.getConfiguration());
80    }
81  
82    public void writeTestBlockContents(DataOutputStream dos) throws IOException {
83      // This compresses really well.
84      for (int i = 0; i < 1000; ++i)
85        dos.writeInt(i / 100);
86    }
87  
88    public byte[] createTestV1Block(Compression.Algorithm algo)
89        throws IOException {
90      Compressor compressor = algo.getCompressor();
91      ByteArrayOutputStream baos = new ByteArrayOutputStream();
92      OutputStream os = algo.createCompressionStream(baos, compressor, 0);
93      DataOutputStream dos = new DataOutputStream(os);
94      BlockType.META.write(dos); // Let's make this a meta block.
95      writeTestBlockContents(dos);
96      uncompressedSizeV1 = dos.size();
97      dos.flush();
98      algo.returnCompressor(compressor);
99      return baos.toByteArray();
100   }
101 
102   private byte[] createTestV2Block(Compression.Algorithm algo)
103       throws IOException {
104     final BlockType blockType = BlockType.DATA;
105     HFileBlock.Writer hbw = new HFileBlock.Writer(algo);
106     DataOutputStream dos = hbw.startWriting(blockType, false);
107     writeTestBlockContents(dos);
108     byte[] headerAndData = hbw.getHeaderAndData();
109     assertEquals(1000 * 4, hbw.getUncompressedSizeWithoutHeader());
110     hbw.releaseCompressor();
111     return headerAndData;
112   }
113 
114   public String createTestBlockStr(Compression.Algorithm algo)
115       throws IOException {
116     byte[] testV2Block = createTestV2Block(algo);
117     int osOffset = HFileBlock.HEADER_SIZE + 9;
118     if (osOffset < testV2Block.length) {
119       // Force-set the "OS" field of the gzip header to 3 (Unix) to avoid
120       // variations across operating systems.
121       // See http://www.gzip.org/zlib/rfc-gzip.html for gzip format.
122       testV2Block[osOffset] = 3;
123     }
124     return Bytes.toStringBinary(testV2Block);
125   }
126 
127   @Test
128   public void testNoCompression() throws IOException {
129     assertEquals(4000 + HFileBlock.HEADER_SIZE, createTestV2Block(NONE).length);
130   }
131 
132   @Test
133   public void testGzipCompression() throws IOException {
134     assertEquals(
135         "DATABLK*\\x00\\x00\\x00:\\x00\\x00\\x0F\\xA0\\xFF\\xFF\\xFF\\xFF"
136             + "\\xFF\\xFF\\xFF\\xFF"
137             // gzip-compressed block: http://www.gzip.org/zlib/rfc-gzip.html
138             + "\\x1F\\x8B"  // gzip magic signature
139             + "\\x08"  // Compression method: 8 = "deflate"
140             + "\\x00"  // Flags
141             + "\\x00\\x00\\x00\\x00"  // mtime
142             + "\\x00"  // XFL (extra flags)
143             // OS (0 = FAT filesystems, 3 = Unix). However, this field
144             // sometimes gets set to 0 on Linux and Mac, so we reset it to 3.
145             + "\\x03"
146             + "\\xED\\xC3\\xC1\\x11\\x00 \\x08\\xC00DD\\xDD\\x7Fa"
147             + "\\xD6\\xE8\\xA3\\xB9K\\x84`\\x96Q\\xD3\\xA8\\xDB\\xA8e\\xD4c"
148             + "\\xD46\\xEA5\\xEA3\\xEA7\\xE7\\x00LI\\s\\xA0\\x0F\\x00\\x00",
149         createTestBlockStr(GZ));
150   }
151 
152   @Test
153   public void testReaderV1() throws IOException {
154     for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
155       for (boolean pread : new boolean[] { false, true }) {
156         byte[] block = createTestV1Block(algo);
157         Path path = new Path(TEST_UTIL.getDataTestDir(),
158           "blocks_v1_"+ algo);
159         LOG.info("Creating temporary file at " + path);
160         FSDataOutputStream os = fs.create(path);
161         int totalSize = 0;
162         int numBlocks = 50;
163         for (int i = 0; i < numBlocks; ++i) {
164           os.write(block);
165           totalSize += block.length;
166         }
167         os.close();
168 
169         FSDataInputStream is = fs.open(path);
170         HFileBlock.FSReader hbr = new HFileBlock.FSReaderV1(is, algo,
171             totalSize);
172         HFileBlock b;
173         int numBlocksRead = 0;
174         long pos = 0;
175         while (pos < totalSize) {
176           b = hbr.readBlockData(pos, block.length, uncompressedSizeV1, pread);
177           b.sanityCheck();
178           pos += block.length;
179           numBlocksRead++;
180         }
181         assertEquals(numBlocks, numBlocksRead);
182         is.close();
183       }
184     }
185   }
186 
187   @Test
188   public void testReaderV2() throws IOException {
189     for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
190       for (boolean pread : new boolean[] { false, true }) {
191         Path path = new Path(TEST_UTIL.getDataTestDir(), "blocks_v2_"
192             + algo);
193         FSDataOutputStream os = fs.create(path);
194         HFileBlock.Writer hbw = new HFileBlock.Writer(algo);
195         long totalSize = 0;
196         for (int blockId = 0; blockId < 2; ++blockId) {
197           DataOutputStream dos = hbw.startWriting(BlockType.DATA, false);
198           for (int i = 0; i < 1234; ++i)
199             dos.writeInt(i);
200           hbw.writeHeaderAndData(os);
201           totalSize += hbw.getOnDiskSizeWithHeader();
202         }
203         os.close();
204 
205         FSDataInputStream is = fs.open(path);
206         HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, algo,
207             totalSize);
208         HFileBlock b = hbr.readBlockData(0, -1, -1, pread);
209         is.close();
210 
211         b.sanityCheck();
212         assertEquals(4936, b.getUncompressedSizeWithoutHeader());
213         assertEquals(algo == GZ ? 2173 : 4936, b.getOnDiskSizeWithoutHeader());
214         String blockStr = b.toString();
215 
216         if (algo == GZ) {
217           is = fs.open(path);
218           hbr = new HFileBlock.FSReaderV2(is, algo, totalSize);
219           b = hbr.readBlockData(0, 2173 + HFileBlock.HEADER_SIZE, -1, pread);
220           assertEquals(blockStr, b.toString());
221           int wrongCompressedSize = 2172;
222           try {
223             b = hbr.readBlockData(0, wrongCompressedSize
224                 + HFileBlock.HEADER_SIZE, -1, pread);
225             fail("Exception expected");
226           } catch (IOException ex) {
227             String expectedPrefix = "On-disk size without header provided is "
228                 + wrongCompressedSize + ", but block header contains "
229                 + b.getOnDiskSizeWithoutHeader() + ".";
230             assertTrue("Invalid exception message: '" + ex.getMessage()
231                 + "'.\nMessage is expected to start with: '" + expectedPrefix
232                 + "'", ex.getMessage().startsWith(expectedPrefix));
233           }
234           is.close();
235         }
236       }
237     }
238   }
239 
240   @Test
241   public void testPreviousOffset() throws IOException {
242     for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
243       for (boolean pread : BOOLEAN_VALUES) {
244         for (boolean cacheOnWrite : BOOLEAN_VALUES) {
245           Random rand = defaultRandom();
246           LOG.info("Compression algorithm: " + algo + ", pread=" + pread);
247           Path path = new Path(TEST_UTIL.getDataTestDir(), "prev_offset");
248           List<Long> expectedOffsets = new ArrayList<Long>();
249           List<Long> expectedPrevOffsets = new ArrayList<Long>();
250           List<BlockType> expectedTypes = new ArrayList<BlockType>();
251           List<ByteBuffer> expectedContents = cacheOnWrite
252               ? new ArrayList<ByteBuffer>() : null;
253           long totalSize = writeBlocks(rand, algo, path, expectedOffsets,
254               expectedPrevOffsets, expectedTypes, expectedContents);
255 
256           FSDataInputStream is = fs.open(path);
257           HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, algo,
258               totalSize);
259           long curOffset = 0;
260           for (int i = 0; i < NUM_TEST_BLOCKS; ++i) {
261             if (!pread) {
262               assertEquals(is.getPos(), curOffset + (i == 0 ? 0 :
263                   HFileBlock.HEADER_SIZE));
264             }
265 
266             assertEquals(expectedOffsets.get(i).longValue(), curOffset);
267             if (detailedLogging) {
268               LOG.info("Reading block #" + i + " at offset " + curOffset);
269             }
270             HFileBlock b = hbr.readBlockData(curOffset, -1, -1, pread);
271             if (detailedLogging) {
272               LOG.info("Block #" + i + ": " + b);
273             }
274             assertEquals("Invalid block #" + i + "'s type:",
275                 expectedTypes.get(i), b.getBlockType());
276             assertEquals("Invalid previous block offset for block " + i
277                 + " of " + "type " + b.getBlockType() + ":",
278                 (long) expectedPrevOffsets.get(i), b.getPrevBlockOffset());
279             b.sanityCheck();
280             assertEquals(curOffset, b.getOffset());
281 
282             // Now re-load this block knowing the on-disk size. This tests a
283             // different branch in the loader.
284             HFileBlock b2 = hbr.readBlockData(curOffset,
285                 b.getOnDiskSizeWithHeader(), -1, pread);
286             b2.sanityCheck();
287 
288             assertEquals(b.getBlockType(), b2.getBlockType());
289             assertEquals(b.getOnDiskSizeWithoutHeader(),
290                 b2.getOnDiskSizeWithoutHeader());
291             assertEquals(b.getOnDiskSizeWithHeader(),
292                 b2.getOnDiskSizeWithHeader());
293             assertEquals(b.getUncompressedSizeWithoutHeader(),
294                 b2.getUncompressedSizeWithoutHeader());
295             assertEquals(b.getPrevBlockOffset(), b2.getPrevBlockOffset());
296             assertEquals(curOffset, b2.getOffset());
297 
298             curOffset += b.getOnDiskSizeWithHeader();
299 
300             if (cacheOnWrite) {
301               // In the cache-on-write mode we store uncompressed bytes so we
302               // can compare them to what was read by the block reader.
303 
304               ByteBuffer bufRead = b.getBufferWithHeader();
305               ByteBuffer bufExpected = expectedContents.get(i);
306               boolean bytesAreCorrect = Bytes.compareTo(bufRead.array(),
307                   bufRead.arrayOffset(), bufRead.limit(),
308                   bufExpected.array(), bufExpected.arrayOffset(),
309                   bufExpected.limit()) == 0;
310               String wrongBytesMsg = "";
311 
312               if (!bytesAreCorrect) {
313                 // Optimization: only construct an error message in case we
314                 // will need it.
315                 wrongBytesMsg = "Expected bytes in block #" + i + " (algo="
316                     + algo + ", pread=" + pread + "):\n";
317                 wrongBytesMsg += Bytes.toStringBinary(bufExpected.array(),
318                     bufExpected.arrayOffset(), Math.min(32,
319                         bufExpected.limit()))
320                     + ", actual:\n"
321                     + Bytes.toStringBinary(bufRead.array(),
322                         bufRead.arrayOffset(), Math.min(32, bufRead.limit()));
323               }
324 
325               assertTrue(wrongBytesMsg, bytesAreCorrect);
326             }
327           }
328 
329           assertEquals(curOffset, fs.getFileStatus(path).getLen());
330           is.close();
331         }
332       }
333     }
334   }
335 
336   private Random defaultRandom() {
337     return new Random(189237);
338   }
339 
340   private class BlockReaderThread implements Callable<Boolean> {
341     private final String clientId;
342     private final HFileBlock.FSReader hbr;
343     private final List<Long> offsets;
344     private final List<BlockType> types;
345     private final long fileSize;
346 
347     public BlockReaderThread(String clientId,
348         HFileBlock.FSReader hbr, List<Long> offsets, List<BlockType> types,
349         long fileSize) {
350       this.clientId = clientId;
351       this.offsets = offsets;
352       this.hbr = hbr;
353       this.types = types;
354       this.fileSize = fileSize;
355     }
356 
357     @Override
358     public Boolean call() throws Exception {
359       Random rand = new Random(clientId.hashCode());
360       long endTime = System.currentTimeMillis() + 10000;
361       int numBlocksRead = 0;
362       int numPositionalRead = 0;
363       int numWithOnDiskSize = 0;
364       while (System.currentTimeMillis() < endTime) {
365         int blockId = rand.nextInt(NUM_TEST_BLOCKS);
366         long offset = offsets.get(blockId);
367         boolean pread = rand.nextBoolean();
368         boolean withOnDiskSize = rand.nextBoolean();
369         long expectedSize =
370           (blockId == NUM_TEST_BLOCKS - 1 ? fileSize
371               : offsets.get(blockId + 1)) - offset;
372 
373         HFileBlock b;
374         try {
375           long onDiskSizeArg = withOnDiskSize ? expectedSize : -1;
376           b = hbr.readBlockData(offset, onDiskSizeArg, -1, pread);
377         } catch (IOException ex) {
378           LOG.error("Error in client " + clientId + " trying to read block at "
379               + offset + ", pread=" + pread + ", withOnDiskSize=" +
380               withOnDiskSize, ex);
381           return false;
382         }
383 
384         assertEquals(types.get(blockId), b.getBlockType());
385         assertEquals(expectedSize, b.getOnDiskSizeWithHeader());
386         assertEquals(offset, b.getOffset());
387 
388         ++numBlocksRead;
389         if (pread)
390           ++numPositionalRead;
391         if (withOnDiskSize)
392           ++numWithOnDiskSize;
393       }
394       LOG.info("Client " + clientId + " successfully read " + numBlocksRead +
395         " blocks (with pread: " + numPositionalRead + ", with onDiskSize " +
396         "specified: " + numWithOnDiskSize + ")");
397 
398       return true;
399     }
400 
401   }
402 
403   @Test
404   public void testConcurrentReading() throws Exception {
405     for (Compression.Algorithm compressAlgo : COMPRESSION_ALGORITHMS) {
406       Path path =
407           new Path(TEST_UTIL.getDataTestDir(), "concurrent_reading");
408       Random rand = defaultRandom();
409       List<Long> offsets = new ArrayList<Long>();
410       List<BlockType> types = new ArrayList<BlockType>();
411       writeBlocks(rand, compressAlgo, path, offsets, null, types, null);
412       FSDataInputStream is = fs.open(path);
413       long fileSize = fs.getFileStatus(path).getLen();
414       HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, compressAlgo,
415           fileSize);
416 
417       Executor exec = Executors.newFixedThreadPool(NUM_READER_THREADS);
418       ExecutorCompletionService<Boolean> ecs =
419           new ExecutorCompletionService<Boolean>(exec);
420 
421       for (int i = 0; i < NUM_READER_THREADS; ++i) {
422         ecs.submit(new BlockReaderThread("reader_" + (char) ('A' + i), hbr,
423             offsets, types, fileSize));
424       }
425 
426       for (int i = 0; i < NUM_READER_THREADS; ++i) {
427         Future<Boolean> result = ecs.take();
428         assertTrue(result.get());
429         if (detailedLogging) {
430           LOG.info(String.valueOf(i + 1)
431             + " reader threads finished successfully (algo=" + compressAlgo
432             + ")");
433         }
434       }
435 
436       is.close();
437     }
438   }
439 
440   private long writeBlocks(Random rand, Compression.Algorithm compressAlgo,
441       Path path, List<Long> expectedOffsets, List<Long> expectedPrevOffsets,
442       List<BlockType> expectedTypes, List<ByteBuffer> expectedContents
443   ) throws IOException {
444     boolean cacheOnWrite = expectedContents != null;
445     FSDataOutputStream os = fs.create(path);
446     HFileBlock.Writer hbw = new HFileBlock.Writer(compressAlgo);
447     Map<BlockType, Long> prevOffsetByType = new HashMap<BlockType, Long>();
448     long totalSize = 0;
449     for (int i = 0; i < NUM_TEST_BLOCKS; ++i) {
450       int blockTypeOrdinal = rand.nextInt(BlockType.values().length);
451       BlockType bt = BlockType.values()[blockTypeOrdinal];
452       DataOutputStream dos = hbw.startWriting(bt, cacheOnWrite);
453       for (int j = 0; j < rand.nextInt(500); ++j) {
454         // This might compress well.
455         dos.writeShort(i + 1);
456         dos.writeInt(j + 1);
457       }
458 
459       if (expectedOffsets != null)
460         expectedOffsets.add(os.getPos());
461 
462       if (expectedPrevOffsets != null) {
463         Long prevOffset = prevOffsetByType.get(bt);
464         expectedPrevOffsets.add(prevOffset != null ? prevOffset : -1);
465         prevOffsetByType.put(bt, os.getPos());
466       }
467 
468       expectedTypes.add(bt);
469 
470       hbw.writeHeaderAndData(os);
471       totalSize += hbw.getOnDiskSizeWithHeader();
472 
473       if (cacheOnWrite)
474         expectedContents.add(hbw.getUncompressedBufferWithHeader());
475 
476       if (detailedLogging) {
477         LOG.info("Writing block #" + i + " of type " + bt
478             + ", uncompressed size " + hbw.getUncompressedSizeWithoutHeader()
479             + " at offset " + os.getPos());
480       }
481     }
482     os.close();
483     LOG.info("Created a temporary file at " + path + ", "
484         + fs.getFileStatus(path).getLen() + " byte, compression=" +
485         compressAlgo);
486     return totalSize;
487   }
488 
489   @Test
490   public void testBlockHeapSize() {
491     // We have seen multiple possible values for this estimate of the heap size
492     // of a ByteBuffer, presumably depending on the JDK version.
493     assertTrue(HFileBlock.BYTE_BUFFER_HEAP_SIZE == 64 ||
494                HFileBlock.BYTE_BUFFER_HEAP_SIZE == 80);
495 
496     for (int size : new int[] { 100, 256, 12345 }) {
497       byte[] byteArr = new byte[HFileBlock.HEADER_SIZE + size];
498       ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size);
499       HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf,
500           true, -1);
501       long expected = ClassSize.align(ClassSize.estimateBase(HFileBlock.class,
502           true)
503           + ClassSize.estimateBase(buf.getClass(), true)
504           + HFileBlock.HEADER_SIZE + size);
505       assertEquals(expected, block.heapSize());
506     }
507   }
508 
509 }