1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.io.hfile;
21
22 import static org.junit.Assert.*;
23
24 import java.io.ByteArrayOutputStream;
25 import java.io.DataOutputStream;
26 import java.io.IOException;
27 import java.io.OutputStream;
28 import java.nio.ByteBuffer;
29 import java.util.ArrayList;
30 import java.util.HashMap;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.Random;
34 import java.util.concurrent.Callable;
35 import java.util.concurrent.Executor;
36 import java.util.concurrent.ExecutorCompletionService;
37 import java.util.concurrent.Executors;
38 import java.util.concurrent.Future;
39
40 import org.apache.commons.logging.Log;
41 import org.apache.commons.logging.LogFactory;
42 import org.apache.hadoop.fs.FSDataInputStream;
43 import org.apache.hadoop.fs.FSDataOutputStream;
44 import org.apache.hadoop.fs.FileSystem;
45 import org.apache.hadoop.fs.Path;
46 import org.apache.hadoop.hbase.HBaseTestingUtility;
47 import org.apache.hadoop.hbase.util.Bytes;
48 import org.apache.hadoop.hbase.util.ClassSize;
49 import org.apache.hadoop.io.compress.Compressor;
50
51 import static org.apache.hadoop.hbase.io.hfile.Compression.Algorithm.*;
52 import org.junit.Before;
53 import org.junit.Test;
54
55 public class TestHFileBlock {
56
57 private static final boolean detailedLogging = false;
58 private static final boolean[] BOOLEAN_VALUES = new boolean[] { false, true };
59
60 private static final Log LOG = LogFactory.getLog(TestHFileBlock.class);
61
62 static final Compression.Algorithm[] COMPRESSION_ALGORITHMS = {
63 NONE, GZ };
64
65
66 static final Compression.Algorithm[] GZIP_ONLY = { GZ };
67
68 private static final int NUM_TEST_BLOCKS = 1000;
69
70 private static final int NUM_READER_THREADS = 26;
71
72 private static final HBaseTestingUtility TEST_UTIL =
73 new HBaseTestingUtility();
74 private FileSystem fs;
75 private int uncompressedSizeV1;
76
77 @Before
78 public void setUp() throws IOException {
79 fs = FileSystem.get(TEST_UTIL.getConfiguration());
80 }
81
82 public void writeTestBlockContents(DataOutputStream dos) throws IOException {
83
84 for (int i = 0; i < 1000; ++i)
85 dos.writeInt(i / 100);
86 }
87
88 public byte[] createTestV1Block(Compression.Algorithm algo)
89 throws IOException {
90 Compressor compressor = algo.getCompressor();
91 ByteArrayOutputStream baos = new ByteArrayOutputStream();
92 OutputStream os = algo.createCompressionStream(baos, compressor, 0);
93 DataOutputStream dos = new DataOutputStream(os);
94 BlockType.META.write(dos);
95 writeTestBlockContents(dos);
96 uncompressedSizeV1 = dos.size();
97 dos.flush();
98 algo.returnCompressor(compressor);
99 return baos.toByteArray();
100 }
101
102 private byte[] createTestV2Block(Compression.Algorithm algo)
103 throws IOException {
104 final BlockType blockType = BlockType.DATA;
105 HFileBlock.Writer hbw = new HFileBlock.Writer(algo);
106 DataOutputStream dos = hbw.startWriting(blockType, false);
107 writeTestBlockContents(dos);
108 byte[] headerAndData = hbw.getHeaderAndData();
109 assertEquals(1000 * 4, hbw.getUncompressedSizeWithoutHeader());
110 hbw.releaseCompressor();
111 return headerAndData;
112 }
113
114 public String createTestBlockStr(Compression.Algorithm algo)
115 throws IOException {
116 byte[] testV2Block = createTestV2Block(algo);
117 int osOffset = HFileBlock.HEADER_SIZE + 9;
118 if (osOffset < testV2Block.length) {
119
120
121
122 testV2Block[osOffset] = 3;
123 }
124 return Bytes.toStringBinary(testV2Block);
125 }
126
127 @Test
128 public void testNoCompression() throws IOException {
129 assertEquals(4000 + HFileBlock.HEADER_SIZE, createTestV2Block(NONE).length);
130 }
131
132 @Test
133 public void testGzipCompression() throws IOException {
134 assertEquals(
135 "DATABLK*\\x00\\x00\\x00:\\x00\\x00\\x0F\\xA0\\xFF\\xFF\\xFF\\xFF"
136 + "\\xFF\\xFF\\xFF\\xFF"
137
138 + "\\x1F\\x8B"
139 + "\\x08"
140 + "\\x00"
141 + "\\x00\\x00\\x00\\x00"
142 + "\\x00"
143
144
145 + "\\x03"
146 + "\\xED\\xC3\\xC1\\x11\\x00 \\x08\\xC00DD\\xDD\\x7Fa"
147 + "\\xD6\\xE8\\xA3\\xB9K\\x84`\\x96Q\\xD3\\xA8\\xDB\\xA8e\\xD4c"
148 + "\\xD46\\xEA5\\xEA3\\xEA7\\xE7\\x00LI\\s\\xA0\\x0F\\x00\\x00",
149 createTestBlockStr(GZ));
150 }
151
152 @Test
153 public void testReaderV1() throws IOException {
154 for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
155 for (boolean pread : new boolean[] { false, true }) {
156 byte[] block = createTestV1Block(algo);
157 Path path = new Path(TEST_UTIL.getDataTestDir(),
158 "blocks_v1_"+ algo);
159 LOG.info("Creating temporary file at " + path);
160 FSDataOutputStream os = fs.create(path);
161 int totalSize = 0;
162 int numBlocks = 50;
163 for (int i = 0; i < numBlocks; ++i) {
164 os.write(block);
165 totalSize += block.length;
166 }
167 os.close();
168
169 FSDataInputStream is = fs.open(path);
170 HFileBlock.FSReader hbr = new HFileBlock.FSReaderV1(is, algo,
171 totalSize);
172 HFileBlock b;
173 int numBlocksRead = 0;
174 long pos = 0;
175 while (pos < totalSize) {
176 b = hbr.readBlockData(pos, block.length, uncompressedSizeV1, pread);
177 b.sanityCheck();
178 pos += block.length;
179 numBlocksRead++;
180 }
181 assertEquals(numBlocks, numBlocksRead);
182 is.close();
183 }
184 }
185 }
186
187 @Test
188 public void testReaderV2() throws IOException {
189 for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
190 for (boolean pread : new boolean[] { false, true }) {
191 Path path = new Path(TEST_UTIL.getDataTestDir(), "blocks_v2_"
192 + algo);
193 FSDataOutputStream os = fs.create(path);
194 HFileBlock.Writer hbw = new HFileBlock.Writer(algo);
195 long totalSize = 0;
196 for (int blockId = 0; blockId < 2; ++blockId) {
197 DataOutputStream dos = hbw.startWriting(BlockType.DATA, false);
198 for (int i = 0; i < 1234; ++i)
199 dos.writeInt(i);
200 hbw.writeHeaderAndData(os);
201 totalSize += hbw.getOnDiskSizeWithHeader();
202 }
203 os.close();
204
205 FSDataInputStream is = fs.open(path);
206 HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, algo,
207 totalSize);
208 HFileBlock b = hbr.readBlockData(0, -1, -1, pread);
209 is.close();
210
211 b.sanityCheck();
212 assertEquals(4936, b.getUncompressedSizeWithoutHeader());
213 assertEquals(algo == GZ ? 2173 : 4936, b.getOnDiskSizeWithoutHeader());
214 String blockStr = b.toString();
215
216 if (algo == GZ) {
217 is = fs.open(path);
218 hbr = new HFileBlock.FSReaderV2(is, algo, totalSize);
219 b = hbr.readBlockData(0, 2173 + HFileBlock.HEADER_SIZE, -1, pread);
220 assertEquals(blockStr, b.toString());
221 int wrongCompressedSize = 2172;
222 try {
223 b = hbr.readBlockData(0, wrongCompressedSize
224 + HFileBlock.HEADER_SIZE, -1, pread);
225 fail("Exception expected");
226 } catch (IOException ex) {
227 String expectedPrefix = "On-disk size without header provided is "
228 + wrongCompressedSize + ", but block header contains "
229 + b.getOnDiskSizeWithoutHeader() + ".";
230 assertTrue("Invalid exception message: '" + ex.getMessage()
231 + "'.\nMessage is expected to start with: '" + expectedPrefix
232 + "'", ex.getMessage().startsWith(expectedPrefix));
233 }
234 is.close();
235 }
236 }
237 }
238 }
239
240 @Test
241 public void testPreviousOffset() throws IOException {
242 for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
243 for (boolean pread : BOOLEAN_VALUES) {
244 for (boolean cacheOnWrite : BOOLEAN_VALUES) {
245 Random rand = defaultRandom();
246 LOG.info("Compression algorithm: " + algo + ", pread=" + pread);
247 Path path = new Path(TEST_UTIL.getDataTestDir(), "prev_offset");
248 List<Long> expectedOffsets = new ArrayList<Long>();
249 List<Long> expectedPrevOffsets = new ArrayList<Long>();
250 List<BlockType> expectedTypes = new ArrayList<BlockType>();
251 List<ByteBuffer> expectedContents = cacheOnWrite
252 ? new ArrayList<ByteBuffer>() : null;
253 long totalSize = writeBlocks(rand, algo, path, expectedOffsets,
254 expectedPrevOffsets, expectedTypes, expectedContents);
255
256 FSDataInputStream is = fs.open(path);
257 HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, algo,
258 totalSize);
259 long curOffset = 0;
260 for (int i = 0; i < NUM_TEST_BLOCKS; ++i) {
261 if (!pread) {
262 assertEquals(is.getPos(), curOffset + (i == 0 ? 0 :
263 HFileBlock.HEADER_SIZE));
264 }
265
266 assertEquals(expectedOffsets.get(i).longValue(), curOffset);
267 if (detailedLogging) {
268 LOG.info("Reading block #" + i + " at offset " + curOffset);
269 }
270 HFileBlock b = hbr.readBlockData(curOffset, -1, -1, pread);
271 if (detailedLogging) {
272 LOG.info("Block #" + i + ": " + b);
273 }
274 assertEquals("Invalid block #" + i + "'s type:",
275 expectedTypes.get(i), b.getBlockType());
276 assertEquals("Invalid previous block offset for block " + i
277 + " of " + "type " + b.getBlockType() + ":",
278 (long) expectedPrevOffsets.get(i), b.getPrevBlockOffset());
279 b.sanityCheck();
280 assertEquals(curOffset, b.getOffset());
281
282
283
284 HFileBlock b2 = hbr.readBlockData(curOffset,
285 b.getOnDiskSizeWithHeader(), -1, pread);
286 b2.sanityCheck();
287
288 assertEquals(b.getBlockType(), b2.getBlockType());
289 assertEquals(b.getOnDiskSizeWithoutHeader(),
290 b2.getOnDiskSizeWithoutHeader());
291 assertEquals(b.getOnDiskSizeWithHeader(),
292 b2.getOnDiskSizeWithHeader());
293 assertEquals(b.getUncompressedSizeWithoutHeader(),
294 b2.getUncompressedSizeWithoutHeader());
295 assertEquals(b.getPrevBlockOffset(), b2.getPrevBlockOffset());
296 assertEquals(curOffset, b2.getOffset());
297
298 curOffset += b.getOnDiskSizeWithHeader();
299
300 if (cacheOnWrite) {
301
302
303
304 ByteBuffer bufRead = b.getBufferWithHeader();
305 ByteBuffer bufExpected = expectedContents.get(i);
306 boolean bytesAreCorrect = Bytes.compareTo(bufRead.array(),
307 bufRead.arrayOffset(), bufRead.limit(),
308 bufExpected.array(), bufExpected.arrayOffset(),
309 bufExpected.limit()) == 0;
310 String wrongBytesMsg = "";
311
312 if (!bytesAreCorrect) {
313
314
315 wrongBytesMsg = "Expected bytes in block #" + i + " (algo="
316 + algo + ", pread=" + pread + "):\n";
317 wrongBytesMsg += Bytes.toStringBinary(bufExpected.array(),
318 bufExpected.arrayOffset(), Math.min(32,
319 bufExpected.limit()))
320 + ", actual:\n"
321 + Bytes.toStringBinary(bufRead.array(),
322 bufRead.arrayOffset(), Math.min(32, bufRead.limit()));
323 }
324
325 assertTrue(wrongBytesMsg, bytesAreCorrect);
326 }
327 }
328
329 assertEquals(curOffset, fs.getFileStatus(path).getLen());
330 is.close();
331 }
332 }
333 }
334 }
335
336 private Random defaultRandom() {
337 return new Random(189237);
338 }
339
340 private class BlockReaderThread implements Callable<Boolean> {
341 private final String clientId;
342 private final HFileBlock.FSReader hbr;
343 private final List<Long> offsets;
344 private final List<BlockType> types;
345 private final long fileSize;
346
347 public BlockReaderThread(String clientId,
348 HFileBlock.FSReader hbr, List<Long> offsets, List<BlockType> types,
349 long fileSize) {
350 this.clientId = clientId;
351 this.offsets = offsets;
352 this.hbr = hbr;
353 this.types = types;
354 this.fileSize = fileSize;
355 }
356
357 @Override
358 public Boolean call() throws Exception {
359 Random rand = new Random(clientId.hashCode());
360 long endTime = System.currentTimeMillis() + 10000;
361 int numBlocksRead = 0;
362 int numPositionalRead = 0;
363 int numWithOnDiskSize = 0;
364 while (System.currentTimeMillis() < endTime) {
365 int blockId = rand.nextInt(NUM_TEST_BLOCKS);
366 long offset = offsets.get(blockId);
367 boolean pread = rand.nextBoolean();
368 boolean withOnDiskSize = rand.nextBoolean();
369 long expectedSize =
370 (blockId == NUM_TEST_BLOCKS - 1 ? fileSize
371 : offsets.get(blockId + 1)) - offset;
372
373 HFileBlock b;
374 try {
375 long onDiskSizeArg = withOnDiskSize ? expectedSize : -1;
376 b = hbr.readBlockData(offset, onDiskSizeArg, -1, pread);
377 } catch (IOException ex) {
378 LOG.error("Error in client " + clientId + " trying to read block at "
379 + offset + ", pread=" + pread + ", withOnDiskSize=" +
380 withOnDiskSize, ex);
381 return false;
382 }
383
384 assertEquals(types.get(blockId), b.getBlockType());
385 assertEquals(expectedSize, b.getOnDiskSizeWithHeader());
386 assertEquals(offset, b.getOffset());
387
388 ++numBlocksRead;
389 if (pread)
390 ++numPositionalRead;
391 if (withOnDiskSize)
392 ++numWithOnDiskSize;
393 }
394 LOG.info("Client " + clientId + " successfully read " + numBlocksRead +
395 " blocks (with pread: " + numPositionalRead + ", with onDiskSize " +
396 "specified: " + numWithOnDiskSize + ")");
397
398 return true;
399 }
400
401 }
402
403 @Test
404 public void testConcurrentReading() throws Exception {
405 for (Compression.Algorithm compressAlgo : COMPRESSION_ALGORITHMS) {
406 Path path =
407 new Path(TEST_UTIL.getDataTestDir(), "concurrent_reading");
408 Random rand = defaultRandom();
409 List<Long> offsets = new ArrayList<Long>();
410 List<BlockType> types = new ArrayList<BlockType>();
411 writeBlocks(rand, compressAlgo, path, offsets, null, types, null);
412 FSDataInputStream is = fs.open(path);
413 long fileSize = fs.getFileStatus(path).getLen();
414 HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, compressAlgo,
415 fileSize);
416
417 Executor exec = Executors.newFixedThreadPool(NUM_READER_THREADS);
418 ExecutorCompletionService<Boolean> ecs =
419 new ExecutorCompletionService<Boolean>(exec);
420
421 for (int i = 0; i < NUM_READER_THREADS; ++i) {
422 ecs.submit(new BlockReaderThread("reader_" + (char) ('A' + i), hbr,
423 offsets, types, fileSize));
424 }
425
426 for (int i = 0; i < NUM_READER_THREADS; ++i) {
427 Future<Boolean> result = ecs.take();
428 assertTrue(result.get());
429 if (detailedLogging) {
430 LOG.info(String.valueOf(i + 1)
431 + " reader threads finished successfully (algo=" + compressAlgo
432 + ")");
433 }
434 }
435
436 is.close();
437 }
438 }
439
440 private long writeBlocks(Random rand, Compression.Algorithm compressAlgo,
441 Path path, List<Long> expectedOffsets, List<Long> expectedPrevOffsets,
442 List<BlockType> expectedTypes, List<ByteBuffer> expectedContents
443 ) throws IOException {
444 boolean cacheOnWrite = expectedContents != null;
445 FSDataOutputStream os = fs.create(path);
446 HFileBlock.Writer hbw = new HFileBlock.Writer(compressAlgo);
447 Map<BlockType, Long> prevOffsetByType = new HashMap<BlockType, Long>();
448 long totalSize = 0;
449 for (int i = 0; i < NUM_TEST_BLOCKS; ++i) {
450 int blockTypeOrdinal = rand.nextInt(BlockType.values().length);
451 BlockType bt = BlockType.values()[blockTypeOrdinal];
452 DataOutputStream dos = hbw.startWriting(bt, cacheOnWrite);
453 for (int j = 0; j < rand.nextInt(500); ++j) {
454
455 dos.writeShort(i + 1);
456 dos.writeInt(j + 1);
457 }
458
459 if (expectedOffsets != null)
460 expectedOffsets.add(os.getPos());
461
462 if (expectedPrevOffsets != null) {
463 Long prevOffset = prevOffsetByType.get(bt);
464 expectedPrevOffsets.add(prevOffset != null ? prevOffset : -1);
465 prevOffsetByType.put(bt, os.getPos());
466 }
467
468 expectedTypes.add(bt);
469
470 hbw.writeHeaderAndData(os);
471 totalSize += hbw.getOnDiskSizeWithHeader();
472
473 if (cacheOnWrite)
474 expectedContents.add(hbw.getUncompressedBufferWithHeader());
475
476 if (detailedLogging) {
477 LOG.info("Writing block #" + i + " of type " + bt
478 + ", uncompressed size " + hbw.getUncompressedSizeWithoutHeader()
479 + " at offset " + os.getPos());
480 }
481 }
482 os.close();
483 LOG.info("Created a temporary file at " + path + ", "
484 + fs.getFileStatus(path).getLen() + " byte, compression=" +
485 compressAlgo);
486 return totalSize;
487 }
488
489 @Test
490 public void testBlockHeapSize() {
491
492
493 assertTrue(HFileBlock.BYTE_BUFFER_HEAP_SIZE == 64 ||
494 HFileBlock.BYTE_BUFFER_HEAP_SIZE == 80);
495
496 for (int size : new int[] { 100, 256, 12345 }) {
497 byte[] byteArr = new byte[HFileBlock.HEADER_SIZE + size];
498 ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size);
499 HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf,
500 true, -1);
501 long expected = ClassSize.align(ClassSize.estimateBase(HFileBlock.class,
502 true)
503 + ClassSize.estimateBase(buf.getClass(), true)
504 + HFileBlock.HEADER_SIZE + size);
505 assertEquals(expected, block.heapSize());
506 }
507 }
508
509 }