1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.io.hfile;
21
22 import java.io.ByteArrayInputStream;
23 import java.io.DataInput;
24 import java.io.DataInputStream;
25 import java.io.IOException;
26 import java.nio.ByteBuffer;
27 import java.util.ArrayList;
28 import java.util.List;
29
30 import org.apache.commons.logging.Log;
31 import org.apache.commons.logging.LogFactory;
32 import org.apache.hadoop.fs.FSDataInputStream;
33 import org.apache.hadoop.fs.Path;
34 import org.apache.hadoop.hbase.KeyValue;
35 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
36 import org.apache.hadoop.hbase.util.Bytes;
37 import org.apache.hadoop.hbase.util.IdLock;
38 import org.apache.hadoop.io.WritableUtils;
39
40
41
42
43 public class HFileReaderV2 extends AbstractHFileReader {
44
45 private static final Log LOG = LogFactory.getLog(HFileReaderV2.class);
46
47
48
49
50
51 private static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT;
52
53 private boolean includesMemstoreTS = false;
54
55 private boolean shouldIncludeMemstoreTS() {
56 return includesMemstoreTS;
57 }
58
59
60
61
62
63
64
65 private IdLock offsetLock = new IdLock();
66
67
68
69
70
71 private List<HFileBlock> loadOnOpenBlocks = new ArrayList<HFileBlock>();
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86 public HFileReaderV2(Path path, FixedFileTrailer trailer,
87 final FSDataInputStream fsdis, final long size,
88 final boolean closeIStream, final CacheConfig cacheConf)
89 throws IOException {
90 super(path, trailer, fsdis, size, closeIStream, cacheConf);
91
92 trailer.expectVersion(2);
93 fsBlockReader = new HFileBlock.FSReaderV2(fsdis, compressAlgo,
94 fileSize);
95
96
97 comparator = trailer.createComparator();
98 dataBlockIndexReader = new HFileBlockIndex.BlockIndexReader(comparator,
99 trailer.getNumDataIndexLevels(), this);
100 metaBlockIndexReader = new HFileBlockIndex.BlockIndexReader(
101 Bytes.BYTES_RAWCOMPARATOR, 1);
102
103
104
105 HFileBlock.BlockIterator blockIter = fsBlockReader.blockRange(
106 trailer.getLoadOnOpenDataOffset(),
107 fileSize - trailer.getTrailerSize());
108
109
110
111 dataBlockIndexReader.readMultiLevelIndexRoot(
112 blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
113 trailer.getDataIndexCount());
114
115
116 metaBlockIndexReader.readRootIndex(
117 blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
118 trailer.getMetaIndexCount());
119
120
121 fileInfo = new FileInfo();
122 fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO));
123 lastKey = fileInfo.get(FileInfo.LASTKEY);
124 avgKeyLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_KEY_LEN));
125 avgValueLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_VALUE_LEN));
126 byte [] keyValueFormatVersion = fileInfo.get(HFileWriterV2.KEY_VALUE_VERSION);
127 includesMemstoreTS = (keyValueFormatVersion != null &&
128 Bytes.toInt(keyValueFormatVersion) == HFileWriterV2.KEY_VALUE_VER_WITH_MEMSTORE);
129
130
131 HFileBlock b;
132 while ((b = blockIter.nextBlock()) != null) {
133 loadOnOpenBlocks.add(b);
134 }
135 }
136
137
138
139
140
141
142
143
144
145
146
147
148
149 @Override
150 public HFileScanner getScanner(boolean cacheBlocks, final boolean pread,
151 final boolean isCompaction) {
152 return new ScannerV2(this, cacheBlocks, pread, isCompaction);
153 }
154
155
156
157
158
159
160
161 @Override
162 public ByteBuffer getMetaBlock(String metaBlockName, boolean cacheBlock)
163 throws IOException {
164 if (trailer.getMetaIndexCount() == 0) {
165 return null;
166 }
167 if (metaBlockIndexReader == null) {
168 throw new IOException("Meta index not loaded");
169 }
170
171 byte[] mbname = Bytes.toBytes(metaBlockName);
172 int block = metaBlockIndexReader.rootBlockContainingKey(mbname, 0,
173 mbname.length);
174 if (block == -1)
175 return null;
176 long blockSize = metaBlockIndexReader.getRootBlockDataSize(block);
177 long startTimeNs = System.nanoTime();
178
179
180
181
182 synchronized (metaBlockIndexReader.getRootBlockKey(block)) {
183 metaLoads.incrementAndGet();
184
185
186 long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block);
187 String cacheKey = HFile.getBlockCacheKey(name, metaBlockOffset);
188
189 cacheBlock &= cacheConf.shouldCacheDataOnRead();
190 if (cacheConf.isBlockCacheEnabled()) {
191 HFileBlock cachedBlock =
192 (HFileBlock) cacheConf.getBlockCache().getBlock(cacheKey, cacheBlock);
193 if (cachedBlock != null) {
194
195
196 cacheHits.incrementAndGet();
197 return cachedBlock.getBufferWithoutHeader();
198 }
199
200 }
201
202 HFileBlock metaBlock = fsBlockReader.readBlockData(metaBlockOffset,
203 blockSize, -1, true);
204
205 HFile.readTimeNano.addAndGet(System.nanoTime() - startTimeNs);
206 HFile.readOps.incrementAndGet();
207
208
209 if (cacheBlock) {
210 cacheConf.getBlockCache().cacheBlock(cacheKey, metaBlock,
211 cacheConf.isInMemory());
212 }
213
214 return metaBlock.getBufferWithoutHeader();
215 }
216 }
217
218
219
220
221
222
223
224
225
226
227
228
229
230 @Override
231 public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize,
232 boolean cacheBlock, boolean pread, final boolean isCompaction)
233 throws IOException {
234 if (dataBlockIndexReader == null) {
235 throw new IOException("Block index not loaded");
236 }
237 if (dataBlockOffset < 0
238 || dataBlockOffset >= trailer.getLoadOnOpenDataOffset()) {
239 throw new IOException("Requested block is out of range: "
240 + dataBlockOffset + ", lastDataBlockOffset: "
241 + trailer.getLastDataBlockOffset());
242 }
243
244
245
246
247
248
249 String cacheKey = HFile.getBlockCacheKey(name, dataBlockOffset);
250 IdLock.Entry lockEntry = offsetLock.getLockEntry(dataBlockOffset);
251 try {
252 blockLoads.incrementAndGet();
253
254
255 cacheBlock &= cacheConf.shouldCacheDataOnRead();
256 if (cacheConf.isBlockCacheEnabled()) {
257 HFileBlock cachedBlock =
258 (HFileBlock) cacheConf.getBlockCache().getBlock(cacheKey, cacheBlock);
259 if (cachedBlock != null) {
260 cacheHits.incrementAndGet();
261
262 return cachedBlock;
263 }
264
265 }
266
267
268 long startTimeNs = System.nanoTime();
269 HFileBlock dataBlock = fsBlockReader.readBlockData(dataBlockOffset,
270 onDiskBlockSize, -1, pread);
271
272 HFile.readTimeNano.addAndGet(System.nanoTime() - startTimeNs);
273 HFile.readOps.incrementAndGet();
274
275
276 if (cacheBlock) {
277 cacheConf.getBlockCache().cacheBlock(cacheKey, dataBlock,
278 cacheConf.isInMemory());
279 }
280
281 return dataBlock;
282 } finally {
283 offsetLock.releaseLockEntry(lockEntry);
284 }
285 }
286
287
288
289
290
291
292 @Override
293 public byte[] getLastKey() {
294 return dataBlockIndexReader.isEmpty() ? null : lastKey;
295 }
296
297
298
299
300
301
302 @Override
303 public byte[] midkey() throws IOException {
304 return dataBlockIndexReader.midkey();
305 }
306
307 @Override
308 public void close() throws IOException {
309 close(cacheConf.shouldEvictOnClose());
310 }
311
312 public void close(boolean evictOnClose) throws IOException {
313 if (evictOnClose && cacheConf.isBlockCacheEnabled()) {
314 int numEvicted = cacheConf.getBlockCache().evictBlocksByPrefix(name
315 + HFile.CACHE_KEY_SEPARATOR);
316 if (LOG.isTraceEnabled()) {
317 LOG.trace("On close, file=" + name + " evicted=" + numEvicted
318 + " block(s)");
319 }
320 }
321 if (closeIStream && istream != null) {
322 istream.close();
323 istream = null;
324 }
325 }
326
327
328
329
330 protected static class ScannerV2 extends AbstractHFileReader.Scanner {
331 private HFileBlock block;
332 private HFileReaderV2 reader;
333
334 public ScannerV2(HFileReaderV2 r, boolean cacheBlocks,
335 final boolean pread, final boolean isCompaction) {
336 super(cacheBlocks, pread, isCompaction);
337 this.reader = r;
338 }
339
340 @Override
341 public HFileReaderV2 getReader() {
342 return reader;
343 }
344
345 @Override
346 public KeyValue getKeyValue() {
347 if (!isSeeked())
348 return null;
349
350 KeyValue ret = new KeyValue(blockBuffer.array(), blockBuffer.arrayOffset()
351 + blockBuffer.position());
352 if (this.reader.shouldIncludeMemstoreTS()) {
353 ret.setMemstoreTS(currMemstoreTS);
354 }
355 return ret;
356 }
357
358 @Override
359 public ByteBuffer getKey() {
360 assertSeeked();
361 return ByteBuffer.wrap(
362 blockBuffer.array(),
363 blockBuffer.arrayOffset() + blockBuffer.position()
364 + KEY_VALUE_LEN_SIZE, currKeyLen).slice();
365 }
366
367 @Override
368 public ByteBuffer getValue() {
369 assertSeeked();
370 return ByteBuffer.wrap(
371 blockBuffer.array(),
372 blockBuffer.arrayOffset() + blockBuffer.position()
373 + KEY_VALUE_LEN_SIZE + currKeyLen, currValueLen).slice();
374 }
375
376 private void setNonSeekedState() {
377 block = null;
378 blockBuffer = null;
379 currKeyLen = 0;
380 currValueLen = 0;
381 currMemstoreTS = 0;
382 currMemstoreTSLen = 0;
383 }
384
385
386
387
388
389
390
391
392 @Override
393 public boolean next() throws IOException {
394 assertSeeked();
395
396 try {
397 blockBuffer.position(blockBuffer.position() + KEY_VALUE_LEN_SIZE
398 + currKeyLen + currValueLen + currMemstoreTSLen);
399 } catch (IllegalArgumentException e) {
400 LOG.error("Current pos = " + blockBuffer.position()
401 + "; currKeyLen = " + currKeyLen + "; currValLen = "
402 + currValueLen + "; block limit = " + blockBuffer.limit()
403 + "; HFile name = " + reader.getName()
404 + "; currBlock currBlockOffset = " + block.getOffset());
405 throw e;
406 }
407
408 if (blockBuffer.remaining() <= 0) {
409 long lastDataBlockOffset =
410 reader.getTrailer().getLastDataBlockOffset();
411
412 if (block.getOffset() >= lastDataBlockOffset) {
413 setNonSeekedState();
414 return false;
415 }
416
417
418 HFileBlock nextBlock = readNextDataBlock();
419 if (nextBlock == null) {
420 setNonSeekedState();
421 return false;
422 }
423
424 updateCurrBlock(nextBlock);
425 return true;
426 }
427
428
429 readKeyValueLen();
430 return true;
431 }
432
433
434
435
436
437
438
439
440 private HFileBlock readNextDataBlock() throws IOException {
441 long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
442 if (block == null)
443 return null;
444
445 HFileBlock curBlock = block;
446
447 do {
448 if (curBlock.getOffset() >= lastDataBlockOffset)
449 return null;
450
451 if (curBlock.getOffset() < 0) {
452 throw new IOException("Invalid block file offset: " + block);
453 }
454 curBlock = reader.readBlock(curBlock.getOffset()
455 + curBlock.getOnDiskSizeWithHeader(),
456 curBlock.getNextBlockOnDiskSizeWithHeader(), cacheBlocks, pread,
457 isCompaction);
458 } while (!curBlock.getBlockType().equals(BlockType.DATA));
459
460 return curBlock;
461 }
462
463
464
465
466
467
468
469
470 @Override
471 public boolean seekTo() throws IOException {
472 if (reader == null) {
473 return false;
474 }
475
476 if (reader.getTrailer().getEntryCount() == 0) {
477
478 return false;
479 }
480
481 long firstDataBlockOffset =
482 reader.getTrailer().getFirstDataBlockOffset();
483 if (block != null && block.getOffset() == firstDataBlockOffset) {
484 blockBuffer.rewind();
485 readKeyValueLen();
486 return true;
487 }
488
489 block = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread,
490 isCompaction);
491 if (block.getOffset() < 0) {
492 throw new IOException("Invalid block offset: " + block.getOffset());
493 }
494 updateCurrBlock(block);
495 return true;
496 }
497
498 @Override
499 public int seekTo(byte[] key) throws IOException {
500 return seekTo(key, 0, key.length);
501 }
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517 private int seekTo(byte[] key, int offset, int length, boolean rewind)
518 throws IOException {
519 HFileBlockIndex.BlockIndexReader indexReader =
520 reader.getDataBlockIndexReader();
521 HFileBlock seekToBlock = indexReader.seekToDataBlock(key, offset, length,
522 block, cacheBlocks, pread, isCompaction);
523
524 if (seekToBlock == null) {
525
526 return -1;
527 }
528 return loadBlockAndSeekToKey(seekToBlock, rewind, key, offset, length,
529 false);
530 }
531
532 @Override
533 public int seekTo(byte[] key, int offset, int length) throws IOException {
534
535
536 return seekTo(key, offset, length, true);
537 }
538
539 @Override
540 public int reseekTo(byte[] key) throws IOException {
541 return reseekTo(key, 0, key.length);
542 }
543
544 @Override
545 public int reseekTo(byte[] key, int offset, int length) throws IOException {
546 if (isSeeked()) {
547 ByteBuffer bb = getKey();
548 int compared = reader.getComparator().compare(key, offset,
549 length, bb.array(), bb.arrayOffset(), bb.limit());
550 if (compared < 1) {
551
552
553 return compared;
554 }
555 }
556
557
558
559 return seekTo(key, offset, length, false);
560 }
561
562 private int loadBlockAndSeekToKey(HFileBlock seekToBlock, boolean rewind,
563 byte[] key, int offset, int length, boolean seekBefore)
564 throws IOException {
565 if (block == null || block.getOffset() != seekToBlock.getOffset()) {
566 updateCurrBlock(seekToBlock);
567 } else if (rewind) {
568 blockBuffer.rewind();
569 }
570 return blockSeek(key, offset, length, seekBefore);
571 }
572
573
574
575
576
577
578
579 private void updateCurrBlock(HFileBlock newBlock) {
580 block = newBlock;
581 blockBuffer = block.getBufferWithoutHeader();
582 readKeyValueLen();
583 blockFetches++;
584 }
585
586 private final void readKeyValueLen() {
587 blockBuffer.mark();
588 currKeyLen = blockBuffer.getInt();
589 currValueLen = blockBuffer.getInt();
590 blockBuffer.reset();
591 if (this.reader.shouldIncludeMemstoreTS()) {
592 try {
593 int memstoreTSOffset = blockBuffer.arrayOffset() + blockBuffer.position()
594 + KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen;
595 currMemstoreTS = Bytes.readVLong(blockBuffer.array(), memstoreTSOffset);
596 currMemstoreTSLen = WritableUtils.getVIntSize(currMemstoreTS);
597 } catch (Exception e) {
598 throw new RuntimeException("Error reading memstoreTS. " + e);
599 }
600 }
601
602 if (currKeyLen < 0 || currValueLen < 0
603 || currKeyLen > blockBuffer.limit()
604 || currValueLen > blockBuffer.limit()) {
605 throw new IllegalStateException("Invalid currKeyLen " + currKeyLen
606 + " or currValueLen " + currValueLen + ". Block offset: "
607 + block.getOffset() + ", block length: " + blockBuffer.limit()
608 + ", position: " + blockBuffer.position() + " (without header).");
609 }
610 }
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625 private int blockSeek(byte[] key, int offset, int length,
626 boolean seekBefore) {
627 int klen, vlen;
628 long memstoreTS = 0;
629 int memstoreTSLen = 0;
630 int lastKeyValueSize = -1;
631 do {
632 blockBuffer.mark();
633 klen = blockBuffer.getInt();
634 vlen = blockBuffer.getInt();
635 blockBuffer.reset();
636 if (this.reader.shouldIncludeMemstoreTS()) {
637 try {
638 int memstoreTSOffset = blockBuffer.arrayOffset() + blockBuffer.position()
639 + KEY_VALUE_LEN_SIZE + klen + vlen;
640 memstoreTS = Bytes.readVLong(blockBuffer.array(), memstoreTSOffset);
641 memstoreTSLen = WritableUtils.getVIntSize(memstoreTS);
642 } catch (Exception e) {
643 throw new RuntimeException("Error reading memstoreTS. " + e);
644 }
645 }
646
647 int keyOffset = blockBuffer.arrayOffset() + blockBuffer.position()
648 + KEY_VALUE_LEN_SIZE;
649 int comp = reader.getComparator().compare(key, offset, length,
650 blockBuffer.array(), keyOffset, klen);
651
652 if (comp == 0) {
653 if (seekBefore) {
654 if (lastKeyValueSize < 0) {
655 throw new IllegalStateException("blockSeek with seekBefore "
656 + "at the first key of the block: key="
657 + Bytes.toStringBinary(key) + ", blockOffset="
658 + block.getOffset() + ", onDiskSize="
659 + block.getOnDiskSizeWithHeader());
660 }
661 blockBuffer.position(blockBuffer.position() - lastKeyValueSize);
662 readKeyValueLen();
663 return 1;
664 }
665 currKeyLen = klen;
666 currValueLen = vlen;
667 if (this.reader.shouldIncludeMemstoreTS()) {
668 currMemstoreTS = memstoreTS;
669 currMemstoreTSLen = memstoreTSLen;
670 }
671 return 0;
672 }
673
674 if (comp < 0) {
675 if (lastKeyValueSize > 0)
676 blockBuffer.position(blockBuffer.position() - lastKeyValueSize);
677 readKeyValueLen();
678 return 1;
679 }
680
681
682 lastKeyValueSize = klen + vlen + memstoreTSLen + KEY_VALUE_LEN_SIZE;
683 blockBuffer.position(blockBuffer.position() + lastKeyValueSize);
684 } while (blockBuffer.remaining() > 0);
685
686
687
688
689 blockBuffer.position(blockBuffer.position() - lastKeyValueSize);
690 readKeyValueLen();
691 return 1;
692 }
693
694 @Override
695 public boolean seekBefore(byte[] key) throws IOException {
696 return seekBefore(key, 0, key.length);
697 }
698
699 private ByteBuffer getFirstKeyInBlock(HFileBlock curBlock) {
700 ByteBuffer buffer = curBlock.getBufferWithoutHeader();
701
702 buffer.rewind();
703 int klen = buffer.getInt();
704 buffer.getInt();
705 ByteBuffer keyBuff = buffer.slice();
706 keyBuff.limit(klen);
707 keyBuff.rewind();
708 return keyBuff;
709 }
710
711 @Override
712 public boolean seekBefore(byte[] key, int offset, int length)
713 throws IOException {
714 HFileBlock seekToBlock =
715 reader.getDataBlockIndexReader().seekToDataBlock(key, offset,
716 length, block, cacheBlocks, pread, isCompaction);
717 if (seekToBlock == null) {
718 return false;
719 }
720 ByteBuffer firstKey = getFirstKeyInBlock(seekToBlock);
721 if (reader.getComparator().compare(firstKey.array(),
722 firstKey.arrayOffset(), firstKey.limit(), key, offset, length) == 0)
723 {
724 long previousBlockOffset = seekToBlock.getPrevBlockOffset();
725
726 if (previousBlockOffset == -1) {
727
728 return false;
729 }
730
731
732
733
734 seekToBlock = reader.readBlock(previousBlockOffset,
735 seekToBlock.getOffset() - previousBlockOffset, cacheBlocks,
736 pread, isCompaction);
737
738
739
740 }
741 loadBlockAndSeekToKey(seekToBlock, true, key, offset, length, true);
742 return true;
743 }
744
745 @Override
746 public String getKeyString() {
747 return Bytes.toStringBinary(blockBuffer.array(),
748 blockBuffer.arrayOffset() + blockBuffer.position()
749 + KEY_VALUE_LEN_SIZE, currKeyLen);
750 }
751
752 @Override
753 public String getValueString() {
754 return Bytes.toString(blockBuffer.array(), blockBuffer.arrayOffset()
755 + blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen,
756 currValueLen);
757 }
758
759 }
760
761
762
763
764
765 @Override
766 public DataInput getBloomFilterMetadata() throws IOException {
767 for (HFileBlock b : loadOnOpenBlocks)
768 if (b.getBlockType() == BlockType.BLOOM_META)
769 return b.getByteStream();
770 return null;
771 }
772
773 @Override
774 public boolean isFileInfoLoaded() {
775 return true;
776 }
777
778 }