View Javadoc

1   /*
2    * Copyright 2011 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.util;
22  
23  import java.io.DataInput;
24  import java.io.IOException;
25  import java.nio.ByteBuffer;
26  
27  import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
28  import org.apache.hadoop.hbase.io.hfile.HFile;
29  import org.apache.hadoop.hbase.io.hfile.HFileBlock;
30  import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex;
31  import org.apache.hadoop.io.RawComparator;
32  
33  /**
34   * A Bloom filter implementation built on top of {@link ByteBloomFilter},
35   * encapsulating a set of fixed-size Bloom filters written out at the time of
36   * {@link org.apache.hadoop.hbase.io.hfile.HFile} generation into the data
37   * block stream, and loaded on demand at query time. This class only provides
38   * reading capabilities.
39   */
40  public class CompoundBloomFilter extends CompoundBloomFilterBase
41      implements BloomFilter {
42  
43    /** Used to load chunks on demand */
44    private HFile.Reader reader;
45  
46    private HFileBlockIndex.BlockIndexReader index;
47  
48    private int hashCount;
49    private Hash hash;
50  
51    private long[] numQueriesPerChunk;
52    private long[] numPositivesPerChunk;
53  
54    /**
55     * De-serialization for compound Bloom filter metadata. Must be consistent
56     * with what {@link CompoundBloomFilterWriter} does.
57     *
58     * @param meta serialized Bloom filter metadata without any magic blocks
59     * @throws IOException
60     */
61    public CompoundBloomFilter(DataInput meta, HFile.Reader reader)
62        throws IOException {
63      this.reader = reader;
64  
65      totalByteSize = meta.readLong();
66      hashCount = meta.readInt();
67      hashType = meta.readInt();
68      totalKeyCount = meta.readLong();
69      totalMaxKeys = meta.readLong();
70      numChunks = meta.readInt();
71      comparator = FixedFileTrailer.createComparator(
72          Bytes.toString(Bytes.readByteArray(meta)));
73  
74      hash = Hash.getInstance(hashType);
75      if (hash == null) {
76        throw new IllegalArgumentException("Invalid hash type: " + hashType);
77      }
78  
79      index = new HFileBlockIndex.BlockIndexReader(comparator, 1);
80      index.readRootIndex(meta, numChunks);
81    }
82  
83    @Override
84    public boolean contains(byte[] key, int keyOffset, int keyLength,
85        ByteBuffer bloom) {
86      // We try to store the result in this variable so we can update stats for
87      // testing, but when an error happens, we log a message and return.
88      boolean result;
89  
90      int block = index.rootBlockContainingKey(key, keyOffset, keyLength);
91      if (block < 0) {
92        result = false; // This key is not in the file.
93      } else {
94        HFileBlock bloomBlock;
95        try {
96          // We cache the block and use a positional read.
97          bloomBlock = reader.readBlock(index.getRootBlockOffset(block),
98              index.getRootBlockDataSize(block), true, true, false);
99        } catch (IOException ex) {
100         // The Bloom filter is broken, turn it off.
101         throw new IllegalArgumentException(
102             "Failed to load Bloom block for key "
103                 + Bytes.toStringBinary(key, keyOffset, keyLength), ex);
104       }
105 
106       ByteBuffer bloomBuf = bloomBlock.getBufferReadOnly();
107       result = ByteBloomFilter.contains(key, keyOffset, keyLength,
108           bloomBuf.array(), bloomBuf.arrayOffset() + HFileBlock.HEADER_SIZE,
109           bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
110     }
111 
112     if (numQueriesPerChunk != null && block >= 0) {
113       // Update statistics. Only used in unit tests.
114       ++numQueriesPerChunk[block];
115       if (result)
116         ++numPositivesPerChunk[block];
117     }
118 
119     return result;
120   }
121 
122   public boolean supportsAutoLoading() {
123     return true;
124   }
125 
126   public int getNumChunks() {
127     return numChunks;
128   }
129 
130   @Override
131   public RawComparator<byte[]> getComparator() {
132     return comparator;
133   }
134 
135   public void enableTestingStats() {
136     numQueriesPerChunk = new long[numChunks];
137     numPositivesPerChunk = new long[numChunks];
138   }
139 
140   public String formatTestingStats() {
141     StringBuilder sb = new StringBuilder();
142     for (int i = 0; i < numChunks; ++i) {
143       sb.append("chunk #");
144       sb.append(i);
145       sb.append(": queries=");
146       sb.append(numQueriesPerChunk[i]);
147       sb.append(", positives=");
148       sb.append(numPositivesPerChunk[i]);
149       sb.append(", positiveRatio=");
150       sb.append(numPositivesPerChunk[i] * 1.0 / numQueriesPerChunk[i]);
151       sb.append(";\n");
152     }
153     return sb.toString();
154   }
155 
156   public long getNumQueriesForTesting(int chunk) {
157     return numQueriesPerChunk[chunk];
158   }
159 
160   public long getNumPositivesForTesting(int chunk) {
161     return numPositivesPerChunk[chunk];
162   }
163 
164   @Override
165   public String toString() {
166     StringBuilder sb = new StringBuilder();
167     sb.append(ByteBloomFilter.formatStats(this));
168     sb.append(ByteBloomFilter.STATS_RECORD_SEP + 
169         "Number of chunks: " + numChunks);
170     sb.append(ByteBloomFilter.STATS_RECORD_SEP + 
171         "Comparator: " + comparator.getClass().getSimpleName());
172     return sb.toString();
173   }
174 
175 }