View Javadoc

1   /*
2    * Copyright 2011 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.util;
22  
23  import java.io.DataInput;
24  import java.io.IOException;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.KeyValue;
30  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
31  import org.apache.hadoop.hbase.io.hfile.HFile;
32  import org.apache.hadoop.hbase.regionserver.StoreFile;
33  import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
34  
35  /**
36   * Handles Bloom filter initialization based on configuration and serialized
37   * metadata in the reader and writer of {@link StoreFile}.
38   */
39  public final class BloomFilterFactory {
40  
41    private static final Log LOG =
42        LogFactory.getLog(BloomFilterFactory.class.getName());
43  
44    /** This class should not be instantiated. */
45    private BloomFilterFactory() {}
46  
47    /**
48     * Specifies the target error rate to use when selecting the number of keys
49     * per Bloom filter.
50     */
51    public static final String IO_STOREFILE_BLOOM_ERROR_RATE =
52        "io.storefile.bloom.error.rate";
53  
54    /**
55     * Maximum folding factor allowed. The Bloom filter will be shrunk by
56     * the factor of up to 2 ** this times if we oversize it initially.
57     */
58    public static final String IO_STOREFILE_BLOOM_MAX_FOLD =
59        "io.storefile.bloom.max.fold";
60  
61    /**
62     * For default (single-block) Bloom filters this specifies the maximum number
63     * of keys.
64     */
65    public static final String IO_STOREFILE_BLOOM_MAX_KEYS =
66        "io.storefile.bloom.max.keys";
67  
68    /** Master switch to enable Bloom filters */
69    public static final String IO_STOREFILE_BLOOM_ENABLED =
70        "io.storefile.bloom.enabled";
71  
72    /**
73     * Target Bloom block size. Bloom filter blocks of approximately this size
74     * are interleaved with data blocks.
75     */
76    public static final String IO_STOREFILE_BLOOM_BLOCK_SIZE =
77        "io.storefile.bloom.block.size";
78  
79    /** Maximum number of times a Bloom filter can be "folded" if oversized */
80    private static final int MAX_ALLOWED_FOLD_FACTOR = 7;
81  
82    /**
83     * Instantiates the correct Bloom filter class based on the version provided
84     * in the meta block data.
85     *
86     * @param meta the byte array holding the Bloom filter's metadata, including
87     *          version information
88     * @param reader the {@link HFile} reader to use to lazily load Bloom filter
89     *          blocks
90     * @return an instance of the correct type of Bloom filter
91     * @throws IllegalArgumentException
92     */
93    public static BloomFilter
94        createFromMeta(DataInput meta, HFile.Reader reader)
95        throws IllegalArgumentException, IOException {
96      int version = meta.readInt();
97      switch (version) {
98        case ByteBloomFilter.VERSION:
99          // This is only possible in a version 1 HFile. We are ignoring the
100         // passed comparator because raw byte comparators are always used
101         // in version 1 Bloom filters.
102         return new ByteBloomFilter(meta);
103 
104       case CompoundBloomFilterBase.VERSION:
105         return new CompoundBloomFilter(meta, reader);
106 
107       default:
108         throw new IllegalArgumentException(
109           "Bad bloom filter format version " + version
110         );
111     }
112   }
113 
114   /**
115    * @return true if Bloom filters are enabled in the given configuration
116    */
117   public static boolean isBloomEnabled(Configuration conf) {
118     return conf.getBoolean(IO_STOREFILE_BLOOM_ENABLED, true);
119   }
120 
121   public static float getErrorRate(Configuration conf) {
122     return conf.getFloat(IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
123   }
124 
125   /**
126    * Creates a new Bloom filter at the time of
127    * {@link org.apache.hadoop.hbase.regionserver.StoreFile} writing.
128    *
129    * @param conf
130    * @param cacheConf
131    * @param bloomType
132    * @param maxKeys an estimate of the number of keys we expect to insert.
133    *        Irrelevant if compound Bloom filters are enabled.
134    * @param writer the HFile writer
135    * @return the new Bloom filter, or null in case Bloom filters are disabled
136    *         or when failed to create one.
137    */
138   public static BloomFilterWriter createBloomAtWrite(Configuration conf,
139       CacheConfig cacheConf, BloomType bloomType, int maxKeys,
140       HFile.Writer writer) {
141     if (!isBloomEnabled(conf)) {
142       if (LOG.isTraceEnabled()) {
143         LOG.trace("Bloom filters are disabled by configuration for "
144           + writer.getPath()
145           + (conf == null ? " (configuration is null)" : ""));
146       }
147       return null;
148     } else if (bloomType == BloomType.NONE) {
149       if (LOG.isTraceEnabled()) {
150         LOG.debug("Bloom filter is turned off for the column family");
151       }
152       return null;
153     }
154 
155     float err = getErrorRate(conf);
156 
157     // In case of row/column Bloom filter lookups, each lookup is an OR if two
158     // separate lookups. Therefore, if each lookup's false positive rate is p,
159     // the resulting false positive rate is err = 1 - (1 - p)^2, and
160     // p = 1 - sqrt(1 - err).
161     if (bloomType == BloomType.ROWCOL) {
162       err = (float) (1 - Math.sqrt(1 - err));
163     }
164 
165     int maxFold = conf.getInt(IO_STOREFILE_BLOOM_MAX_FOLD,
166         MAX_ALLOWED_FOLD_FACTOR);
167 
168     if (HFile.getFormatVersion(conf) > HFile.MIN_FORMAT_VERSION) {
169       // In case of compound Bloom filters we ignore the maxKeys hint.
170       CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(
171           getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold,
172           cacheConf.shouldCacheBloomsOnWrite(), bloomType == BloomType.ROWCOL
173               ? KeyValue.KEY_COMPARATOR : Bytes.BYTES_RAWCOMPARATOR);
174       writer.addInlineBlockWriter(bloomWriter);
175       return bloomWriter;
176     } else {
177       // A single-block Bloom filter. Only used when testing HFile format
178       // version 1.
179       int tooBig = conf.getInt(IO_STOREFILE_BLOOM_MAX_KEYS,
180           128 * 1000 * 1000);
181 
182       if (maxKeys <= 0) {
183         LOG.warn("Invalid maximum number of keys specified: " + maxKeys
184             + ", not using Bloom filter");
185         return null;
186       } else if (maxKeys < tooBig) {
187         BloomFilterWriter bloom = new ByteBloomFilter((int) maxKeys, err,
188             Hash.getHashType(conf), maxFold);
189         bloom.allocBloom();
190         return bloom;
191       } else {
192         LOG.debug("Skipping bloom filter because max keysize too large: "
193             + maxKeys);
194       }
195     }
196     return null;
197   }
198 
199   /** @return the compound Bloom filter block size from the configuration */
200   public static int getBloomBlockSize(Configuration conf) {
201     return conf.getInt(IO_STOREFILE_BLOOM_BLOCK_SIZE, 128 * 1024);
202   }
203 };