View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.filter;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.KeyValue;
26  import org.apache.hadoop.hbase.client.Scan;
27  import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
28  import org.apache.hadoop.hbase.io.HbaseObjectWritable;
29  import org.apache.hadoop.hbase.util.Bytes;
30  
31  import java.io.DataInput;
32  import java.io.DataOutput;
33  import java.io.IOException;
34  import java.util.Arrays;
35  import java.util.List;
36  import java.util.ArrayList;
37  
38  import com.google.common.base.Preconditions;
39  
40  /**
41   * This filter is used to filter cells based on value. It takes a {@link CompareFilter.CompareOp}
42   * operator (equal, greater, not equal, etc), and either a byte [] value or
43   * a WritableByteArrayComparable.
44   * <p>
45   * If we have a byte [] value then we just do a lexicographic compare. For
46   * example, if passed value is 'b' and cell has 'a' and the compare operator
47   * is LESS, then we will filter out this cell (return true).  If this is not
48   * sufficient (eg you want to deserialize a long and then compare it to a fixed
49   * long value), then you can pass in your own comparator instead.
50   * <p>
51   * You must also specify a family and qualifier.  Only the value of this column
52   * will be tested. When using this filter on a {@link Scan} with specified
53   * inputs, the column to be tested should also be added as input (otherwise
54   * the filter will regard the column as missing).
55   * <p>
56   * To prevent the entire row from being emitted if the column is not found
57   * on a row, use {@link #setFilterIfMissing}.
58   * Otherwise, if the column is found, the entire row will be emitted only if
59   * the value passes.  If the value fails, the row will be filtered out.
60   * <p>
61   * In order to test values of previous versions (timestamps), set
62   * {@link #setLatestVersionOnly} to false. The default is true, meaning that
63   * only the latest version's value is tested and all previous versions are ignored.
64   * <p>
65   * To filter based on the value of all scanned columns, use {@link ValueFilter}.
66   */
67  public class SingleColumnValueFilter extends FilterBase {
68    static final Log LOG = LogFactory.getLog(SingleColumnValueFilter.class);
69  
70    protected byte [] columnFamily;
71    protected byte [] columnQualifier;
72    private CompareOp compareOp;
73    private WritableByteArrayComparable comparator;
74    private boolean foundColumn = false;
75    private boolean matchedColumn = false;
76    private boolean filterIfMissing = false;
77    private boolean latestVersionOnly = true;
78  
79    /**
80     * Writable constructor, do not use.
81     */
82    public SingleColumnValueFilter() {
83    }
84  
85    /**
86     * Constructor for binary compare of the value of a single column.  If the
87     * column is found and the condition passes, all columns of the row will be
88     * emitted.  If the condition fails, the row will not be emitted.
89     * <p>
90     * Use the filterIfColumnMissing flag to set whether the rest of the columns
91     * in a row will be emitted if the specified column to check is not found in
92     * the row.
93     *
94     * @param family name of column family
95     * @param qualifier name of column qualifier
96     * @param compareOp operator
97     * @param value value to compare column values against
98     */
99    public SingleColumnValueFilter(final byte [] family, final byte [] qualifier,
100       final CompareOp compareOp, final byte[] value) {
101     this(family, qualifier, compareOp, new BinaryComparator(value));
102   }
103 
104   /**
105    * Constructor for binary compare of the value of a single column.  If the
106    * column is found and the condition passes, all columns of the row will be
107    * emitted.  If the condition fails, the row will not be emitted.
108    * <p>
109    * Use the filterIfColumnMissing flag to set whether the rest of the columns
110    * in a row will be emitted if the specified column to check is not found in
111    * the row.
112    *
113    * @param family name of column family
114    * @param qualifier name of column qualifier
115    * @param compareOp operator
116    * @param comparator Comparator to use.
117    */
118   public SingleColumnValueFilter(final byte [] family, final byte [] qualifier,
119       final CompareOp compareOp, final WritableByteArrayComparable comparator) {
120     this.columnFamily = family;
121     this.columnQualifier = qualifier;
122     this.compareOp = compareOp;
123     this.comparator = comparator;
124   }
125 
126   /**
127    * @return operator
128    */
129   public CompareOp getOperator() {
130     return compareOp;
131   }
132 
133   /**
134    * @return the comparator
135    */
136   public WritableByteArrayComparable getComparator() {
137     return comparator;
138   }
139 
140   /**
141    * @return the family
142    */
143   public byte[] getFamily() {
144     return columnFamily;
145   }
146 
147   /**
148    * @return the qualifier
149    */
150   public byte[] getQualifier() {
151     return columnQualifier;
152   }
153 
154   public ReturnCode filterKeyValue(KeyValue keyValue) {
155     // System.out.println("REMOVE KEY=" + keyValue.toString() + ", value=" + Bytes.toString(keyValue.getValue()));
156     if (this.matchedColumn) {
157       // We already found and matched the single column, all keys now pass
158       return ReturnCode.INCLUDE;
159     } else if (this.latestVersionOnly && this.foundColumn) {
160       // We found but did not match the single column, skip to next row
161       return ReturnCode.NEXT_ROW;
162     }
163     if (!keyValue.matchingColumn(this.columnFamily, this.columnQualifier)) {
164       return ReturnCode.INCLUDE;
165     }
166     foundColumn = true;
167     if (filterColumnValue(keyValue.getBuffer(),
168         keyValue.getValueOffset(), keyValue.getValueLength())) {
169       return this.latestVersionOnly? ReturnCode.NEXT_ROW: ReturnCode.INCLUDE;
170     }
171     this.matchedColumn = true;
172     return ReturnCode.INCLUDE;
173   }
174 
175   private boolean filterColumnValue(final byte [] data, final int offset,
176       final int length) {
177     // TODO: Can this filter take a rawcomparator so don't have to make this
178     // byte array copy?
179     int compareResult =
180       this.comparator.compareTo(Arrays.copyOfRange(data, offset, offset + length));
181     switch (this.compareOp) {
182     case LESS:
183       return compareResult <= 0;
184     case LESS_OR_EQUAL:
185       return compareResult < 0;
186     case EQUAL:
187       return compareResult != 0;
188     case NOT_EQUAL:
189       return compareResult == 0;
190     case GREATER_OR_EQUAL:
191       return compareResult > 0;
192     case GREATER:
193       return compareResult >= 0;
194     default:
195       throw new RuntimeException("Unknown Compare op " + compareOp.name());
196     }
197   }
198 
199   public boolean filterRow() {
200     // If column was found, return false if it was matched, true if it was not
201     // If column not found, return true if we filter if missing, false if not
202     return this.foundColumn? !this.matchedColumn: this.filterIfMissing;
203   }
204 
205   public void reset() {
206     foundColumn = false;
207     matchedColumn = false;
208   }
209 
210   /**
211    * Get whether entire row should be filtered if column is not found.
212    * @return true if row should be skipped if column not found, false if row
213    * should be let through anyways
214    */
215   public boolean getFilterIfMissing() {
216     return filterIfMissing;
217   }
218 
219   /**
220    * Set whether entire row should be filtered if column is not found.
221    * <p>
222    * If true, the entire row will be skipped if the column is not found.
223    * <p>
224    * If false, the row will pass if the column is not found.  This is default.
225    * @param filterIfMissing flag
226    */
227   public void setFilterIfMissing(boolean filterIfMissing) {
228     this.filterIfMissing = filterIfMissing;
229   }
230 
231   /**
232    * Get whether only the latest version of the column value should be compared.
233    * If true, the row will be returned if only the latest version of the column
234    * value matches. If false, the row will be returned if any version of the
235    * column value matches. The default is true.
236    * @return return value
237    */
238   public boolean getLatestVersionOnly() {
239     return latestVersionOnly;
240   }
241 
242   /**
243    * Set whether only the latest version of the column value should be compared.
244    * If true, the row will be returned if only the latest version of the column
245    * value matches. If false, the row will be returned if any version of the
246    * column value matches. The default is true.
247    * @param latestVersionOnly flag
248    */
249   public void setLatestVersionOnly(boolean latestVersionOnly) {
250     this.latestVersionOnly = latestVersionOnly;
251   }
252 
253   public static Filter createFilterFromArguments(ArrayList<byte []> filterArguments) {
254     Preconditions.checkArgument(filterArguments.size() == 4 || filterArguments.size() == 6,
255                                 "Expected 4 or 6 but got: %s", filterArguments.size());
256     byte [] family = ParseFilter.removeQuotesFromByteArray(filterArguments.get(0));
257     byte [] qualifier = ParseFilter.removeQuotesFromByteArray(filterArguments.get(1));
258     CompareOp compareOp = ParseFilter.createCompareOp(filterArguments.get(2));
259     WritableByteArrayComparable comparator = ParseFilter.createComparator(
260       ParseFilter.removeQuotesFromByteArray(filterArguments.get(3)));
261 
262     if (comparator instanceof RegexStringComparator ||
263         comparator instanceof SubstringComparator) {
264       if (compareOp != CompareOp.EQUAL &&
265           compareOp != CompareOp.NOT_EQUAL) {
266         throw new IllegalArgumentException ("A regexstring comparator and substring comparator " +
267                                             "can only be used with EQUAL and NOT_EQUAL");
268       }
269     }
270 
271     SingleColumnValueFilter filter = new SingleColumnValueFilter(family, qualifier,
272                                                                  compareOp, comparator);
273 
274     if (filterArguments.size() == 6) {
275       boolean filterIfMissing = ParseFilter.convertByteArrayToBoolean(filterArguments.get(4));
276       boolean latestVersionOnly = ParseFilter.convertByteArrayToBoolean(filterArguments.get(5));
277       filter.setFilterIfMissing(filterIfMissing);
278       filter.setLatestVersionOnly(latestVersionOnly);
279     }
280     return filter;
281   }
282 
283   public void readFields(final DataInput in) throws IOException {
284     this.columnFamily = Bytes.readByteArray(in);
285     if(this.columnFamily.length == 0) {
286       this.columnFamily = null;
287     }
288     this.columnQualifier = Bytes.readByteArray(in);
289     if(this.columnQualifier.length == 0) {
290       this.columnQualifier = null;
291     }
292     this.compareOp = CompareOp.valueOf(in.readUTF());
293     this.comparator =
294       (WritableByteArrayComparable)HbaseObjectWritable.readObject(in, null);
295     this.foundColumn = in.readBoolean();
296     this.matchedColumn = in.readBoolean();
297     this.filterIfMissing = in.readBoolean();
298     this.latestVersionOnly = in.readBoolean();
299   }
300 
301   public void write(final DataOutput out) throws IOException {
302     Bytes.writeByteArray(out, this.columnFamily);
303     Bytes.writeByteArray(out, this.columnQualifier);
304     out.writeUTF(compareOp.name());
305     HbaseObjectWritable.writeObject(out, comparator,
306         WritableByteArrayComparable.class, null);
307     out.writeBoolean(foundColumn);
308     out.writeBoolean(matchedColumn);
309     out.writeBoolean(filterIfMissing);
310     out.writeBoolean(latestVersionOnly);
311   }
312 }