1 /*
2 * Copyright 2010 The Apache Software Foundation
3 *
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21 package org.apache.hadoop.hbase.filter;
22
23 import org.apache.hadoop.hbase.KeyValue;
24 import org.apache.hadoop.io.Writable;
25
26 import java.util.List;
27
28 /**
29 * Interface for row and column filters directly applied within the regionserver.
30 * A filter can expect the following call sequence:
31 *<ul>
32 * <li>{@link #reset()}</li>
33 * <li>{@link #filterAllRemaining()} -> true indicates scan is over, false, keep going on.</li>
34 * <li>{@link #filterRowKey(byte[],int,int)} -> true to drop this row,
35 * if false, we will also call</li>
36 * <li>{@link #filterKeyValue(KeyValue)} -> true to drop this key/value</li>
37 * <li>{@link #filterRow(List)} -> allows directmodification of the final list to be submitted
38 * <li>{@link #filterRow()} -> last chance to drop entire row based on the sequence of
39 * filterValue() calls. Eg: filter a row if it doesn't contain a specified column.
40 * </li>
41 * </ul>
42 *
43 * Filter instances are created one per region/scan. This interface replaces
44 * the old RowFilterInterface.
45 *
46 * When implementing your own filters, consider inheriting {@link FilterBase} to help
47 * you reduce boilerplate.
48 *
49 * @see FilterBase
50 */
51 public interface Filter extends Writable {
52 /**
53 * Reset the state of the filter between rows.
54 */
55 public void reset();
56
57 /**
58 * Filters a row based on the row key. If this returns true, the entire
59 * row will be excluded. If false, each KeyValue in the row will be
60 * passed to {@link #filterKeyValue(KeyValue)} below.
61 *
62 * @param buffer buffer containing row key
63 * @param offset offset into buffer where row key starts
64 * @param length length of the row key
65 * @return true, remove entire row, false, include the row (maybe).
66 */
67 public boolean filterRowKey(byte [] buffer, int offset, int length);
68
69 /**
70 * If this returns true, the scan will terminate.
71 *
72 * @return true to end scan, false to continue.
73 */
74 public boolean filterAllRemaining();
75
76 /**
77 * A way to filter based on the column family, column qualifier and/or the
78 * column value. Return code is described below. This allows filters to
79 * filter only certain number of columns, then terminate without matching ever
80 * column.
81 *
82 * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
83 * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called
84 * just in case the caller calls for the next row.
85 *
86 * @param v the KeyValue in question
87 * @return code as described below
88 * @see Filter.ReturnCode
89 */
90 public ReturnCode filterKeyValue(final KeyValue v);
91
92 /**
93 * Give the filter a chance to transform the passed KeyValue.
94 * If the KeyValue is changed a new KeyValue object must be returned.
95 * @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
96 *
97 * The transformed KeyValue is what is eventually returned to the
98 * client. Most filters will return the passed KeyValue unchanged.
99 * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transform(KeyValue)
100 * for an example of a transformation.
101 *
102 * @param v the KeyValue in question
103 * @return the changed KeyValue
104 */
105 public KeyValue transform(final KeyValue v);
106
107 /**
108 * Return codes for filterValue().
109 */
110 public enum ReturnCode {
111 /**
112 * Include the KeyValue
113 */
114 INCLUDE,
115 /**
116 * Skip this KeyValue
117 */
118 SKIP,
119 /**
120 * Skip this column. Go to the next column in this row.
121 */
122 NEXT_COL,
123 /**
124 * Done with columns, skip to next row. Note that filterRow() will
125 * still be called.
126 */
127 NEXT_ROW,
128 /**
129 * Seek to next key which is given as hint by the filter.
130 */
131 SEEK_NEXT_USING_HINT,
132 }
133
134 /**
135 * Chance to alter the list of keyvalues to be submitted.
136 * Modifications to the list will carry on
137 * @param kvs the list of keyvalues to be filtered
138 */
139 public void filterRow(List<KeyValue> kvs);
140
141 /**
142 * @return True if this filter actively uses filterRow(List).
143 * Primarily used to check for conflicts with scans(such as scans
144 * that do not read a full row at a time)
145 */
146 public boolean hasFilterRow();
147
148 /**
149 * Last chance to veto row based on previous {@link #filterKeyValue(KeyValue)}
150 * calls. The filter needs to retain state then return a particular value for
151 * this call if they wish to exclude a row if a certain column is missing
152 * (for example).
153 * @return true to exclude row, false to include row.
154 */
155 public boolean filterRow();
156
157 /**
158 * If the filter returns the match code SEEK_NEXT_USING_HINT, then
159 * it should also tell which is the next key it must seek to.
160 * After receiving the match code SEEK_NEXT_USING_HINT, the QueryMatcher would
161 * call this function to find out which key it must next seek to.
162 * @return KeyValue which must be next seeked. return null if the filter is
163 * not sure which key to seek to next.
164 */
165 public KeyValue getNextKeyHint(final KeyValue currentKV);
166 }