1 /*
2 * Copyright 2009 The Apache Software Foundation
3 *
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20 package org.apache.hadoop.hbase.regionserver;
21
22 import java.util.ArrayList;
23 import java.util.List;
24 import java.util.NavigableSet;
25
26 import org.apache.hadoop.hbase.HConstants;
27 import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
28 import org.apache.hadoop.hbase.util.Bytes;
29
30 /**
31 * This class is used for the tracking and enforcement of columns and numbers
32 * of versions during the course of a Get or Scan operation, when explicit
33 * column qualifiers have been asked for in the query.
34 *
35 * With a little magic (see {@link ScanQueryMatcher}), we can use this matcher
36 * for both scans and gets. The main difference is 'next' and 'done' collapse
37 * for the scan case (since we see all columns in order), and we only reset
38 * between rows.
39 *
40 * <p>
41 * This class is utilized by {@link ScanQueryMatcher} through two methods:
42 * <ul><li>{@link #checkColumn} is called when a Put satisfies all other
43 * conditions of the query. This method returns a {@link org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode} to define
44 * what action should be taken.
45 * <li>{@link #update} is called at the end of every StoreFile or memstore.
46 * <p>
47 * This class is NOT thread-safe as queries are never multi-threaded
48 */
49 public class ExplicitColumnTracker implements ColumnTracker {
50
51 private final int maxVersions;
52 private final int minVersions;
53
54 /**
55 * Contains the list of columns that the ExplicitColumnTracker is tracking.
56 * Each ColumnCount instance also tracks how many versions of the requested
57 * column have been returned.
58 */
59 private final List<ColumnCount> columns;
60 private final List<ColumnCount> columnsToReuse;
61 private int index;
62 private ColumnCount column;
63 /** Keeps track of the latest timestamp included for current column.
64 * Used to eliminate duplicates. */
65 private long latestTSOfCurrentColumn;
66 private long oldestStamp;
67
68 /**
69 * Default constructor.
70 * @param columns columns specified user in query
71 * @param minVersions minimum number of versions to keep
72 * @param maxVersions maximum versions to return per column
73 * @param ttl The timeToLive to enforce
74 */
75 public ExplicitColumnTracker(NavigableSet<byte[]> columns, int minVersions,
76 int maxVersions, long ttl) {
77 this.maxVersions = maxVersions;
78 this.minVersions = minVersions;
79 this.oldestStamp = System.currentTimeMillis() - ttl;
80 this.columns = new ArrayList<ColumnCount>(columns.size());
81 this.columnsToReuse = new ArrayList<ColumnCount>(columns.size());
82 for(byte [] column : columns) {
83 this.columnsToReuse.add(new ColumnCount(column));
84 }
85 reset();
86 }
87
88 /**
89 * Done when there are no more columns to match against.
90 */
91 public boolean done() {
92 return this.columns.size() == 0;
93 }
94
95 public ColumnCount getColumnHint() {
96 return this.column;
97 }
98
99 /**
100 * Checks against the parameters of the query and the columns which have
101 * already been processed by this query.
102 * @param bytes KeyValue buffer
103 * @param offset offset to the start of the qualifier
104 * @param length length of the qualifier
105 * @param timestamp timestamp of the key being checked
106 * @return MatchCode telling ScanQueryMatcher what action to take
107 */
108 public ScanQueryMatcher.MatchCode checkColumn(byte [] bytes, int offset,
109 int length, long timestamp, boolean ignoreCount) {
110 do {
111 // No more columns left, we are done with this query
112 if(this.columns.size() == 0) {
113 return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
114 }
115
116 // No more columns to match against, done with storefile
117 if(this.column == null) {
118 return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
119 }
120
121 // Compare specific column to current column
122 int ret = Bytes.compareTo(column.getBuffer(), column.getOffset(),
123 column.getLength(), bytes, offset, length);
124
125 // Column Matches. If it is not a duplicate key, increment the version count
126 // and include.
127 if(ret == 0) {
128 if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
129
130 //If column matches, check if it is a duplicate timestamp
131 if (sameAsPreviousTS(timestamp)) {
132 //If duplicate, skip this Key
133 return ScanQueryMatcher.MatchCode.SKIP;
134 }
135 int count = this.column.increment();
136 if(count >= maxVersions || (count >= minVersions && isExpired(timestamp))) {
137 // Done with versions for this column
138 // Note: because we are done with this column, and are removing
139 // it from columns, we don't do a ++this.index. The index stays
140 // the same but the columns have shifted within the array such
141 // that index now points to the next column we are interested in.
142 this.columns.remove(this.index);
143
144 resetTS();
145 if (this.columns.size() == this.index) {
146 // We have served all the requested columns.
147 this.column = null;
148 return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW;
149 } else {
150 // We are done with current column; advance to next column
151 // of interest.
152 this.column = this.columns.get(this.index);
153 return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
154 }
155 } else {
156 setTS(timestamp);
157 }
158 return ScanQueryMatcher.MatchCode.INCLUDE;
159 }
160
161 resetTS();
162
163 if (ret > 0) {
164 // The current KV is smaller than the column the ExplicitColumnTracker
165 // is interested in, so seek to that column of interest.
166 return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
167 }
168
169 // The current KV is bigger than the column the ExplicitColumnTracker
170 // is interested in. That means there is no more data for the column
171 // of interest. Advance the ExplicitColumnTracker state to next
172 // column of interest, and check again.
173 if (ret <= -1) {
174 if (++this.index >= this.columns.size()) {
175 // No more to match, do not include, done with this row.
176 return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
177 }
178 // This is the recursive case.
179 this.column = this.columns.get(this.index);
180 }
181 } while(true);
182 }
183
184 /**
185 * Called at the end of every StoreFile or memstore.
186 */
187 public void update() {
188 if(this.columns.size() != 0) {
189 this.index = 0;
190 this.column = this.columns.get(this.index);
191 } else {
192 this.index = -1;
193 this.column = null;
194 }
195 }
196
197 // Called between every row.
198 public void reset() {
199 buildColumnList();
200 this.index = 0;
201 this.column = this.columns.get(this.index);
202 resetTS();
203 }
204
205 private void resetTS() {
206 latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
207 }
208
209 private void setTS(long timestamp) {
210 latestTSOfCurrentColumn = timestamp;
211 }
212
213 private boolean sameAsPreviousTS(long timestamp) {
214 return timestamp == latestTSOfCurrentColumn;
215 }
216
217 private boolean isExpired(long timestamp) {
218 return timestamp < oldestStamp;
219 }
220
221 private void buildColumnList() {
222 this.columns.clear();
223 this.columns.addAll(this.columnsToReuse);
224 for(ColumnCount col : this.columns) {
225 col.setCount(0);
226 }
227 }
228
229 /**
230 * This method is used to inform the column tracker that we are done with
231 * this column. We may get this information from external filters or
232 * timestamp range and we then need to indicate this information to
233 * tracker. It is required only in case of ExplicitColumnTracker.
234 * @param bytes
235 * @param offset
236 * @param length
237 */
238 public void doneWithColumn(byte [] bytes, int offset, int length) {
239 while (this.column != null) {
240 int compare = Bytes.compareTo(column.getBuffer(), column.getOffset(),
241 column.getLength(), bytes, offset, length);
242 resetTS();
243 if (compare == 0) {
244 this.columns.remove(this.index);
245 if (this.columns.size() == this.index) {
246 // Will not hit any more columns in this storefile
247 this.column = null;
248 } else {
249 this.column = this.columns.get(this.index);
250 }
251 return;
252 } else if ( compare <= -1) {
253 if(++this.index != this.columns.size()) {
254 this.column = this.columns.get(this.index);
255 } else {
256 this.column = null;
257 }
258 } else {
259 return;
260 }
261 }
262 }
263 public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
264 int qualLength) {
265 doneWithColumn(bytes, offset,qualLength);
266
267 if (getColumnHint() == null) {
268 return MatchCode.SEEK_NEXT_ROW;
269 } else {
270 return MatchCode.SEEK_NEXT_COL;
271 }
272 }
273
274 public boolean isDone(long timestamp) {
275 return minVersions <=0 && isExpired(timestamp);
276 }
277 }