View Javadoc

1   /*
2    * Copyright 2009 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.util.ArrayList;
23  import java.util.List;
24  import java.util.NavigableSet;
25  
26  import org.apache.hadoop.hbase.HConstants;
27  import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
28  import org.apache.hadoop.hbase.util.Bytes;
29  
30  /**
31   * This class is used for the tracking and enforcement of columns and numbers
32   * of versions during the course of a Get or Scan operation, when explicit
33   * column qualifiers have been asked for in the query.
34   *
35   * With a little magic (see {@link ScanQueryMatcher}), we can use this matcher
36   * for both scans and gets.  The main difference is 'next' and 'done' collapse
37   * for the scan case (since we see all columns in order), and we only reset
38   * between rows.
39   *
40   * <p>
41   * This class is utilized by {@link ScanQueryMatcher} through two methods:
42   * <ul><li>{@link #checkColumn} is called when a Put satisfies all other
43   * conditions of the query.  This method returns a {@link org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode} to define
44   * what action should be taken.
45   * <li>{@link #update} is called at the end of every StoreFile or memstore.
46   * <p>
47   * This class is NOT thread-safe as queries are never multi-threaded
48   */
49  public class ExplicitColumnTracker implements ColumnTracker {
50  
51    private final int maxVersions;
52    private final int minVersions;
53  
54   /**
55    * Contains the list of columns that the ExplicitColumnTracker is tracking.
56    * Each ColumnCount instance also tracks how many versions of the requested
57    * column have been returned.
58    */
59    private final List<ColumnCount> columns;
60    private final List<ColumnCount> columnsToReuse;
61    private int index;
62    private ColumnCount column;
63    /** Keeps track of the latest timestamp included for current column.
64     * Used to eliminate duplicates. */
65    private long latestTSOfCurrentColumn;
66    private long oldestStamp;
67  
68    /**
69     * Default constructor.
70     * @param columns columns specified user in query
71     * @param minVersions minimum number of versions to keep
72     * @param maxVersions maximum versions to return per column
73     * @param ttl The timeToLive to enforce
74     */
75    public ExplicitColumnTracker(NavigableSet<byte[]> columns, int minVersions,
76        int maxVersions, long ttl) {
77      this.maxVersions = maxVersions;
78      this.minVersions = minVersions;
79      this.oldestStamp = System.currentTimeMillis() - ttl;
80      this.columns = new ArrayList<ColumnCount>(columns.size());
81      this.columnsToReuse = new ArrayList<ColumnCount>(columns.size());
82      for(byte [] column : columns) {
83        this.columnsToReuse.add(new ColumnCount(column));
84      }
85      reset();
86    }
87  
88      /**
89     * Done when there are no more columns to match against.
90     */
91    public boolean done() {
92      return this.columns.size() == 0;
93    }
94  
95    public ColumnCount getColumnHint() {
96      return this.column;
97    }
98  
99    /**
100    * Checks against the parameters of the query and the columns which have
101    * already been processed by this query.
102    * @param bytes KeyValue buffer
103    * @param offset offset to the start of the qualifier
104    * @param length length of the qualifier
105    * @param timestamp timestamp of the key being checked
106    * @return MatchCode telling ScanQueryMatcher what action to take
107    */
108   public ScanQueryMatcher.MatchCode checkColumn(byte [] bytes, int offset,
109       int length, long timestamp, boolean ignoreCount) {
110     do {
111       // No more columns left, we are done with this query
112       if(this.columns.size() == 0) {
113         return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
114       }
115 
116       // No more columns to match against, done with storefile
117       if(this.column == null) {
118         return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
119       }
120 
121       // Compare specific column to current column
122       int ret = Bytes.compareTo(column.getBuffer(), column.getOffset(),
123           column.getLength(), bytes, offset, length);
124 
125       // Column Matches. If it is not a duplicate key, increment the version count
126       // and include.
127       if(ret == 0) {
128         if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
129 
130         //If column matches, check if it is a duplicate timestamp
131         if (sameAsPreviousTS(timestamp)) {
132           //If duplicate, skip this Key
133           return ScanQueryMatcher.MatchCode.SKIP;
134         }
135         int count = this.column.increment();
136         if(count >= maxVersions || (count >= minVersions && isExpired(timestamp))) {
137           // Done with versions for this column
138           // Note: because we are done with this column, and are removing
139           // it from columns, we don't do a ++this.index. The index stays
140           // the same but the columns have shifted within the array such
141           // that index now points to the next column we are interested in.
142           this.columns.remove(this.index);
143 
144           resetTS();
145           if (this.columns.size() == this.index) {
146             // We have served all the requested columns.
147             this.column = null;
148 	    return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW;
149           } else {
150 	    // We are done with current column; advance to next column
151 	    // of interest.
152             this.column = this.columns.get(this.index);
153 	    return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
154           }
155         } else {
156           setTS(timestamp);
157         }
158         return ScanQueryMatcher.MatchCode.INCLUDE;
159       }
160 
161       resetTS();
162 
163       if (ret > 0) {
164         // The current KV is smaller than the column the ExplicitColumnTracker
165         // is interested in, so seek to that column of interest.
166         return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
167       }
168 
169       // The current KV is bigger than the column the ExplicitColumnTracker
170       // is interested in. That means there is no more data for the column
171       // of interest. Advance the ExplicitColumnTracker state to next
172       // column of interest, and check again.
173       if (ret <= -1) {
174         if (++this.index >= this.columns.size()) {
175           // No more to match, do not include, done with this row.
176           return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
177         }
178         // This is the recursive case.
179         this.column = this.columns.get(this.index);
180       }
181     } while(true);
182   }
183 
184   /**
185    * Called at the end of every StoreFile or memstore.
186    */
187   public void update() {
188     if(this.columns.size() != 0) {
189       this.index = 0;
190       this.column = this.columns.get(this.index);
191     } else {
192       this.index = -1;
193       this.column = null;
194     }
195   }
196 
197   // Called between every row.
198   public void reset() {
199     buildColumnList();
200     this.index = 0;
201     this.column = this.columns.get(this.index);
202     resetTS();
203   }
204 
205   private void resetTS() {
206     latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
207   }
208 
209   private void setTS(long timestamp) {
210     latestTSOfCurrentColumn = timestamp;
211   }
212 
213   private boolean sameAsPreviousTS(long timestamp) {
214     return timestamp == latestTSOfCurrentColumn;
215   }
216 
217   private boolean isExpired(long timestamp) {
218     return timestamp < oldestStamp;
219   }
220 
221   private void buildColumnList() {
222     this.columns.clear();
223     this.columns.addAll(this.columnsToReuse);
224     for(ColumnCount col : this.columns) {
225       col.setCount(0);
226     }
227   }
228 
229   /**
230    * This method is used to inform the column tracker that we are done with
231    * this column. We may get this information from external filters or
232    * timestamp range and we then need to indicate this information to
233    * tracker. It is required only in case of ExplicitColumnTracker.
234    * @param bytes
235    * @param offset
236    * @param length
237    */
238   public void doneWithColumn(byte [] bytes, int offset, int length) {
239     while (this.column != null) {
240       int compare = Bytes.compareTo(column.getBuffer(), column.getOffset(),
241           column.getLength(), bytes, offset, length);
242       resetTS();
243       if (compare == 0) {
244         this.columns.remove(this.index);
245         if (this.columns.size() == this.index) {
246           // Will not hit any more columns in this storefile
247           this.column = null;
248         } else {
249           this.column = this.columns.get(this.index);
250         }
251         return;
252       } else if ( compare <= -1) {
253         if(++this.index != this.columns.size()) {
254           this.column = this.columns.get(this.index);
255         } else {
256           this.column = null;
257         }
258       } else {
259         return;
260       }
261     }
262   }
263   public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
264       int qualLength) {
265     doneWithColumn(bytes, offset,qualLength);
266 
267     if (getColumnHint() == null) {
268       return MatchCode.SEEK_NEXT_ROW;
269     } else {
270       return MatchCode.SEEK_NEXT_COL;
271     }
272   }
273 
274   public boolean isDone(long timestamp) {
275     return minVersions <=0 && isExpired(timestamp);
276   }
277 }