View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.regionserver;
22  
23  import org.apache.hadoop.hbase.HConstants;
24  import org.apache.hadoop.hbase.KeyValue;
25  import org.apache.hadoop.hbase.client.Scan;
26  import org.apache.hadoop.hbase.filter.Filter;
27  import org.apache.hadoop.hbase.filter.Filter.ReturnCode;
28  import org.apache.hadoop.hbase.io.TimeRange;
29  import org.apache.hadoop.hbase.util.Bytes;
30  
31  import java.io.IOException;
32  import java.util.NavigableSet;
33  
34  /**
35   * A query matcher that is specifically designed for the scan case.
36   */
37  public class ScanQueryMatcher {
38    // Optimization so we can skip lots of compares when we decide to skip
39    // to the next row.
40    private boolean stickyNextRow;
41    private byte[] stopRow;
42  
43    protected TimeRange tr;
44  
45    protected Filter filter;
46  
47    /** Keeps track of deletes */
48    protected DeleteTracker deletes;
49    protected boolean retainDeletesInOutput;
50  
51    /** Keeps track of columns and versions */
52    protected ColumnTracker columns;
53  
54    /** Key to seek to in memstore and StoreFiles */
55    protected KeyValue startKey;
56  
57    /** Row comparator for the region this query is for */
58    KeyValue.KeyComparator rowComparator;
59  
60    /** Row the query is on */
61    protected byte [] row;
62    
63    /** 
64     * True if we are only interested in the given exact set of columns. In that
65     * case we can use Bloom filters to avoid unnecessary disk seeks.
66     */
67    private boolean exactColumnQuery;
68  
69    /**
70     * Constructs a ScanQueryMatcher for a Scan.
71     * @param scan
72     * @param family
73     * @param columns
74     * @param ttl
75     * @param rowComparator
76     */
77    public ScanQueryMatcher(Scan scan, byte [] family,
78        NavigableSet<byte[]> columns, long ttl,
79        KeyValue.KeyComparator rowComparator, int minVersions, int maxVersions,
80        boolean retainDeletesInOutput, long readPointToUse) {
81      this.tr = scan.getTimeRange();
82      this.rowComparator = rowComparator;
83      this.deletes =  new ScanDeleteTracker();
84      this.stopRow = scan.getStopRow();
85      this.startKey = KeyValue.createFirstOnRow(scan.getStartRow());
86      this.filter = scan.getFilter();
87      this.retainDeletesInOutput = retainDeletesInOutput;
88      this.maxReadPointToTrackVersions = readPointToUse;
89  
90      // Single branch to deal with two types of reads (columns vs all in family)
91      if (columns == null || columns.size() == 0) {
92        // use a specialized scan for wildcard column tracker.
93        this.columns = new ScanWildcardColumnTracker(minVersions, maxVersions, ttl);
94      } else {
95        // We can share the ExplicitColumnTracker, diff is we reset
96        // between rows, not between storefiles.
97        this.columns = new ExplicitColumnTracker(columns, minVersions, maxVersions,
98            ttl);
99        exactColumnQuery = true;
100     }
101   }
102 
103   public ScanQueryMatcher(Scan scan, byte [] family,
104       NavigableSet<byte[]> columns, long ttl,
105       KeyValue.KeyComparator rowComparator, int minVersions, int maxVersions) {
106       /* By default we will not include deletes */
107       /* deletes are included explicitly (for minor compaction) */
108       this(scan, family, columns, ttl, rowComparator, minVersions, maxVersions,
109           false, Long.MAX_VALUE /* max Readpoint to track versions */);
110   }
111   public ScanQueryMatcher(Scan scan, byte [] family,
112       NavigableSet<byte[]> columns, long ttl,
113       KeyValue.KeyComparator rowComparator, int maxVersions) {
114     this(scan, family, columns, ttl, rowComparator, 0, maxVersions);
115   }
116 
117   /** readPoint over which the KVs are unconditionally included */
118   protected long maxReadPointToTrackVersions;
119 
120   /**
121    * Determines if the caller should do one of several things:
122    * - seek/skip to the next row (MatchCode.SEEK_NEXT_ROW)
123    * - seek/skip to the next column (MatchCode.SEEK_NEXT_COL)
124    * - include the current KeyValue (MatchCode.INCLUDE)
125    * - ignore the current KeyValue (MatchCode.SKIP)
126    * - got to the next row (MatchCode.DONE)
127    *
128    * @param kv KeyValue to check
129    * @return The match code instance.
130    * @throws IOException in case there is an internal consistency problem
131    *      caused by a data corruption.
132    */
133   public MatchCode match(KeyValue kv) throws IOException {
134     if (filter != null && filter.filterAllRemaining()) {
135       return MatchCode.DONE_SCAN;
136     }
137 
138     byte [] bytes = kv.getBuffer();
139     int offset = kv.getOffset();
140     int initialOffset = offset;
141 
142     int keyLength = Bytes.toInt(bytes, offset, Bytes.SIZEOF_INT);
143     offset += KeyValue.ROW_OFFSET;
144 
145     short rowLength = Bytes.toShort(bytes, offset, Bytes.SIZEOF_SHORT);
146     offset += Bytes.SIZEOF_SHORT;
147 
148     int ret = this.rowComparator.compareRows(row, 0, row.length,
149         bytes, offset, rowLength);
150     if (ret <= -1) {
151       return MatchCode.DONE;
152     } else if (ret >= 1) {
153       // could optimize this, if necessary?
154       // Could also be called SEEK_TO_CURRENT_ROW, but this
155       // should be rare/never happens.
156       return MatchCode.SEEK_NEXT_ROW;
157     }
158 
159     // optimize case.
160     if (this.stickyNextRow)
161         return MatchCode.SEEK_NEXT_ROW;
162 
163     if (this.columns.done()) {
164       stickyNextRow = true;
165       return MatchCode.SEEK_NEXT_ROW;
166     }
167 
168     //Passing rowLength
169     offset += rowLength;
170 
171     //Skipping family
172     byte familyLength = bytes [offset];
173     offset += familyLength + 1;
174 
175     int qualLength = keyLength + KeyValue.ROW_OFFSET -
176       (offset - initialOffset) - KeyValue.TIMESTAMP_TYPE_SIZE;
177 
178     long timestamp = kv.getTimestamp();
179     // check for early out based on timestamp alone
180     if (columns.isDone(timestamp)) {
181         return columns.getNextRowOrNextColumn(bytes, offset, qualLength);
182     }
183 
184     byte type = kv.getType();
185     if (isDelete(type)) {
186       if (tr.withinOrAfterTimeRange(timestamp)) {
187         this.deletes.add(bytes, offset, qualLength, timestamp, type);
188         // Can't early out now, because DelFam come before any other keys
189       }
190       if (retainDeletesInOutput) {
191         return MatchCode.INCLUDE;
192       }
193       else {
194         return MatchCode.SKIP;
195       }
196     }
197 
198     if (!this.deletes.isEmpty() &&
199         deletes.isDeleted(bytes, offset, qualLength, timestamp)) {
200 
201       // May be able to optimize the SKIP here, if we matched
202       // due to a DelFam, we can skip to next row
203       // due to a DelCol, we can skip to next col
204       // But it requires more info out of isDelete().
205       // needful -> million column challenge.
206       return MatchCode.SKIP;
207     }
208 
209     int timestampComparison = tr.compare(timestamp);
210     if (timestampComparison >= 1) {
211       return MatchCode.SKIP;
212     } else if (timestampComparison <= -1) {
213       return columns.getNextRowOrNextColumn(bytes, offset, qualLength);
214     }
215 
216     /**
217      * Filters should be checked before checking column trackers. If we do
218      * otherwise, as was previously being done, ColumnTracker may increment its
219      * counter for even that KV which may be discarded later on by Filter. This
220      * would lead to incorrect results in certain cases.
221      */
222     if (filter != null) {
223       ReturnCode filterResponse = filter.filterKeyValue(kv);
224       if (filterResponse == ReturnCode.SKIP) {
225         return MatchCode.SKIP;
226       } else if (filterResponse == ReturnCode.NEXT_COL) {
227         return columns.getNextRowOrNextColumn(bytes, offset, qualLength);
228       } else if (filterResponse == ReturnCode.NEXT_ROW) {
229         stickyNextRow = true;
230         return MatchCode.SEEK_NEXT_ROW;
231       } else if (filterResponse == ReturnCode.SEEK_NEXT_USING_HINT) {
232         return MatchCode.SEEK_NEXT_USING_HINT;
233       }
234     }
235 
236     MatchCode colChecker = columns.checkColumn(bytes, offset, qualLength,
237         timestamp, kv.getMemstoreTS() > maxReadPointToTrackVersions);
238     /*
239      * According to current implementation, colChecker can only be
240      * SEEK_NEXT_COL, SEEK_NEXT_ROW, SKIP or INCLUDE. Therefore, always return
241      * the MatchCode. If it is SEEK_NEXT_ROW, also set stickyNextRow.
242      */
243     if (colChecker == MatchCode.SEEK_NEXT_ROW) {
244       stickyNextRow = true;
245     }
246     return colChecker;
247 
248   }
249 
250   public boolean moreRowsMayExistAfter(KeyValue kv) {
251     if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) &&
252         rowComparator.compareRows(kv.getBuffer(),kv.getRowOffset(),
253             kv.getRowLength(), stopRow, 0, stopRow.length) >= 0) {
254       // KV >= STOPROW
255       // then NO there is nothing left.
256       return false;
257     } else {
258       return true;
259     }
260   }
261 
262   /**
263    * Set current row
264    * @param row
265    */
266   public void setRow(byte [] row) {
267     this.row = row;
268     reset();
269   }
270 
271   public void reset() {
272     this.deletes.reset();
273     this.columns.reset();
274 
275     stickyNextRow = false;
276   }
277 
278   // should be in KeyValue.
279   protected boolean isDelete(byte type) {
280     return (type != KeyValue.Type.Put.getCode());
281   }
282 
283   /**
284    *
285    * @return the start key
286    */
287   public KeyValue getStartKey() {
288     return this.startKey;
289   }
290 
291   /**
292    *
293    * @return the Filter
294    */
295   Filter getFilter() {
296     return this.filter;
297   }
298 
299   public KeyValue getNextKeyHint(KeyValue kv) {
300     if (filter == null) {
301       return null;
302     } else {
303       return filter.getNextKeyHint(kv);
304     }
305   }
306 
307   public KeyValue getKeyForNextColumn(KeyValue kv) {
308     ColumnCount nextColumn = columns.getColumnHint();
309     if (nextColumn == null) {
310       return KeyValue.createLastOnRow(
311           kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
312           kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
313           kv.getBuffer(), kv.getQualifierOffset(), kv.getQualifierLength());
314     } else {
315       return KeyValue.createFirstOnRow(
316           kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
317           kv.getBuffer(), kv.getFamilyOffset(), kv.getFamilyLength(),
318           nextColumn.getBuffer(), nextColumn.getOffset(), nextColumn.getLength());
319     }
320   }
321 
322   public KeyValue getKeyForNextRow(KeyValue kv) {
323     return KeyValue.createLastOnRow(
324         kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(),
325         null, 0, 0,
326         null, 0, 0);
327   }
328 
329   public boolean isExactColumnQuery() {
330     return exactColumnQuery;
331   }
332 
333   /**
334    * {@link #match} return codes.  These instruct the scanner moving through
335    * memstores and StoreFiles what to do with the current KeyValue.
336    * <p>
337    * Additionally, this contains "early-out" language to tell the scanner to
338    * move on to the next File (memstore or Storefile), or to return immediately.
339    */
340   public static enum MatchCode {
341     /**
342      * Include KeyValue in the returned result
343      */
344     INCLUDE,
345 
346     /**
347      * Do not include KeyValue in the returned result
348      */
349     SKIP,
350 
351     /**
352      * Do not include, jump to next StoreFile or memstore (in time order)
353      */
354     NEXT,
355 
356     /**
357      * Do not include, return current result
358      */
359     DONE,
360 
361     /**
362      * These codes are used by the ScanQueryMatcher
363      */
364 
365     /**
366      * Done with the row, seek there.
367      */
368     SEEK_NEXT_ROW,
369     /**
370      * Done with column, seek to next.
371      */
372     SEEK_NEXT_COL,
373 
374     /**
375      * Done with scan, thanks to the row filter.
376      */
377     DONE_SCAN,
378 
379     /*
380      * Seek to next key which is given as hint.
381      */
382     SEEK_NEXT_USING_HINT,
383 
384     /**
385      * Include KeyValue and done with column, seek to next.
386      */
387     INCLUDE_AND_SEEK_NEXT_COL,
388 
389     /**
390      * Include KeyValue and done with row, seek to next.
391      */
392     INCLUDE_AND_SEEK_NEXT_ROW,
393   }
394 }