View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.regionserver;
22  
23  import org.apache.commons.lang.NotImplementedException;
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.hbase.KeyValue;
27  import org.apache.hadoop.hbase.client.Scan;
28  import org.apache.hadoop.hbase.filter.Filter;
29  
30  import java.io.IOException;
31  import java.util.ArrayList;
32  import java.util.LinkedList;
33  import java.util.List;
34  import java.util.NavigableSet;
35  
36  /**
37   * Scanner scans both the memstore and the HStore. Coalesce KeyValue stream
38   * into List<KeyValue> for a single row.
39   */
40  class StoreScanner implements KeyValueScanner, InternalScanner, ChangedReadersObserver {
41    static final Log LOG = LogFactory.getLog(StoreScanner.class);
42    private Store store;
43    private ScanQueryMatcher matcher;
44    private KeyValueHeap heap;
45    private boolean cacheBlocks;
46  
47    // Used to indicate that the scanner has closed (see HBASE-1107)
48    // Doesnt need to be volatile because it's always accessed via synchronized methods
49    private boolean closing = false;
50    private final boolean isGet;
51  
52    // if heap == null and lastTop != null, you need to reseek given the key below
53    private KeyValue lastTop = null;
54  
55    /**
56     * Opens a scanner across memstore, snapshot, and all StoreFiles.
57     *
58     * @param store who we scan
59     * @param scan the spec
60     * @param columns which columns we are scanning
61     * @throws IOException
62     */
63    StoreScanner(Store store, Scan scan, final NavigableSet<byte[]> columns)
64                                throws IOException {
65      this.store = store;
66      this.cacheBlocks = scan.getCacheBlocks();
67      matcher = new ScanQueryMatcher(scan, store.getFamily().getName(),
68          columns, store.ttl, store.comparator.getRawComparator(),
69          store.minVersions, store.versionsToReturn(scan.getMaxVersions()),
70          false, Long.MAX_VALUE);
71  
72      this.isGet = scan.isGetScan();
73      // pass columns = try to filter out unnecessary ScanFiles
74      List<KeyValueScanner> scanners = getScanners(scan, columns);
75  
76      // Seek all scanners to the start of the Row (or if the exact matching row
77      // key does not exist, then to the start of the next matching Row).
78      if (matcher.isExactColumnQuery()) {
79        for (KeyValueScanner scanner : scanners)
80          scanner.seekExactly(matcher.getStartKey(), false);
81      } else {
82        for (KeyValueScanner scanner : scanners)
83          scanner.seek(matcher.getStartKey());
84      }
85  
86      // Combine all seeked scanners with a heap
87      heap = new KeyValueHeap(scanners, store.comparator);
88  
89      this.store.addChangedReaderObserver(this);
90    }
91  
92    /**
93     * Used for major compactions.<p>
94     *
95     * Opens a scanner across specified StoreFiles.
96     * @param store who we scan
97     * @param scan the spec
98     * @param scanners ancilliary scanners
99     * @param smallestReadPoint the readPoint that we should use for tracking versions
100    * @param retainDeletesInOutput should we retain deletes after compaction?
101    */
102   StoreScanner(Store store, Scan scan, List<? extends KeyValueScanner> scanners,
103       boolean retainDeletesInOutput, long smallestReadPoint)
104   throws IOException {
105     this.store = store;
106     this.cacheBlocks = false;
107     this.isGet = false;
108     matcher = new ScanQueryMatcher(scan, store.getFamily().getName(),
109         null, store.ttl, store.comparator.getRawComparator(), store.minVersions,
110         store.versionsToReturn(scan.getMaxVersions()), retainDeletesInOutput, smallestReadPoint);
111 
112     // Seek all scanners to the initial key
113     for(KeyValueScanner scanner : scanners) {
114       scanner.seek(matcher.getStartKey());
115     }
116 
117     // Combine all seeked scanners with a heap
118     heap = new KeyValueHeap(scanners, store.comparator);
119   }
120 
121   // Constructor for testing.
122   StoreScanner(final Scan scan, final byte [] colFamily, final long ttl,
123       final KeyValue.KVComparator comparator,
124       final NavigableSet<byte[]> columns,
125       final List<KeyValueScanner> scanners)
126         throws IOException {
127     this.store = null;
128     this.isGet = false;
129     this.cacheBlocks = scan.getCacheBlocks();
130     this.matcher = new ScanQueryMatcher(scan, colFamily, columns, ttl,
131         comparator.getRawComparator(), 0, scan.getMaxVersions(), false,
132         Long.MAX_VALUE);
133 
134     // Seek all scanners to the initial key
135     for(KeyValueScanner scanner : scanners) {
136       scanner.seek(matcher.getStartKey());
137     }
138     heap = new KeyValueHeap(scanners, comparator);
139   }
140 
141   /*
142    * @return List of scanners ordered properly.
143    */
144   private List<KeyValueScanner> getScanners() throws IOException {
145     return this.store.getScanners(cacheBlocks, isGet, false);
146   }
147 
148   /*
149    * @return List of scanners to seek, possibly filtered by StoreFile.
150    */
151   private List<KeyValueScanner> getScanners(Scan scan,
152       final NavigableSet<byte[]> columns) throws IOException {
153     boolean memOnly;
154     boolean filesOnly;
155     if (scan instanceof InternalScan) {
156       InternalScan iscan = (InternalScan)scan;
157       memOnly = iscan.isCheckOnlyMemStore();
158       filesOnly = iscan.isCheckOnlyStoreFiles();
159     } else {
160       memOnly = false;
161       filesOnly = false;
162     }
163     List<KeyValueScanner> allStoreScanners =
164         this.store.getScanners(cacheBlocks, isGet, false);
165 
166     List<KeyValueScanner> scanners =
167         new ArrayList<KeyValueScanner>(allStoreScanners.size());
168 
169     // include only those scan files which pass all filters
170     for (KeyValueScanner kvs : allStoreScanners) {
171       if (kvs instanceof StoreFileScanner) {
172         if (memOnly == false
173             && ((StoreFileScanner) kvs).shouldSeek(scan, columns)) {
174           scanners.add(kvs);
175         }
176       } else {
177         // kvs is a MemStoreScanner
178         if (filesOnly == false && this.store.memstore.shouldSeek(scan)) {
179           scanners.add(kvs);
180         }
181       }
182     }
183 
184     return scanners;
185   }
186 
187   public synchronized KeyValue peek() {
188     if (this.heap == null) {
189       return this.lastTop;
190     }
191     return this.heap.peek();
192   }
193 
194   public KeyValue next() {
195     // throw runtime exception perhaps?
196     throw new RuntimeException("Never call StoreScanner.next()");
197   }
198 
199   public synchronized void close() {
200     if (this.closing) return;
201     this.closing = true;
202     // under test, we dont have a this.store
203     if (this.store != null)
204       this.store.deleteChangedReaderObserver(this);
205     if (this.heap != null)
206       this.heap.close();
207     this.heap = null; // CLOSED!
208     this.lastTop = null; // If both are null, we are closed.
209   }
210 
211   public synchronized boolean seek(KeyValue key) throws IOException {
212     if (this.heap == null) {
213 
214       List<KeyValueScanner> scanners = getScanners();
215 
216       heap = new KeyValueHeap(scanners, store.comparator);
217     }
218 
219     return this.heap.seek(key);
220   }
221 
222   /**
223    * Get the next row of values from this Store.
224    * @param outResult
225    * @param limit
226    * @return true if there are more rows, false if scanner is done
227    */
228   public synchronized boolean next(List<KeyValue> outResult, int limit) throws IOException {
229     //DebugPrint.println("SS.next");
230 
231     checkReseek();
232 
233     // if the heap was left null, then the scanners had previously run out anyways, close and
234     // return.
235     if (this.heap == null) {
236       close();
237       return false;
238     }
239 
240     KeyValue peeked = this.heap.peek();
241     if (peeked == null) {
242       close();
243       return false;
244     }
245 
246     // only call setRow if the row changes; avoids confusing the query matcher
247     // if scanning intra-row
248     if ((matcher.row == null) || !peeked.matchingRow(matcher.row)) {
249       matcher.setRow(peeked.getRow());
250     }
251 
252     KeyValue kv;
253     KeyValue prevKV = null;
254     List<KeyValue> results = new ArrayList<KeyValue>();
255 
256     // Only do a sanity-check if store and comparator are available.
257     KeyValue.KVComparator comparator =
258         store != null ? store.getComparator() : null;
259 
260     LOOP: while((kv = this.heap.peek()) != null) {
261       // Check that the heap gives us KVs in an increasing order.
262       if (prevKV != null && comparator != null
263           && comparator.compare(prevKV, kv) > 0) {
264         throw new IOException("Key " + prevKV + " followed by a " +
265             "smaller key " + kv + " in cf " + store);
266       }
267       prevKV = kv;
268       ScanQueryMatcher.MatchCode qcode = matcher.match(kv);
269 
270       switch(qcode) {
271         case INCLUDE:
272         case INCLUDE_AND_SEEK_NEXT_ROW:
273         case INCLUDE_AND_SEEK_NEXT_COL:
274 
275           Filter f = matcher.getFilter();
276           results.add(f == null ? kv : f.transform(kv));
277 
278           if (qcode == ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW) {
279             if (!matcher.moreRowsMayExistAfter(kv)) {
280               outResult.addAll(results);
281               return false;
282             }
283             reseek(matcher.getKeyForNextRow(kv));
284           } else if (qcode == ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL) {
285             reseek(matcher.getKeyForNextColumn(kv));
286           } else {
287             this.heap.next();
288           }
289 
290           if (limit > 0 && (results.size() == limit)) {
291             break LOOP;
292           }
293           continue;
294 
295         case DONE:
296           // copy jazz
297           outResult.addAll(results);
298           return true;
299 
300         case DONE_SCAN:
301           close();
302 
303           // copy jazz
304           outResult.addAll(results);
305 
306           return false;
307 
308         case SEEK_NEXT_ROW:
309           // This is just a relatively simple end of scan fix, to short-cut end us if there is a
310           // endKey in the scan.
311           if (!matcher.moreRowsMayExistAfter(kv)) {
312             outResult.addAll(results);
313             return false;
314           }
315 
316           reseek(matcher.getKeyForNextRow(kv));
317           break;
318 
319         case SEEK_NEXT_COL:
320           reseek(matcher.getKeyForNextColumn(kv));
321           break;
322 
323         case SKIP:
324           this.heap.next();
325           break;
326 
327         case SEEK_NEXT_USING_HINT:
328           KeyValue nextKV = matcher.getNextKeyHint(kv);
329           if (nextKV != null) {
330             reseek(nextKV);
331           } else {
332             heap.next();
333           }
334           break;
335 
336         default:
337           throw new RuntimeException("UNEXPECTED");
338       }
339     }
340 
341     if (!results.isEmpty()) {
342       // copy jazz
343       outResult.addAll(results);
344       return true;
345     }
346 
347     // No more keys
348     close();
349     return false;
350   }
351 
352   public synchronized boolean next(List<KeyValue> outResult) throws IOException {
353     return next(outResult, -1);
354   }
355 
356   // Implementation of ChangedReadersObserver
357   public synchronized void updateReaders() throws IOException {
358     if (this.closing) return;
359 
360     // All public synchronized API calls will call 'checkReseek' which will cause
361     // the scanner stack to reseek if this.heap==null && this.lastTop != null.
362     // But if two calls to updateReaders() happen without a 'next' or 'peek' then we
363     // will end up calling this.peek() which would cause a reseek in the middle of a updateReaders
364     // which is NOT what we want, not to mention could cause an NPE. So we early out here.
365     if (this.heap == null) return;
366 
367     // this could be null.
368     this.lastTop = this.peek();
369 
370     //DebugPrint.println("SS updateReaders, topKey = " + lastTop);
371 
372     // close scanners to old obsolete Store files
373     this.heap.close(); // bubble thru and close all scanners.
374     this.heap = null; // the re-seeks could be slow (access HDFS) free up memory ASAP
375 
376     // Let the next() call handle re-creating and seeking
377   }
378 
379   private void checkReseek() throws IOException {
380     if (this.heap == null && this.lastTop != null) {
381       resetScannerStack(this.lastTop);
382       this.lastTop = null; // gone!
383     }
384     // else dont need to reseek
385   }
386 
387   private void resetScannerStack(KeyValue lastTopKey) throws IOException {
388     if (heap != null) {
389       throw new RuntimeException("StoreScanner.reseek run on an existing heap!");
390     }
391 
392     /* When we have the scan object, should we not pass it to getScanners()
393      * to get a limited set of scanners? We did so in the constructor and we
394      * could have done it now by storing the scan object from the constructor */
395     List<KeyValueScanner> scanners = getScanners();
396 
397     for(KeyValueScanner scanner : scanners) {
398       scanner.seek(lastTopKey);
399     }
400 
401     // Combine all seeked scanners with a heap
402     heap = new KeyValueHeap(scanners, store.comparator);
403 
404     // Reset the state of the Query Matcher and set to top row.
405     // Only reset and call setRow if the row changes; avoids confusing the
406     // query matcher if scanning intra-row.
407     KeyValue kv = heap.peek();
408     if (kv == null) {
409       kv = lastTopKey;
410     }
411     if ((matcher.row == null) || !kv.matchingRow(matcher.row)) {
412       matcher.reset();
413       matcher.setRow(kv.getRow());
414     }
415   }
416 
417   @Override
418   public synchronized boolean reseek(KeyValue kv) throws IOException {
419     //Heap cannot be null, because this is only called from next() which
420     //guarantees that heap will never be null before this call.
421     return matcher.isExactColumnQuery() ? heap.seekExactly(kv, true) : 
422         heap.reseek(kv);
423   }
424 
425   @Override
426   public long getSequenceID() {
427     return 0;
428   }
429 
430   @Override
431   public boolean seekExactly(KeyValue kv, boolean forward) throws IOException {
432     throw new NotImplementedException();
433   }
434 
435   /**
436    * Used in testing.
437    * @return all scanners in no particular order
438    */
439   List<KeyValueScanner> getAllScannersForTesting() {
440     List<KeyValueScanner> allScanners = new ArrayList<KeyValueScanner>();
441     KeyValueScanner current = heap.getCurrentForTesting();
442     if (current != null)
443       allScanners.add(current);
444     for (KeyValueScanner scanner : heap.getHeap())
445       allScanners.add(scanner);
446     return allScanners;
447   }
448 }