View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.regionserver;
22  
23  import org.apache.hadoop.hbase.KeyValue;
24  import org.apache.hadoop.hbase.KeyValue.KVComparator;
25  
26  import java.io.IOException;
27  import java.util.Comparator;
28  import java.util.List;
29  import java.util.PriorityQueue;
30  
31  /**
32   * Implements a heap merge across any number of KeyValueScanners.
33   * <p>
34   * Implements KeyValueScanner itself.
35   * <p>
36   * This class is used at the Region level to merge across Stores
37   * and at the Store level to merge across the memstore and StoreFiles.
38   * <p>
39   * In the Region case, we also need InternalScanner.next(List), so this class
40   * also implements InternalScanner.  WARNING: As is, if you try to use this
41   * as an InternalScanner at the Store level, you will get runtime exceptions.
42   */
43  public class KeyValueHeap implements KeyValueScanner, InternalScanner {
44    private PriorityQueue<KeyValueScanner> heap = null;
45    private KeyValueScanner current = null;
46    private KVScannerComparator comparator;
47  
48    /**
49     * A helper enum that knows how to call the correct seek function within a
50     * {@link KeyValueScanner}.
51     */
52    public enum SeekType {
53      NORMAL {
54        @Override
55        public boolean seek(KeyValueScanner scanner, KeyValue kv,
56            boolean forward) throws IOException {
57          return forward ? scanner.reseek(kv) : scanner.seek(kv);
58        }
59      },
60      EXACT {
61        @Override
62        public boolean seek(KeyValueScanner scanner, KeyValue kv,
63            boolean forward) throws IOException {
64          return scanner.seekExactly(kv, forward);
65        }
66      };
67  
68      public abstract boolean seek(KeyValueScanner scanner, KeyValue kv,
69          boolean forward) throws IOException;
70    }
71  
72    /**
73     * Constructor.  This KeyValueHeap will handle closing of passed in
74     * KeyValueScanners.
75     * @param scanners
76     * @param comparator
77     */
78    public KeyValueHeap(List<? extends KeyValueScanner> scanners,
79        KVComparator comparator) {
80      this.comparator = new KVScannerComparator(comparator);
81      if (!scanners.isEmpty()) {
82        this.heap = new PriorityQueue<KeyValueScanner>(scanners.size(),
83            this.comparator);
84        for (KeyValueScanner scanner : scanners) {
85          if (scanner.peek() != null) {
86            this.heap.add(scanner);
87          } else {
88            scanner.close();
89          }
90        }
91        this.current = heap.poll();
92      }
93    }
94  
95    public KeyValue peek() {
96      if (this.current == null) {
97        return null;
98      }
99      return this.current.peek();
100   }
101 
102   public KeyValue next()  throws IOException {
103     if(this.current == null) {
104       return null;
105     }
106     KeyValue kvReturn = this.current.next();
107     KeyValue kvNext = this.current.peek();
108     if (kvNext == null) {
109       this.current.close();
110       this.current = this.heap.poll();
111     } else {
112       KeyValueScanner topScanner = this.heap.peek();
113       if (topScanner == null ||
114           this.comparator.compare(kvNext, topScanner.peek()) >= 0) {
115         this.heap.add(this.current);
116         this.current = this.heap.poll();
117       }
118     }
119     return kvReturn;
120   }
121 
122   /**
123    * Gets the next row of keys from the top-most scanner.
124    * <p>
125    * This method takes care of updating the heap.
126    * <p>
127    * This can ONLY be called when you are using Scanners that implement
128    * InternalScanner as well as KeyValueScanner (a {@link StoreScanner}).
129    * @param result
130    * @param limit
131    * @return true if there are more keys, false if all scanners are done
132    */
133   public boolean next(List<KeyValue> result, int limit) throws IOException {
134     if (this.current == null) {
135       return false;
136     }
137     InternalScanner currentAsInternal = (InternalScanner)this.current;
138     boolean mayContainsMoreRows = currentAsInternal.next(result, limit);
139     KeyValue pee = this.current.peek();
140     /*
141      * By definition, any InternalScanner must return false only when it has no
142      * further rows to be fetched. So, we can close a scanner if it returns
143      * false. All existing implementations seem to be fine with this. It is much
144      * more efficient to close scanners which are not needed than keep them in
145      * the heap. This is also required for certain optimizations.
146      */
147     if (pee == null || !mayContainsMoreRows) {
148       this.current.close();
149     } else {
150       this.heap.add(this.current);
151     }
152     this.current = this.heap.poll();
153     return (this.current != null);
154   }
155 
156   /**
157    * Gets the next row of keys from the top-most scanner.
158    * <p>
159    * This method takes care of updating the heap.
160    * <p>
161    * This can ONLY be called when you are using Scanners that implement
162    * InternalScanner as well as KeyValueScanner (a {@link StoreScanner}).
163    * @param result
164    * @return true if there are more keys, false if all scanners are done
165    */
166   public boolean next(List<KeyValue> result) throws IOException {
167     return next(result, -1);
168   }
169 
170   private static class KVScannerComparator implements Comparator<KeyValueScanner> {
171     private KVComparator kvComparator;
172     /**
173      * Constructor
174      * @param kvComparator
175      */
176     public KVScannerComparator(KVComparator kvComparator) {
177       this.kvComparator = kvComparator;
178     }
179     public int compare(KeyValueScanner left, KeyValueScanner right) {
180       int comparison = compare(left.peek(), right.peek());
181       if (comparison != 0) {
182         return comparison;
183       } else {
184         // Since both the keys are exactly the same, we break the tie in favor
185         // of the key which came latest.
186         long leftSequenceID = left.getSequenceID();
187         long rightSequenceID = right.getSequenceID();
188         if (leftSequenceID > rightSequenceID) {
189           return -1;
190         } else if (leftSequenceID < rightSequenceID) {
191           return 1;
192         } else {
193           return 0;
194         }
195       }
196     }
197     /**
198      * Compares two KeyValue
199      * @param left
200      * @param right
201      * @return less than 0 if left is smaller, 0 if equal etc..
202      */
203     public int compare(KeyValue left, KeyValue right) {
204       return this.kvComparator.compare(left, right);
205     }
206     /**
207      * @return KVComparator
208      */
209     public KVComparator getComparator() {
210       return this.kvComparator;
211     }
212   }
213 
214   public void close() {
215     if (this.current != null) {
216       this.current.close();
217     }
218     if (this.heap != null) {
219       KeyValueScanner scanner;
220       while ((scanner = this.heap.poll()) != null) {
221         scanner.close();
222       }
223     }
224   }
225 
226   /**
227    * Seeks all scanners at or below the specified seek key.  If we earlied-out
228    * of a row, we may end up skipping values that were never reached yet.
229    * Rather than iterating down, we want to give the opportunity to re-seek.
230    * <p>
231    * As individual scanners may run past their ends, those scanners are
232    * automatically closed and removed from the heap.
233    * @param seekKey KeyValue to seek at or after
234    * @return true if KeyValues exist at or after specified key, false if not
235    * @throws IOException
236    */
237   @Override
238   public boolean seek(KeyValue seekKey) throws IOException {
239     return generalizedSeek(seekKey, SeekType.NORMAL, false);
240   }
241 
242   /**
243    * This function is identical to the {@link #seek(KeyValue)} function except
244    * that scanner.seek(seekKey) is changed to scanner.reseek(seekKey).
245    */
246   @Override
247   public boolean reseek(KeyValue seekKey) throws IOException {
248     return generalizedSeek(seekKey, SeekType.NORMAL, true);
249   }
250 
251   /**
252    * {@inheritDoc}
253    */
254   @Override
255   public boolean seekExactly(KeyValue seekKey, boolean forward)
256       throws IOException {
257     return generalizedSeek(seekKey, SeekType.EXACT, forward);
258   }
259 
260   private boolean generalizedSeek(KeyValue seekKey, SeekType seekType,
261       boolean forward) throws IOException {
262     if (current == null) {
263       return false;
264     }
265     heap.add(current);
266     current = null;
267 
268     KeyValueScanner scanner;
269     while ((scanner = heap.poll()) != null) {
270       KeyValue topKey = scanner.peek();
271       if (comparator.getComparator().compare(seekKey, topKey) <= 0) {
272         // Top KeyValue is at-or-after Seek KeyValue
273         current = scanner;
274         return true;
275       }
276       
277       if (!seekType.seek(scanner, seekKey, forward)) {
278         scanner.close();
279       } else {
280         heap.add(scanner);
281       }
282     }
283 
284     // Heap is returning empty, scanner is done
285     return false;
286   }
287 
288   /**
289    * @return the current Heap
290    */
291   public PriorityQueue<KeyValueScanner> getHeap() {
292     return this.heap;
293   }
294 
295   @Override
296   public long getSequenceID() {
297     return 0;
298   }
299 
300   KeyValueScanner getCurrentForTesting() {
301     return current;
302   }
303 }