View Javadoc

1   /*
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.client;
22  
23  import org.apache.hadoop.conf.Configuration;
24  import org.apache.hadoop.hbase.HConstants;
25  import org.apache.hadoop.hbase.KeyValue;
26  import org.apache.hadoop.hbase.filter.Filter;
27  import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
28  import org.apache.hadoop.hbase.io.TimeRange;
29  import org.apache.hadoop.hbase.util.Bytes;
30  import org.apache.hadoop.io.Writable;
31  import org.apache.hadoop.io.WritableFactories;
32  
33  import java.io.DataInput;
34  import java.io.DataOutput;
35  import java.io.IOException;
36  import java.util.ArrayList;
37  import java.util.HashMap;
38  import java.util.List;
39  import java.util.Map;
40  import java.util.NavigableSet;
41  import java.util.TreeMap;
42  import java.util.TreeSet;
43  
44  /**
45   * Used to perform Scan operations.
46   * <p>
47   * All operations are identical to {@link Get} with the exception of
48   * instantiation.  Rather than specifying a single row, an optional startRow
49   * and stopRow may be defined.  If rows are not specified, the Scanner will
50   * iterate over all rows.
51   * <p>
52   * To scan everything for each row, instantiate a Scan object.
53   * <p>
54   * To modify scanner caching for just this scan, use {@link #setCaching(int) setCaching}.
55   * If caching is NOT set, we will use the caching value of the hosting
56   * {@link HTable}.  See {@link HTable#setScannerCaching(int)}.
57   * <p>
58   * To further define the scope of what to get when scanning, perform additional
59   * methods as outlined below.
60   * <p>
61   * To get all columns from specific families, execute {@link #addFamily(byte[]) addFamily}
62   * for each family to retrieve.
63   * <p>
64   * To get specific columns, execute {@link #addColumn(byte[], byte[]) addColumn}
65   * for each column to retrieve.
66   * <p>
67   * To only retrieve columns within a specific range of version timestamps,
68   * execute {@link #setTimeRange(long, long) setTimeRange}.
69   * <p>
70   * To only retrieve columns with a specific timestamp, execute
71   * {@link #setTimeStamp(long) setTimestamp}.
72   * <p>
73   * To limit the number of versions of each column to be returned, execute
74   * {@link #setMaxVersions(int) setMaxVersions}.
75   * <p>
76   * To limit the maximum number of values returned for each call to next(),
77   * execute {@link #setBatch(int) setBatch}.
78   * <p>
79   * To add a filter, execute {@link #setFilter(org.apache.hadoop.hbase.filter.Filter) setFilter}.
80   * <p>
81   * Expert: To explicitly disable server-side block caching for this scan,
82   * execute {@link #setCacheBlocks(boolean)}.
83   */
84  public class Scan extends OperationWithAttributes implements Writable {
85    private static final byte SCAN_VERSION = (byte)2;
86    private byte [] startRow = HConstants.EMPTY_START_ROW;
87    private byte [] stopRow  = HConstants.EMPTY_END_ROW;
88    private int maxVersions = 1;
89    private int batch = -1;
90  
91    /*
92     * -1 means no caching
93     */
94    private int caching = -1;
95    private boolean cacheBlocks = true;
96    private Filter filter = null;
97    private TimeRange tr = new TimeRange();
98    private Map<byte [], NavigableSet<byte []>> familyMap =
99      new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR);
100 
101   /**
102    * Create a Scan operation across all rows.
103    */
104   public Scan() {}
105 
106   public Scan(byte [] startRow, Filter filter) {
107     this(startRow);
108     this.filter = filter;
109   }
110 
111   /**
112    * Create a Scan operation starting at the specified row.
113    * <p>
114    * If the specified row does not exist, the Scanner will start from the
115    * next closest row after the specified row.
116    * @param startRow row to start scanner at or after
117    */
118   public Scan(byte [] startRow) {
119     this.startRow = startRow;
120   }
121 
122   /**
123    * Create a Scan operation for the range of rows specified.
124    * @param startRow row to start scanner at or after (inclusive)
125    * @param stopRow row to stop scanner before (exclusive)
126    */
127   public Scan(byte [] startRow, byte [] stopRow) {
128     this.startRow = startRow;
129     this.stopRow = stopRow;
130   }
131 
132   /**
133    * Creates a new instance of this class while copying all values.
134    *
135    * @param scan  The scan instance to copy from.
136    * @throws IOException When copying the values fails.
137    */
138   public Scan(Scan scan) throws IOException {
139     startRow = scan.getStartRow();
140     stopRow  = scan.getStopRow();
141     maxVersions = scan.getMaxVersions();
142     batch = scan.getBatch();
143     caching = scan.getCaching();
144     cacheBlocks = scan.getCacheBlocks();
145     filter = scan.getFilter(); // clone?
146     TimeRange ctr = scan.getTimeRange();
147     tr = new TimeRange(ctr.getMin(), ctr.getMax());
148     Map<byte[], NavigableSet<byte[]>> fams = scan.getFamilyMap();
149     for (Map.Entry<byte[],NavigableSet<byte[]>> entry : fams.entrySet()) {
150       byte [] fam = entry.getKey();
151       NavigableSet<byte[]> cols = entry.getValue();
152       if (cols != null && cols.size() > 0) {
153         for (byte[] col : cols) {
154           addColumn(fam, col);
155         }
156       } else {
157         addFamily(fam);
158       }
159     }
160   }
161 
162   /**
163    * Builds a scan object with the same specs as get.
164    * @param get get to model scan after
165    */
166   public Scan(Get get) {
167     this.startRow = get.getRow();
168     this.stopRow = get.getRow();
169     this.filter = get.getFilter();
170     this.cacheBlocks = get.getCacheBlocks();
171     this.maxVersions = get.getMaxVersions();
172     this.tr = get.getTimeRange();
173     this.familyMap = get.getFamilyMap();
174   }
175 
176   public boolean isGetScan() {
177     return this.startRow != null && this.startRow.length > 0 &&
178       Bytes.equals(this.startRow, this.stopRow);
179   }
180 
181   /**
182    * Get all columns from the specified family.
183    * <p>
184    * Overrides previous calls to addColumn for this family.
185    * @param family family name
186    * @return this
187    */
188   public Scan addFamily(byte [] family) {
189     familyMap.remove(family);
190     familyMap.put(family, null);
191     return this;
192   }
193 
194   /**
195    * Get the column from the specified family with the specified qualifier.
196    * <p>
197    * Overrides previous calls to addFamily for this family.
198    * @param family family name
199    * @param qualifier column qualifier
200    * @return this
201    */
202   public Scan addColumn(byte [] family, byte [] qualifier) {
203     NavigableSet<byte []> set = familyMap.get(family);
204     if(set == null) {
205       set = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
206     }
207     set.add(qualifier);
208     familyMap.put(family, set);
209 
210     return this;
211   }
212 
213   /**
214    * Get versions of columns only within the specified timestamp range,
215    * [minStamp, maxStamp).  Note, default maximum versions to return is 1.  If
216    * your time range spans more than one version and you want all versions
217    * returned, up the number of versions beyond the defaut.
218    * @param minStamp minimum timestamp value, inclusive
219    * @param maxStamp maximum timestamp value, exclusive
220    * @throws IOException if invalid time range
221    * @see #setMaxVersions()
222    * @see #setMaxVersions(int)
223    * @return this
224    */
225   public Scan setTimeRange(long minStamp, long maxStamp)
226   throws IOException {
227     tr = new TimeRange(minStamp, maxStamp);
228     return this;
229   }
230 
231   /**
232    * Get versions of columns with the specified timestamp. Note, default maximum
233    * versions to return is 1.  If your time range spans more than one version
234    * and you want all versions returned, up the number of versions beyond the
235    * defaut.
236    * @param timestamp version timestamp
237    * @see #setMaxVersions()
238    * @see #setMaxVersions(int)
239    * @return this
240    */
241   public Scan setTimeStamp(long timestamp) {
242     try {
243       tr = new TimeRange(timestamp, timestamp+1);
244     } catch(IOException e) {
245       // Will never happen
246     }
247     return this;
248   }
249 
250   /**
251    * Set the start row of the scan.
252    * @param startRow row to start scan on, inclusive
253    * @return this
254    */
255   public Scan setStartRow(byte [] startRow) {
256     this.startRow = startRow;
257     return this;
258   }
259 
260   /**
261    * Set the stop row.
262    * @param stopRow row to end at (exclusive)
263    * @return this
264    */
265   public Scan setStopRow(byte [] stopRow) {
266     this.stopRow = stopRow;
267     return this;
268   }
269 
270   /**
271    * Get all available versions.
272    * @return this
273    */
274   public Scan setMaxVersions() {
275     this.maxVersions = Integer.MAX_VALUE;
276     return this;
277   }
278 
279   /**
280    * Get up to the specified number of versions of each column.
281    * @param maxVersions maximum versions for each column
282    * @return this
283    */
284   public Scan setMaxVersions(int maxVersions) {
285     this.maxVersions = maxVersions;
286     return this;
287   }
288 
289   /**
290    * Set the maximum number of values to return for each call to next()
291    * @param batch the maximum number of values
292    */
293   public void setBatch(int batch) {
294 	if(this.hasFilter() && this.filter.hasFilterRow()) {
295 	  throw new IncompatibleFilterException(
296         "Cannot set batch on a scan using a filter" +
297         " that returns true for filter.hasFilterRow");
298 	}
299     this.batch = batch;
300   }
301 
302   /**
303    * Set the number of rows for caching that will be passed to scanners.
304    * If not set, the default setting from {@link HTable#getScannerCaching()} will apply.
305    * Higher caching values will enable faster scanners but will use more memory.
306    * @param caching the number of rows for caching
307    */
308   public void setCaching(int caching) {
309     this.caching = caching;
310   }
311 
312   /**
313    * Apply the specified server-side filter when performing the Scan.
314    * @param filter filter to run on the server
315    * @return this
316    */
317   public Scan setFilter(Filter filter) {
318     this.filter = filter;
319     return this;
320   }
321 
322   /**
323    * Setting the familyMap
324    * @param familyMap map of family to qualifier
325    * @return this
326    */
327   public Scan setFamilyMap(Map<byte [], NavigableSet<byte []>> familyMap) {
328     this.familyMap = familyMap;
329     return this;
330   }
331 
332   /**
333    * Getting the familyMap
334    * @return familyMap
335    */
336   public Map<byte [], NavigableSet<byte []>> getFamilyMap() {
337     return this.familyMap;
338   }
339 
340   /**
341    * @return the number of families in familyMap
342    */
343   public int numFamilies() {
344     if(hasFamilies()) {
345       return this.familyMap.size();
346     }
347     return 0;
348   }
349 
350   /**
351    * @return true if familyMap is non empty, false otherwise
352    */
353   public boolean hasFamilies() {
354     return !this.familyMap.isEmpty();
355   }
356 
357   /**
358    * @return the keys of the familyMap
359    */
360   public byte[][] getFamilies() {
361     if(hasFamilies()) {
362       return this.familyMap.keySet().toArray(new byte[0][0]);
363     }
364     return null;
365   }
366 
367   /**
368    * @return the startrow
369    */
370   public byte [] getStartRow() {
371     return this.startRow;
372   }
373 
374   /**
375    * @return the stoprow
376    */
377   public byte [] getStopRow() {
378     return this.stopRow;
379   }
380 
381   /**
382    * @return the max number of versions to fetch
383    */
384   public int getMaxVersions() {
385     return this.maxVersions;
386   }
387 
388   /**
389    * @return maximum number of values to return for a single call to next()
390    */
391   public int getBatch() {
392     return this.batch;
393   }
394 
395   /**
396    * @return caching the number of rows fetched when calling next on a scanner
397    */
398   public int getCaching() {
399     return this.caching;
400   }
401 
402   /**
403    * @return TimeRange
404    */
405   public TimeRange getTimeRange() {
406     return this.tr;
407   }
408 
409   /**
410    * @return RowFilter
411    */
412   public Filter getFilter() {
413     return filter;
414   }
415 
416   /**
417    * @return true is a filter has been specified, false if not
418    */
419   public boolean hasFilter() {
420     return filter != null;
421   }
422 
423   /**
424    * Set whether blocks should be cached for this Scan.
425    * <p>
426    * This is true by default.  When true, default settings of the table and
427    * family are used (this will never override caching blocks if the block
428    * cache is disabled for that family or entirely).
429    *
430    * @param cacheBlocks if false, default settings are overridden and blocks
431    * will not be cached
432    */
433   public void setCacheBlocks(boolean cacheBlocks) {
434     this.cacheBlocks = cacheBlocks;
435   }
436 
437   /**
438    * Get whether blocks should be cached for this Scan.
439    * @return true if default caching should be used, false if blocks should not
440    * be cached
441    */
442   public boolean getCacheBlocks() {
443     return cacheBlocks;
444   }
445 
446   /**
447    * Compile the table and column family (i.e. schema) information
448    * into a String. Useful for parsing and aggregation by debugging,
449    * logging, and administration tools.
450    * @return Map
451    */
452   @Override
453   public Map<String, Object> getFingerprint() {
454     Map<String, Object> map = new HashMap<String, Object>();
455     List<String> families = new ArrayList<String>();
456     if(this.familyMap.size() == 0) {
457       map.put("families", "ALL");
458       return map;
459     } else {
460       map.put("families", families);
461     }
462     for (Map.Entry<byte [], NavigableSet<byte[]>> entry :
463         this.familyMap.entrySet()) {
464       families.add(Bytes.toStringBinary(entry.getKey()));
465     }
466     return map;
467   }
468 
469   /**
470    * Compile the details beyond the scope of getFingerprint (row, columns,
471    * timestamps, etc.) into a Map along with the fingerprinted information.
472    * Useful for debugging, logging, and administration tools.
473    * @param maxCols a limit on the number of columns output prior to truncation
474    * @return Map
475    */ 
476   @Override
477   public Map<String, Object> toMap(int maxCols) {
478     // start with the fingerpring map and build on top of it
479     Map<String, Object> map = getFingerprint();
480     // map from families to column list replaces fingerprint's list of families
481     Map<String, List<String>> familyColumns =
482       new HashMap<String, List<String>>();
483     map.put("families", familyColumns);
484     // add scalar information first
485     map.put("startRow", Bytes.toStringBinary(this.startRow));
486     map.put("stopRow", Bytes.toStringBinary(this.stopRow));
487     map.put("maxVersions", this.maxVersions);
488     map.put("batch", this.batch);
489     map.put("caching", this.caching);
490     map.put("cacheBlocks", this.cacheBlocks);
491     List<Long> timeRange = new ArrayList<Long>();
492     timeRange.add(this.tr.getMin());
493     timeRange.add(this.tr.getMax());
494     map.put("timeRange", timeRange);
495     int colCount = 0;
496     // iterate through affected families and list out up to maxCols columns
497     for (Map.Entry<byte [], NavigableSet<byte[]>> entry :
498       this.familyMap.entrySet()) {
499       List<String> columns = new ArrayList<String>();
500       familyColumns.put(Bytes.toStringBinary(entry.getKey()), columns);
501       if(entry.getValue() == null) {
502         colCount++;
503         --maxCols;
504         columns.add("ALL");
505       } else {
506         colCount += entry.getValue().size();
507         if (maxCols <= 0) {
508           continue;
509         } 
510         for (byte [] column : entry.getValue()) {
511           if (--maxCols <= 0) {
512             continue;
513           }
514           columns.add(Bytes.toStringBinary(column));
515         }
516       } 
517     }       
518     map.put("totalColumns", colCount);
519     return map;
520   }
521 
522   @SuppressWarnings("unchecked")
523   private Writable createForName(String className) {
524     try {
525       Class<? extends Writable> clazz =
526         (Class<? extends Writable>) Class.forName(className);
527       return WritableFactories.newInstance(clazz, new Configuration());
528     } catch (ClassNotFoundException e) {
529       throw new RuntimeException("Can't find class " + className);
530     }
531   }
532 
533   //Writable
534   public void readFields(final DataInput in)
535   throws IOException {
536     int version = in.readByte();
537     if (version > (int)SCAN_VERSION) {
538       throw new IOException("version not supported");
539     }
540     this.startRow = Bytes.readByteArray(in);
541     this.stopRow = Bytes.readByteArray(in);
542     this.maxVersions = in.readInt();
543     this.batch = in.readInt();
544     this.caching = in.readInt();
545     this.cacheBlocks = in.readBoolean();
546     if(in.readBoolean()) {
547       this.filter = (Filter)createForName(Bytes.toString(Bytes.readByteArray(in)));
548       this.filter.readFields(in);
549     }
550     this.tr = new TimeRange();
551     tr.readFields(in);
552     int numFamilies = in.readInt();
553     this.familyMap =
554       new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR);
555     for(int i=0; i<numFamilies; i++) {
556       byte [] family = Bytes.readByteArray(in);
557       int numColumns = in.readInt();
558       TreeSet<byte []> set = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
559       for(int j=0; j<numColumns; j++) {
560         byte [] qualifier = Bytes.readByteArray(in);
561         set.add(qualifier);
562       }
563       this.familyMap.put(family, set);
564     }
565 
566     if (version > 1) {
567       readAttributes(in);
568     }
569   }
570 
571   public void write(final DataOutput out)
572   throws IOException {
573     out.writeByte(SCAN_VERSION);
574     Bytes.writeByteArray(out, this.startRow);
575     Bytes.writeByteArray(out, this.stopRow);
576     out.writeInt(this.maxVersions);
577     out.writeInt(this.batch);
578     out.writeInt(this.caching);
579     out.writeBoolean(this.cacheBlocks);
580     if(this.filter == null) {
581       out.writeBoolean(false);
582     } else {
583       out.writeBoolean(true);
584       Bytes.writeByteArray(out, Bytes.toBytes(filter.getClass().getName()));
585       filter.write(out);
586     }
587     tr.write(out);
588     out.writeInt(familyMap.size());
589     for(Map.Entry<byte [], NavigableSet<byte []>> entry : familyMap.entrySet()) {
590       Bytes.writeByteArray(out, entry.getKey());
591       NavigableSet<byte []> columnSet = entry.getValue();
592       if(columnSet != null){
593         out.writeInt(columnSet.size());
594         for(byte [] qualifier : columnSet) {
595           Bytes.writeByteArray(out, qualifier);
596         }
597       } else {
598         out.writeInt(0);
599       }
600     }
601     writeAttributes(out);
602   }
603 }