View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.regionserver;
22  
23  import java.io.IOException;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.HConstants;
28  import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode;
29  import org.apache.hadoop.hbase.util.Bytes;
30  
31  /**
32   * Keeps track of the columns for a scan if they are not explicitly specified
33   */
34  public class ScanWildcardColumnTracker implements ColumnTracker {
35    private static final Log LOG =
36      LogFactory.getLog(ScanWildcardColumnTracker.class);
37    private byte [] columnBuffer = null;
38    private int columnOffset = 0;
39    private int columnLength = 0;
40    private int currentCount = 0;
41    private int maxVersions;
42    private int minVersions;
43    /* Keeps track of the latest timestamp included for current column.
44     * Used to eliminate duplicates. */
45    private long latestTSOfCurrentColumn;
46    private long oldestStamp;
47  
48    /**
49     * Return maxVersions of every row.
50     * @param minVersion Minimum number of versions to keep
51     * @param maxVersion Maximum number of versions to return
52     * @param ttl TimeToLive to enforce
53     */
54    public ScanWildcardColumnTracker(int minVersion, int maxVersion, long ttl) {
55      this.maxVersions = maxVersion;
56      this.minVersions = minVersion;
57      this.oldestStamp = System.currentTimeMillis() - ttl;
58    }
59  
60    /**
61     * Can only return INCLUDE or SKIP, since returning "NEXT" or
62     * "DONE" would imply we have finished with this row, when
63     * this class can't figure that out.
64     *
65     * @param bytes
66     * @param offset
67     * @param length
68     * @param timestamp
69     * @param ignoreCount
70     * @return The match code instance.
71     */
72    @Override
73    public MatchCode checkColumn(byte[] bytes, int offset, int length,
74        long timestamp, boolean ignoreCount) throws IOException {
75      if (columnBuffer == null) {
76        // first iteration.
77        resetBuffer(bytes, offset, length);
78        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
79        return checkVersion(++currentCount, timestamp);
80      }
81      int cmp = Bytes.compareTo(bytes, offset, length,
82          columnBuffer, columnOffset, columnLength);
83      if (cmp == 0) {
84        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
85  
86        //If column matches, check if it is a duplicate timestamp
87        if (sameAsPreviousTS(timestamp)) {
88          return ScanQueryMatcher.MatchCode.SKIP;
89        }
90        return checkVersion(++currentCount, timestamp);
91      }
92  
93      resetTS();
94  
95      // new col > old col
96      if (cmp > 0) {
97        // switched columns, lets do something.x
98        resetBuffer(bytes, offset, length);
99        if (ignoreCount) return ScanQueryMatcher.MatchCode.INCLUDE;
100       return checkVersion(++currentCount, timestamp);
101     }
102 
103     // new col < oldcol
104     // WARNING: This means that very likely an edit for some other family
105     // was incorrectly stored into the store for this one. Throw an exception,
106     // because this might lead to data corruption.
107     throw new IOException(
108         "ScanWildcardColumnTracker.checkColumn ran into a column actually " +
109         "smaller than the previous column: " +
110         Bytes.toStringBinary(bytes, offset, length));
111   }
112 
113   private void resetBuffer(byte[] bytes, int offset, int length) {
114     columnBuffer = bytes;
115     columnOffset = offset;
116     columnLength = length;
117     currentCount = 0;
118   }
119 
120   private MatchCode checkVersion(int version, long timestamp) {
121     if (version > maxVersions) {
122       return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL; // skip to next col
123     }
124     // keep the KV if required by minversions or it is not expired, yet
125     if (version <= minVersions || !isExpired(timestamp)) {
126       setTS(timestamp);
127       return ScanQueryMatcher.MatchCode.INCLUDE;
128     } else {
129       return MatchCode.SEEK_NEXT_COL;
130     }
131 
132   }
133 
134   @Override
135   public void update() {
136     // no-op, shouldn't even be called
137     throw new UnsupportedOperationException(
138         "ScanWildcardColumnTracker.update should never be called!");
139   }
140 
141   @Override
142   public void reset() {
143     columnBuffer = null;
144     resetTS();
145   }
146 
147   private void resetTS() {
148     latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
149   }
150 
151   private void setTS(long timestamp) {
152     latestTSOfCurrentColumn = timestamp;
153   }
154 
155   private boolean sameAsPreviousTS(long timestamp) {
156     return timestamp == latestTSOfCurrentColumn;
157   }
158 
159   private boolean isExpired(long timestamp) {
160     return timestamp < oldestStamp;
161   }
162 
163   /**
164    * Used by matcher and scan/get to get a hint of the next column
165    * to seek to after checkColumn() returns SKIP.  Returns the next interesting
166    * column we want, or NULL there is none (wildcard scanner).
167    *
168    * @return The column count.
169    */
170   public ColumnCount getColumnHint() {
171     return null;
172   }
173 
174 
175   /**
176    * We can never know a-priori if we are done, so always return false.
177    * @return false
178    */
179   @Override
180   public boolean done() {
181     return false;
182   }
183 
184   public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset,
185       int qualLength) {
186     return MatchCode.SEEK_NEXT_COL;
187   }
188 
189   public boolean isDone(long timestamp) {
190     return minVersions <=0 && isExpired(timestamp);
191   }
192 
193 }