1   /*
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import static org.junit.Assert.*;
23  
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.Collection;
27  import java.util.Collections;
28  import java.util.HashMap;
29  import java.util.HashSet;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.Random;
33  import java.util.Set;
34  import java.util.TreeSet;
35  
36  import org.apache.commons.logging.Log;
37  import org.apache.commons.logging.LogFactory;
38  import org.apache.hadoop.hbase.HBaseTestingUtility;
39  import org.apache.hadoop.hbase.HColumnDescriptor;
40  import org.apache.hadoop.hbase.HRegionInfo;
41  import org.apache.hadoop.hbase.HTableDescriptor;
42  import org.apache.hadoop.hbase.KeyValue;
43  import org.apache.hadoop.hbase.KeyValueTestUtil;
44  import org.apache.hadoop.hbase.client.Delete;
45  import org.apache.hadoop.hbase.client.Put;
46  import org.apache.hadoop.hbase.client.Scan;
47  import org.apache.hadoop.hbase.io.hfile.Compression;
48  import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.junit.Test;
51  import org.junit.runner.RunWith;
52  import org.junit.runners.Parameterized;
53  import org.junit.runners.Parameterized.Parameters;
54  
55  /**
56   * Tests optimized scanning of multiple columns.
57   */
58  @RunWith(Parameterized.class)
59  public class TestMultiColumnScanner {
60  
61    private static final Log LOG = LogFactory.getLog(TestMultiColumnScanner.class);
62  
63    private static final String TABLE_NAME = "TestMultiColumnScanner";
64    static final String FAMILY = "CF";
65    static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
66    static final int MAX_VERSIONS = 50;
67  
68    /**
69     * The size of the column qualifier set used. Increasing this parameter
70     * exponentially increases test time.
71     */
72    private static final int NUM_COLUMNS = 8;
73  
74    private static final int MAX_COLUMN_BIT_MASK = 1 << NUM_COLUMNS - 1;
75    private static final int NUM_FLUSHES = 10;
76    private static final int NUM_ROWS = 20;
77  
78    /** A large value of type long for use as a timestamp */
79    private static final long BIG_LONG = 9111222333444555666L;
80  
81    /**
82     * Timestamps to test with. Cannot use {@link Long#MAX_VALUE} here, because
83     * it will be replaced by an timestamp auto-generated based on the time.
84     */
85    private static final long[] TIMESTAMPS = new long[] { 1, 3, 5,
86        Integer.MAX_VALUE, BIG_LONG, Long.MAX_VALUE - 1 };
87  
88    /** The probability that a column is skipped in a store file. */
89    private static final double COLUMN_SKIP_IN_STORE_FILE_PROB = 0.7;
90  
91    /** The probability of skipping a column in a single row */
92    private static final double COLUMN_SKIP_IN_ROW_PROB = 0.1;
93  
94    /** The probability of skipping a column everywhere */
95    private static final double COLUMN_SKIP_EVERYWHERE_PROB = 0.1;
96  
97    /** The probability to delete a row/column pair */
98    private static final double DELETE_PROBABILITY = 0.02;
99  
100   private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
101 
102   private Compression.Algorithm comprAlgo;
103   private StoreFile.BloomType bloomType;
104 
105   // Some static sanity-checking.
106   static {
107     assertTrue(BIG_LONG > 0.9 * Long.MAX_VALUE); // Guard against typos.
108 
109     // Ensure TIMESTAMPS are sorted.
110     for (int i = 0; i < TIMESTAMPS.length - 1; ++i)
111       assertTrue(TIMESTAMPS[i] < TIMESTAMPS[i + 1]);
112   }
113 
114   @Parameters
115   public static final Collection<Object[]> parameters() {
116     List<Object[]> configurations = new ArrayList<Object[]>();
117     for (Compression.Algorithm comprAlgo : HBaseTestingUtility.COMPRESSION_ALGORITHMS) {
118       for (StoreFile.BloomType bloomType : StoreFile.BloomType.values()) {
119         configurations.add(new Object[] { comprAlgo, bloomType });
120       }
121     }
122     return configurations;
123   }
124 
125   public TestMultiColumnScanner(Compression.Algorithm comprAlgo,
126       StoreFile.BloomType bloomType) {
127     this.comprAlgo = comprAlgo;
128     this.bloomType = bloomType;
129   }
130 
131   @Test
132   public void testMultiColumnScanner() throws IOException {
133     HRegion region = createRegion(TABLE_NAME, comprAlgo, bloomType);
134     List<String> rows = sequentialStrings("row", NUM_ROWS);
135     List<String> qualifiers = sequentialStrings("qual", NUM_COLUMNS);
136     List<KeyValue> kvs = new ArrayList<KeyValue>();
137     Set<String> keySet = new HashSet<String>();
138 
139     // A map from <row>_<qualifier> to the most recent delete timestamp for
140     // that column.
141     Map<String, Long> lastDelTimeMap = new HashMap<String, Long>();
142 
143     Random rand = new Random(29372937L);
144     Set<String> rowQualSkip = new HashSet<String>();
145 
146     // Skip some columns in some rows. We need to test scanning over a set
147     // of columns when some of the columns are not there.
148     for (String row : rows)
149       for (String qual : qualifiers)
150         if (rand.nextDouble() < COLUMN_SKIP_IN_ROW_PROB) {
151           LOG.info("Skipping " + qual + " in row " + row);
152           rowQualSkip.add(rowQualKey(row, qual));
153         }
154 
155     // Also skip some columns in all rows.
156     for (String qual : qualifiers)
157       if (rand.nextDouble() < COLUMN_SKIP_EVERYWHERE_PROB) {
158         LOG.info("Skipping " + qual + " in all rows");
159         for (String row : rows)
160           rowQualSkip.add(rowQualKey(row, qual));
161       }
162 
163     for (int iFlush = 0; iFlush < NUM_FLUSHES; ++iFlush) {
164       for (String qual : qualifiers) {
165         // This is where we decide to include or not include this column into
166         // this store file, regardless of row and timestamp.
167         if (rand.nextDouble() < COLUMN_SKIP_IN_STORE_FILE_PROB)
168           continue;
169 
170         byte[] qualBytes = Bytes.toBytes(qual);
171         for (String row : rows) {
172           Put p = new Put(Bytes.toBytes(row));
173           for (long ts : TIMESTAMPS) {
174             String value = createValue(row, qual, ts);
175             KeyValue kv = KeyValueTestUtil.create(row, FAMILY, qual, ts,
176                 value);
177             assertEquals(kv.getTimestamp(), ts);
178             p.add(kv);
179             String keyAsString = kv.toString();
180             if (!keySet.contains(keyAsString)) {
181               keySet.add(keyAsString);
182               kvs.add(kv);
183             }
184           }
185           region.put(p);
186 
187           Delete d = new Delete(Bytes.toBytes(row));
188           boolean deletedSomething = false;
189           for (long ts : TIMESTAMPS)
190             if (rand.nextDouble() < DELETE_PROBABILITY) {
191               d.deleteColumns(FAMILY_BYTES, qualBytes, ts);
192               String rowAndQual = row + "_" + qual;
193               Long whenDeleted = lastDelTimeMap.get(rowAndQual);
194               lastDelTimeMap.put(rowAndQual, whenDeleted == null ? ts
195                   : Math.max(ts, whenDeleted));
196               deletedSomething = true;
197             }
198           if (deletedSomething)
199             region.delete(d, null, true);
200         }
201       }
202       region.flushcache();
203     }
204 
205     Collections.sort(kvs, KeyValue.COMPARATOR);
206     for (int maxVersions = 1; maxVersions <= TIMESTAMPS.length; ++maxVersions) {
207       for (int columnBitMask = 1; columnBitMask <= MAX_COLUMN_BIT_MASK; ++columnBitMask) {
208         Scan scan = new Scan();
209         scan.setMaxVersions(maxVersions);
210         Set<String> qualSet = new TreeSet<String>();
211         {
212           int columnMaskTmp = columnBitMask;
213           for (String qual : qualifiers) {
214             if ((columnMaskTmp & 1) != 0) {
215               scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qual));
216               qualSet.add(qual);
217             }
218             columnMaskTmp >>= 1;
219           }
220           assertEquals(0, columnMaskTmp);
221         }
222 
223         InternalScanner scanner = region.getScanner(scan);
224         List<KeyValue> results = new ArrayList<KeyValue>();
225 
226         int kvPos = 0;
227         int numResults = 0;
228         String queryInfo = "columns queried: " + qualSet + " (columnBitMask="
229             + columnBitMask + "), maxVersions=" + maxVersions;
230 
231         while (scanner.next(results) || results.size() > 0) {
232           for (KeyValue kv : results) {
233             while (kvPos < kvs.size()
234                 && !matchesQuery(kvs.get(kvPos), qualSet, maxVersions,
235                     lastDelTimeMap)) {
236               ++kvPos;
237             }
238             String rowQual = getRowQualStr(kv);
239             String deleteInfo = "";
240             Long lastDelTS = lastDelTimeMap.get(rowQual);
241             if (lastDelTS != null) {
242               deleteInfo = "; last timestamp when row/column " + rowQual
243                   + " was deleted: " + lastDelTS;
244             }
245             assertTrue("Scanner returned additional key/value: " + kv + ", "
246                 + queryInfo + deleteInfo + ";", kvPos < kvs.size());
247             assertEquals("Scanner returned wrong key/value; " + queryInfo
248                 + deleteInfo + ";", kvs.get(kvPos), kv);
249             ++kvPos;
250             ++numResults;
251           }
252           results.clear();
253         }
254         for (; kvPos < kvs.size(); ++kvPos) {
255           KeyValue remainingKV = kvs.get(kvPos);
256           assertFalse("Matching column not returned by scanner: "
257               + remainingKV + ", " + queryInfo + ", results returned: "
258               + numResults, matchesQuery(remainingKV, qualSet, maxVersions,
259               lastDelTimeMap));
260         }
261       }
262     }
263     assertTrue("This test is supposed to delete at least some row/column " +
264         "pairs", lastDelTimeMap.size() > 0);
265     LOG.info("Number of row/col pairs deleted at least once: " +
266        lastDelTimeMap.size());
267     region.close();
268   }
269 
270   static HRegion createRegion(String tableName,
271       Compression.Algorithm comprAlgo, BloomType bloomType)
272       throws IOException {
273     HColumnDescriptor hcd =
274       new HColumnDescriptor(FAMILY_BYTES, MAX_VERSIONS,
275           comprAlgo.getName(),
276           HColumnDescriptor.DEFAULT_IN_MEMORY,
277           HColumnDescriptor.DEFAULT_BLOCKCACHE,
278           HColumnDescriptor.DEFAULT_TTL,
279           bloomType.toString());
280     HTableDescriptor htd = new HTableDescriptor(tableName);
281     htd.addFamily(hcd);
282     HRegionInfo info =
283         new HRegionInfo(Bytes.toBytes(tableName), null, null, false);
284     HRegion region = HRegion.createHRegion(
285         info, TEST_UTIL.getDataTestDir(), TEST_UTIL.getConfiguration(),
286         htd);
287     return region;
288   }
289 
290   private static String getRowQualStr(KeyValue kv) {
291     String rowStr = Bytes.toString(kv.getBuffer(), kv.getRowOffset(),
292         kv.getRowLength());
293     String qualStr = Bytes.toString(kv.getBuffer(), kv.getQualifierOffset(),
294         kv.getQualifierLength());
295     return rowStr + "_" + qualStr;
296   }
297 
298   private static boolean matchesQuery(KeyValue kv, Set<String> qualSet,
299       int maxVersions, Map<String, Long> lastDelTimeMap) {
300     Long lastDelTS = lastDelTimeMap.get(getRowQualStr(kv));
301     long ts = kv.getTimestamp();
302     return qualSet.contains(qualStr(kv))
303         && ts >= TIMESTAMPS[TIMESTAMPS.length - maxVersions]
304         && (lastDelTS == null || ts > lastDelTS);
305   }
306 
307   private static String qualStr(KeyValue kv) {
308     return Bytes.toString(kv.getBuffer(), kv.getQualifierOffset(),
309         kv.getQualifierLength());
310   }
311 
312   private static String rowQualKey(String row, String qual) {
313     return row + "_" + qual;
314   }
315 
316   static String createValue(String row, String qual, long ts) {
317     return "value_for_" + row + "_" + qual + "_" + ts;
318   }
319 
320   private static List<String> sequentialStrings(String prefix, int n) {
321     List<String> lst = new ArrayList<String>();
322     for (int i = 0; i < n; ++i) {
323       StringBuilder sb = new StringBuilder();
324       sb.append(prefix + i);
325 
326       // Make column length depend on i.
327       int iBitShifted = i;
328       while (iBitShifted != 0) {
329         sb.append((iBitShifted & 1) == 0 ? 'a' : 'b');
330         iBitShifted >>= 1;
331       }
332 
333       lst.add(sb.toString());
334     }
335 
336     return lst;
337   }
338 
339 }