1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.regionserver;
21
22 import static org.junit.Assert.*;
23
24 import java.io.IOException;
25 import java.util.ArrayList;
26 import java.util.Collection;
27 import java.util.Collections;
28 import java.util.HashMap;
29 import java.util.HashSet;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.Random;
33 import java.util.Set;
34 import java.util.TreeSet;
35
36 import org.apache.commons.logging.Log;
37 import org.apache.commons.logging.LogFactory;
38 import org.apache.hadoop.hbase.HBaseTestingUtility;
39 import org.apache.hadoop.hbase.HColumnDescriptor;
40 import org.apache.hadoop.hbase.HRegionInfo;
41 import org.apache.hadoop.hbase.HTableDescriptor;
42 import org.apache.hadoop.hbase.KeyValue;
43 import org.apache.hadoop.hbase.KeyValueTestUtil;
44 import org.apache.hadoop.hbase.client.Delete;
45 import org.apache.hadoop.hbase.client.Put;
46 import org.apache.hadoop.hbase.client.Scan;
47 import org.apache.hadoop.hbase.io.hfile.Compression;
48 import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
49 import org.apache.hadoop.hbase.util.Bytes;
50 import org.junit.Test;
51 import org.junit.runner.RunWith;
52 import org.junit.runners.Parameterized;
53 import org.junit.runners.Parameterized.Parameters;
54
55
56
57
58 @RunWith(Parameterized.class)
59 public class TestMultiColumnScanner {
60
61 private static final Log LOG = LogFactory.getLog(TestMultiColumnScanner.class);
62
63 private static final String TABLE_NAME = "TestMultiColumnScanner";
64 static final String FAMILY = "CF";
65 static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY);
66 static final int MAX_VERSIONS = 50;
67
68
69
70
71
72 private static final int NUM_COLUMNS = 8;
73
74 private static final int MAX_COLUMN_BIT_MASK = 1 << NUM_COLUMNS - 1;
75 private static final int NUM_FLUSHES = 10;
76 private static final int NUM_ROWS = 20;
77
78
79 private static final long BIG_LONG = 9111222333444555666L;
80
81
82
83
84
85 private static final long[] TIMESTAMPS = new long[] { 1, 3, 5,
86 Integer.MAX_VALUE, BIG_LONG, Long.MAX_VALUE - 1 };
87
88
89 private static final double COLUMN_SKIP_IN_STORE_FILE_PROB = 0.7;
90
91
92 private static final double COLUMN_SKIP_IN_ROW_PROB = 0.1;
93
94
95 private static final double COLUMN_SKIP_EVERYWHERE_PROB = 0.1;
96
97
98 private static final double DELETE_PROBABILITY = 0.02;
99
100 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
101
102 private Compression.Algorithm comprAlgo;
103 private StoreFile.BloomType bloomType;
104
105
106 static {
107 assertTrue(BIG_LONG > 0.9 * Long.MAX_VALUE);
108
109
110 for (int i = 0; i < TIMESTAMPS.length - 1; ++i)
111 assertTrue(TIMESTAMPS[i] < TIMESTAMPS[i + 1]);
112 }
113
114 @Parameters
115 public static final Collection<Object[]> parameters() {
116 List<Object[]> configurations = new ArrayList<Object[]>();
117 for (Compression.Algorithm comprAlgo : HBaseTestingUtility.COMPRESSION_ALGORITHMS) {
118 for (StoreFile.BloomType bloomType : StoreFile.BloomType.values()) {
119 configurations.add(new Object[] { comprAlgo, bloomType });
120 }
121 }
122 return configurations;
123 }
124
125 public TestMultiColumnScanner(Compression.Algorithm comprAlgo,
126 StoreFile.BloomType bloomType) {
127 this.comprAlgo = comprAlgo;
128 this.bloomType = bloomType;
129 }
130
131 @Test
132 public void testMultiColumnScanner() throws IOException {
133 HRegion region = createRegion(TABLE_NAME, comprAlgo, bloomType);
134 List<String> rows = sequentialStrings("row", NUM_ROWS);
135 List<String> qualifiers = sequentialStrings("qual", NUM_COLUMNS);
136 List<KeyValue> kvs = new ArrayList<KeyValue>();
137 Set<String> keySet = new HashSet<String>();
138
139
140
141 Map<String, Long> lastDelTimeMap = new HashMap<String, Long>();
142
143 Random rand = new Random(29372937L);
144 Set<String> rowQualSkip = new HashSet<String>();
145
146
147
148 for (String row : rows)
149 for (String qual : qualifiers)
150 if (rand.nextDouble() < COLUMN_SKIP_IN_ROW_PROB) {
151 LOG.info("Skipping " + qual + " in row " + row);
152 rowQualSkip.add(rowQualKey(row, qual));
153 }
154
155
156 for (String qual : qualifiers)
157 if (rand.nextDouble() < COLUMN_SKIP_EVERYWHERE_PROB) {
158 LOG.info("Skipping " + qual + " in all rows");
159 for (String row : rows)
160 rowQualSkip.add(rowQualKey(row, qual));
161 }
162
163 for (int iFlush = 0; iFlush < NUM_FLUSHES; ++iFlush) {
164 for (String qual : qualifiers) {
165
166
167 if (rand.nextDouble() < COLUMN_SKIP_IN_STORE_FILE_PROB)
168 continue;
169
170 byte[] qualBytes = Bytes.toBytes(qual);
171 for (String row : rows) {
172 Put p = new Put(Bytes.toBytes(row));
173 for (long ts : TIMESTAMPS) {
174 String value = createValue(row, qual, ts);
175 KeyValue kv = KeyValueTestUtil.create(row, FAMILY, qual, ts,
176 value);
177 assertEquals(kv.getTimestamp(), ts);
178 p.add(kv);
179 String keyAsString = kv.toString();
180 if (!keySet.contains(keyAsString)) {
181 keySet.add(keyAsString);
182 kvs.add(kv);
183 }
184 }
185 region.put(p);
186
187 Delete d = new Delete(Bytes.toBytes(row));
188 boolean deletedSomething = false;
189 for (long ts : TIMESTAMPS)
190 if (rand.nextDouble() < DELETE_PROBABILITY) {
191 d.deleteColumns(FAMILY_BYTES, qualBytes, ts);
192 String rowAndQual = row + "_" + qual;
193 Long whenDeleted = lastDelTimeMap.get(rowAndQual);
194 lastDelTimeMap.put(rowAndQual, whenDeleted == null ? ts
195 : Math.max(ts, whenDeleted));
196 deletedSomething = true;
197 }
198 if (deletedSomething)
199 region.delete(d, null, true);
200 }
201 }
202 region.flushcache();
203 }
204
205 Collections.sort(kvs, KeyValue.COMPARATOR);
206 for (int maxVersions = 1; maxVersions <= TIMESTAMPS.length; ++maxVersions) {
207 for (int columnBitMask = 1; columnBitMask <= MAX_COLUMN_BIT_MASK; ++columnBitMask) {
208 Scan scan = new Scan();
209 scan.setMaxVersions(maxVersions);
210 Set<String> qualSet = new TreeSet<String>();
211 {
212 int columnMaskTmp = columnBitMask;
213 for (String qual : qualifiers) {
214 if ((columnMaskTmp & 1) != 0) {
215 scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qual));
216 qualSet.add(qual);
217 }
218 columnMaskTmp >>= 1;
219 }
220 assertEquals(0, columnMaskTmp);
221 }
222
223 InternalScanner scanner = region.getScanner(scan);
224 List<KeyValue> results = new ArrayList<KeyValue>();
225
226 int kvPos = 0;
227 int numResults = 0;
228 String queryInfo = "columns queried: " + qualSet + " (columnBitMask="
229 + columnBitMask + "), maxVersions=" + maxVersions;
230
231 while (scanner.next(results) || results.size() > 0) {
232 for (KeyValue kv : results) {
233 while (kvPos < kvs.size()
234 && !matchesQuery(kvs.get(kvPos), qualSet, maxVersions,
235 lastDelTimeMap)) {
236 ++kvPos;
237 }
238 String rowQual = getRowQualStr(kv);
239 String deleteInfo = "";
240 Long lastDelTS = lastDelTimeMap.get(rowQual);
241 if (lastDelTS != null) {
242 deleteInfo = "; last timestamp when row/column " + rowQual
243 + " was deleted: " + lastDelTS;
244 }
245 assertTrue("Scanner returned additional key/value: " + kv + ", "
246 + queryInfo + deleteInfo + ";", kvPos < kvs.size());
247 assertEquals("Scanner returned wrong key/value; " + queryInfo
248 + deleteInfo + ";", kvs.get(kvPos), kv);
249 ++kvPos;
250 ++numResults;
251 }
252 results.clear();
253 }
254 for (; kvPos < kvs.size(); ++kvPos) {
255 KeyValue remainingKV = kvs.get(kvPos);
256 assertFalse("Matching column not returned by scanner: "
257 + remainingKV + ", " + queryInfo + ", results returned: "
258 + numResults, matchesQuery(remainingKV, qualSet, maxVersions,
259 lastDelTimeMap));
260 }
261 }
262 }
263 assertTrue("This test is supposed to delete at least some row/column " +
264 "pairs", lastDelTimeMap.size() > 0);
265 LOG.info("Number of row/col pairs deleted at least once: " +
266 lastDelTimeMap.size());
267 region.close();
268 }
269
270 static HRegion createRegion(String tableName,
271 Compression.Algorithm comprAlgo, BloomType bloomType)
272 throws IOException {
273 HColumnDescriptor hcd =
274 new HColumnDescriptor(FAMILY_BYTES, MAX_VERSIONS,
275 comprAlgo.getName(),
276 HColumnDescriptor.DEFAULT_IN_MEMORY,
277 HColumnDescriptor.DEFAULT_BLOCKCACHE,
278 HColumnDescriptor.DEFAULT_TTL,
279 bloomType.toString());
280 HTableDescriptor htd = new HTableDescriptor(tableName);
281 htd.addFamily(hcd);
282 HRegionInfo info =
283 new HRegionInfo(Bytes.toBytes(tableName), null, null, false);
284 HRegion region = HRegion.createHRegion(
285 info, TEST_UTIL.getDataTestDir(), TEST_UTIL.getConfiguration(),
286 htd);
287 return region;
288 }
289
290 private static String getRowQualStr(KeyValue kv) {
291 String rowStr = Bytes.toString(kv.getBuffer(), kv.getRowOffset(),
292 kv.getRowLength());
293 String qualStr = Bytes.toString(kv.getBuffer(), kv.getQualifierOffset(),
294 kv.getQualifierLength());
295 return rowStr + "_" + qualStr;
296 }
297
298 private static boolean matchesQuery(KeyValue kv, Set<String> qualSet,
299 int maxVersions, Map<String, Long> lastDelTimeMap) {
300 Long lastDelTS = lastDelTimeMap.get(getRowQualStr(kv));
301 long ts = kv.getTimestamp();
302 return qualSet.contains(qualStr(kv))
303 && ts >= TIMESTAMPS[TIMESTAMPS.length - maxVersions]
304 && (lastDelTS == null || ts > lastDelTS);
305 }
306
307 private static String qualStr(KeyValue kv) {
308 return Bytes.toString(kv.getBuffer(), kv.getQualifierOffset(),
309 kv.getQualifierLength());
310 }
311
312 private static String rowQualKey(String row, String qual) {
313 return row + "_" + qual;
314 }
315
316 static String createValue(String row, String qual, long ts) {
317 return "value_for_" + row + "_" + qual + "_" + ts;
318 }
319
320 private static List<String> sequentialStrings(String prefix, int n) {
321 List<String> lst = new ArrayList<String>();
322 for (int i = 0; i < n; ++i) {
323 StringBuilder sb = new StringBuilder();
324 sb.append(prefix + i);
325
326
327 int iBitShifted = i;
328 while (iBitShifted != 0) {
329 sb.append((iBitShifted & 1) == 0 ? 'a' : 'b');
330 iBitShifted >>= 1;
331 }
332
333 lst.add(sb.toString());
334 }
335
336 return lst;
337 }
338
339 }