1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import static org.junit.Assert.assertArrayEquals;
23  import static org.junit.Assert.assertEquals;
24  import static org.junit.Assert.assertTrue;
25  
26  import java.io.IOException;
27  import java.util.TreeMap;
28  
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.HBaseTestingUtility;
33  import org.apache.hadoop.hbase.HColumnDescriptor;
34  import org.apache.hadoop.hbase.HTableDescriptor;
35  import org.apache.hadoop.hbase.KeyValue;
36  import org.apache.hadoop.hbase.client.HBaseAdmin;
37  import org.apache.hadoop.hbase.client.HTable;
38  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
39  import org.apache.hadoop.hbase.io.hfile.Compression;
40  import org.apache.hadoop.hbase.io.hfile.HFile;
41  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
42  import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
43  import org.apache.hadoop.hbase.util.Bytes;
44  import org.junit.*;
45  
46  /**
47   * Test cases for the "load" half of the HFileOutputFormat bulk load
48   * functionality. These tests run faster than the full MR cluster
49   * tests in TestHFileOutputFormat
50   */
51  public class TestLoadIncrementalHFiles {
52    private static final byte[] QUALIFIER = Bytes.toBytes("myqual");
53    private static final byte[] FAMILY = Bytes.toBytes("myfam");
54  
55    private static final byte[][] SPLIT_KEYS = new byte[][] {
56      Bytes.toBytes("ddd"),
57      Bytes.toBytes("ppp")
58    };
59  
60    public static int BLOCKSIZE = 64*1024;
61    public static String COMPRESSION =
62      Compression.Algorithm.NONE.getName();
63  
64    private static HBaseTestingUtility util = new HBaseTestingUtility();
65  
66    @BeforeClass
67    public static void setUpBeforeClass() throws Exception {
68      util.startMiniCluster();
69    }
70  
71    @AfterClass
72    public static void tearDownAfterClass() throws Exception {
73      util.shutdownMiniCluster();
74    }
75  
76    /**
77     * Test case that creates some regions and loads
78     * HFiles that fit snugly inside those regions
79     */
80    @Test
81    public void testSimpleLoad() throws Exception {
82      runTest("testSimpleLoad", BloomType.NONE,
83          new byte[][][] {
84            new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("cccc") },
85            new byte[][]{ Bytes.toBytes("ddd"), Bytes.toBytes("ooo") },
86      });
87    }
88  
89    /**
90     * Test case that creates some regions and loads
91     * HFiles that cross the boundaries of those regions
92     */
93    @Test
94    public void testRegionCrossingLoad() throws Exception {
95      runTest("testRegionCrossingLoad", BloomType.NONE,
96          new byte[][][] {
97            new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
98            new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
99      });
100   }
101 
102   /**
103    * Test loading into a column family that has a ROW bloom filter.
104    */
105   @Test
106   public void testRegionCrossingRowBloom() throws Exception {
107     runTest("testRegionCrossingLoadRowBloom", BloomType.ROW,
108         new byte[][][] {
109           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
110           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
111     });
112   }
113   
114   /**
115    * Test loading into a column family that has a ROWCOL bloom filter.
116    */
117   @Test
118   public void testRegionCrossingRowColBloom() throws Exception {
119     runTest("testRegionCrossingLoadRowColBloom", BloomType.ROWCOL,
120         new byte[][][] {
121           new byte[][]{ Bytes.toBytes("aaaa"), Bytes.toBytes("eee") },
122           new byte[][]{ Bytes.toBytes("fff"), Bytes.toBytes("zzz") },
123     });
124   }
125 
126   private void runTest(String testName, BloomType bloomType, 
127           byte[][][] hfileRanges) throws Exception {
128     Path dir = util.getDataTestDir(testName);
129     FileSystem fs = util.getTestFileSystem();
130     dir = dir.makeQualified(fs);
131     Path familyDir = new Path(dir, Bytes.toString(FAMILY));
132 
133     int hfileIdx = 0;
134     for (byte[][] range : hfileRanges) {
135       byte[] from = range[0];
136       byte[] to = range[1];
137       createHFile(util.getConfiguration(), fs, new Path(familyDir, "hfile_"
138           + hfileIdx++), FAMILY, QUALIFIER, from, to, 1000);
139     }
140     int expectedRows = hfileIdx * 1000;
141 
142     final byte[] TABLE = Bytes.toBytes("mytable_"+testName);
143 
144     HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
145     HTableDescriptor htd = new HTableDescriptor(TABLE);
146     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
147     familyDesc.setBloomFilterType(bloomType);
148     htd.addFamily(familyDesc);
149     admin.createTable(htd, SPLIT_KEYS);
150 
151     HTable table = new HTable(util.getConfiguration(), TABLE);
152     util.waitTableAvailable(TABLE, 30000);
153     LoadIncrementalHFiles loader = new LoadIncrementalHFiles(
154       util.getConfiguration());
155     loader.doBulkLoad(dir, table);
156 
157     assertEquals(expectedRows, util.countRows(table));
158   }
159 
160   @Test
161   public void testSplitStoreFile() throws IOException {
162     Path dir = util.getDataTestDir("testSplitHFile");
163     FileSystem fs = util.getTestFileSystem();
164     Path testIn = new Path(dir, "testhfile");
165     HColumnDescriptor familyDesc = new HColumnDescriptor(FAMILY);
166     createHFile(util.getConfiguration(), fs, testIn, FAMILY, QUALIFIER,
167         Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
168 
169     Path bottomOut = new Path(dir, "bottom.out");
170     Path topOut = new Path(dir, "top.out");
171 
172     LoadIncrementalHFiles.splitStoreFile(
173         util.getConfiguration(), testIn,
174         familyDesc, Bytes.toBytes("ggg"),
175         bottomOut,
176         topOut);
177 
178     int rowCount = verifyHFile(bottomOut);
179     rowCount += verifyHFile(topOut);
180     assertEquals(1000, rowCount);
181   }
182 
183   private int verifyHFile(Path p) throws IOException {
184     Configuration conf = util.getConfiguration();
185     HFile.Reader reader = HFile.createReader(
186         p.getFileSystem(conf), p, new CacheConfig(conf));
187     reader.loadFileInfo();
188     HFileScanner scanner = reader.getScanner(false, false);
189     scanner.seekTo();
190     int count = 0;
191     do {
192       count++;
193     } while (scanner.next());
194     assertTrue(count > 0);
195     return count;
196   }
197 
198 
199   /**
200    * Create an HFile with the given number of rows between a given
201    * start key and end key.
202    * TODO put me in an HFileTestUtil or something?
203    */
204   static void createHFile(
205       Configuration conf,
206       FileSystem fs, Path path,
207       byte[] family, byte[] qualifier,
208       byte[] startKey, byte[] endKey, int numRows) throws IOException
209   {
210     HFile.Writer writer =
211       HFile.getWriterFactory(conf, new CacheConfig(conf)).createWriter(fs, path,
212         BLOCKSIZE, COMPRESSION,
213         KeyValue.KEY_COMPARATOR);
214     long now = System.currentTimeMillis();
215     try {
216       // subtract 2 since iterateOnSplits doesn't include boundary keys
217       for (byte[] key : Bytes.iterateOnSplits(startKey, endKey, numRows-2)) {
218         KeyValue kv = new KeyValue(key, family, qualifier, now, key);
219         writer.append(kv);
220       }
221     } finally {
222       writer.close();
223     }
224   }
225 
226   private void addStartEndKeysForTest(TreeMap<byte[], Integer> map, byte[] first, byte[] last) {
227     Integer value = map.containsKey(first)?(Integer)map.get(first):0;
228     map.put(first, value+1);
229 
230     value = map.containsKey(last)?(Integer)map.get(last):0;
231     map.put(last, value-1);
232   }
233 
234   @Test 
235   public void testInferBoundaries() {
236     TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
237 
238     /* Toy example
239      *     c---------i            o------p          s---------t     v------x
240      * a------e    g-----k   m-------------q   r----s            u----w
241      *
242      * Should be inferred as:
243      * a-----------------k   m-------------q   r--------------t  u---------x
244      * 
245      * The output should be (m,r,u) 
246      */
247 
248     String first;
249     String last;
250 
251     first = "a"; last = "e";
252     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
253     
254     first = "r"; last = "s";
255     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
256 
257     first = "o"; last = "p";
258     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
259 
260     first = "g"; last = "k";
261     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
262 
263     first = "v"; last = "x";
264     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
265 
266     first = "c"; last = "i";
267     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
268 
269     first = "m"; last = "q";
270     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
271 
272     first = "s"; last = "t";
273     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
274     
275     first = "u"; last = "w";
276     addStartEndKeysForTest(map, first.getBytes(), last.getBytes());
277 
278     byte[][] keysArray = LoadIncrementalHFiles.inferBoundaries(map);
279     byte[][] compare = new byte[3][];
280     compare[0] = "m".getBytes();
281     compare[1] = "r".getBytes(); 
282     compare[2] = "u".getBytes();
283 
284     assertEquals(keysArray.length, 3);
285 
286     for (int row = 0; row<keysArray.length; row++){
287       assertArrayEquals(keysArray[row], compare[row]);
288     }
289   }
290 
291 }