1   /**
2    * Copyright 2011 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.io.hfile;
22  
23  import static org.junit.Assert.*;
24  
25  import java.io.ByteArrayInputStream;
26  import java.io.DataInputStream;
27  import java.io.IOException;
28  import java.nio.ByteBuffer;
29  import java.util.ArrayList;
30  import java.util.List;
31  import java.util.Random;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FSDataInputStream;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.Path;
39  import org.apache.hadoop.hbase.HBaseTestingUtility;
40  import org.apache.hadoop.hbase.KeyValue;
41  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
42  import org.apache.hadoop.hbase.util.Bytes;
43  import org.apache.hadoop.io.RawComparator;
44  import org.apache.hadoop.io.Text;
45  import org.apache.hadoop.io.WritableUtils;
46  import org.junit.Before;
47  import org.junit.Test;
48  
49  /**
50   * Testing writing a version 2 {@link HFile}. This is a low-level test written
51   * during the development of {@link HFileWriterV2}.
52   */
53  public class TestHFileWriterV2 {
54  
55    private static final Log LOG = LogFactory.getLog(TestHFileWriterV2.class);
56  
57    private static final HBaseTestingUtility TEST_UTIL =
58        new HBaseTestingUtility();
59  
60    private Configuration conf;
61    private FileSystem fs;
62  
63    @Before
64    public void setUp() throws IOException {
65      conf = TEST_UTIL.getConfiguration();
66      fs = FileSystem.get(conf);
67    }
68  
69    @Test
70    public void testHFileFormatV2() throws IOException {
71      Path hfilePath = new Path(TEST_UTIL.getDataTestDir(),
72          "testHFileFormatV2");
73  
74      final Compression.Algorithm COMPRESS_ALGO = Compression.Algorithm.GZ;
75      HFileWriterV2 writer = new HFileWriterV2(conf, new CacheConfig(conf), fs,
76          hfilePath, 4096, COMPRESS_ALGO, KeyValue.KEY_COMPARATOR);
77  
78      Random rand = new Random(9713312); // Just a fixed seed.
79  
80      final int ENTRY_COUNT = 10000;
81      List<byte[]> keys = new ArrayList<byte[]>();
82      List<byte[]> values = new ArrayList<byte[]>();
83  
84      for (int i = 0; i < ENTRY_COUNT; ++i) {
85        byte[] keyBytes = randomOrderedKey(rand, i);
86  
87        // A random-length random value.
88        byte[] valueBytes = randomValue(rand);
89        writer.append(keyBytes, valueBytes);
90  
91        keys.add(keyBytes);
92        values.add(valueBytes);
93      }
94  
95      // Add in an arbitrary order. They will be sorted lexicographically by
96      // the key.
97      writer.appendMetaBlock("CAPITAL_OF_USA", new Text("Washington, D.C."));
98      writer.appendMetaBlock("CAPITAL_OF_RUSSIA", new Text("Moscow"));
99      writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris"));
100 
101     writer.close();
102 
103     FSDataInputStream fsdis = fs.open(hfilePath);
104 
105     // A "manual" version of a new-format HFile reader. This unit test was
106     // written before the V2 reader was fully implemented.
107 
108     long fileSize = fs.getFileStatus(hfilePath).getLen();
109     FixedFileTrailer trailer =
110         FixedFileTrailer.readFromStream(fsdis, fileSize);
111 
112     assertEquals(2, trailer.getVersion());
113     assertEquals(ENTRY_COUNT, trailer.getEntryCount());
114 
115     HFileBlock.FSReader blockReader =
116         new HFileBlock.FSReaderV2(fsdis, COMPRESS_ALGO, fileSize);
117     // Comparator class name is stored in the trailer in version 2.
118     RawComparator<byte []> comparator = trailer.createComparator();
119     HFileBlockIndex.BlockIndexReader dataBlockIndexReader = new HFileBlockIndex.BlockIndexReader(comparator,
120         trailer.getNumDataIndexLevels());
121     HFileBlockIndex.BlockIndexReader metaBlockIndexReader = new HFileBlockIndex.BlockIndexReader(
122         Bytes.BYTES_RAWCOMPARATOR, 1);
123 
124     HFileBlock.BlockIterator blockIter = blockReader.blockRange(
125         trailer.getLoadOnOpenDataOffset(),
126         fileSize - trailer.getTrailerSize());
127     // Data index. We also read statistics about the block index written after
128     // the root level.
129     dataBlockIndexReader.readMultiLevelIndexRoot(
130         blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
131         trailer.getDataIndexCount());
132 
133     // Meta index.
134     metaBlockIndexReader.readRootIndex(
135         blockIter.nextBlockAsStream(BlockType.ROOT_INDEX),
136         trailer.getMetaIndexCount());
137     // File info
138     FileInfo fileInfo = new FileInfo();
139     fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO));
140     byte [] keyValueFormatVersion = fileInfo.get(HFileWriterV2.KEY_VALUE_VERSION);
141     boolean includeMemstoreTS = (keyValueFormatVersion != null && Bytes.toInt(keyValueFormatVersion) > 0);
142 
143     // Counters for the number of key/value pairs and the number of blocks
144     int entriesRead = 0;
145     int blocksRead = 0;
146     long memstoreTS = 0;
147 
148     // Scan blocks the way the reader would scan them
149     fsdis.seek(0);
150     long curBlockPos = 0;
151     while (curBlockPos <= trailer.getLastDataBlockOffset()) {
152       HFileBlock block = blockReader.readBlockData(curBlockPos, -1, -1, false);
153       assertEquals(BlockType.DATA, block.getBlockType());
154       ByteBuffer buf = block.getBufferWithoutHeader();
155       while (buf.hasRemaining()) {
156         int keyLen = buf.getInt();
157         int valueLen = buf.getInt();
158 
159         byte[] key = new byte[keyLen];
160         buf.get(key);
161 
162         byte[] value = new byte[valueLen];
163         buf.get(value);
164 
165         if (includeMemstoreTS) {
166           ByteArrayInputStream byte_input = new ByteArrayInputStream(buf.array(),
167                                buf.arrayOffset() + buf.position(), buf.remaining());
168           DataInputStream data_input = new DataInputStream(byte_input);
169 
170           memstoreTS = WritableUtils.readVLong(data_input);
171           buf.position(buf.position() + WritableUtils.getVIntSize(memstoreTS));
172         }
173 
174         // A brute-force check to see that all keys and values are correct.
175         assertTrue(Bytes.compareTo(key, keys.get(entriesRead)) == 0);
176         assertTrue(Bytes.compareTo(value, values.get(entriesRead)) == 0);
177 
178         ++entriesRead;
179       }
180       ++blocksRead;
181       curBlockPos += block.getOnDiskSizeWithHeader();
182     }
183     LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead="
184         + blocksRead);
185     assertEquals(ENTRY_COUNT, entriesRead);
186 
187     // Meta blocks. We can scan until the load-on-open data offset (which is
188     // the root block index offset in version 2) because we are not testing
189     // intermediate-level index blocks here.
190 
191     int metaCounter = 0;
192     while (fsdis.getPos() < trailer.getLoadOnOpenDataOffset()) {
193       LOG.info("Current offset: " + fsdis.getPos() + ", scanning until " +
194           trailer.getLoadOnOpenDataOffset());
195       HFileBlock block = blockReader.readBlockData(curBlockPos, -1, -1, false);
196       assertEquals(BlockType.META, block.getBlockType());
197       Text t = new Text();
198       block.readInto(t);
199       Text expectedText =
200           (metaCounter == 0 ? new Text("Paris") : metaCounter == 1 ? new Text(
201               "Moscow") : new Text("Washington, D.C."));
202       assertEquals(expectedText, t);
203       LOG.info("Read meta block data: " + t);
204       ++metaCounter;
205       curBlockPos += block.getOnDiskSizeWithHeader();
206     }
207 
208     fsdis.close();
209   }
210 
211   // Static stuff used by various HFile v2 unit tests
212 
213   private static final String COLUMN_FAMILY_NAME = "_-myColumnFamily-_";
214   private static final int MIN_ROW_OR_QUALIFIER_LENGTH = 64;
215   private static final int MAX_ROW_OR_QUALIFIER_LENGTH = 128;
216 
217   /**
218    * Generates a random key that is guaranteed to increase as the given index i
219    * increases. The result consists of a prefix, which is a deterministic
220    * increasing function of i, and a random suffix.
221    *
222    * @param rand
223    *          random number generator to use
224    * @param i
225    * @return
226    */
227   public static byte[] randomOrderedKey(Random rand, int i) {
228     StringBuilder k = new StringBuilder();
229 
230     // The fixed-length lexicographically increasing part of the key.
231     for (int bitIndex = 31; bitIndex >= 0; --bitIndex) {
232       if ((i & (1 << bitIndex)) == 0)
233         k.append("a");
234       else
235         k.append("b");
236     }
237 
238     // A random-length random suffix of the key.
239     for (int j = 0; j < rand.nextInt(50); ++j)
240       k.append(randomReadableChar(rand));
241 
242     byte[] keyBytes = k.toString().getBytes();
243     return keyBytes;
244   }
245 
246   public static byte[] randomValue(Random rand) {
247     StringBuilder v = new StringBuilder();
248     for (int j = 0; j < 1 + rand.nextInt(2000); ++j) {
249       v.append((char) (32 + rand.nextInt(95)));
250     }
251 
252     byte[] valueBytes = v.toString().getBytes();
253     return valueBytes;
254   }
255 
256   public static final char randomReadableChar(Random rand) {
257     int i = rand.nextInt(26 * 2 + 10 + 1);
258     if (i < 26)
259       return (char) ('A' + i);
260     i -= 26;
261 
262     if (i < 26)
263       return (char) ('a' + i);
264     i -= 26;
265 
266     if (i < 10)
267       return (char) ('0' + i);
268     i -= 10;
269 
270     assert i == 0;
271     return '_';
272   }
273 
274   public static byte[] randomRowOrQualifier(Random rand) {
275     StringBuilder field = new StringBuilder();
276     int fieldLen = MIN_ROW_OR_QUALIFIER_LENGTH
277         + rand.nextInt(MAX_ROW_OR_QUALIFIER_LENGTH
278             - MIN_ROW_OR_QUALIFIER_LENGTH + 1);
279     for (int i = 0; i < fieldLen; ++i)
280       field.append(randomReadableChar(rand));
281     return field.toString().getBytes();
282   }
283 
284   public static KeyValue randomKeyValue(Random rand) {
285     return new KeyValue(randomRowOrQualifier(rand),
286         COLUMN_FAMILY_NAME.getBytes(), randomRowOrQualifier(rand),
287         randomValue(rand));
288   }
289 
290 }