1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.IOException;
21  import java.nio.ByteBuffer;
22  import java.text.DateFormat;
23  import java.text.SimpleDateFormat;
24  import java.util.Random;
25  
26  import junit.framework.TestCase;
27  
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FSDataInputStream;
30  import org.apache.hadoop.fs.FSDataOutputStream;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HBaseTestingUtility;
34  import org.apache.hadoop.io.BytesWritable;
35  import org.apache.hadoop.io.SequenceFile;
36  import org.apache.hadoop.io.compress.CompressionCodec;
37  import org.apache.hadoop.io.compress.GzipCodec;
38  
39  /**
40   *  Set of long-running tests to measure performance of HFile.
41   * <p>
42   * Copied from
43   * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
44   * Remove after tfile is committed and use the tfile version of this class
45   * instead.</p>
46   */
47  public class TestHFilePerformance extends TestCase {
48    private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
49    private static String ROOT_DIR =
50      TEST_UTIL.getDataTestDir("TestHFilePerformance").toString();
51    private FileSystem fs;
52    private Configuration conf;
53    private long startTimeEpoch;
54    private long finishTimeEpoch;
55    private DateFormat formatter;
56  
57    @Override
58    public void setUp() throws IOException {
59      conf = new Configuration();
60      fs = FileSystem.get(conf);
61      formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
62    }
63  
64    public void startTime() {
65      startTimeEpoch = System.currentTimeMillis();
66      System.out.println(formatTime() + " Started timing.");
67    }
68  
69    public void stopTime() {
70      finishTimeEpoch = System.currentTimeMillis();
71      System.out.println(formatTime() + " Stopped timing.");
72    }
73  
74    public long getIntervalMillis() {
75      return finishTimeEpoch - startTimeEpoch;
76    }
77  
78    public void printlnWithTimestamp(String message) {
79      System.out.println(formatTime() + "  " +  message);
80    }
81  
82    /*
83     * Format millis into minutes and seconds.
84     */
85    public String formatTime(long milis){
86      return formatter.format(milis);
87    }
88  
89    public String formatTime(){
90      return formatTime(System.currentTimeMillis());
91    }
92  
93    private FSDataOutputStream createFSOutput(Path name) throws IOException {
94      if (fs.exists(name))
95        fs.delete(name, true);
96      FSDataOutputStream fout = fs.create(name);
97      return fout;
98    }
99  
100   //TODO have multiple ways of generating key/value e.g. dictionary words
101   //TODO to have a sample compressable data, for now, made 1 out of 3 values random
102   //     keys are all random.
103 
104   private static class KeyValueGenerator {
105     Random keyRandomizer;
106     Random valueRandomizer;
107     long randomValueRatio = 3; // 1 out of randomValueRatio generated values will be random.
108     long valueSequence = 0 ;
109 
110 
111     KeyValueGenerator() {
112       keyRandomizer = new Random(0L); //TODO with seed zero
113       valueRandomizer = new Random(1L); //TODO with seed one
114     }
115 
116     // Key is always random now.
117     void getKey(byte[] key) {
118       keyRandomizer.nextBytes(key);
119     }
120 
121     void getValue(byte[] value) {
122       if (valueSequence % randomValueRatio == 0)
123           valueRandomizer.nextBytes(value);
124       valueSequence++;
125     }
126   }
127 
128   /**
129    *
130    * @param fileType "HFile" or "SequenceFile"
131    * @param keyLength
132    * @param valueLength
133    * @param codecName "none", "lzo", "gz", "snappy"
134    * @param rows number of rows to be written.
135    * @param writeMethod used for HFile only.
136    * @param minBlockSize used for HFile only.
137    * @throws IOException
138    */
139    //TODO writeMethod: implement multiple ways of writing e.g. A) known length (no chunk) B) using a buffer and streaming (for many chunks).
140   public void timeWrite(String fileType, int keyLength, int valueLength,
141     String codecName, long rows, String writeMethod, int minBlockSize)
142   throws IOException {
143     System.out.println("File Type: " + fileType);
144     System.out.println("Writing " + fileType + " with codecName: " + codecName);
145     long totalBytesWritten = 0;
146 
147 
148     //Using separate randomizer for key/value with seeds matching Sequence File.
149     byte[] key = new byte[keyLength];
150     byte[] value = new byte[valueLength];
151     KeyValueGenerator generator = new KeyValueGenerator();
152 
153     startTime();
154 
155     Path path = new Path(ROOT_DIR, fileType + ".Performance");
156     System.out.println(ROOT_DIR + path.getName());
157     FSDataOutputStream fout =  createFSOutput(path);
158 
159     if ("HFile".equals(fileType)){
160         System.out.println("HFile write method: ");
161         HFile.Writer writer =
162           HFile.getWriterFactory(conf).createWriter(fout,
163              minBlockSize, codecName, null);
164 
165         // Writing value in one shot.
166         for (long l=0 ; l<rows ; l++ ) {
167           generator.getKey(key);
168           generator.getValue(value);
169           writer.append(key, value);
170           totalBytesWritten += key.length;
171           totalBytesWritten += value.length;
172          }
173         writer.close();
174     } else if ("SequenceFile".equals(fileType)){
175         CompressionCodec codec = null;
176         if ("gz".equals(codecName))
177           codec = new GzipCodec();
178         else if (!"none".equals(codecName))
179           throw new IOException("Codec not supported.");
180 
181         SequenceFile.Writer writer;
182 
183         //TODO
184         //JobConf conf = new JobConf();
185 
186         if (!"none".equals(codecName))
187           writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
188             BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec);
189         else
190           writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
191             BytesWritable.class, SequenceFile.CompressionType.NONE, null);
192 
193         BytesWritable keyBsw;
194         BytesWritable valBsw;
195         for (long l=0 ; l<rows ; l++ ) {
196 
197            generator.getKey(key);
198            keyBsw = new BytesWritable(key);
199            totalBytesWritten += keyBsw.getSize();
200 
201            generator.getValue(value);
202            valBsw = new BytesWritable(value);
203            writer.append(keyBsw, valBsw);
204            totalBytesWritten += valBsw.getSize();
205         }
206 
207         writer.close();
208     } else
209        throw new IOException("File Type is not supported");
210 
211     fout.close();
212     stopTime();
213 
214     printlnWithTimestamp("Data written: ");
215     printlnWithTimestamp("  rate  = " +
216       totalBytesWritten / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
217     printlnWithTimestamp("  total = " + totalBytesWritten + "B");
218 
219     printlnWithTimestamp("File written: ");
220     printlnWithTimestamp("  rate  = " +
221       fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
222     printlnWithTimestamp("  total = " + fs.getFileStatus(path).getLen() + "B");
223   }
224 
225   public void timeReading(String fileType, int keyLength, int valueLength,
226       long rows, int method) throws IOException {
227     System.out.println("Reading file of type: " + fileType);
228     Path path = new Path(ROOT_DIR, fileType + ".Performance");
229     System.out.println("Input file size: " + fs.getFileStatus(path).getLen());
230     long totalBytesRead = 0;
231 
232 
233     ByteBuffer val;
234 
235     ByteBuffer key;
236 
237     startTime();
238     FSDataInputStream fin = fs.open(path);
239 
240     if ("HFile".equals(fileType)){
241         HFile.Reader reader = HFile.createReader(path, fs.open(path),
242           fs.getFileStatus(path).getLen(), new CacheConfig(conf));
243         reader.loadFileInfo();
244         switch (method) {
245 
246           case 0:
247           case 1:
248           default:
249             {
250               HFileScanner scanner = reader.getScanner(false, false);
251               scanner.seekTo();
252               for (long l=0 ; l<rows ; l++ ) {
253                 key = scanner.getKey();
254                 val = scanner.getValue();
255                 totalBytesRead += key.limit() + val.limit();
256                 scanner.next();
257               }
258             }
259             break;
260         }
261     } else if("SequenceFile".equals(fileType)){
262 
263         SequenceFile.Reader reader;
264         reader = new SequenceFile.Reader(fs, path, new Configuration());
265 
266         if (reader.getCompressionCodec() != null) {
267             printlnWithTimestamp("Compression codec class: " + reader.getCompressionCodec().getClass());
268         } else
269             printlnWithTimestamp("Compression codec class: " + "none");
270 
271         BytesWritable keyBsw = new BytesWritable();
272         BytesWritable valBsw = new BytesWritable();
273 
274         for (long l=0 ; l<rows ; l++ ) {
275           reader.next(keyBsw, valBsw);
276           totalBytesRead += keyBsw.getSize() + valBsw.getSize();
277         }
278         reader.close();
279 
280         //TODO make a tests for other types of SequenceFile reading scenarios
281 
282     } else {
283         throw new IOException("File Type not supported.");
284     }
285 
286 
287     //printlnWithTimestamp("Closing reader");
288     fin.close();
289     stopTime();
290     //printlnWithTimestamp("Finished close");
291 
292     printlnWithTimestamp("Finished in " + getIntervalMillis() + "ms");
293     printlnWithTimestamp("Data read: ");
294     printlnWithTimestamp("  rate  = " +
295       totalBytesRead / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
296     printlnWithTimestamp("  total = " + totalBytesRead + "B");
297 
298     printlnWithTimestamp("File read: ");
299     printlnWithTimestamp("  rate  = " +
300       fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
301     printlnWithTimestamp("  total = " + fs.getFileStatus(path).getLen() + "B");
302 
303     //TODO uncomment this for final committing so test files is removed.
304     //fs.delete(path, true);
305   }
306 
307   public void testRunComparisons() throws IOException {
308 
309     int keyLength = 100; // 100B
310     int valueLength = 5*1024; // 5KB
311     int minBlockSize = 10*1024*1024; // 10MB
312     int rows = 10000;
313 
314     System.out.println("****************************** Sequence File *****************************");
315 
316     timeWrite("SequenceFile", keyLength, valueLength, "none", rows, null, minBlockSize);
317     System.out.println("\n+++++++\n");
318     timeReading("SequenceFile", keyLength, valueLength, rows, -1);
319 
320     System.out.println("");
321     System.out.println("----------------------");
322     System.out.println("");
323 
324     /* DISABLED LZO
325     timeWrite("SequenceFile", keyLength, valueLength, "lzo", rows, null, minBlockSize);
326     System.out.println("\n+++++++\n");
327     timeReading("SequenceFile", keyLength, valueLength, rows, -1);
328 
329     System.out.println("");
330     System.out.println("----------------------");
331     System.out.println("");
332 
333     /* Sequence file can only use native hadoop libs gzipping so commenting out.
334      */
335     try {
336       timeWrite("SequenceFile", keyLength, valueLength, "gz", rows, null,
337         minBlockSize);
338       System.out.println("\n+++++++\n");
339       timeReading("SequenceFile", keyLength, valueLength, rows, -1);
340     } catch (IllegalArgumentException e) {
341       System.out.println("Skipping sequencefile gz: " + e.getMessage());
342     }
343 
344 
345     System.out.println("\n\n\n");
346     System.out.println("****************************** HFile *****************************");
347 
348     timeWrite("HFile", keyLength, valueLength, "none", rows, null, minBlockSize);
349     System.out.println("\n+++++++\n");
350     timeReading("HFile", keyLength, valueLength, rows, 0 );
351 
352     System.out.println("");
353     System.out.println("----------------------");
354     System.out.println("");
355 /* DISABLED LZO
356     timeWrite("HFile", keyLength, valueLength, "lzo", rows, null, minBlockSize);
357     System.out.println("\n+++++++\n");
358     timeReading("HFile", keyLength, valueLength, rows, 0 );
359     System.out.println("\n+++++++\n");
360     timeReading("HFile", keyLength, valueLength, rows, 1 );
361     System.out.println("\n+++++++\n");
362     timeReading("HFile", keyLength, valueLength, rows, 2 );
363 
364     System.out.println("");
365     System.out.println("----------------------");
366     System.out.println("");
367 */
368     timeWrite("HFile", keyLength, valueLength, "gz", rows, null, minBlockSize);
369     System.out.println("\n+++++++\n");
370     timeReading("HFile", keyLength, valueLength, rows, 0 );
371 
372     System.out.println("\n\n\n\nNotes: ");
373     System.out.println(" * Timing includes open/closing of files.");
374     System.out.println(" * Timing includes reading both Key and Value");
375     System.out.println(" * Data is generated as random bytes. Other methods e.g. using " +
376             "dictionary with care for distributation of words is under development.");
377     System.out.println(" * Timing of write currently, includes random value/key generations. " +
378             "Which is the same for Sequence File and HFile. Another possibility is to generate " +
379             "test data beforehand");
380     System.out.println(" * We need to mitigate cache effect on benchmark. We can apply several " +
381             "ideas, for next step we do a large dummy read between benchmark read to dismantle " +
382             "caching of data. Renaming of file may be helpful. We can have a loop that reads with" +
383             " the same method several times and flood cache every time and average it to get a" +
384             " better number.");
385   }
386 }