1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements. See the NOTICE file distributed with this
4    * work for additional information regarding copyright ownership. The ASF
5    * licenses this file to you under the Apache License, Version 2.0 (the
6    * "License"); you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    * http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14   * License for the specific language governing permissions and limitations under
15   * the License.
16   */
17  
18  package org.apache.hadoop.hbase.io.hfile;
19  
20  import java.io.IOException;
21  import java.nio.ByteBuffer;
22  import java.util.Random;
23  import java.util.StringTokenizer;
24  
25  import junit.framework.TestCase;
26  
27  import org.apache.commons.cli.CommandLine;
28  import org.apache.commons.cli.CommandLineParser;
29  import org.apache.commons.cli.GnuParser;
30  import org.apache.commons.cli.HelpFormatter;
31  import org.apache.commons.cli.Option;
32  import org.apache.commons.cli.OptionBuilder;
33  import org.apache.commons.cli.Options;
34  import org.apache.commons.cli.ParseException;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FSDataInputStream;
37  import org.apache.hadoop.fs.FSDataOutputStream;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.fs.RawLocalFileSystem;
41  import org.apache.hadoop.hbase.HBaseTestingUtility;
42  import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
43  import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
44  import org.apache.hadoop.io.BytesWritable;
45  import org.mortbay.log.Log;
46  
47  /**
48   * test the performance for seek.
49   * <p>
50   * Copied from
51   * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
52   * Remove after tfile is committed and use the tfile version of this class
53   * instead.</p>
54   */
55  public class TestHFileSeek extends TestCase {
56    private static final boolean USE_PREAD = true;
57    private MyOptions options;
58    private Configuration conf;
59    private Path path;
60    private FileSystem fs;
61    private NanoTimer timer;
62    private Random rng;
63    private RandomDistribution.DiscreteRNG keyLenGen;
64    private KVGenerator kvGen;
65  
66    @Override
67    public void setUp() throws IOException {
68      if (options == null) {
69        options = new MyOptions(new String[0]);
70      }
71  
72      conf = new Configuration();
73      
74      if (options.useRawFs) {
75        conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);
76      }
77      
78      conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize);
79      conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize);
80      path = new Path(new Path(options.rootDir), options.file);
81      fs = path.getFileSystem(conf);
82      timer = new NanoTimer(false);
83      rng = new Random(options.seed);
84      keyLenGen =
85          new RandomDistribution.Zipf(new Random(rng.nextLong()),
86              options.minKeyLen, options.maxKeyLen, 1.2);
87      RandomDistribution.DiscreteRNG valLenGen =
88          new RandomDistribution.Flat(new Random(rng.nextLong()),
89              options.minValLength, options.maxValLength);
90      RandomDistribution.DiscreteRNG wordLenGen =
91          new RandomDistribution.Flat(new Random(rng.nextLong()),
92              options.minWordLen, options.maxWordLen);
93      kvGen =
94          new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen,
95              options.dictSize);
96    }
97  
98    @Override
99    public void tearDown() {
100     try {
101       fs.close();
102     }
103     catch (Exception e) {
104       // Nothing
105     }
106   }
107 
108   private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
109     throws IOException {
110     if (fs.exists(name)) {
111       fs.delete(name, true);
112     }
113     FSDataOutputStream fout = fs.create(name);
114     return fout;
115   }
116 
117   private void createTFile() throws IOException {
118     long totalBytes = 0;
119     FSDataOutputStream fout = createFSOutput(path, fs);
120     try {
121       Writer writer =
122         HFile.getWriterFactory(conf).createWriter(fout,
123           options.minBlockSize, options.compress, null);
124       try {
125         BytesWritable key = new BytesWritable();
126         BytesWritable val = new BytesWritable();
127         timer.start();
128         for (long i = 0; true; ++i) {
129           if (i % 1000 == 0) { // test the size for every 1000 rows.
130             if (fs.getFileStatus(path).getLen() >= options.fileSize) {
131               break;
132             }
133           }
134           kvGen.next(key, val, false);
135           byte [] k = new byte [key.getLength()];
136           System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
137           byte [] v = new byte [val.getLength()];
138           System.arraycopy(val.getBytes(), 0, v, 0, key.getLength());
139           writer.append(k, v);
140           totalBytes += key.getLength();
141           totalBytes += val.getLength();
142         }
143         timer.stop();
144       }
145       finally {
146         writer.close();
147       }
148     }
149     finally {
150       fout.close();
151     }
152     double duration = (double)timer.read()/1000; // in us.
153     long fsize = fs.getFileStatus(path).getLen();
154 
155     System.out.printf(
156         "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n",
157         timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes
158             / duration);
159     System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n",
160         timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
161   }
162 
163   public void seekTFile() throws IOException {
164     int miss = 0;
165     long totalBytes = 0;
166     FSDataInputStream fsdis = fs.open(path);
167     Reader reader = HFile.createReader(path, fsdis,
168         fs.getFileStatus(path).getLen(), new CacheConfig(conf));
169     reader.loadFileInfo();
170     KeySampler kSampler =
171         new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(),
172             keyLenGen);
173     HFileScanner scanner = reader.getScanner(false, USE_PREAD);
174     BytesWritable key = new BytesWritable();
175     timer.reset();
176     timer.start();
177     for (int i = 0; i < options.seekCount; ++i) {
178       kSampler.next(key);
179       byte [] k = new byte [key.getLength()];
180       System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
181       if (scanner.seekTo(k) >= 0) {
182         ByteBuffer bbkey = scanner.getKey();
183         ByteBuffer bbval = scanner.getValue();
184         totalBytes += bbkey.limit();
185         totalBytes += bbval.limit();
186       }
187       else {
188         ++miss;
189       }
190     }
191     timer.stop();
192     System.out.printf(
193         "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n",
194         timer.toString(), NanoTimer.nanoTimeToString(timer.read()
195             / options.seekCount), options.seekCount - miss, miss,
196         (double) totalBytes / 1024 / (options.seekCount - miss));
197 
198   }
199 
200   public void testSeeks() throws IOException {
201     if (options.doCreate()) {
202       createTFile();
203     }
204 
205     if (options.doRead()) {
206       seekTFile();
207     }
208 
209     if (options.doCreate()) {
210       fs.delete(path, true);
211     }
212   }
213 
214   private static class IntegerRange {
215     private final int from, to;
216 
217     public IntegerRange(int from, int to) {
218       this.from = from;
219       this.to = to;
220     }
221 
222     public static IntegerRange parse(String s) throws ParseException {
223       StringTokenizer st = new StringTokenizer(s, " \t,");
224       if (st.countTokens() != 2) {
225         throw new ParseException("Bad integer specification: " + s);
226       }
227       int from = Integer.parseInt(st.nextToken());
228       int to = Integer.parseInt(st.nextToken());
229       return new IntegerRange(from, to);
230     }
231 
232     public int from() {
233       return from;
234     }
235 
236     public int to() {
237       return to;
238     }
239   }
240 
241   private static class MyOptions {
242     // hard coded constants
243     int dictSize = 1000;
244     int minWordLen = 5;
245     int maxWordLen = 20;
246 
247     private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
248     String rootDir =
249       TEST_UTIL.getDataTestDir("TestTFileSeek").toString();
250     String file = "TestTFileSeek";
251     // String compress = "lzo"; DISABLED
252     String compress = "none";
253     int minKeyLen = 10;
254     int maxKeyLen = 50;
255     int minValLength = 1024;
256     int maxValLength = 2 * 1024;
257     int minBlockSize = 1 * 1024 * 1024;
258     int fsOutputBufferSize = 1;
259     int fsInputBufferSize = 0;
260     // Default writing 10MB.
261     long fileSize = 10 * 1024 * 1024;
262     long seekCount = 1000;
263     long trialCount = 1;
264     long seed;
265     boolean useRawFs = false;
266 
267     static final int OP_CREATE = 1;
268     static final int OP_READ = 2;
269     int op = OP_CREATE | OP_READ;
270 
271     boolean proceed = false;
272 
273     public MyOptions(String[] args) {
274       seed = System.nanoTime();
275 
276       try {
277         Options opts = buildOptions();
278         CommandLineParser parser = new GnuParser();
279         CommandLine line = parser.parse(opts, args, true);
280         processOptions(line, opts);
281         validateOptions();
282       }
283       catch (ParseException e) {
284         System.out.println(e.getMessage());
285         System.out.println("Try \"--help\" option for details.");
286         setStopProceed();
287       }
288     }
289 
290     public boolean proceed() {
291       return proceed;
292     }
293 
294     private Options buildOptions() {
295       Option compress =
296           OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz|snappy]")
297               .hasArg().withDescription("compression scheme").create('c');
298 
299       Option fileSize =
300           OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB")
301               .hasArg().withDescription("target size of the file (in MB).")
302               .create('s');
303 
304       Option fsInputBufferSz =
305           OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size")
306               .hasArg().withDescription(
307                   "size of the file system input buffer (in bytes).").create(
308                   'i');
309 
310       Option fsOutputBufferSize =
311           OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size")
312               .hasArg().withDescription(
313                   "size of the file system output buffer (in bytes).").create(
314                   'o');
315 
316       Option keyLen =
317           OptionBuilder
318               .withLongOpt("key-length")
319               .withArgName("min,max")
320               .hasArg()
321               .withDescription(
322                   "the length range of the key (in bytes)")
323               .create('k');
324 
325       Option valueLen =
326           OptionBuilder
327               .withLongOpt("value-length")
328               .withArgName("min,max")
329               .hasArg()
330               .withDescription(
331                   "the length range of the value (in bytes)")
332               .create('v');
333 
334       Option blockSz =
335           OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg()
336               .withDescription("minimum block size (in KB)").create('b');
337 
338       Option operation =
339           OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg()
340               .withDescription(
341                   "action: seek-only, create-only, seek-after-create").create(
342                   'x');
343 
344       Option rootDir =
345           OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg()
346               .withDescription(
347                   "specify root directory where files will be created.")
348               .create('r');
349 
350       Option file =
351           OptionBuilder.withLongOpt("file").withArgName("name").hasArg()
352               .withDescription("specify the file name to be created or read.")
353               .create('f');
354 
355       Option seekCount =
356           OptionBuilder
357               .withLongOpt("seek")
358               .withArgName("count")
359               .hasArg()
360               .withDescription(
361                   "specify how many seek operations we perform (requires -x r or -x rw.")
362               .create('n');
363       
364       Option trialCount =
365           OptionBuilder 
366               .withLongOpt("trials")
367               .withArgName("n")
368               .hasArg()
369               .withDescription(
370                   "specify how many times to run the whole benchmark")
371               .create('t');
372 
373       Option useRawFs =
374           OptionBuilder
375             .withLongOpt("rawfs")
376             .withDescription("use raw instead of checksummed file system")
377             .create();
378       
379       Option help =
380           OptionBuilder.withLongOpt("help").hasArg(false).withDescription(
381               "show this screen").create("h");
382 
383       return new Options().addOption(compress).addOption(fileSize).addOption(
384           fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen)
385           .addOption(blockSz).addOption(rootDir).addOption(valueLen)
386           .addOption(operation).addOption(seekCount).addOption(file)
387           .addOption(trialCount).addOption(useRawFs).addOption(help);
388 
389     }
390 
391     private void processOptions(CommandLine line, Options opts)
392         throws ParseException {
393       // --help -h and --version -V must be processed first.
394       if (line.hasOption('h')) {
395         HelpFormatter formatter = new HelpFormatter();
396         System.out.println("TFile and SeqFile benchmark.");
397         System.out.println();
398         formatter.printHelp(100,
399             "java ... TestTFileSeqFileComparison [options]",
400             "\nSupported options:", opts, "");
401         return;
402       }
403 
404       if (line.hasOption('c')) {
405         compress = line.getOptionValue('c');
406       }
407 
408       if (line.hasOption('d')) {
409         dictSize = Integer.parseInt(line.getOptionValue('d'));
410       }
411 
412       if (line.hasOption('s')) {
413         fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024;
414       }
415 
416       if (line.hasOption('i')) {
417         fsInputBufferSize = Integer.parseInt(line.getOptionValue('i'));
418       }
419 
420       if (line.hasOption('o')) {
421         fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o'));
422       }
423 
424       if (line.hasOption('n')) {
425         seekCount = Integer.parseInt(line.getOptionValue('n'));
426       }
427       
428       if (line.hasOption('t')) {
429         trialCount = Integer.parseInt(line.getOptionValue('t'));
430       }
431 
432       if (line.hasOption('k')) {
433         IntegerRange ir = IntegerRange.parse(line.getOptionValue('k'));
434         minKeyLen = ir.from();
435         maxKeyLen = ir.to();
436       }
437 
438       if (line.hasOption('v')) {
439         IntegerRange ir = IntegerRange.parse(line.getOptionValue('v'));
440         minValLength = ir.from();
441         maxValLength = ir.to();
442       }
443 
444       if (line.hasOption('b')) {
445         minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024;
446       }
447 
448       if (line.hasOption('r')) {
449         rootDir = line.getOptionValue('r');
450       }
451 
452       if (line.hasOption('f')) {
453         file = line.getOptionValue('f');
454       }
455 
456       if (line.hasOption('S')) {
457         seed = Long.parseLong(line.getOptionValue('S'));
458       }
459 
460       if (line.hasOption('x')) {
461         String strOp = line.getOptionValue('x');
462         if (strOp.equals("r")) {
463           op = OP_READ;
464         }
465         else if (strOp.equals("w")) {
466           op = OP_CREATE;
467         }
468         else if (strOp.equals("rw")) {
469           op = OP_CREATE | OP_READ;
470         }
471         else {
472           throw new ParseException("Unknown action specifier: " + strOp);
473         }
474       }
475       
476       useRawFs = line.hasOption("rawfs");
477 
478       proceed = true;
479     }
480 
481     private void validateOptions() throws ParseException {
482       if (!compress.equals("none") && !compress.equals("lzo")
483           && !compress.equals("gz") && !compress.equals("snappy")) {
484         throw new ParseException("Unknown compression scheme: " + compress);
485       }
486 
487       if (minKeyLen >= maxKeyLen) {
488         throw new ParseException(
489             "Max key length must be greater than min key length.");
490       }
491 
492       if (minValLength >= maxValLength) {
493         throw new ParseException(
494             "Max value length must be greater than min value length.");
495       }
496 
497       if (minWordLen >= maxWordLen) {
498         throw new ParseException(
499             "Max word length must be greater than min word length.");
500       }
501       return;
502     }
503 
504     private void setStopProceed() {
505       proceed = false;
506     }
507 
508     public boolean doCreate() {
509       return (op & OP_CREATE) != 0;
510     }
511 
512     public boolean doRead() {
513       return (op & OP_READ) != 0;
514     }
515   }
516 
517   public static void main(String[] argv) throws IOException {
518     TestHFileSeek testCase = new TestHFileSeek();
519     MyOptions options = new MyOptions(argv);
520 
521     if (options.proceed == false) {
522       return;
523     }
524 
525     testCase.options = options;
526     for (int i = 0; i < options.trialCount; i++) {
527       Log.info("Beginning trial " + (i+1));
528       testCase.setUp();
529       testCase.testSeeks();
530       testCase.tearDown();
531     }
532   }
533 }