1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.io.hfile;
19
20 import java.io.IOException;
21 import java.nio.ByteBuffer;
22 import java.util.Random;
23 import java.util.StringTokenizer;
24
25 import junit.framework.TestCase;
26
27 import org.apache.commons.cli.CommandLine;
28 import org.apache.commons.cli.CommandLineParser;
29 import org.apache.commons.cli.GnuParser;
30 import org.apache.commons.cli.HelpFormatter;
31 import org.apache.commons.cli.Option;
32 import org.apache.commons.cli.OptionBuilder;
33 import org.apache.commons.cli.Options;
34 import org.apache.commons.cli.ParseException;
35 import org.apache.hadoop.conf.Configuration;
36 import org.apache.hadoop.fs.FSDataInputStream;
37 import org.apache.hadoop.fs.FSDataOutputStream;
38 import org.apache.hadoop.fs.FileSystem;
39 import org.apache.hadoop.fs.Path;
40 import org.apache.hadoop.fs.RawLocalFileSystem;
41 import org.apache.hadoop.hbase.HBaseTestingUtility;
42 import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
43 import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
44 import org.apache.hadoop.io.BytesWritable;
45 import org.mortbay.log.Log;
46
47
48
49
50
51
52
53
54
55 public class TestHFileSeek extends TestCase {
56 private static final boolean USE_PREAD = true;
57 private MyOptions options;
58 private Configuration conf;
59 private Path path;
60 private FileSystem fs;
61 private NanoTimer timer;
62 private Random rng;
63 private RandomDistribution.DiscreteRNG keyLenGen;
64 private KVGenerator kvGen;
65
66 @Override
67 public void setUp() throws IOException {
68 if (options == null) {
69 options = new MyOptions(new String[0]);
70 }
71
72 conf = new Configuration();
73
74 if (options.useRawFs) {
75 conf.setClass("fs.file.impl", RawLocalFileSystem.class, FileSystem.class);
76 }
77
78 conf.setInt("tfile.fs.input.buffer.size", options.fsInputBufferSize);
79 conf.setInt("tfile.fs.output.buffer.size", options.fsOutputBufferSize);
80 path = new Path(new Path(options.rootDir), options.file);
81 fs = path.getFileSystem(conf);
82 timer = new NanoTimer(false);
83 rng = new Random(options.seed);
84 keyLenGen =
85 new RandomDistribution.Zipf(new Random(rng.nextLong()),
86 options.minKeyLen, options.maxKeyLen, 1.2);
87 RandomDistribution.DiscreteRNG valLenGen =
88 new RandomDistribution.Flat(new Random(rng.nextLong()),
89 options.minValLength, options.maxValLength);
90 RandomDistribution.DiscreteRNG wordLenGen =
91 new RandomDistribution.Flat(new Random(rng.nextLong()),
92 options.minWordLen, options.maxWordLen);
93 kvGen =
94 new KVGenerator(rng, true, keyLenGen, valLenGen, wordLenGen,
95 options.dictSize);
96 }
97
98 @Override
99 public void tearDown() {
100 try {
101 fs.close();
102 }
103 catch (Exception e) {
104
105 }
106 }
107
108 private static FSDataOutputStream createFSOutput(Path name, FileSystem fs)
109 throws IOException {
110 if (fs.exists(name)) {
111 fs.delete(name, true);
112 }
113 FSDataOutputStream fout = fs.create(name);
114 return fout;
115 }
116
117 private void createTFile() throws IOException {
118 long totalBytes = 0;
119 FSDataOutputStream fout = createFSOutput(path, fs);
120 try {
121 Writer writer =
122 HFile.getWriterFactory(conf).createWriter(fout,
123 options.minBlockSize, options.compress, null);
124 try {
125 BytesWritable key = new BytesWritable();
126 BytesWritable val = new BytesWritable();
127 timer.start();
128 for (long i = 0; true; ++i) {
129 if (i % 1000 == 0) {
130 if (fs.getFileStatus(path).getLen() >= options.fileSize) {
131 break;
132 }
133 }
134 kvGen.next(key, val, false);
135 byte [] k = new byte [key.getLength()];
136 System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
137 byte [] v = new byte [val.getLength()];
138 System.arraycopy(val.getBytes(), 0, v, 0, key.getLength());
139 writer.append(k, v);
140 totalBytes += key.getLength();
141 totalBytes += val.getLength();
142 }
143 timer.stop();
144 }
145 finally {
146 writer.close();
147 }
148 }
149 finally {
150 fout.close();
151 }
152 double duration = (double)timer.read()/1000;
153 long fsize = fs.getFileStatus(path).getLen();
154
155 System.out.printf(
156 "time: %s...uncompressed: %.2fMB...raw thrpt: %.2fMB/s\n",
157 timer.toString(), (double) totalBytes / 1024 / 1024, totalBytes
158 / duration);
159 System.out.printf("time: %s...file size: %.2fMB...disk thrpt: %.2fMB/s\n",
160 timer.toString(), (double) fsize / 1024 / 1024, fsize / duration);
161 }
162
163 public void seekTFile() throws IOException {
164 int miss = 0;
165 long totalBytes = 0;
166 FSDataInputStream fsdis = fs.open(path);
167 Reader reader = HFile.createReader(path, fsdis,
168 fs.getFileStatus(path).getLen(), new CacheConfig(conf));
169 reader.loadFileInfo();
170 KeySampler kSampler =
171 new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(),
172 keyLenGen);
173 HFileScanner scanner = reader.getScanner(false, USE_PREAD);
174 BytesWritable key = new BytesWritable();
175 timer.reset();
176 timer.start();
177 for (int i = 0; i < options.seekCount; ++i) {
178 kSampler.next(key);
179 byte [] k = new byte [key.getLength()];
180 System.arraycopy(key.getBytes(), 0, k, 0, key.getLength());
181 if (scanner.seekTo(k) >= 0) {
182 ByteBuffer bbkey = scanner.getKey();
183 ByteBuffer bbval = scanner.getValue();
184 totalBytes += bbkey.limit();
185 totalBytes += bbval.limit();
186 }
187 else {
188 ++miss;
189 }
190 }
191 timer.stop();
192 System.out.printf(
193 "time: %s...avg seek: %s...%d hit...%d miss...avg I/O size: %.2fKB\n",
194 timer.toString(), NanoTimer.nanoTimeToString(timer.read()
195 / options.seekCount), options.seekCount - miss, miss,
196 (double) totalBytes / 1024 / (options.seekCount - miss));
197
198 }
199
200 public void testSeeks() throws IOException {
201 if (options.doCreate()) {
202 createTFile();
203 }
204
205 if (options.doRead()) {
206 seekTFile();
207 }
208
209 if (options.doCreate()) {
210 fs.delete(path, true);
211 }
212 }
213
214 private static class IntegerRange {
215 private final int from, to;
216
217 public IntegerRange(int from, int to) {
218 this.from = from;
219 this.to = to;
220 }
221
222 public static IntegerRange parse(String s) throws ParseException {
223 StringTokenizer st = new StringTokenizer(s, " \t,");
224 if (st.countTokens() != 2) {
225 throw new ParseException("Bad integer specification: " + s);
226 }
227 int from = Integer.parseInt(st.nextToken());
228 int to = Integer.parseInt(st.nextToken());
229 return new IntegerRange(from, to);
230 }
231
232 public int from() {
233 return from;
234 }
235
236 public int to() {
237 return to;
238 }
239 }
240
241 private static class MyOptions {
242
243 int dictSize = 1000;
244 int minWordLen = 5;
245 int maxWordLen = 20;
246
247 private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
248 String rootDir =
249 TEST_UTIL.getDataTestDir("TestTFileSeek").toString();
250 String file = "TestTFileSeek";
251
252 String compress = "none";
253 int minKeyLen = 10;
254 int maxKeyLen = 50;
255 int minValLength = 1024;
256 int maxValLength = 2 * 1024;
257 int minBlockSize = 1 * 1024 * 1024;
258 int fsOutputBufferSize = 1;
259 int fsInputBufferSize = 0;
260
261 long fileSize = 10 * 1024 * 1024;
262 long seekCount = 1000;
263 long trialCount = 1;
264 long seed;
265 boolean useRawFs = false;
266
267 static final int OP_CREATE = 1;
268 static final int OP_READ = 2;
269 int op = OP_CREATE | OP_READ;
270
271 boolean proceed = false;
272
273 public MyOptions(String[] args) {
274 seed = System.nanoTime();
275
276 try {
277 Options opts = buildOptions();
278 CommandLineParser parser = new GnuParser();
279 CommandLine line = parser.parse(opts, args, true);
280 processOptions(line, opts);
281 validateOptions();
282 }
283 catch (ParseException e) {
284 System.out.println(e.getMessage());
285 System.out.println("Try \"--help\" option for details.");
286 setStopProceed();
287 }
288 }
289
290 public boolean proceed() {
291 return proceed;
292 }
293
294 private Options buildOptions() {
295 Option compress =
296 OptionBuilder.withLongOpt("compress").withArgName("[none|lzo|gz|snappy]")
297 .hasArg().withDescription("compression scheme").create('c');
298
299 Option fileSize =
300 OptionBuilder.withLongOpt("file-size").withArgName("size-in-MB")
301 .hasArg().withDescription("target size of the file (in MB).")
302 .create('s');
303
304 Option fsInputBufferSz =
305 OptionBuilder.withLongOpt("fs-input-buffer").withArgName("size")
306 .hasArg().withDescription(
307 "size of the file system input buffer (in bytes).").create(
308 'i');
309
310 Option fsOutputBufferSize =
311 OptionBuilder.withLongOpt("fs-output-buffer").withArgName("size")
312 .hasArg().withDescription(
313 "size of the file system output buffer (in bytes).").create(
314 'o');
315
316 Option keyLen =
317 OptionBuilder
318 .withLongOpt("key-length")
319 .withArgName("min,max")
320 .hasArg()
321 .withDescription(
322 "the length range of the key (in bytes)")
323 .create('k');
324
325 Option valueLen =
326 OptionBuilder
327 .withLongOpt("value-length")
328 .withArgName("min,max")
329 .hasArg()
330 .withDescription(
331 "the length range of the value (in bytes)")
332 .create('v');
333
334 Option blockSz =
335 OptionBuilder.withLongOpt("block").withArgName("size-in-KB").hasArg()
336 .withDescription("minimum block size (in KB)").create('b');
337
338 Option operation =
339 OptionBuilder.withLongOpt("operation").withArgName("r|w|rw").hasArg()
340 .withDescription(
341 "action: seek-only, create-only, seek-after-create").create(
342 'x');
343
344 Option rootDir =
345 OptionBuilder.withLongOpt("root-dir").withArgName("path").hasArg()
346 .withDescription(
347 "specify root directory where files will be created.")
348 .create('r');
349
350 Option file =
351 OptionBuilder.withLongOpt("file").withArgName("name").hasArg()
352 .withDescription("specify the file name to be created or read.")
353 .create('f');
354
355 Option seekCount =
356 OptionBuilder
357 .withLongOpt("seek")
358 .withArgName("count")
359 .hasArg()
360 .withDescription(
361 "specify how many seek operations we perform (requires -x r or -x rw.")
362 .create('n');
363
364 Option trialCount =
365 OptionBuilder
366 .withLongOpt("trials")
367 .withArgName("n")
368 .hasArg()
369 .withDescription(
370 "specify how many times to run the whole benchmark")
371 .create('t');
372
373 Option useRawFs =
374 OptionBuilder
375 .withLongOpt("rawfs")
376 .withDescription("use raw instead of checksummed file system")
377 .create();
378
379 Option help =
380 OptionBuilder.withLongOpt("help").hasArg(false).withDescription(
381 "show this screen").create("h");
382
383 return new Options().addOption(compress).addOption(fileSize).addOption(
384 fsInputBufferSz).addOption(fsOutputBufferSize).addOption(keyLen)
385 .addOption(blockSz).addOption(rootDir).addOption(valueLen)
386 .addOption(operation).addOption(seekCount).addOption(file)
387 .addOption(trialCount).addOption(useRawFs).addOption(help);
388
389 }
390
391 private void processOptions(CommandLine line, Options opts)
392 throws ParseException {
393
394 if (line.hasOption('h')) {
395 HelpFormatter formatter = new HelpFormatter();
396 System.out.println("TFile and SeqFile benchmark.");
397 System.out.println();
398 formatter.printHelp(100,
399 "java ... TestTFileSeqFileComparison [options]",
400 "\nSupported options:", opts, "");
401 return;
402 }
403
404 if (line.hasOption('c')) {
405 compress = line.getOptionValue('c');
406 }
407
408 if (line.hasOption('d')) {
409 dictSize = Integer.parseInt(line.getOptionValue('d'));
410 }
411
412 if (line.hasOption('s')) {
413 fileSize = Long.parseLong(line.getOptionValue('s')) * 1024 * 1024;
414 }
415
416 if (line.hasOption('i')) {
417 fsInputBufferSize = Integer.parseInt(line.getOptionValue('i'));
418 }
419
420 if (line.hasOption('o')) {
421 fsOutputBufferSize = Integer.parseInt(line.getOptionValue('o'));
422 }
423
424 if (line.hasOption('n')) {
425 seekCount = Integer.parseInt(line.getOptionValue('n'));
426 }
427
428 if (line.hasOption('t')) {
429 trialCount = Integer.parseInt(line.getOptionValue('t'));
430 }
431
432 if (line.hasOption('k')) {
433 IntegerRange ir = IntegerRange.parse(line.getOptionValue('k'));
434 minKeyLen = ir.from();
435 maxKeyLen = ir.to();
436 }
437
438 if (line.hasOption('v')) {
439 IntegerRange ir = IntegerRange.parse(line.getOptionValue('v'));
440 minValLength = ir.from();
441 maxValLength = ir.to();
442 }
443
444 if (line.hasOption('b')) {
445 minBlockSize = Integer.parseInt(line.getOptionValue('b')) * 1024;
446 }
447
448 if (line.hasOption('r')) {
449 rootDir = line.getOptionValue('r');
450 }
451
452 if (line.hasOption('f')) {
453 file = line.getOptionValue('f');
454 }
455
456 if (line.hasOption('S')) {
457 seed = Long.parseLong(line.getOptionValue('S'));
458 }
459
460 if (line.hasOption('x')) {
461 String strOp = line.getOptionValue('x');
462 if (strOp.equals("r")) {
463 op = OP_READ;
464 }
465 else if (strOp.equals("w")) {
466 op = OP_CREATE;
467 }
468 else if (strOp.equals("rw")) {
469 op = OP_CREATE | OP_READ;
470 }
471 else {
472 throw new ParseException("Unknown action specifier: " + strOp);
473 }
474 }
475
476 useRawFs = line.hasOption("rawfs");
477
478 proceed = true;
479 }
480
481 private void validateOptions() throws ParseException {
482 if (!compress.equals("none") && !compress.equals("lzo")
483 && !compress.equals("gz") && !compress.equals("snappy")) {
484 throw new ParseException("Unknown compression scheme: " + compress);
485 }
486
487 if (minKeyLen >= maxKeyLen) {
488 throw new ParseException(
489 "Max key length must be greater than min key length.");
490 }
491
492 if (minValLength >= maxValLength) {
493 throw new ParseException(
494 "Max value length must be greater than min value length.");
495 }
496
497 if (minWordLen >= maxWordLen) {
498 throw new ParseException(
499 "Max word length must be greater than min word length.");
500 }
501 return;
502 }
503
504 private void setStopProceed() {
505 proceed = false;
506 }
507
508 public boolean doCreate() {
509 return (op & OP_CREATE) != 0;
510 }
511
512 public boolean doRead() {
513 return (op & OP_READ) != 0;
514 }
515 }
516
517 public static void main(String[] argv) throws IOException {
518 TestHFileSeek testCase = new TestHFileSeek();
519 MyOptions options = new MyOptions(argv);
520
521 if (options.proceed == false) {
522 return;
523 }
524
525 testCase.options = options;
526 for (int i = 0; i < options.trialCount; i++) {
527 Log.info("Beginning trial " + (i+1));
528 testCase.setUp();
529 testCase.testSeeks();
530 testCase.tearDown();
531 }
532 }
533 }