1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.io.hfile;
19
20 import java.io.IOException;
21 import java.nio.ByteBuffer;
22 import java.text.DateFormat;
23 import java.text.SimpleDateFormat;
24 import java.util.Random;
25
26 import junit.framework.TestCase;
27
28 import org.apache.hadoop.conf.Configuration;
29 import org.apache.hadoop.fs.FSDataInputStream;
30 import org.apache.hadoop.fs.FSDataOutputStream;
31 import org.apache.hadoop.fs.FileSystem;
32 import org.apache.hadoop.fs.Path;
33 import org.apache.hadoop.hbase.HBaseTestingUtility;
34 import org.apache.hadoop.io.BytesWritable;
35 import org.apache.hadoop.io.SequenceFile;
36 import org.apache.hadoop.io.compress.CompressionCodec;
37 import org.apache.hadoop.io.compress.GzipCodec;
38
39
40
41
42
43
44
45
46
47 public class TestHFilePerformance extends TestCase {
48 private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
49 private static String ROOT_DIR =
50 TEST_UTIL.getDataTestDir("TestHFilePerformance").toString();
51 private FileSystem fs;
52 private Configuration conf;
53 private long startTimeEpoch;
54 private long finishTimeEpoch;
55 private DateFormat formatter;
56
57 @Override
58 public void setUp() throws IOException {
59 conf = new Configuration();
60 fs = FileSystem.get(conf);
61 formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
62 }
63
64 public void startTime() {
65 startTimeEpoch = System.currentTimeMillis();
66 System.out.println(formatTime() + " Started timing.");
67 }
68
69 public void stopTime() {
70 finishTimeEpoch = System.currentTimeMillis();
71 System.out.println(formatTime() + " Stopped timing.");
72 }
73
74 public long getIntervalMillis() {
75 return finishTimeEpoch - startTimeEpoch;
76 }
77
78 public void printlnWithTimestamp(String message) {
79 System.out.println(formatTime() + " " + message);
80 }
81
82
83
84
85 public String formatTime(long milis){
86 return formatter.format(milis);
87 }
88
89 public String formatTime(){
90 return formatTime(System.currentTimeMillis());
91 }
92
93 private FSDataOutputStream createFSOutput(Path name) throws IOException {
94 if (fs.exists(name))
95 fs.delete(name, true);
96 FSDataOutputStream fout = fs.create(name);
97 return fout;
98 }
99
100
101
102
103
104 private static class KeyValueGenerator {
105 Random keyRandomizer;
106 Random valueRandomizer;
107 long randomValueRatio = 3;
108 long valueSequence = 0 ;
109
110
111 KeyValueGenerator() {
112 keyRandomizer = new Random(0L);
113 valueRandomizer = new Random(1L);
114 }
115
116
117 void getKey(byte[] key) {
118 keyRandomizer.nextBytes(key);
119 }
120
121 void getValue(byte[] value) {
122 if (valueSequence % randomValueRatio == 0)
123 valueRandomizer.nextBytes(value);
124 valueSequence++;
125 }
126 }
127
128
129
130
131
132
133
134
135
136
137
138
139
140 public void timeWrite(String fileType, int keyLength, int valueLength,
141 String codecName, long rows, String writeMethod, int minBlockSize)
142 throws IOException {
143 System.out.println("File Type: " + fileType);
144 System.out.println("Writing " + fileType + " with codecName: " + codecName);
145 long totalBytesWritten = 0;
146
147
148
149 byte[] key = new byte[keyLength];
150 byte[] value = new byte[valueLength];
151 KeyValueGenerator generator = new KeyValueGenerator();
152
153 startTime();
154
155 Path path = new Path(ROOT_DIR, fileType + ".Performance");
156 System.out.println(ROOT_DIR + path.getName());
157 FSDataOutputStream fout = createFSOutput(path);
158
159 if ("HFile".equals(fileType)){
160 System.out.println("HFile write method: ");
161 HFile.Writer writer =
162 HFile.getWriterFactory(conf).createWriter(fout,
163 minBlockSize, codecName, null);
164
165
166 for (long l=0 ; l<rows ; l++ ) {
167 generator.getKey(key);
168 generator.getValue(value);
169 writer.append(key, value);
170 totalBytesWritten += key.length;
171 totalBytesWritten += value.length;
172 }
173 writer.close();
174 } else if ("SequenceFile".equals(fileType)){
175 CompressionCodec codec = null;
176 if ("gz".equals(codecName))
177 codec = new GzipCodec();
178 else if (!"none".equals(codecName))
179 throw new IOException("Codec not supported.");
180
181 SequenceFile.Writer writer;
182
183
184
185
186 if (!"none".equals(codecName))
187 writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
188 BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec);
189 else
190 writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
191 BytesWritable.class, SequenceFile.CompressionType.NONE, null);
192
193 BytesWritable keyBsw;
194 BytesWritable valBsw;
195 for (long l=0 ; l<rows ; l++ ) {
196
197 generator.getKey(key);
198 keyBsw = new BytesWritable(key);
199 totalBytesWritten += keyBsw.getSize();
200
201 generator.getValue(value);
202 valBsw = new BytesWritable(value);
203 writer.append(keyBsw, valBsw);
204 totalBytesWritten += valBsw.getSize();
205 }
206
207 writer.close();
208 } else
209 throw new IOException("File Type is not supported");
210
211 fout.close();
212 stopTime();
213
214 printlnWithTimestamp("Data written: ");
215 printlnWithTimestamp(" rate = " +
216 totalBytesWritten / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
217 printlnWithTimestamp(" total = " + totalBytesWritten + "B");
218
219 printlnWithTimestamp("File written: ");
220 printlnWithTimestamp(" rate = " +
221 fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
222 printlnWithTimestamp(" total = " + fs.getFileStatus(path).getLen() + "B");
223 }
224
225 public void timeReading(String fileType, int keyLength, int valueLength,
226 long rows, int method) throws IOException {
227 System.out.println("Reading file of type: " + fileType);
228 Path path = new Path(ROOT_DIR, fileType + ".Performance");
229 System.out.println("Input file size: " + fs.getFileStatus(path).getLen());
230 long totalBytesRead = 0;
231
232
233 ByteBuffer val;
234
235 ByteBuffer key;
236
237 startTime();
238 FSDataInputStream fin = fs.open(path);
239
240 if ("HFile".equals(fileType)){
241 HFile.Reader reader = HFile.createReader(path, fs.open(path),
242 fs.getFileStatus(path).getLen(), new CacheConfig(conf));
243 reader.loadFileInfo();
244 switch (method) {
245
246 case 0:
247 case 1:
248 default:
249 {
250 HFileScanner scanner = reader.getScanner(false, false);
251 scanner.seekTo();
252 for (long l=0 ; l<rows ; l++ ) {
253 key = scanner.getKey();
254 val = scanner.getValue();
255 totalBytesRead += key.limit() + val.limit();
256 scanner.next();
257 }
258 }
259 break;
260 }
261 } else if("SequenceFile".equals(fileType)){
262
263 SequenceFile.Reader reader;
264 reader = new SequenceFile.Reader(fs, path, new Configuration());
265
266 if (reader.getCompressionCodec() != null) {
267 printlnWithTimestamp("Compression codec class: " + reader.getCompressionCodec().getClass());
268 } else
269 printlnWithTimestamp("Compression codec class: " + "none");
270
271 BytesWritable keyBsw = new BytesWritable();
272 BytesWritable valBsw = new BytesWritable();
273
274 for (long l=0 ; l<rows ; l++ ) {
275 reader.next(keyBsw, valBsw);
276 totalBytesRead += keyBsw.getSize() + valBsw.getSize();
277 }
278 reader.close();
279
280
281
282 } else {
283 throw new IOException("File Type not supported.");
284 }
285
286
287
288 fin.close();
289 stopTime();
290
291
292 printlnWithTimestamp("Finished in " + getIntervalMillis() + "ms");
293 printlnWithTimestamp("Data read: ");
294 printlnWithTimestamp(" rate = " +
295 totalBytesRead / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
296 printlnWithTimestamp(" total = " + totalBytesRead + "B");
297
298 printlnWithTimestamp("File read: ");
299 printlnWithTimestamp(" rate = " +
300 fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
301 printlnWithTimestamp(" total = " + fs.getFileStatus(path).getLen() + "B");
302
303
304
305 }
306
307 public void testRunComparisons() throws IOException {
308
309 int keyLength = 100;
310 int valueLength = 5*1024;
311 int minBlockSize = 10*1024*1024;
312 int rows = 10000;
313
314 System.out.println("****************************** Sequence File *****************************");
315
316 timeWrite("SequenceFile", keyLength, valueLength, "none", rows, null, minBlockSize);
317 System.out.println("\n+++++++\n");
318 timeReading("SequenceFile", keyLength, valueLength, rows, -1);
319
320 System.out.println("");
321 System.out.println("----------------------");
322 System.out.println("");
323
324
325
326
327
328
329
330
331
332
333
334
335 try {
336 timeWrite("SequenceFile", keyLength, valueLength, "gz", rows, null,
337 minBlockSize);
338 System.out.println("\n+++++++\n");
339 timeReading("SequenceFile", keyLength, valueLength, rows, -1);
340 } catch (IllegalArgumentException e) {
341 System.out.println("Skipping sequencefile gz: " + e.getMessage());
342 }
343
344
345 System.out.println("\n\n\n");
346 System.out.println("****************************** HFile *****************************");
347
348 timeWrite("HFile", keyLength, valueLength, "none", rows, null, minBlockSize);
349 System.out.println("\n+++++++\n");
350 timeReading("HFile", keyLength, valueLength, rows, 0 );
351
352 System.out.println("");
353 System.out.println("----------------------");
354 System.out.println("");
355
356
357
358
359
360
361
362
363
364
365
366
367
368 timeWrite("HFile", keyLength, valueLength, "gz", rows, null, minBlockSize);
369 System.out.println("\n+++++++\n");
370 timeReading("HFile", keyLength, valueLength, rows, 0 );
371
372 System.out.println("\n\n\n\nNotes: ");
373 System.out.println(" * Timing includes open/closing of files.");
374 System.out.println(" * Timing includes reading both Key and Value");
375 System.out.println(" * Data is generated as random bytes. Other methods e.g. using " +
376 "dictionary with care for distributation of words is under development.");
377 System.out.println(" * Timing of write currently, includes random value/key generations. " +
378 "Which is the same for Sequence File and HFile. Another possibility is to generate " +
379 "test data beforehand");
380 System.out.println(" * We need to mitigate cache effect on benchmark. We can apply several " +
381 "ideas, for next step we do a large dummy read between benchmark read to dismantle " +
382 "caching of data. Renaming of file may be helpful. We can have a loop that reads with" +
383 " the same method several times and flood cache every time and average it to get a" +
384 " better number.");
385 }
386 }