1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.apache.hadoop.hbase.io.hfile;
22
23 import java.io.DataInput;
24 import java.io.IOException;
25 import java.util.ArrayList;
26 import java.util.List;
27 import java.util.Map;
28
29 import org.apache.commons.logging.Log;
30 import org.apache.commons.logging.LogFactory;
31
32 import org.apache.commons.cli.CommandLine;
33 import org.apache.commons.cli.CommandLineParser;
34 import org.apache.commons.cli.HelpFormatter;
35 import org.apache.commons.cli.Options;
36 import org.apache.commons.cli.ParseException;
37 import org.apache.commons.cli.PosixParser;
38 import org.apache.hadoop.conf.Configuration;
39 import org.apache.hadoop.fs.FileSystem;
40 import org.apache.hadoop.fs.Path;
41 import org.apache.hadoop.hbase.HBaseConfiguration;
42 import org.apache.hadoop.hbase.HRegionInfo;
43 import org.apache.hadoop.hbase.KeyValue;
44 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
45 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
46 import org.apache.hadoop.hbase.util.BloomFilter;
47 import org.apache.hadoop.hbase.util.BloomFilterFactory;
48 import org.apache.hadoop.hbase.util.ByteBloomFilter;
49 import org.apache.hadoop.hbase.util.Bytes;
50 import org.apache.hadoop.hbase.util.FSUtils;
51 import org.apache.hadoop.hbase.util.Writables;
52
53
54
55
56 public class HFilePrettyPrinter {
57
58 private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
59
60 private Options options = new Options();
61
62 private boolean verbose;
63 private boolean printValue;
64 private boolean printKey;
65 private boolean shouldPrintMeta;
66 private boolean printBlocks;
67 private boolean printStats;
68 private boolean checkRow;
69 private boolean checkFamily;
70
71 private Configuration conf;
72
73 private List<Path> files = new ArrayList<Path>();
74 private int count;
75
76 private static final String FOUR_SPACES = " ";
77
78 public HFilePrettyPrinter() {
79 options.addOption("v", "verbose", false,
80 "Verbose output; emits file and meta data delimiters");
81 options.addOption("p", "printkv", false, "Print key/value pairs");
82 options.addOption("e", "printkey", false, "Print keys");
83 options.addOption("m", "printmeta", false, "Print meta data of file");
84 options.addOption("b", "printblocks", false, "Print block index meta data");
85 options.addOption("k", "checkrow", false,
86 "Enable row order check; looks for out-of-order keys");
87 options.addOption("a", "checkfamily", false, "Enable family check");
88 options.addOption("f", "file", true,
89 "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/.META./12/34");
90 options.addOption("r", "region", true,
91 "Region to scan. Pass region name; e.g. '.META.,,1'");
92 options.addOption("s", "stats", false, "Print statistics");
93 }
94
95 public boolean parseOptions(String args[]) throws ParseException,
96 IOException {
97 if (args.length == 0) {
98 HelpFormatter formatter = new HelpFormatter();
99 formatter.printHelp("HFile", options, true);
100 return false;
101 }
102 CommandLineParser parser = new PosixParser();
103 CommandLine cmd = parser.parse(options, args);
104
105 verbose = cmd.hasOption("v");
106 printValue = cmd.hasOption("p");
107 printKey = cmd.hasOption("e") || printValue;
108 shouldPrintMeta = cmd.hasOption("m");
109 printBlocks = cmd.hasOption("b");
110 printStats = cmd.hasOption("s");
111 checkRow = cmd.hasOption("k");
112 checkFamily = cmd.hasOption("a");
113
114 if (cmd.hasOption("f")) {
115 files.add(new Path(cmd.getOptionValue("f")));
116 }
117
118 if (cmd.hasOption("r")) {
119 String regionName = cmd.getOptionValue("r");
120 byte[] rn = Bytes.toBytes(regionName);
121 byte[][] hri = HRegionInfo.parseRegionName(rn);
122 Path rootDir = FSUtils.getRootDir(conf);
123 Path tableDir = new Path(rootDir, Bytes.toString(hri[0]));
124 String enc = HRegionInfo.encodeRegionName(rn);
125 Path regionDir = new Path(tableDir, enc);
126 if (verbose)
127 System.out.println("region dir -> " + regionDir);
128 List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(conf),
129 regionDir);
130 if (verbose)
131 System.out.println("Number of region files found -> "
132 + regionFiles.size());
133 if (verbose) {
134 int i = 1;
135 for (Path p : regionFiles) {
136 if (verbose)
137 System.out.println("Found file[" + i++ + "] -> " + p);
138 }
139 }
140 files.addAll(regionFiles);
141 }
142
143 return true;
144 }
145
146
147
148
149
150 public int run(String[] args) {
151 conf = HBaseConfiguration.create();
152 conf.set("fs.defaultFS",
153 conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
154 conf.set("fs.default.name",
155 conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
156 try {
157 if (!parseOptions(args))
158 return 1;
159 } catch (IOException ex) {
160 LOG.error("Error parsing command-line options", ex);
161 return 1;
162 } catch (ParseException ex) {
163 LOG.error("Error parsing command-line options", ex);
164 return 1;
165 }
166
167
168 for (Path fileName : files) {
169 try {
170 processFile(fileName);
171 } catch (IOException ex) {
172 LOG.error("Error reading " + fileName, ex);
173 }
174 }
175
176 if (verbose || printKey) {
177 System.out.println("Scanned kv count -> " + count);
178 }
179
180 return 0;
181 }
182
183 private void processFile(Path file) throws IOException {
184 if (verbose)
185 System.out.println("Scanning -> " + file);
186 FileSystem fs = file.getFileSystem(conf);
187 if (!fs.exists(file)) {
188 System.err.println("ERROR, file doesnt exist: " + file);
189 }
190
191 HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(conf));
192
193 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
194
195 KeyValueStatsCollector fileStats = null;
196
197 if (verbose || printKey || checkRow || checkFamily || printStats) {
198
199 HFileScanner scanner = reader.getScanner(false, false, false);
200 fileStats = new KeyValueStatsCollector();
201 if (scanner.seekTo())
202 scanKeysValues(file, fileStats, scanner);
203 }
204
205
206 if (shouldPrintMeta) {
207 printMeta(reader, fileInfo);
208 }
209
210 if (printBlocks) {
211 System.out.println("Block Index:");
212 System.out.println(reader.getDataBlockIndexReader());
213 }
214
215 if (printStats) {
216 fileStats.finish();
217 System.out.println("Stats:\n" + fileStats);
218 }
219
220 reader.close();
221 }
222
223 private void scanKeysValues(Path file, KeyValueStatsCollector fileStats, HFileScanner scanner)
224 throws IOException {
225 KeyValue pkv = null;
226 do {
227 KeyValue kv = scanner.getKeyValue();
228
229 if (printStats) {
230 fileStats.collect(kv);
231 }
232
233 if (printKey) {
234 System.out.print("K: " + kv);
235 if (printValue) {
236 System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
237 }
238 System.out.println();
239 }
240
241 if (checkRow && pkv != null) {
242 if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
243 System.err.println("WARNING, previous row is greater then"
244 + " current row\n\tfilename -> " + file + "\n\tprevious -> "
245 + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent -> "
246 + Bytes.toStringBinary(kv.getKey()));
247 }
248 }
249
250 if (checkFamily) {
251 String fam = Bytes.toString(kv.getFamily());
252 if (!file.toString().contains(fam)) {
253 System.err.println("WARNING, filename does not match kv family,"
254 + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
255 + Bytes.toStringBinary(kv.getKey()));
256 }
257 if (pkv != null
258 && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
259 System.err.println("WARNING, previous kv has different family"
260 + " compared to current key\n\tfilename -> " + file
261 + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
262 + "\n\tcurrent -> " + Bytes.toStringBinary(kv.getKey()));
263 }
264 }
265 pkv = kv;
266 ++count;
267 } while (scanner.next());
268 }
269
270
271
272
273
274 private static String asSeparateLines(String keyValueStr) {
275 return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
276 ",\n" + FOUR_SPACES + "$1");
277 }
278
279 private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
280 throws IOException {
281 System.out.println("Block index size as per heapsize: "
282 + reader.indexSize());
283 System.out.println(asSeparateLines(reader.toString()));
284 System.out.println("Trailer:\n "
285 + asSeparateLines(reader.getTrailer().toString()));
286 System.out.println("Fileinfo:");
287 for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
288 System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
289 if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
290 long seqid = Bytes.toLong(e.getValue());
291 System.out.println(seqid);
292 } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
293 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
294 Writables.copyWritable(e.getValue(), timeRangeTracker);
295 System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
296 + timeRangeTracker.getMaximumTimestamp());
297 } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
298 || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
299 System.out.println(Bytes.toInt(e.getValue()));
300 } else {
301 System.out.println(Bytes.toStringBinary(e.getValue()));
302 }
303 }
304
305 System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
306
307
308 DataInput bloomMeta = reader.getBloomFilterMetadata();
309 BloomFilter bloomFilter = null;
310 if (bloomMeta != null)
311 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
312
313 System.out.println("Bloom filter:");
314 if (bloomFilter != null) {
315 System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
316 ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
317 } else {
318 System.out.println(FOUR_SPACES + "Not present");
319 }
320 }
321
322 private static class LongStats {
323 private long min = Long.MAX_VALUE;
324 private long max = Long.MIN_VALUE;
325 private long sum = 0;
326 private long count = 0;
327
328 void collect(long d) {
329 if (d < min) min = d;
330 if (d > max) max = d;
331 sum += d;
332 count++;
333 }
334
335 public String toString() {
336 return "count: " + count +
337 "\tmin: " + min +
338 "\tmax: " + max +
339 "\tmean: " + ((double)sum/count);
340 }
341 }
342
343 private static class KeyValueStatsCollector {
344 LongStats keyLen = new LongStats();
345 LongStats valLen = new LongStats();
346 LongStats rowSizeBytes = new LongStats();
347 LongStats rowSizeCols = new LongStats();
348
349 long curRowBytes = 0;
350 long curRowCols = 0;
351
352 byte[] biggestRow = null;
353
354 private KeyValue prevKV = null;
355 private long maxRowBytes = 0;
356
357 public void collect(KeyValue kv) {
358 keyLen.collect(kv.getKeyLength());
359 valLen.collect(kv.getValueLength());
360 if (prevKV != null &&
361 KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
362
363 collectRow();
364 }
365 curRowBytes += kv.getLength();
366 curRowCols++;
367 prevKV = kv;
368 }
369
370 private void collectRow() {
371 rowSizeBytes.collect(curRowBytes);
372 rowSizeCols.collect(curRowCols);
373
374 if (curRowBytes > maxRowBytes && prevKV != null) {
375 biggestRow = prevKV.getRow();
376 }
377
378 curRowBytes = 0;
379 curRowCols = 0;
380 }
381
382 public void finish() {
383 if (curRowCols > 0) {
384 collectRow();
385 }
386 }
387
388 @Override
389 public String toString() {
390 if (prevKV == null)
391 return "no data available for statistics";
392
393 return
394 "Key length: " + keyLen + "\n" +
395 "Val length: " + valLen + "\n" +
396 "Row size (bytes): " + rowSizeBytes + "\n" +
397 "Row size (columns): " + rowSizeCols + "\n" +
398 "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
399 }
400 }
401 }