View Javadoc

1   
2   /*
3    * Copyright 2011 The Apache Software Foundation
4    *
5    * Licensed to the Apache Software Foundation (ASF) under one
6    * or more contributor license agreements.  See the NOTICE file
7    * distributed with this work for additional information
8    * regarding copyright ownership.  The ASF licenses this file
9    * to you under the Apache License, Version 2.0 (the
10   * "License"); you may not use this file except in compliance
11   * with the License.  You may obtain a copy of the License at
12   *
13   *     http://www.apache.org/licenses/LICENSE-2.0
14   *
15   * Unless required by applicable law or agreed to in writing, software
16   * distributed under the License is distributed on an "AS IS" BASIS,
17   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18   * See the License for the specific language governing permissions and
19   * limitations under the License.
20   */
21  package org.apache.hadoop.hbase.io.hfile;
22  
23  import java.io.DataInput;
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.List;
27  import java.util.Map;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  
32  import org.apache.commons.cli.CommandLine;
33  import org.apache.commons.cli.CommandLineParser;
34  import org.apache.commons.cli.HelpFormatter;
35  import org.apache.commons.cli.Options;
36  import org.apache.commons.cli.ParseException;
37  import org.apache.commons.cli.PosixParser;
38  import org.apache.hadoop.conf.Configuration;
39  import org.apache.hadoop.fs.FileSystem;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.hbase.HBaseConfiguration;
42  import org.apache.hadoop.hbase.HRegionInfo;
43  import org.apache.hadoop.hbase.KeyValue;
44  import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
45  import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
46  import org.apache.hadoop.hbase.util.BloomFilter;
47  import org.apache.hadoop.hbase.util.BloomFilterFactory;
48  import org.apache.hadoop.hbase.util.ByteBloomFilter;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.hbase.util.FSUtils;
51  import org.apache.hadoop.hbase.util.Writables;
52  
53  /**
54   * Implements pretty-printing functionality for {@link HFile}s.
55   */
56  public class HFilePrettyPrinter {
57  
58    private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
59  
60    private Options options = new Options();
61  
62    private boolean verbose;
63    private boolean printValue;
64    private boolean printKey;
65    private boolean shouldPrintMeta;
66    private boolean printBlocks;
67    private boolean printStats;
68    private boolean checkRow;
69    private boolean checkFamily;
70  
71    private Configuration conf;
72  
73    private List<Path> files = new ArrayList<Path>();
74    private int count;
75  
76    private static final String FOUR_SPACES = "    ";
77  
78    public HFilePrettyPrinter() {
79      options.addOption("v", "verbose", false,
80          "Verbose output; emits file and meta data delimiters");
81      options.addOption("p", "printkv", false, "Print key/value pairs");
82      options.addOption("e", "printkey", false, "Print keys");
83      options.addOption("m", "printmeta", false, "Print meta data of file");
84      options.addOption("b", "printblocks", false, "Print block index meta data");
85      options.addOption("k", "checkrow", false,
86          "Enable row order check; looks for out-of-order keys");
87      options.addOption("a", "checkfamily", false, "Enable family check");
88      options.addOption("f", "file", true,
89          "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/.META./12/34");
90      options.addOption("r", "region", true,
91          "Region to scan. Pass region name; e.g. '.META.,,1'");
92      options.addOption("s", "stats", false, "Print statistics");
93    }
94  
95    public boolean parseOptions(String args[]) throws ParseException,
96        IOException {
97      if (args.length == 0) {
98        HelpFormatter formatter = new HelpFormatter();
99        formatter.printHelp("HFile", options, true);
100       return false;
101     }
102     CommandLineParser parser = new PosixParser();
103     CommandLine cmd = parser.parse(options, args);
104 
105     verbose = cmd.hasOption("v");
106     printValue = cmd.hasOption("p");
107     printKey = cmd.hasOption("e") || printValue;
108     shouldPrintMeta = cmd.hasOption("m");
109     printBlocks = cmd.hasOption("b");
110     printStats = cmd.hasOption("s");
111     checkRow = cmd.hasOption("k");
112     checkFamily = cmd.hasOption("a");
113 
114     if (cmd.hasOption("f")) {
115       files.add(new Path(cmd.getOptionValue("f")));
116     }
117 
118     if (cmd.hasOption("r")) {
119       String regionName = cmd.getOptionValue("r");
120       byte[] rn = Bytes.toBytes(regionName);
121       byte[][] hri = HRegionInfo.parseRegionName(rn);
122       Path rootDir = FSUtils.getRootDir(conf);
123       Path tableDir = new Path(rootDir, Bytes.toString(hri[0]));
124       String enc = HRegionInfo.encodeRegionName(rn);
125       Path regionDir = new Path(tableDir, enc);
126       if (verbose)
127         System.out.println("region dir -> " + regionDir);
128       List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(conf),
129           regionDir);
130       if (verbose)
131         System.out.println("Number of region files found -> "
132             + regionFiles.size());
133       if (verbose) {
134         int i = 1;
135         for (Path p : regionFiles) {
136           if (verbose)
137             System.out.println("Found file[" + i++ + "] -> " + p);
138         }
139       }
140       files.addAll(regionFiles);
141     }
142 
143     return true;
144   }
145 
146   /**
147    * Runs the command-line pretty-printer, and returns the desired command
148    * exit code (zero for success, non-zero for failure).
149    */
150   public int run(String[] args) {
151     conf = HBaseConfiguration.create();
152     conf.set("fs.defaultFS",
153         conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
154     conf.set("fs.default.name",
155         conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
156     try {
157       if (!parseOptions(args))
158         return 1;
159     } catch (IOException ex) {
160       LOG.error("Error parsing command-line options", ex);
161       return 1;
162     } catch (ParseException ex) {
163       LOG.error("Error parsing command-line options", ex);
164       return 1;
165     }
166 
167     // iterate over all files found
168     for (Path fileName : files) {
169       try {
170         processFile(fileName);
171       } catch (IOException ex) {
172         LOG.error("Error reading " + fileName, ex);
173       }
174     }
175 
176     if (verbose || printKey) {
177       System.out.println("Scanned kv count -> " + count);
178     }
179 
180     return 0;
181   }
182 
183   private void processFile(Path file) throws IOException {
184     if (verbose)
185       System.out.println("Scanning -> " + file);
186     FileSystem fs = file.getFileSystem(conf);
187     if (!fs.exists(file)) {
188       System.err.println("ERROR, file doesnt exist: " + file);
189     }
190 
191     HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(conf));
192 
193     Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
194 
195     KeyValueStatsCollector fileStats = null;
196 
197     if (verbose || printKey || checkRow || checkFamily || printStats) {
198       // scan over file and read key/value's and check if requested
199       HFileScanner scanner = reader.getScanner(false, false, false);
200       fileStats = new KeyValueStatsCollector();
201       if (scanner.seekTo())
202         scanKeysValues(file, fileStats, scanner);
203     }
204 
205     // print meta data
206     if (shouldPrintMeta) {
207       printMeta(reader, fileInfo);
208     }
209 
210     if (printBlocks) {
211       System.out.println("Block Index:");
212       System.out.println(reader.getDataBlockIndexReader());
213     }
214 
215     if (printStats) {
216       fileStats.finish();
217       System.out.println("Stats:\n" + fileStats);
218     }
219 
220     reader.close();
221   }
222 
223   private void scanKeysValues(Path file, KeyValueStatsCollector fileStats, HFileScanner scanner)
224       throws IOException {
225     KeyValue pkv = null;
226     do {
227       KeyValue kv = scanner.getKeyValue();
228       // collect stats
229       if (printStats) {
230         fileStats.collect(kv);
231       }
232       // dump key value
233       if (printKey) {
234         System.out.print("K: " + kv);
235         if (printValue) {
236           System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
237         }
238         System.out.println();
239       }
240       // check if rows are in order
241       if (checkRow && pkv != null) {
242         if (Bytes.compareTo(pkv.getRow(), kv.getRow()) > 0) {
243           System.err.println("WARNING, previous row is greater then"
244               + " current row\n\tfilename -> " + file + "\n\tprevious -> "
245               + Bytes.toStringBinary(pkv.getKey()) + "\n\tcurrent  -> "
246               + Bytes.toStringBinary(kv.getKey()));
247         }
248       }
249       // check if families are consistent
250       if (checkFamily) {
251         String fam = Bytes.toString(kv.getFamily());
252         if (!file.toString().contains(fam)) {
253           System.err.println("WARNING, filename does not match kv family,"
254               + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
255               + Bytes.toStringBinary(kv.getKey()));
256         }
257         if (pkv != null
258             && !Bytes.equals(pkv.getFamily(), kv.getFamily())) {
259           System.err.println("WARNING, previous kv has different family"
260               + " compared to current key\n\tfilename -> " + file
261               + "\n\tprevious -> " + Bytes.toStringBinary(pkv.getKey())
262               + "\n\tcurrent  -> " + Bytes.toStringBinary(kv.getKey()));
263         }
264       }
265       pkv = kv;
266       ++count;
267     } while (scanner.next());
268   }
269 
270   /**
271    * Format a string of the form "k1=v1, k2=v2, ..." into separate lines
272    * with a four-space indentation.
273    */
274   private static String asSeparateLines(String keyValueStr) {
275     return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
276                                   ",\n" + FOUR_SPACES + "$1");
277   }
278 
279   private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
280       throws IOException {
281     System.out.println("Block index size as per heapsize: "
282         + reader.indexSize());
283     System.out.println(asSeparateLines(reader.toString()));
284     System.out.println("Trailer:\n    "
285         + asSeparateLines(reader.getTrailer().toString()));
286     System.out.println("Fileinfo:");
287     for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
288       System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
289       if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
290         long seqid = Bytes.toLong(e.getValue());
291         System.out.println(seqid);
292       } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
293         TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
294         Writables.copyWritable(e.getValue(), timeRangeTracker);
295         System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
296             + timeRangeTracker.getMaximumTimestamp());
297       } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
298           || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
299         System.out.println(Bytes.toInt(e.getValue()));
300       } else {
301         System.out.println(Bytes.toStringBinary(e.getValue()));
302       }
303     }
304 
305     System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
306 
307     // Printing bloom information
308     DataInput bloomMeta = reader.getBloomFilterMetadata();
309     BloomFilter bloomFilter = null;
310     if (bloomMeta != null)
311       bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
312 
313     System.out.println("Bloom filter:");
314     if (bloomFilter != null) {
315       System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
316           ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
317     } else {
318       System.out.println(FOUR_SPACES + "Not present");
319     }
320   }
321 
322   private static class LongStats {
323     private long min = Long.MAX_VALUE;
324     private long max = Long.MIN_VALUE;
325     private long sum = 0;
326     private long count = 0;
327 
328     void collect(long d) {
329       if (d < min) min = d;
330       if (d > max) max = d;
331       sum += d;
332       count++;
333     }
334 
335     public String toString() {
336       return "count: " + count +
337         "\tmin: " + min +
338         "\tmax: " + max +
339         "\tmean: " + ((double)sum/count);
340     }
341   }
342 
343   private static class KeyValueStatsCollector {
344     LongStats keyLen = new LongStats();
345     LongStats valLen = new LongStats();
346     LongStats rowSizeBytes = new LongStats();
347     LongStats rowSizeCols = new LongStats();
348 
349     long curRowBytes = 0;
350     long curRowCols = 0;
351 
352     byte[] biggestRow = null;
353 
354     private KeyValue prevKV = null;
355     private long maxRowBytes = 0;
356 
357     public void collect(KeyValue kv) {
358       keyLen.collect(kv.getKeyLength());
359       valLen.collect(kv.getValueLength());
360       if (prevKV != null &&
361           KeyValue.COMPARATOR.compareRows(prevKV, kv) != 0) {
362         // new row
363         collectRow();
364       }
365       curRowBytes += kv.getLength();
366       curRowCols++;
367       prevKV = kv;
368     }
369 
370     private void collectRow() {
371       rowSizeBytes.collect(curRowBytes);
372       rowSizeCols.collect(curRowCols);
373 
374       if (curRowBytes > maxRowBytes && prevKV != null) {
375         biggestRow = prevKV.getRow();
376       }
377 
378       curRowBytes = 0;
379       curRowCols = 0;
380     }
381 
382     public void finish() {
383       if (curRowCols > 0) {
384         collectRow();
385       }
386     }
387 
388     @Override
389     public String toString() {
390       if (prevKV == null)
391         return "no data available for statistics";
392 
393       return
394         "Key length: " + keyLen + "\n" +
395         "Val length: " + valLen + "\n" +
396         "Row size (bytes): " + rowSizeBytes + "\n" +
397         "Row size (columns): " + rowSizeCols + "\n" +
398         "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
399     }
400   }
401 }