View Javadoc

1   /**
2   * Copyright 2009 The Apache Software Foundation
3   *
4   * Licensed to the Apache Software Foundation (ASF) under one
5   * or more contributor license agreements.  See the NOTICE file
6   * distributed with this work for additional information
7   * regarding copyright ownership.  The ASF licenses this file
8   * to you under the Apache License, Version 2.0 (the
9   * "License"); you may not use this file except in compliance
10  * with the License.  You may obtain a copy of the License at
11  *
12  *     http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import java.io.IOException;
23  
24  import org.apache.hadoop.conf.Configuration;
25  import org.apache.hadoop.fs.Path;
26  import org.apache.hadoop.hbase.HBaseConfiguration;
27  import org.apache.hadoop.hbase.client.Result;
28  import org.apache.hadoop.hbase.client.Scan;
29  import org.apache.hadoop.hbase.filter.PrefixFilter;
30  import org.apache.hadoop.hbase.filter.RowFilter;
31  import org.apache.hadoop.hbase.filter.RegexStringComparator;
32  import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
33  import org.apache.hadoop.hbase.filter.Filter;
34  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
35  import org.apache.hadoop.hbase.util.Bytes;
36  import org.apache.hadoop.mapreduce.Job;
37  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
38  import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
39  import org.apache.hadoop.util.GenericOptionsParser;
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  
43  /**
44  * Export an HBase table.
45  * Writes content to sequence files up in HDFS.  Use {@link Import} to read it
46  * back in again.
47  */
48  public class Export {
49    private static final Log LOG = LogFactory.getLog(Export.class);
50    final static String NAME = "export";
51  
52    /**
53     * Mapper.
54     */
55    static class Exporter
56    extends TableMapper<ImmutableBytesWritable, Result> {
57      /**
58       * @param row  The current table row key.
59       * @param value  The columns.
60       * @param context  The current context.
61       * @throws IOException When something is broken with the data.
62       * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN,
63       *   org.apache.hadoop.mapreduce.Mapper.Context)
64       */
65      @Override
66      public void map(ImmutableBytesWritable row, Result value,
67        Context context)
68      throws IOException {
69        try {
70          context.write(row, value);
71        } catch (InterruptedException e) {
72          e.printStackTrace();
73        }
74      }
75    }
76  
77    /**
78     * Sets up the actual job.
79     *
80     * @param conf  The current configuration.
81     * @param args  The command line parameters.
82     * @return The newly created job.
83     * @throws IOException When setting up the job fails.
84     */
85    public static Job createSubmittableJob(Configuration conf, String[] args)
86    throws IOException {
87      String tableName = args[0];
88      Path outputDir = new Path(args[1]);
89      Job job = new Job(conf, NAME + "_" + tableName);
90      job.setJobName(NAME + "_" + tableName);
91      job.setJarByClass(Exporter.class);
92      // Set optional scan parameters
93      Scan s = getConfiguredScanForJob(conf, args);
94      TableMapReduceUtil.initTableMapperJob(tableName, s, Exporter.class, null,
95        null, job);
96      // No reducers.  Just write straight to output files.
97      job.setNumReduceTasks(0);
98      job.setOutputFormatClass(SequenceFileOutputFormat.class);
99      job.setOutputKeyClass(ImmutableBytesWritable.class);
100     job.setOutputValueClass(Result.class);
101     FileOutputFormat.setOutputPath(job, outputDir);
102     return job;
103   }
104 
105   private static Scan getConfiguredScanForJob(Configuration conf, String[] args) throws IOException {
106     Scan s = new Scan();
107     // Optional arguments.
108     // Set Scan Versions
109     int versions = args.length > 2? Integer.parseInt(args[2]): 1;
110     s.setMaxVersions(versions);
111     // Set Scan Range
112     long startTime = args.length > 3? Long.parseLong(args[3]): 0L;
113     long endTime = args.length > 4? Long.parseLong(args[4]): Long.MAX_VALUE;
114     s.setTimeRange(startTime, endTime);
115     // Set cache blocks
116     s.setCacheBlocks(false);
117     // Set Scan Column Family
118     if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) {
119       s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY)));
120     }
121     // Set RowFilter or Prefix Filter if applicable.
122     Filter exportFilter = getExportFilter(args);
123     if (exportFilter!= null) {
124         LOG.info("Setting Scan Filter for Export.");
125       s.setFilter(exportFilter);
126     }
127     LOG.info("verisons=" + versions + ", starttime=" + startTime +
128       ", endtime=" + endTime);
129     return s;
130   }
131 
132   private static Filter getExportFilter(String[] args) {
133     Filter exportFilter = null;
134     String filterCriteria = (args.length > 5) ? args[5]: null;
135     if (filterCriteria == null) return null;
136     if (filterCriteria.startsWith("^")) {
137       String regexPattern = filterCriteria.substring(1, filterCriteria.length());
138       exportFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(regexPattern));
139     } else {
140       exportFilter = new PrefixFilter(Bytes.toBytes(filterCriteria));
141     }
142     return exportFilter;
143   }
144 
145   /*
146    * @param errorMsg Error message.  Can be null.
147    */
148   private static void usage(final String errorMsg) {
149     if (errorMsg != null && errorMsg.length() > 0) {
150       System.err.println("ERROR: " + errorMsg);
151     }
152     System.err.println("Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " +
153       "[<starttime> [<endtime>]] [^[regex pattern] or [Prefix] to filter]]\n");
154     System.err.println("  Note: -D properties will be applied to the conf used. ");
155     System.err.println("  For example: ");
156     System.err.println("   -D mapred.output.compress=true");
157     System.err.println("   -D mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec");
158     System.err.println("   -D mapred.output.compression.type=BLOCK");
159     System.err.println("  Additionally, the following SCAN properties can be specified");
160     System.err.println("  to control/limit what is exported..");
161     System.err.println("   -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
162   }
163 
164   /**
165    * Main entry point.
166    *
167    * @param args  The command line parameters.
168    * @throws Exception When running the job fails.
169    */
170   public static void main(String[] args) throws Exception {
171     Configuration conf = HBaseConfiguration.create();
172     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
173     if (otherArgs.length < 2) {
174       usage("Wrong number of arguments: " + otherArgs.length);
175       System.exit(-1);
176     }
177     Job job = createSubmittableJob(conf, otherArgs);
178     System.exit(job.waitForCompletion(true)? 0 : 1);
179   }
180 }