1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.io.IOException;
23
24 import org.apache.hadoop.conf.Configuration;
25 import org.apache.hadoop.fs.Path;
26 import org.apache.hadoop.hbase.HBaseConfiguration;
27 import org.apache.hadoop.hbase.client.Result;
28 import org.apache.hadoop.hbase.client.Scan;
29 import org.apache.hadoop.hbase.filter.PrefixFilter;
30 import org.apache.hadoop.hbase.filter.RowFilter;
31 import org.apache.hadoop.hbase.filter.RegexStringComparator;
32 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
33 import org.apache.hadoop.hbase.filter.Filter;
34 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
35 import org.apache.hadoop.hbase.util.Bytes;
36 import org.apache.hadoop.mapreduce.Job;
37 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
38 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
39 import org.apache.hadoop.util.GenericOptionsParser;
40 import org.apache.commons.logging.Log;
41 import org.apache.commons.logging.LogFactory;
42
43
44
45
46
47
48 public class Export {
49 private static final Log LOG = LogFactory.getLog(Export.class);
50 final static String NAME = "export";
51
52
53
54
55 static class Exporter
56 extends TableMapper<ImmutableBytesWritable, Result> {
57
58
59
60
61
62
63
64
65 @Override
66 public void map(ImmutableBytesWritable row, Result value,
67 Context context)
68 throws IOException {
69 try {
70 context.write(row, value);
71 } catch (InterruptedException e) {
72 e.printStackTrace();
73 }
74 }
75 }
76
77
78
79
80
81
82
83
84
85 public static Job createSubmittableJob(Configuration conf, String[] args)
86 throws IOException {
87 String tableName = args[0];
88 Path outputDir = new Path(args[1]);
89 Job job = new Job(conf, NAME + "_" + tableName);
90 job.setJobName(NAME + "_" + tableName);
91 job.setJarByClass(Exporter.class);
92
93 Scan s = getConfiguredScanForJob(conf, args);
94 TableMapReduceUtil.initTableMapperJob(tableName, s, Exporter.class, null,
95 null, job);
96
97 job.setNumReduceTasks(0);
98 job.setOutputFormatClass(SequenceFileOutputFormat.class);
99 job.setOutputKeyClass(ImmutableBytesWritable.class);
100 job.setOutputValueClass(Result.class);
101 FileOutputFormat.setOutputPath(job, outputDir);
102 return job;
103 }
104
105 private static Scan getConfiguredScanForJob(Configuration conf, String[] args) throws IOException {
106 Scan s = new Scan();
107
108
109 int versions = args.length > 2? Integer.parseInt(args[2]): 1;
110 s.setMaxVersions(versions);
111
112 long startTime = args.length > 3? Long.parseLong(args[3]): 0L;
113 long endTime = args.length > 4? Long.parseLong(args[4]): Long.MAX_VALUE;
114 s.setTimeRange(startTime, endTime);
115
116 s.setCacheBlocks(false);
117
118 if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) {
119 s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY)));
120 }
121
122 Filter exportFilter = getExportFilter(args);
123 if (exportFilter!= null) {
124 LOG.info("Setting Scan Filter for Export.");
125 s.setFilter(exportFilter);
126 }
127 LOG.info("verisons=" + versions + ", starttime=" + startTime +
128 ", endtime=" + endTime);
129 return s;
130 }
131
132 private static Filter getExportFilter(String[] args) {
133 Filter exportFilter = null;
134 String filterCriteria = (args.length > 5) ? args[5]: null;
135 if (filterCriteria == null) return null;
136 if (filterCriteria.startsWith("^")) {
137 String regexPattern = filterCriteria.substring(1, filterCriteria.length());
138 exportFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(regexPattern));
139 } else {
140 exportFilter = new PrefixFilter(Bytes.toBytes(filterCriteria));
141 }
142 return exportFilter;
143 }
144
145
146
147
148 private static void usage(final String errorMsg) {
149 if (errorMsg != null && errorMsg.length() > 0) {
150 System.err.println("ERROR: " + errorMsg);
151 }
152 System.err.println("Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> " +
153 "[<starttime> [<endtime>]] [^[regex pattern] or [Prefix] to filter]]\n");
154 System.err.println(" Note: -D properties will be applied to the conf used. ");
155 System.err.println(" For example: ");
156 System.err.println(" -D mapred.output.compress=true");
157 System.err.println(" -D mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec");
158 System.err.println(" -D mapred.output.compression.type=BLOCK");
159 System.err.println(" Additionally, the following SCAN properties can be specified");
160 System.err.println(" to control/limit what is exported..");
161 System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
162 }
163
164
165
166
167
168
169
170 public static void main(String[] args) throws Exception {
171 Configuration conf = HBaseConfiguration.create();
172 String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
173 if (otherArgs.length < 2) {
174 usage("Wrong number of arguments: " + otherArgs.length);
175 System.exit(-1);
176 }
177 Job job = createSubmittableJob(conf, otherArgs);
178 System.exit(job.waitForCompletion(true)? 0 : 1);
179 }
180 }