1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapred;
21
22 import java.io.IOException;
23
24 import org.apache.hadoop.hbase.HBaseConfiguration;
25 import org.apache.hadoop.hbase.client.HTable;
26 import org.apache.hadoop.hbase.client.Put;
27 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
28 import org.apache.hadoop.io.Writable;
29 import org.apache.hadoop.io.WritableComparable;
30 import org.apache.hadoop.mapred.FileInputFormat;
31 import org.apache.hadoop.mapred.JobConf;
32 import org.apache.hadoop.mapred.InputFormat;
33 import org.apache.hadoop.mapred.OutputFormat;
34 import org.apache.hadoop.mapred.TextInputFormat;
35 import org.apache.hadoop.mapred.TextOutputFormat;
36
37
38
39
40 @Deprecated
41 @SuppressWarnings("unchecked")
42 public class TableMapReduceUtil {
43
44
45
46
47
48
49
50
51
52
53
54
55 public static void initTableMapJob(String table, String columns,
56 Class<? extends TableMap> mapper,
57 Class<? extends WritableComparable> outputKeyClass,
58 Class<? extends Writable> outputValueClass, JobConf job) {
59
60 job.setInputFormat(TableInputFormat.class);
61 job.setMapOutputValueClass(outputValueClass);
62 job.setMapOutputKeyClass(outputKeyClass);
63 job.setMapperClass(mapper);
64 FileInputFormat.addInputPaths(job, table);
65 job.set(TableInputFormat.COLUMN_LIST, columns);
66 try {
67 addDependencyJars(job);
68 } catch (IOException ioe) {
69 throw new RuntimeException(ioe);
70 }
71 }
72
73
74
75
76
77
78
79
80
81
82 public static void initTableReduceJob(String table,
83 Class<? extends TableReduce> reducer, JobConf job)
84 throws IOException {
85 initTableReduceJob(table, reducer, job, null);
86 }
87
88
89
90
91
92
93
94
95
96
97
98
99 public static void initTableReduceJob(String table,
100 Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
101 throws IOException {
102 job.setOutputFormat(TableOutputFormat.class);
103 job.setReducerClass(reducer);
104 job.set(TableOutputFormat.OUTPUT_TABLE, table);
105 job.setOutputKeyClass(ImmutableBytesWritable.class);
106 job.setOutputValueClass(Put.class);
107 if (partitioner == HRegionPartitioner.class) {
108 job.setPartitionerClass(HRegionPartitioner.class);
109 HTable outputTable = new HTable(new HBaseConfiguration(job), table);
110 int regions = outputTable.getRegionsInfo().size();
111 if (job.getNumReduceTasks() > regions) {
112 job.setNumReduceTasks(outputTable.getRegionsInfo().size());
113 }
114 } else if (partitioner != null) {
115 job.setPartitionerClass(partitioner);
116 }
117 addDependencyJars(job);
118 }
119
120
121
122
123
124
125
126
127
128 public static void limitNumReduceTasks(String table, JobConf job)
129 throws IOException {
130 HTable outputTable = new HTable(new HBaseConfiguration(job), table);
131 int regions = outputTable.getRegionsInfo().size();
132 if (job.getNumReduceTasks() > regions)
133 job.setNumReduceTasks(regions);
134 }
135
136
137
138
139
140
141
142
143
144 public static void limitNumMapTasks(String table, JobConf job)
145 throws IOException {
146 HTable outputTable = new HTable(new HBaseConfiguration(job), table);
147 int regions = outputTable.getRegionsInfo().size();
148 if (job.getNumMapTasks() > regions)
149 job.setNumMapTasks(regions);
150 }
151
152
153
154
155
156
157
158
159
160 public static void setNumReduceTasks(String table, JobConf job)
161 throws IOException {
162 HTable outputTable = new HTable(new HBaseConfiguration(job), table);
163 int regions = outputTable.getRegionsInfo().size();
164 job.setNumReduceTasks(regions);
165 }
166
167
168
169
170
171
172
173
174
175 public static void setNumMapTasks(String table, JobConf job)
176 throws IOException {
177 HTable outputTable = new HTable(new HBaseConfiguration(job), table);
178 int regions = outputTable.getRegionsInfo().size();
179 job.setNumMapTasks(regions);
180 }
181
182
183
184
185
186
187
188
189
190
191 public static void setScannerCaching(JobConf job, int batchSize) {
192 job.setInt("hbase.client.scanner.caching", batchSize);
193 }
194
195
196
197
198 public static void addDependencyJars(JobConf job) throws IOException {
199 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
200 job,
201 org.apache.zookeeper.ZooKeeper.class,
202 com.google.common.base.Function.class,
203 job.getMapOutputKeyClass(),
204 job.getMapOutputValueClass(),
205 job.getOutputKeyClass(),
206 job.getOutputValueClass(),
207 job.getPartitionerClass(),
208 job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
209 job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
210 job.getCombinerClass());
211 }
212 }