View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapred;
21  
22  import java.io.IOException;
23  
24  import org.apache.hadoop.hbase.HBaseConfiguration;
25  import org.apache.hadoop.hbase.client.HTable;
26  import org.apache.hadoop.hbase.client.Put;
27  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
28  import org.apache.hadoop.io.Writable;
29  import org.apache.hadoop.io.WritableComparable;
30  import org.apache.hadoop.mapred.FileInputFormat;
31  import org.apache.hadoop.mapred.JobConf;
32  import org.apache.hadoop.mapred.InputFormat;
33  import org.apache.hadoop.mapred.OutputFormat;
34  import org.apache.hadoop.mapred.TextInputFormat;
35  import org.apache.hadoop.mapred.TextOutputFormat;
36  
37  /**
38   * Utility for {@link TableMap} and {@link TableReduce}
39   */
40  @Deprecated
41  @SuppressWarnings("unchecked")
42  public class TableMapReduceUtil {
43  
44    /**
45     * Use this before submitting a TableMap job. It will
46     * appropriately set up the JobConf.
47     *
48     * @param table  The table name to read from.
49     * @param columns  The columns to scan.
50     * @param mapper  The mapper class to use.
51     * @param outputKeyClass  The class of the output key.
52     * @param outputValueClass  The class of the output value.
53     * @param job  The current job configuration to adjust.
54     */
55    public static void initTableMapJob(String table, String columns,
56      Class<? extends TableMap> mapper,
57      Class<? extends WritableComparable> outputKeyClass,
58      Class<? extends Writable> outputValueClass, JobConf job) {
59  
60      job.setInputFormat(TableInputFormat.class);
61      job.setMapOutputValueClass(outputValueClass);
62      job.setMapOutputKeyClass(outputKeyClass);
63      job.setMapperClass(mapper);
64      FileInputFormat.addInputPaths(job, table);
65      job.set(TableInputFormat.COLUMN_LIST, columns);
66      try {
67        addDependencyJars(job);
68      } catch (IOException ioe) {
69        throw new RuntimeException(ioe);
70      }
71    }
72  
73    /**
74     * Use this before submitting a TableReduce job. It will
75     * appropriately set up the JobConf.
76     *
77     * @param table  The output table.
78     * @param reducer  The reducer class to use.
79     * @param job  The current job configuration to adjust.
80     * @throws IOException When determining the region count fails.
81     */
82    public static void initTableReduceJob(String table,
83      Class<? extends TableReduce> reducer, JobConf job)
84    throws IOException {
85      initTableReduceJob(table, reducer, job, null);
86    }
87  
88    /**
89     * Use this before submitting a TableReduce job. It will
90     * appropriately set up the JobConf.
91     *
92     * @param table  The output table.
93     * @param reducer  The reducer class to use.
94     * @param job  The current job configuration to adjust.
95     * @param partitioner  Partitioner to use. Pass <code>null</code> to use
96     * default partitioner.
97     * @throws IOException When determining the region count fails.
98     */
99    public static void initTableReduceJob(String table,
100     Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
101   throws IOException {
102     job.setOutputFormat(TableOutputFormat.class);
103     job.setReducerClass(reducer);
104     job.set(TableOutputFormat.OUTPUT_TABLE, table);
105     job.setOutputKeyClass(ImmutableBytesWritable.class);
106     job.setOutputValueClass(Put.class);
107     if (partitioner == HRegionPartitioner.class) {
108       job.setPartitionerClass(HRegionPartitioner.class);
109       HTable outputTable = new HTable(new HBaseConfiguration(job), table);
110       int regions = outputTable.getRegionsInfo().size();
111       if (job.getNumReduceTasks() > regions) {
112         job.setNumReduceTasks(outputTable.getRegionsInfo().size());
113       }
114     } else if (partitioner != null) {
115       job.setPartitionerClass(partitioner);
116     }
117     addDependencyJars(job);
118   }
119 
120   /**
121    * Ensures that the given number of reduce tasks for the given job
122    * configuration does not exceed the number of regions for the given table.
123    *
124    * @param table  The table to get the region count for.
125    * @param job  The current job configuration to adjust.
126    * @throws IOException When retrieving the table details fails.
127    */
128   public static void limitNumReduceTasks(String table, JobConf job)
129   throws IOException {
130     HTable outputTable = new HTable(new HBaseConfiguration(job), table);
131     int regions = outputTable.getRegionsInfo().size();
132     if (job.getNumReduceTasks() > regions)
133       job.setNumReduceTasks(regions);
134   }
135 
136   /**
137    * Ensures that the given number of map tasks for the given job
138    * configuration does not exceed the number of regions for the given table.
139    *
140    * @param table  The table to get the region count for.
141    * @param job  The current job configuration to adjust.
142    * @throws IOException When retrieving the table details fails.
143    */
144   public static void limitNumMapTasks(String table, JobConf job)
145   throws IOException {
146     HTable outputTable = new HTable(new HBaseConfiguration(job), table);
147     int regions = outputTable.getRegionsInfo().size();
148     if (job.getNumMapTasks() > regions)
149       job.setNumMapTasks(regions);
150   }
151 
152   /**
153    * Sets the number of reduce tasks for the given job configuration to the
154    * number of regions the given table has.
155    *
156    * @param table  The table to get the region count for.
157    * @param job  The current job configuration to adjust.
158    * @throws IOException When retrieving the table details fails.
159    */
160   public static void setNumReduceTasks(String table, JobConf job)
161   throws IOException {
162     HTable outputTable = new HTable(new HBaseConfiguration(job), table);
163     int regions = outputTable.getRegionsInfo().size();
164     job.setNumReduceTasks(regions);
165   }
166 
167   /**
168    * Sets the number of map tasks for the given job configuration to the
169    * number of regions the given table has.
170    *
171    * @param table  The table to get the region count for.
172    * @param job  The current job configuration to adjust.
173    * @throws IOException When retrieving the table details fails.
174    */
175   public static void setNumMapTasks(String table, JobConf job)
176   throws IOException {
177     HTable outputTable = new HTable(new HBaseConfiguration(job), table);
178     int regions = outputTable.getRegionsInfo().size();
179     job.setNumMapTasks(regions);
180   }
181 
182   /**
183    * Sets the number of rows to return and cache with each scanner iteration.
184    * Higher caching values will enable faster mapreduce jobs at the expense of
185    * requiring more heap to contain the cached rows.
186    *
187    * @param job The current job configuration to adjust.
188    * @param batchSize The number of rows to return in batch with each scanner
189    * iteration.
190    */
191   public static void setScannerCaching(JobConf job, int batchSize) {
192     job.setInt("hbase.client.scanner.caching", batchSize);
193   }
194 
195   /**
196    * @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(Job)
197    */
198   public static void addDependencyJars(JobConf job) throws IOException {
199     org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
200       job,
201       org.apache.zookeeper.ZooKeeper.class,
202       com.google.common.base.Function.class,
203       job.getMapOutputKeyClass(),
204       job.getMapOutputValueClass(),
205       job.getOutputKeyClass(),
206       job.getOutputValueClass(),
207       job.getPartitionerClass(),
208       job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
209       job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
210       job.getCombinerClass());
211   }
212 }