View Javadoc

1   /**
2    * Copyright 2009 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import java.io.IOException;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.conf.Configurable;
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.hbase.HBaseConfiguration;
29  import org.apache.hadoop.hbase.HConstants;
30  import org.apache.hadoop.hbase.client.Delete;
31  import org.apache.hadoop.hbase.client.HConnectionManager;
32  import org.apache.hadoop.hbase.client.HTable;
33  import org.apache.hadoop.hbase.client.Put;
34  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
35  import org.apache.hadoop.io.Writable;
36  import org.apache.hadoop.mapreduce.JobContext;
37  import org.apache.hadoop.mapreduce.OutputCommitter;
38  import org.apache.hadoop.mapreduce.OutputFormat;
39  import org.apache.hadoop.mapreduce.RecordWriter;
40  import org.apache.hadoop.mapreduce.TaskAttemptContext;
41  
42  /**
43   * Convert Map/Reduce output and write it to an HBase table. The KEY is ignored
44   * while the output value <u>must</u> be either a {@link Put} or a
45   * {@link Delete} instance.
46   *
47   * @param <KEY>  The type of the key. Ignored in this class.
48   */
49  public class TableOutputFormat<KEY> extends OutputFormat<KEY, Writable>
50  implements Configurable {
51  
52    private final Log LOG = LogFactory.getLog(TableOutputFormat.class);
53  
54    /** Job parameter that specifies the output table. */
55    public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
56  
57    /**
58     * Optional job parameter to specify a peer cluster.
59     * Used specifying remote cluster when copying between hbase clusters (the
60     * source is picked up from <code>hbase-site.xml</code>).
61     * @see TableMapReduceUtil#initTableReducerJob(String, Class, org.apache.hadoop.mapreduce.Job, Class, String, String, String)
62     */
63    public static final String QUORUM_ADDRESS = "hbase.mapred.output.quorum";
64  
65    /** Optional specification of the rs class name of the peer cluster */
66    public static final String
67        REGION_SERVER_CLASS = "hbase.mapred.output.rs.class";
68    /** Optional specification of the rs impl name of the peer cluster */
69    public static final String
70        REGION_SERVER_IMPL = "hbase.mapred.output.rs.impl";
71  
72    /** The configuration. */
73    private Configuration conf = null;
74  
75    private HTable table;
76  
77    /**
78     * Writes the reducer output to an HBase table.
79     *
80     * @param <KEY>  The type of the key.
81     */
82    protected static class TableRecordWriter<KEY>
83    extends RecordWriter<KEY, Writable> {
84  
85      /** The table to write to. */
86      private HTable table;
87  
88      /**
89       * Instantiate a TableRecordWriter with the HBase HClient for writing.
90       *
91       * @param table  The table to write to.
92       */
93      public TableRecordWriter(HTable table) {
94        this.table = table;
95      }
96  
97      /**
98       * Closes the writer, in this case flush table commits.
99       *
100      * @param context  The context.
101      * @throws IOException When closing the writer fails.
102      * @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext)
103      */
104     @Override
105     public void close(TaskAttemptContext context)
106     throws IOException {
107       table.close();
108     }
109 
110     /**
111      * Writes a key/value pair into the table.
112      *
113      * @param key  The key.
114      * @param value  The value.
115      * @throws IOException When writing fails.
116      * @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object)
117      */
118     @Override
119     public void write(KEY key, Writable value)
120     throws IOException {
121       if (value instanceof Put) this.table.put(new Put((Put)value));
122       else if (value instanceof Delete) this.table.delete(new Delete((Delete)value));
123       else throw new IOException("Pass a Delete or a Put");
124     }
125   }
126 
127   /**
128    * Creates a new record writer.
129    *
130    * @param context  The current task context.
131    * @return The newly created writer instance.
132    * @throws IOException When creating the writer fails.
133    * @throws InterruptedException When the jobs is cancelled.
134    * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext)
135    */
136   @Override
137   public RecordWriter<KEY, Writable> getRecordWriter(
138     TaskAttemptContext context)
139   throws IOException, InterruptedException {
140     return new TableRecordWriter<KEY>(this.table);
141   }
142 
143   /**
144    * Checks if the output target exists.
145    *
146    * @param context  The current context.
147    * @throws IOException When the check fails.
148    * @throws InterruptedException When the job is aborted.
149    * @see org.apache.hadoop.mapreduce.OutputFormat#checkOutputSpecs(org.apache.hadoop.mapreduce.JobContext)
150    */
151   @Override
152   public void checkOutputSpecs(JobContext context) throws IOException,
153       InterruptedException {
154     // TODO Check if the table exists?
155 
156   }
157 
158   /**
159    * Returns the output committer.
160    *
161    * @param context  The current context.
162    * @return The committer.
163    * @throws IOException When creating the committer fails.
164    * @throws InterruptedException When the job is aborted.
165    * @see org.apache.hadoop.mapreduce.OutputFormat#getOutputCommitter(org.apache.hadoop.mapreduce.TaskAttemptContext)
166    */
167   @Override
168   public OutputCommitter getOutputCommitter(TaskAttemptContext context)
169   throws IOException, InterruptedException {
170     return new TableOutputCommitter();
171   }
172 
173   public Configuration getConf() {
174     return conf;
175   }
176 
177   @Override
178   public void setConf(Configuration otherConf) {
179     this.conf = HBaseConfiguration.create(otherConf);
180     String tableName = this.conf.get(OUTPUT_TABLE);
181     if(tableName == null || tableName.length() <= 0) {
182       throw new IllegalArgumentException("Must specify table name");
183     }
184     String address = this.conf.get(QUORUM_ADDRESS);
185     String serverClass = this.conf.get(REGION_SERVER_CLASS);
186     String serverImpl = this.conf.get(REGION_SERVER_IMPL);
187     try {
188       if (address != null) {
189         ZKUtil.applyClusterKeyToConf(this.conf, address);
190       }
191       if (serverClass != null) {
192         this.conf.set(HConstants.REGION_SERVER_CLASS, serverClass);
193         this.conf.set(HConstants.REGION_SERVER_IMPL, serverImpl);
194       }
195       this.table = new HTable(this.conf, tableName);
196       this.table.setAutoFlush(false);
197       LOG.info("Created table instance for "  + tableName);
198     } catch(IOException e) {
199       LOG.error(e);
200       throw new RuntimeException(e);
201     }
202   }
203 }