1 /**
2 * Copyright 2009 The Apache Software Foundation
3 *
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.io.IOException;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.conf.Configurable;
27 import org.apache.hadoop.conf.Configuration;
28 import org.apache.hadoop.hbase.HBaseConfiguration;
29 import org.apache.hadoop.hbase.HConstants;
30 import org.apache.hadoop.hbase.client.Delete;
31 import org.apache.hadoop.hbase.client.HConnectionManager;
32 import org.apache.hadoop.hbase.client.HTable;
33 import org.apache.hadoop.hbase.client.Put;
34 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
35 import org.apache.hadoop.io.Writable;
36 import org.apache.hadoop.mapreduce.JobContext;
37 import org.apache.hadoop.mapreduce.OutputCommitter;
38 import org.apache.hadoop.mapreduce.OutputFormat;
39 import org.apache.hadoop.mapreduce.RecordWriter;
40 import org.apache.hadoop.mapreduce.TaskAttemptContext;
41
42 /**
43 * Convert Map/Reduce output and write it to an HBase table. The KEY is ignored
44 * while the output value <u>must</u> be either a {@link Put} or a
45 * {@link Delete} instance.
46 *
47 * @param <KEY> The type of the key. Ignored in this class.
48 */
49 public class TableOutputFormat<KEY> extends OutputFormat<KEY, Writable>
50 implements Configurable {
51
52 private final Log LOG = LogFactory.getLog(TableOutputFormat.class);
53
54 /** Job parameter that specifies the output table. */
55 public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
56
57 /**
58 * Optional job parameter to specify a peer cluster.
59 * Used specifying remote cluster when copying between hbase clusters (the
60 * source is picked up from <code>hbase-site.xml</code>).
61 * @see TableMapReduceUtil#initTableReducerJob(String, Class, org.apache.hadoop.mapreduce.Job, Class, String, String, String)
62 */
63 public static final String QUORUM_ADDRESS = "hbase.mapred.output.quorum";
64
65 /** Optional specification of the rs class name of the peer cluster */
66 public static final String
67 REGION_SERVER_CLASS = "hbase.mapred.output.rs.class";
68 /** Optional specification of the rs impl name of the peer cluster */
69 public static final String
70 REGION_SERVER_IMPL = "hbase.mapred.output.rs.impl";
71
72 /** The configuration. */
73 private Configuration conf = null;
74
75 private HTable table;
76
77 /**
78 * Writes the reducer output to an HBase table.
79 *
80 * @param <KEY> The type of the key.
81 */
82 protected static class TableRecordWriter<KEY>
83 extends RecordWriter<KEY, Writable> {
84
85 /** The table to write to. */
86 private HTable table;
87
88 /**
89 * Instantiate a TableRecordWriter with the HBase HClient for writing.
90 *
91 * @param table The table to write to.
92 */
93 public TableRecordWriter(HTable table) {
94 this.table = table;
95 }
96
97 /**
98 * Closes the writer, in this case flush table commits.
99 *
100 * @param context The context.
101 * @throws IOException When closing the writer fails.
102 * @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext)
103 */
104 @Override
105 public void close(TaskAttemptContext context)
106 throws IOException {
107 table.close();
108 }
109
110 /**
111 * Writes a key/value pair into the table.
112 *
113 * @param key The key.
114 * @param value The value.
115 * @throws IOException When writing fails.
116 * @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object)
117 */
118 @Override
119 public void write(KEY key, Writable value)
120 throws IOException {
121 if (value instanceof Put) this.table.put(new Put((Put)value));
122 else if (value instanceof Delete) this.table.delete(new Delete((Delete)value));
123 else throw new IOException("Pass a Delete or a Put");
124 }
125 }
126
127 /**
128 * Creates a new record writer.
129 *
130 * @param context The current task context.
131 * @return The newly created writer instance.
132 * @throws IOException When creating the writer fails.
133 * @throws InterruptedException When the jobs is cancelled.
134 * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext)
135 */
136 @Override
137 public RecordWriter<KEY, Writable> getRecordWriter(
138 TaskAttemptContext context)
139 throws IOException, InterruptedException {
140 return new TableRecordWriter<KEY>(this.table);
141 }
142
143 /**
144 * Checks if the output target exists.
145 *
146 * @param context The current context.
147 * @throws IOException When the check fails.
148 * @throws InterruptedException When the job is aborted.
149 * @see org.apache.hadoop.mapreduce.OutputFormat#checkOutputSpecs(org.apache.hadoop.mapreduce.JobContext)
150 */
151 @Override
152 public void checkOutputSpecs(JobContext context) throws IOException,
153 InterruptedException {
154 // TODO Check if the table exists?
155
156 }
157
158 /**
159 * Returns the output committer.
160 *
161 * @param context The current context.
162 * @return The committer.
163 * @throws IOException When creating the committer fails.
164 * @throws InterruptedException When the job is aborted.
165 * @see org.apache.hadoop.mapreduce.OutputFormat#getOutputCommitter(org.apache.hadoop.mapreduce.TaskAttemptContext)
166 */
167 @Override
168 public OutputCommitter getOutputCommitter(TaskAttemptContext context)
169 throws IOException, InterruptedException {
170 return new TableOutputCommitter();
171 }
172
173 public Configuration getConf() {
174 return conf;
175 }
176
177 @Override
178 public void setConf(Configuration otherConf) {
179 this.conf = HBaseConfiguration.create(otherConf);
180 String tableName = this.conf.get(OUTPUT_TABLE);
181 if(tableName == null || tableName.length() <= 0) {
182 throw new IllegalArgumentException("Must specify table name");
183 }
184 String address = this.conf.get(QUORUM_ADDRESS);
185 String serverClass = this.conf.get(REGION_SERVER_CLASS);
186 String serverImpl = this.conf.get(REGION_SERVER_IMPL);
187 try {
188 if (address != null) {
189 ZKUtil.applyClusterKeyToConf(this.conf, address);
190 }
191 if (serverClass != null) {
192 this.conf.set(HConstants.REGION_SERVER_CLASS, serverClass);
193 this.conf.set(HConstants.REGION_SERVER_IMPL, serverImpl);
194 }
195 this.table = new HTable(this.conf, tableName);
196 this.table.setAutoFlush(false);
197 LOG.info("Created table instance for " + tableName);
198 } catch(IOException e) {
199 LOG.error(e);
200 throw new RuntimeException(e);
201 }
202 }
203 }