1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapred;
21
22 import java.io.File;
23 import java.io.IOException;
24 import java.util.Map;
25 import java.util.NavigableMap;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.fs.FileUtil;
30 import org.apache.hadoop.hbase.HBaseTestingUtility;
31 import org.apache.hadoop.hbase.HColumnDescriptor;
32 import org.apache.hadoop.hbase.HConstants;
33 import org.apache.hadoop.hbase.HTableDescriptor;
34 import org.apache.hadoop.hbase.KeyValue;
35 import org.apache.hadoop.hbase.client.HBaseAdmin;
36 import org.apache.hadoop.hbase.client.HTable;
37 import org.apache.hadoop.hbase.client.Put;
38 import org.apache.hadoop.hbase.client.Result;
39 import org.apache.hadoop.hbase.client.ResultScanner;
40 import org.apache.hadoop.hbase.client.Scan;
41 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
42 import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
43 import org.apache.hadoop.hbase.util.Bytes;
44 import org.apache.hadoop.mapred.JobClient;
45 import org.apache.hadoop.mapred.JobConf;
46 import org.apache.hadoop.mapred.MapReduceBase;
47 import org.apache.hadoop.mapred.OutputCollector;
48 import org.apache.hadoop.mapred.Reporter;
49 import org.junit.AfterClass;
50 import org.junit.BeforeClass;
51 import org.junit.Test;
52
53
54
55
56
57
58 public class TestTableMapReduce {
59 private static final Log LOG =
60 LogFactory.getLog(TestTableMapReduce.class.getName());
61 private static final HBaseTestingUtility UTIL =
62 new HBaseTestingUtility();
63 static final String MULTI_REGION_TABLE_NAME = "mrtest";
64 static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
65 static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
66
67 private static final byte [][] columns = new byte [][] {
68 INPUT_FAMILY,
69 OUTPUT_FAMILY
70 };
71
72 @BeforeClass
73 public static void beforeClass() throws Exception {
74 HTableDescriptor desc = new HTableDescriptor(MULTI_REGION_TABLE_NAME);
75 desc.addFamily(new HColumnDescriptor(INPUT_FAMILY));
76 desc.addFamily(new HColumnDescriptor(OUTPUT_FAMILY));
77 UTIL.startMiniCluster();
78 HBaseAdmin admin = new HBaseAdmin(UTIL.getConfiguration());
79 admin.createTable(desc, HBaseTestingUtility.KEYS);
80 UTIL.startMiniMapReduceCluster();
81 }
82
83 @AfterClass
84 public static void afterClass() throws Exception {
85 UTIL.shutdownMiniMapReduceCluster();
86 UTIL.shutdownMiniCluster();
87 }
88
89
90
91
92 public static class ProcessContentsMapper
93 extends MapReduceBase
94 implements TableMap<ImmutableBytesWritable, Put> {
95
96
97
98
99
100
101
102
103 public void map(ImmutableBytesWritable key, Result value,
104 OutputCollector<ImmutableBytesWritable, Put> output,
105 Reporter reporter)
106 throws IOException {
107 if (value.size() != 1) {
108 throw new IOException("There should only be one input column");
109 }
110 Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
111 cf = value.getMap();
112 if(!cf.containsKey(INPUT_FAMILY)) {
113 throw new IOException("Wrong input columns. Missing: '" +
114 Bytes.toString(INPUT_FAMILY) + "'.");
115 }
116
117
118
119 String originalValue = new String(value.getValue(INPUT_FAMILY, null),
120 HConstants.UTF8_ENCODING);
121 StringBuilder newValue = new StringBuilder(originalValue);
122 newValue.reverse();
123
124
125
126 Put outval = new Put(key.get());
127 outval.add(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
128 output.collect(key, outval);
129 }
130 }
131
132
133
134
135
136 @Test
137 public void testMultiRegionTable() throws IOException {
138 runTestOnTable(new HTable(UTIL.getConfiguration(), MULTI_REGION_TABLE_NAME));
139 }
140
141 private void runTestOnTable(HTable table) throws IOException {
142 JobConf jobConf = null;
143 try {
144 LOG.info("Before map/reduce startup");
145 jobConf = new JobConf(UTIL.getConfiguration(), TestTableMapReduce.class);
146 jobConf.setJobName("process column contents");
147 jobConf.setNumReduceTasks(1);
148 TableMapReduceUtil.initTableMapJob(Bytes.toString(table.getTableName()),
149 Bytes.toString(INPUT_FAMILY), ProcessContentsMapper.class,
150 ImmutableBytesWritable.class, Put.class, jobConf);
151 TableMapReduceUtil.initTableReduceJob(Bytes.toString(table.getTableName()),
152 IdentityTableReduce.class, jobConf);
153
154 LOG.info("Started " + Bytes.toString(table.getTableName()));
155 JobClient.runJob(jobConf);
156 LOG.info("After map/reduce completion");
157
158
159 verify(Bytes.toString(table.getTableName()));
160 } finally {
161 if (jobConf != null) {
162 FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
163 }
164 }
165 }
166
167 private void verify(String tableName) throws IOException {
168 HTable table = new HTable(UTIL.getConfiguration(), tableName);
169 boolean verified = false;
170 long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
171 int numRetries = UTIL.getConfiguration().getInt("hbase.client.retries.number", 5);
172 for (int i = 0; i < numRetries; i++) {
173 try {
174 LOG.info("Verification attempt #" + i);
175 verifyAttempt(table);
176 verified = true;
177 break;
178 } catch (NullPointerException e) {
179
180
181 LOG.debug("Verification attempt failed: " + e.getMessage());
182 }
183 try {
184 Thread.sleep(pause);
185 } catch (InterruptedException e) {
186
187 }
188 }
189 org.junit.Assert.assertTrue(verified);
190 }
191
192
193
194
195
196
197
198
199 private void verifyAttempt(final HTable table) throws IOException, NullPointerException {
200 Scan scan = new Scan();
201 TableInputFormat.addColumns(scan, columns);
202 ResultScanner scanner = table.getScanner(scan);
203 try {
204 for (Result r : scanner) {
205 if (LOG.isDebugEnabled()) {
206 if (r.size() > 2 ) {
207 throw new IOException("Too many results, expected 2 got " +
208 r.size());
209 }
210 }
211 byte[] firstValue = null;
212 byte[] secondValue = null;
213 int count = 0;
214 for(KeyValue kv : r.list()) {
215 if (count == 0) {
216 firstValue = kv.getValue();
217 }
218 if (count == 1) {
219 secondValue = kv.getValue();
220 }
221 count++;
222 if (count == 2) {
223 break;
224 }
225 }
226
227
228 String first = "";
229 if (firstValue == null) {
230 throw new NullPointerException(Bytes.toString(r.getRow()) +
231 ": first value is null");
232 }
233 first = new String(firstValue, HConstants.UTF8_ENCODING);
234
235 String second = "";
236 if (secondValue == null) {
237 throw new NullPointerException(Bytes.toString(r.getRow()) +
238 ": second value is null");
239 }
240 byte[] secondReversed = new byte[secondValue.length];
241 for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
242 secondReversed[i] = secondValue[j];
243 }
244 second = new String(secondReversed, HConstants.UTF8_ENCODING);
245
246 if (first.compareTo(second) != 0) {
247 if (LOG.isDebugEnabled()) {
248 LOG.debug("second key is not the reverse of first. row=" +
249 r.getRow() + ", first value=" + first + ", second value=" +
250 second);
251 }
252 org.junit.Assert.fail();
253 }
254 }
255 } finally {
256 scanner.close();
257 }
258 }
259 }