1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.mapreduce;
21
22 import java.io.File;
23 import java.io.IOException;
24 import java.util.Map;
25 import java.util.NavigableMap;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.conf.Configuration;
30 import org.apache.hadoop.fs.FileUtil;
31 import org.apache.hadoop.fs.Path;
32 import org.apache.hadoop.hbase.HBaseTestingUtility;
33 import org.apache.hadoop.hbase.HColumnDescriptor;
34 import org.apache.hadoop.hbase.HConstants;
35 import org.apache.hadoop.hbase.HTableDescriptor;
36 import org.apache.hadoop.hbase.KeyValue;
37 import org.apache.hadoop.hbase.client.HBaseAdmin;
38 import org.apache.hadoop.hbase.client.HTable;
39 import org.apache.hadoop.hbase.client.Put;
40 import org.apache.hadoop.hbase.client.Result;
41 import org.apache.hadoop.hbase.client.ResultScanner;
42 import org.apache.hadoop.hbase.client.Scan;
43 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
44 import org.apache.hadoop.hbase.util.Bytes;
45 import org.apache.hadoop.mapreduce.Job;
46 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
47 import org.junit.AfterClass;
48 import org.junit.BeforeClass;
49 import org.junit.Test;
50 import static org.junit.Assert.fail;
51 import static org.junit.Assert.assertTrue;
52 import static org.junit.Assert.assertFalse;
53
54
55
56
57
58
59 public class TestTableMapReduce {
60 private static final Log LOG = LogFactory.getLog(TestTableMapReduce.class);
61 private static final HBaseTestingUtility UTIL =
62 new HBaseTestingUtility();
63 static final String MULTI_REGION_TABLE_NAME = "mrtest";
64 static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
65 static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
66
67 @BeforeClass
68 public static void beforeClass() throws Exception {
69 HTableDescriptor desc = new HTableDescriptor(MULTI_REGION_TABLE_NAME);
70 desc.addFamily(new HColumnDescriptor(INPUT_FAMILY));
71 desc.addFamily(new HColumnDescriptor(OUTPUT_FAMILY));
72 UTIL.startMiniCluster();
73 HBaseAdmin admin = new HBaseAdmin(UTIL.getConfiguration());
74 admin.createTable(desc, HBaseTestingUtility.KEYS);
75 UTIL.startMiniMapReduceCluster();
76 }
77
78 @AfterClass
79 public static void afterClass() throws Exception {
80 UTIL.shutdownMiniMapReduceCluster();
81 UTIL.shutdownMiniCluster();
82 }
83
84
85
86
87 public static class ProcessContentsMapper
88 extends TableMapper<ImmutableBytesWritable, Put> {
89
90
91
92
93
94
95
96
97
98 public void map(ImmutableBytesWritable key, Result value,
99 Context context)
100 throws IOException, InterruptedException {
101 if (value.size() != 1) {
102 throw new IOException("There should only be one input column");
103 }
104 Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
105 cf = value.getMap();
106 if(!cf.containsKey(INPUT_FAMILY)) {
107 throw new IOException("Wrong input columns. Missing: '" +
108 Bytes.toString(INPUT_FAMILY) + "'.");
109 }
110
111
112 String originalValue = new String(value.getValue(INPUT_FAMILY, null),
113 HConstants.UTF8_ENCODING);
114 StringBuilder newValue = new StringBuilder(originalValue);
115 newValue.reverse();
116
117 Put outval = new Put(key.get());
118 outval.add(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
119 context.write(key, outval);
120 }
121 }
122
123
124
125
126
127
128
129 @Test
130 public void testMultiRegionTable()
131 throws IOException, InterruptedException, ClassNotFoundException {
132 runTestOnTable(new HTable(new Configuration(UTIL.getConfiguration()),
133 MULTI_REGION_TABLE_NAME));
134 }
135
136 private void runTestOnTable(HTable table)
137 throws IOException, InterruptedException, ClassNotFoundException {
138 Job job = null;
139 try {
140 LOG.info("Before map/reduce startup");
141 job = new Job(table.getConfiguration(), "process column contents");
142 job.setNumReduceTasks(1);
143 Scan scan = new Scan();
144 scan.addFamily(INPUT_FAMILY);
145 TableMapReduceUtil.initTableMapperJob(
146 Bytes.toString(table.getTableName()), scan,
147 ProcessContentsMapper.class, ImmutableBytesWritable.class,
148 Put.class, job);
149 TableMapReduceUtil.initTableReducerJob(
150 Bytes.toString(table.getTableName()),
151 IdentityTableReducer.class, job);
152 FileOutputFormat.setOutputPath(job, new Path("test"));
153 LOG.info("Started " + Bytes.toString(table.getTableName()));
154 job.waitForCompletion(true);
155 LOG.info("After map/reduce completion");
156
157
158 verify(Bytes.toString(table.getTableName()));
159 } finally {
160 table.close();
161 if (job != null) {
162 FileUtil.fullyDelete(
163 new File(job.getConfiguration().get("hadoop.tmp.dir")));
164 }
165 }
166 }
167
168 private void verify(String tableName) throws IOException {
169 HTable table = new HTable(new Configuration(UTIL.getConfiguration()), tableName);
170 boolean verified = false;
171 long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
172 int numRetries = UTIL.getConfiguration().getInt("hbase.client.retries.number", 5);
173 for (int i = 0; i < numRetries; i++) {
174 try {
175 LOG.info("Verification attempt #" + i);
176 verifyAttempt(table);
177 verified = true;
178 break;
179 } catch (NullPointerException e) {
180
181
182 LOG.debug("Verification attempt failed: " + e.getMessage());
183 }
184 try {
185 Thread.sleep(pause);
186 } catch (InterruptedException e) {
187
188 }
189 }
190 assertTrue(verified);
191 table.close();
192 }
193
194
195
196
197
198
199
200
201
202 private void verifyAttempt(final HTable table) throws IOException, NullPointerException {
203 Scan scan = new Scan();
204 scan.addFamily(INPUT_FAMILY);
205 scan.addFamily(OUTPUT_FAMILY);
206 ResultScanner scanner = table.getScanner(scan);
207 try {
208 for (Result r : scanner) {
209 if (LOG.isDebugEnabled()) {
210 if (r.size() > 2 ) {
211 throw new IOException("Too many results, expected 2 got " +
212 r.size());
213 }
214 }
215 byte[] firstValue = null;
216 byte[] secondValue = null;
217 int count = 0;
218 for(KeyValue kv : r.list()) {
219 if (count == 0) {
220 firstValue = kv.getValue();
221 }
222 if (count == 1) {
223 secondValue = kv.getValue();
224 }
225 count++;
226 if (count == 2) {
227 break;
228 }
229 }
230
231 String first = "";
232 if (firstValue == null) {
233 throw new NullPointerException(Bytes.toString(r.getRow()) +
234 ": first value is null");
235 }
236 first = new String(firstValue, HConstants.UTF8_ENCODING);
237
238 String second = "";
239 if (secondValue == null) {
240 throw new NullPointerException(Bytes.toString(r.getRow()) +
241 ": second value is null");
242 }
243 byte[] secondReversed = new byte[secondValue.length];
244 for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
245 secondReversed[i] = secondValue[j];
246 }
247 second = new String(secondReversed, HConstants.UTF8_ENCODING);
248
249 if (first.compareTo(second) != 0) {
250 if (LOG.isDebugEnabled()) {
251 LOG.debug("second key is not the reverse of first. row=" +
252 Bytes.toStringBinary(r.getRow()) + ", first value=" + first +
253 ", second value=" + second);
254 }
255 fail();
256 }
257 }
258 } finally {
259 scanner.close();
260 }
261 }
262
263
264
265
266 public void testAddDependencyJars() throws Exception {
267 Job job = new Job();
268 TableMapReduceUtil.addDependencyJars(job);
269 String tmpjars = job.getConfiguration().get("tmpjars");
270
271 System.err.println("tmpjars: " + tmpjars);
272 assertTrue(tmpjars.contains("zookeeper"));
273 assertFalse(tmpjars.contains("guava"));
274
275 System.err.println("appending guava jar");
276 TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
277 com.google.common.base.Function.class);
278 tmpjars = job.getConfiguration().get("tmpjars");
279 assertTrue(tmpjars.contains("guava"));
280 }
281 }