1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapred;
21  
22  import java.io.File;
23  import java.io.IOException;
24  import java.util.Map;
25  import java.util.NavigableMap;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.fs.FileUtil;
30  import org.apache.hadoop.hbase.HBaseTestingUtility;
31  import org.apache.hadoop.hbase.HColumnDescriptor;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.HTableDescriptor;
34  import org.apache.hadoop.hbase.KeyValue;
35  import org.apache.hadoop.hbase.client.HBaseAdmin;
36  import org.apache.hadoop.hbase.client.HTable;
37  import org.apache.hadoop.hbase.client.Put;
38  import org.apache.hadoop.hbase.client.Result;
39  import org.apache.hadoop.hbase.client.ResultScanner;
40  import org.apache.hadoop.hbase.client.Scan;
41  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
42  import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
43  import org.apache.hadoop.hbase.util.Bytes;
44  import org.apache.hadoop.mapred.JobClient;
45  import org.apache.hadoop.mapred.JobConf;
46  import org.apache.hadoop.mapred.MapReduceBase;
47  import org.apache.hadoop.mapred.OutputCollector;
48  import org.apache.hadoop.mapred.Reporter;
49  import org.junit.AfterClass;
50  import org.junit.BeforeClass;
51  import org.junit.Test;
52  
53  /**
54   * Test Map/Reduce job over HBase tables. The map/reduce process we're testing
55   * on our tables is simple - take every row in the table, reverse the value of
56   * a particular cell, and write it back to the table.
57   */
58  public class TestTableMapReduce {
59    private static final Log LOG =
60      LogFactory.getLog(TestTableMapReduce.class.getName());
61    private static final HBaseTestingUtility UTIL =
62      new HBaseTestingUtility();
63    static final String MULTI_REGION_TABLE_NAME = "mrtest";
64    static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
65    static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text");
66  
67    private static final byte [][] columns = new byte [][] {
68      INPUT_FAMILY,
69      OUTPUT_FAMILY
70    };
71  
72    @BeforeClass
73    public static void beforeClass() throws Exception {
74      HTableDescriptor desc = new HTableDescriptor(MULTI_REGION_TABLE_NAME);
75      desc.addFamily(new HColumnDescriptor(INPUT_FAMILY));
76      desc.addFamily(new HColumnDescriptor(OUTPUT_FAMILY));
77      UTIL.startMiniCluster();
78      HBaseAdmin admin = new HBaseAdmin(UTIL.getConfiguration());
79      admin.createTable(desc, HBaseTestingUtility.KEYS);
80      UTIL.startMiniMapReduceCluster();
81    }
82  
83    @AfterClass
84    public static void afterClass() throws Exception {
85      UTIL.shutdownMiniMapReduceCluster();
86      UTIL.shutdownMiniCluster();
87    }
88  
89    /**
90     * Pass the given key and processed record reduce
91     */
92    public static class ProcessContentsMapper
93    extends MapReduceBase
94    implements TableMap<ImmutableBytesWritable, Put> {
95      /**
96       * Pass the key, and reversed value to reduce
97       * @param key
98       * @param value
99       * @param output
100      * @param reporter
101      * @throws IOException
102      */
103     public void map(ImmutableBytesWritable key, Result value,
104       OutputCollector<ImmutableBytesWritable, Put> output,
105       Reporter reporter)
106     throws IOException {
107       if (value.size() != 1) {
108         throw new IOException("There should only be one input column");
109       }
110       Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>>
111         cf = value.getMap();
112       if(!cf.containsKey(INPUT_FAMILY)) {
113         throw new IOException("Wrong input columns. Missing: '" +
114           Bytes.toString(INPUT_FAMILY) + "'.");
115       }
116 
117       // Get the original value and reverse it
118 
119       String originalValue = new String(value.getValue(INPUT_FAMILY, null),
120         HConstants.UTF8_ENCODING);
121       StringBuilder newValue = new StringBuilder(originalValue);
122       newValue.reverse();
123 
124       // Now set the value to be collected
125 
126       Put outval = new Put(key.get());
127       outval.add(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString()));
128       output.collect(key, outval);
129     }
130   }
131 
132   /**
133    * Test a map/reduce against a multi-region table
134    * @throws IOException
135    */
136   @Test
137   public void testMultiRegionTable() throws IOException {
138     runTestOnTable(new HTable(UTIL.getConfiguration(), MULTI_REGION_TABLE_NAME));
139   }
140 
141   private void runTestOnTable(HTable table) throws IOException {
142     JobConf jobConf = null;
143     try {
144       LOG.info("Before map/reduce startup");
145       jobConf = new JobConf(UTIL.getConfiguration(), TestTableMapReduce.class);
146       jobConf.setJobName("process column contents");
147       jobConf.setNumReduceTasks(1);
148       TableMapReduceUtil.initTableMapJob(Bytes.toString(table.getTableName()),
149         Bytes.toString(INPUT_FAMILY), ProcessContentsMapper.class,
150         ImmutableBytesWritable.class, Put.class, jobConf);
151       TableMapReduceUtil.initTableReduceJob(Bytes.toString(table.getTableName()),
152         IdentityTableReduce.class, jobConf);
153 
154       LOG.info("Started " + Bytes.toString(table.getTableName()));
155       JobClient.runJob(jobConf);
156       LOG.info("After map/reduce completion");
157 
158       // verify map-reduce results
159       verify(Bytes.toString(table.getTableName()));
160     } finally {
161       if (jobConf != null) {
162         FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
163       }
164     }
165   }
166 
167   private void verify(String tableName) throws IOException {
168     HTable table = new HTable(UTIL.getConfiguration(), tableName);
169     boolean verified = false;
170     long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000);
171     int numRetries = UTIL.getConfiguration().getInt("hbase.client.retries.number", 5);
172     for (int i = 0; i < numRetries; i++) {
173       try {
174         LOG.info("Verification attempt #" + i);
175         verifyAttempt(table);
176         verified = true;
177         break;
178       } catch (NullPointerException e) {
179         // If here, a cell was empty.  Presume its because updates came in
180         // after the scanner had been opened.  Wait a while and retry.
181         LOG.debug("Verification attempt failed: " + e.getMessage());
182       }
183       try {
184         Thread.sleep(pause);
185       } catch (InterruptedException e) {
186         // continue
187       }
188     }
189     org.junit.Assert.assertTrue(verified);
190   }
191 
192   /**
193    * Looks at every value of the mapreduce output and verifies that indeed
194    * the values have been reversed.
195    * @param table Table to scan.
196    * @throws IOException
197    * @throws NullPointerException if we failed to find a cell value
198    */
199   private void verifyAttempt(final HTable table) throws IOException, NullPointerException {
200     Scan scan = new Scan();
201     TableInputFormat.addColumns(scan, columns);
202     ResultScanner scanner = table.getScanner(scan);
203     try {
204       for (Result r : scanner) {
205         if (LOG.isDebugEnabled()) {
206           if (r.size() > 2 ) {
207             throw new IOException("Too many results, expected 2 got " +
208               r.size());
209           }
210         }
211         byte[] firstValue = null;
212         byte[] secondValue = null;
213         int count = 0;
214          for(KeyValue kv : r.list()) {
215           if (count == 0) {
216             firstValue = kv.getValue();
217           }
218           if (count == 1) {
219             secondValue = kv.getValue();
220           }
221           count++;
222           if (count == 2) {
223             break;
224           }
225         }
226 
227 
228         String first = "";
229         if (firstValue == null) {
230           throw new NullPointerException(Bytes.toString(r.getRow()) +
231             ": first value is null");
232         }
233         first = new String(firstValue, HConstants.UTF8_ENCODING);
234 
235         String second = "";
236         if (secondValue == null) {
237           throw new NullPointerException(Bytes.toString(r.getRow()) +
238             ": second value is null");
239         }
240         byte[] secondReversed = new byte[secondValue.length];
241         for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) {
242           secondReversed[i] = secondValue[j];
243         }
244         second = new String(secondReversed, HConstants.UTF8_ENCODING);
245 
246         if (first.compareTo(second) != 0) {
247           if (LOG.isDebugEnabled()) {
248             LOG.debug("second key is not the reverse of first. row=" +
249                 r.getRow() + ", first value=" + first + ", second value=" +
250                 second);
251           }
252           org.junit.Assert.fail();
253         }
254       }
255     } finally {
256       scanner.close();
257     }
258   }
259 }