View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import java.io.IOException;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.hbase.DoNotRetryIOException;
27  import org.apache.hadoop.hbase.client.HTable;
28  import org.apache.hadoop.hbase.client.Result;
29  import org.apache.hadoop.hbase.client.ResultScanner;
30  import org.apache.hadoop.hbase.client.Scan;
31  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
32  import org.apache.hadoop.util.StringUtils;
33  
34  /**
35   * Iterate over an HBase table data, return (ImmutableBytesWritable, Result)
36   * pairs.
37   */
38  public class TableRecordReaderImpl {
39  
40  
41    static final Log LOG = LogFactory.getLog(TableRecordReader.class);
42  
43    private ResultScanner scanner = null;
44    private Scan scan = null;
45    private HTable htable = null;
46    private byte[] lastSuccessfulRow = null;
47    private ImmutableBytesWritable key = null;
48    private Result value = null;
49  
50    /**
51     * Restart from survivable exceptions by creating a new scanner.
52     *
53     * @param firstRow  The first row to start at.
54     * @throws IOException When restarting fails.
55     */
56    public void restart(byte[] firstRow) throws IOException {
57      Scan newScan = new Scan(scan);
58      newScan.setStartRow(firstRow);
59      this.scanner = this.htable.getScanner(newScan);
60    }
61  
62    /**
63     * Build the scanner. Not done in constructor to allow for extension.
64     *
65     * @throws IOException When restarting the scan fails.
66     */
67    public void init() throws IOException {
68      restart(scan.getStartRow());
69    }
70  
71    /**
72     * Sets the HBase table.
73     *
74     * @param htable  The {@link HTable} to scan.
75     */
76    public void setHTable(HTable htable) {
77      this.htable = htable;
78    }
79  
80    /**
81     * Sets the scan defining the actual details like columns etc.
82     *
83     * @param scan  The scan to set.
84     */
85    public void setScan(Scan scan) {
86      this.scan = scan;
87    }
88  
89    /**
90     * Closes the split.
91     *
92     *
93     */
94    public void close() {
95      this.scanner.close();
96    }
97  
98    /**
99     * Returns the current key.
100    *
101    * @return The current key.
102    * @throws IOException
103    * @throws InterruptedException When the job is aborted.
104    */
105   public ImmutableBytesWritable getCurrentKey() throws IOException,
106       InterruptedException {
107     return key;
108   }
109 
110   /**
111    * Returns the current value.
112    *
113    * @return The current value.
114    * @throws IOException When the value is faulty.
115    * @throws InterruptedException When the job is aborted.
116    */
117   public Result getCurrentValue() throws IOException, InterruptedException {
118     return value;
119   }
120 
121 
122   /**
123    * Positions the record reader to the next record.
124    *
125    * @return <code>true</code> if there was another record.
126    * @throws IOException When reading the record failed.
127    * @throws InterruptedException When the job was aborted.
128    */
129   public boolean nextKeyValue() throws IOException, InterruptedException {
130     if (key == null) key = new ImmutableBytesWritable();
131     if (value == null) value = new Result();
132     try {
133       value = this.scanner.next();
134     } catch (DoNotRetryIOException e) {
135       throw e;
136     } catch (IOException e) {
137       LOG.debug("recovered from " + StringUtils.stringifyException(e));
138       if (lastSuccessfulRow == null) {
139         LOG.warn("We are restarting the first next() invocation," +
140             " if your mapper's restarted a few other times like this" +
141             " then you should consider killing this job and investigate" +
142             " why it's taking so long.");
143       }
144       if (lastSuccessfulRow == null) {
145         restart(scan.getStartRow());
146       } else {
147         restart(lastSuccessfulRow);
148         scanner.next();    // skip presumed already mapped row
149       }
150       value = scanner.next();
151     }
152     if (value != null && value.size() > 0) {
153       key.set(value.getRow());
154       lastSuccessfulRow = key.get();
155       return true;
156     }
157     return false;
158   }
159 
160   /**
161    * The current progress of the record reader through its data.
162    *
163    * @return A number between 0.0 and 1.0, the fraction of the data read.
164    */
165   public float getProgress() {
166     // Depends on the total number of tuples
167     return 0;
168   }
169 
170 }