View Javadoc

1   /*
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.replication.regionserver;
21  
22  import org.apache.commons.logging.Log;
23  import org.apache.commons.logging.LogFactory;
24  import org.apache.hadoop.conf.Configuration;
25  import org.apache.hadoop.hbase.HConstants;
26  import org.apache.hadoop.hbase.KeyValue;
27  import org.apache.hadoop.hbase.TableNotFoundException;
28  import org.apache.hadoop.hbase.client.Delete;
29  import org.apache.hadoop.hbase.client.HTableInterface;
30  import org.apache.hadoop.hbase.client.HTablePool;
31  import org.apache.hadoop.hbase.client.Put;
32  import org.apache.hadoop.hbase.regionserver.wal.HLog;
33  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
34  import org.apache.hadoop.hbase.util.Bytes;
35  import org.apache.hadoop.hbase.Stoppable;
36  
37  import java.io.IOException;
38  import java.util.ArrayList;
39  import java.util.List;
40  import java.util.Map;
41  import java.util.TreeMap;
42  
43  /**
44   * This class is responsible for replicating the edits coming
45   * from another cluster.
46   * <p/>
47   * This replication process is currently waiting for the edits to be applied
48   * before the method can return. This means that the replication of edits
49   * is synchronized (after reading from HLogs in ReplicationSource) and that a
50   * single region server cannot receive edits from two sources at the same time
51   * <p/>
52   * This class uses the native HBase client in order to replicate entries.
53   * <p/>
54   *
55   * TODO make this class more like ReplicationSource wrt log handling
56   */
57  public class ReplicationSink {
58  
59    private static final Log LOG = LogFactory.getLog(ReplicationSink.class);
60    // Name of the HDFS directory that contains the temporary rep logs
61    public static final String REPLICATION_LOG_DIR = ".replogs";
62    private final Configuration conf;
63    // Pool used to replicated
64    private final HTablePool pool;
65    // Chain to pull on when we want all to stop.
66    private final Stoppable stopper;
67    private final ReplicationSinkMetrics metrics;
68  
69    /**
70     * Create a sink for replication
71     *
72     * @param conf                conf object
73     * @param stopper             boolean to tell this thread to stop
74     * @throws IOException thrown when HDFS goes bad or bad file name
75     */
76    public ReplicationSink(Configuration conf, Stoppable stopper)
77        throws IOException {
78      this.conf = conf;
79      this.pool = new HTablePool(this.conf,
80          conf.getInt("replication.sink.htablepool.capacity", 10));
81      this.stopper = stopper;
82      this.metrics = new ReplicationSinkMetrics();
83    }
84  
85    /**
86     * Replicate this array of entries directly into the local cluster
87     * using the native client.
88     *
89     * @param entries
90     * @throws IOException
91     */
92    public void replicateEntries(HLog.Entry[] entries)
93        throws IOException {
94      if (entries.length == 0) {
95        return;
96      }
97      // Very simple optimization where we batch sequences of rows going
98      // to the same table.
99      try {
100       long totalReplicated = 0;
101       // Map of table => list of puts, we only want to flushCommits once per
102       // invocation of this method per table.
103       Map<byte[], List<Put>> puts = new TreeMap<byte[], List<Put>>(Bytes.BYTES_COMPARATOR);
104       for (HLog.Entry entry : entries) {
105         WALEdit edit = entry.getEdit();
106         List<KeyValue> kvs = edit.getKeyValues();
107         if (kvs.get(0).isDelete()) {
108           Delete delete = new Delete(kvs.get(0).getRow(),
109               kvs.get(0).getTimestamp(), null);
110           delete.setClusterId(entry.getKey().getClusterId());
111           for (KeyValue kv : kvs) {
112             if (kv.isDeleteFamily()) {
113               delete.deleteFamily(kv.getFamily());
114             } else if (!kv.isEmptyColumn()) {
115               delete.deleteColumn(kv.getFamily(),
116                   kv.getQualifier());
117             }
118           }
119           delete(entry.getKey().getTablename(), delete);
120         } else {
121           byte[] table = entry.getKey().getTablename();
122           List<Put> tableList = puts.get(table);
123           if (tableList == null) {
124             tableList = new ArrayList<Put>();
125             puts.put(table, tableList);
126           }
127           // With mini-batching, we need to expect multiple rows per edit
128           byte[] lastKey = kvs.get(0).getRow();
129           Put put = new Put(kvs.get(0).getRow(),
130               kvs.get(0).getTimestamp());
131           put.setClusterId(entry.getKey().getClusterId());
132           for (KeyValue kv : kvs) {
133             if (!Bytes.equals(lastKey, kv.getRow())) {
134               tableList.add(put);
135               put = new Put(kv.getRow(), kv.getTimestamp());
136               put.setClusterId(entry.getKey().getClusterId());
137             }
138             put.add(kv.getFamily(), kv.getQualifier(), kv.getValue());
139             lastKey = kv.getRow();
140           }
141           tableList.add(put);
142         }
143         totalReplicated++;
144       }
145       for(byte [] table : puts.keySet()) {
146         put(table, puts.get(table));
147       }
148       this.metrics.setAgeOfLastAppliedOp(
149           entries[entries.length-1].getKey().getWriteTime());
150       this.metrics.appliedBatchesRate.inc(1);
151       LOG.info("Total replicated: " + totalReplicated);
152     } catch (IOException ex) {
153       LOG.error("Unable to accept edit because:", ex);
154       throw ex;
155     }
156   }
157 
158   /**
159    * Do the puts and handle the pool
160    * @param tableName table to insert into
161    * @param puts list of puts
162    * @throws IOException
163    */
164   private void put(byte[] tableName, List<Put> puts) throws IOException {
165     if (puts.isEmpty()) {
166       return;
167     }
168     HTableInterface table = null;
169     try {
170       table = this.pool.getTable(tableName);
171       table.put(puts);
172       this.metrics.appliedOpsRate.inc(puts.size());
173     } finally {
174       if (table != null) {
175         this.pool.putTable(table);
176       }
177     }
178   }
179 
180   /**
181    * Do the delete and handle the pool
182    * @param tableName table to delete in
183    * @param delete the delete to use
184    * @throws IOException
185    */
186   private void delete(byte[] tableName, Delete delete) throws IOException {
187     HTableInterface table = null;
188     try {
189       table = this.pool.getTable(tableName);
190       table.delete(delete);
191       this.metrics.appliedOpsRate.inc(1);
192     } finally {
193       if (table != null) {
194         this.pool.putTable(table);
195       }
196     }
197   }
198 }