View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import org.apache.hadoop.conf.Configuration;
23  import org.apache.hadoop.hbase.HBaseConfiguration;
24  import org.apache.hadoop.hbase.HConstants;
25  import org.apache.hadoop.hbase.util.Bytes;
26  import org.apache.hadoop.hbase.client.Scan;
27  import org.apache.hadoop.mapreduce.Job;
28  
29  import java.io.IOException;
30  import java.util.HashMap;
31  import java.util.Map;
32  
33  /**
34   * Tool used to copy a table to another one which can be on a different setup.
35   * It is also configurable with a start and time as well as a specification
36   * of the region server implementation if different from the local cluster.
37   */
38  public class CopyTable {
39  
40    final static String NAME = "copytable";
41    static String rsClass = null;
42    static String rsImpl = null;
43    static long startTime = 0;
44    static long endTime = 0;
45    static String tableName = null;
46    static String newTableName = null;
47    static String peerAddress = null;
48    static String families = null;
49  
50    /**
51     * Sets up the actual job.
52     *
53     * @param conf  The current configuration.
54     * @param args  The command line parameters.
55     * @return The newly created job.
56     * @throws IOException When setting up the job fails.
57     */
58    public static Job createSubmittableJob(Configuration conf, String[] args)
59    throws IOException {
60      if (!doCommandLine(args)) {
61        return null;
62      }
63      Job job = new Job(conf, NAME + "_" + tableName);
64      job.setJarByClass(CopyTable.class);
65      Scan scan = new Scan();
66      if (startTime != 0) {
67        scan.setTimeRange(startTime,
68            endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
69      }
70      if(families != null) {
71        String[] fams = families.split(",");
72        Map<String,String> cfRenameMap = new HashMap<String,String>();
73        for(String fam : fams) {
74          String sourceCf;
75          if(fam.contains(":")) { 
76              // fam looks like "sourceCfName:destCfName"
77              String[] srcAndDest = fam.split(":", 2);
78              sourceCf = srcAndDest[0];
79              String destCf = srcAndDest[1];
80              cfRenameMap.put(sourceCf, destCf);
81          } else {
82              // fam is just "sourceCf"
83              sourceCf = fam; 
84          }
85          scan.addFamily(Bytes.toBytes(sourceCf));
86        }
87        Import.configureCfRenaming(job.getConfiguration(), cfRenameMap);
88      }
89      TableMapReduceUtil.initTableMapperJob(tableName, scan,
90          Import.Importer.class, null, null, job);
91      TableMapReduceUtil.initTableReducerJob(
92          newTableName == null ? tableName : newTableName, null, job,
93          null, peerAddress, rsClass, rsImpl);
94      job.setNumReduceTasks(0);
95      return job;
96    }
97  
98    /*
99     * @param errorMsg Error message.  Can be null.
100    */
101   private static void printUsage(final String errorMsg) {
102     if (errorMsg != null && errorMsg.length() > 0) {
103       System.err.println("ERROR: " + errorMsg);
104     }
105     System.err.println("Usage: CopyTable [--rs.class=CLASS] " +
106         "[--rs.impl=IMPL] [--starttime=X] [--endtime=Y] " +
107         "[--new.name=NEW] [--peer.adr=ADR] <tablename>");
108     System.err.println();
109     System.err.println("Options:");
110     System.err.println(" rs.class     hbase.regionserver.class of the peer cluster");
111     System.err.println("              specify if different from current cluster");
112     System.err.println(" rs.impl      hbase.regionserver.impl of the peer cluster");
113     System.err.println(" starttime    beginning of the time range");
114     System.err.println("              without endtime means from starttime to forever");
115     System.err.println(" endtime      end of the time range");
116     System.err.println(" new.name     new table's name");
117     System.err.println(" peer.adr     Address of the peer cluster given in the format");
118     System.err.println("              hbase.zookeeer.quorum:hbase.zookeeper.client.port:zookeeper.znode.parent");
119     System.err.println(" families     comma-separated list of families to copy");
120     System.err.println("              To copy from cf1 to cf2, give sourceCfName:destCfName. ");
121     System.err.println("              To keep the same name, just give \"cfName\"");
122     System.err.println();
123     System.err.println("Args:");
124     System.err.println(" tablename    Name of the table to copy");
125     System.err.println();
126     System.err.println("Examples:");
127     System.err.println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour window:");
128     System.err.println(" $ bin/hbase " +
129         "org.apache.hadoop.hbase.mapreduce.CopyTable --rs.class=org.apache.hadoop.hbase.ipc.ReplicationRegionInterface " +
130         "--rs.impl=org.apache.hadoop.hbase.regionserver.replication.ReplicationRegionServer --starttime=1265875194289 --endtime=1265878794289 " +
131         "--peer.adr=server1,server2,server3:2181:/hbase --families=myOldCf:myNewCf,cf2,cf3 TestTable ");
132   }
133 
134   private static boolean doCommandLine(final String[] args) {
135     // Process command-line args. TODO: Better cmd-line processing
136     // (but hopefully something not as painful as cli options).
137     if (args.length < 1) {
138       printUsage(null);
139       return false;
140     }
141     try {
142       for (int i = 0; i < args.length; i++) {
143         String cmd = args[i];
144         if (cmd.equals("-h") || cmd.startsWith("--h")) {
145           printUsage(null);
146           return false;
147         }
148 
149         final String rsClassArgKey = "--rs.class=";
150         if (cmd.startsWith(rsClassArgKey)) {
151           rsClass = cmd.substring(rsClassArgKey.length());
152           continue;
153         }
154 
155         final String rsImplArgKey = "--rs.impl=";
156         if (cmd.startsWith(rsImplArgKey)) {
157           rsImpl = cmd.substring(rsImplArgKey.length());
158           continue;
159         }
160 
161         final String startTimeArgKey = "--starttime=";
162         if (cmd.startsWith(startTimeArgKey)) {
163           startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
164           continue;
165         }
166 
167         final String endTimeArgKey = "--endtime=";
168         if (cmd.startsWith(endTimeArgKey)) {
169           endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
170           continue;
171         }
172 
173         final String newNameArgKey = "--new.name=";
174         if (cmd.startsWith(newNameArgKey)) {
175           newTableName = cmd.substring(newNameArgKey.length());
176           continue;
177         }
178 
179         final String peerAdrArgKey = "--peer.adr=";
180         if (cmd.startsWith(peerAdrArgKey)) {
181           peerAddress = cmd.substring(peerAdrArgKey.length());
182           continue;
183         }
184 
185         final String familiesArgKey = "--families=";
186         if (cmd.startsWith(familiesArgKey)) {
187           families = cmd.substring(familiesArgKey.length());
188           continue;
189         }
190 
191         if (i == args.length-1) {
192           tableName = cmd;
193         }
194       }
195       if (newTableName == null && peerAddress == null) {
196         printUsage("At least a new table name or a " +
197             "peer address must be specified");
198         return false;
199       }
200     } catch (Exception e) {
201       e.printStackTrace();
202       printUsage("Can't start because " + e.getMessage());
203       return false;
204     }
205     return true;
206   }
207 
208   /**
209    * Main entry point.
210    *
211    * @param args  The command line parameters.
212    * @throws Exception When running the job fails.
213    */
214   public static void main(String[] args) throws Exception {
215     Configuration conf = HBaseConfiguration.create();
216     Job job = createSubmittableJob(conf, args);
217     if (job != null) {
218       System.exit(job.waitForCompletion(true) ? 0 : 1);
219     }
220   }
221 }