View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.util;
21  
22  import java.io.IOException;
23  import java.util.List;
24  
25  import org.apache.hadoop.conf.Configuration;
26  import org.apache.hadoop.hbase.HRegionInfo;
27  import org.apache.hadoop.hbase.NotServingRegionException;
28  import org.apache.hadoop.hbase.ServerName;
29  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
30  import org.apache.hadoop.hbase.client.HBaseAdmin;
31  import org.apache.hadoop.hbase.client.HConnection;
32  import org.apache.hadoop.hbase.client.HConnectionManager;
33  import org.apache.hadoop.hbase.ipc.HRegionInterface;
34  import org.apache.zookeeper.KeeperException;
35  
36  public class HBaseFsckRepair {
37  
38    /**
39     * Fix dupe assignment by doing silent closes on each RS hosting the region
40     * and then force ZK unassigned node to OFFLINE to trigger assignment by
41     * master.
42     * @param admin
43     * @param region
44     * @param servers
45     * @throws IOException
46     * @throws KeeperException
47     * @throws InterruptedException
48     */
49    public static void fixDupeAssignment(HBaseAdmin admin, HRegionInfo region,
50        List<ServerName> servers)
51    throws IOException, KeeperException, InterruptedException {
52  
53      HRegionInfo actualRegion = new HRegionInfo(region);
54  
55      // Close region on the servers silently
56      for(ServerName server : servers) {
57        closeRegionSilentlyAndWait(admin.getConfiguration(), server, actualRegion);
58      }
59  
60      // Force ZK node to OFFLINE so master assigns
61      forceOfflineInZK(admin, actualRegion);
62    }
63  
64    /**
65     * Fix unassigned by creating/transition the unassigned ZK node for this
66     * region to OFFLINE state with a special flag to tell the master that this
67     * is a forced operation by HBCK.
68     * @param admin
69     * @param region
70     * @throws IOException
71     * @throws KeeperException
72     */
73    public static void fixUnassigned(HBaseAdmin admin, HRegionInfo region)
74    throws IOException, KeeperException {
75      HRegionInfo actualRegion = new HRegionInfo(region);
76  
77      // Force ZK node to OFFLINE so master assigns
78      forceOfflineInZK(admin, actualRegion);
79    }
80  
81    private static void forceOfflineInZK(HBaseAdmin admin, final HRegionInfo region)
82    throws ZooKeeperConnectionException, KeeperException, IOException {
83      admin.assign(region.getRegionName());
84    }
85  
86    private static void closeRegionSilentlyAndWait(Configuration conf,
87        ServerName server, HRegionInfo region) throws IOException,
88        InterruptedException {
89      HConnection connection = HConnectionManager.getConnection(conf);
90      boolean success = false;
91      try {
92        HRegionInterface rs =
93          connection.getHRegionConnection(server.getHostname(), server.getPort());
94        rs.closeRegion(region, false);
95        long timeout = conf.getLong("hbase.hbck.close.timeout", 120000);
96        long expiration = timeout + System.currentTimeMillis();
97        while (System.currentTimeMillis() < expiration) {
98          try {
99            HRegionInfo rsRegion = rs.getRegionInfo(region.getRegionName());
100           if (rsRegion == null)
101             throw new NotServingRegionException();
102         } catch (Exception e) {
103           success = true;
104           return;
105         }
106         Thread.sleep(1000);
107       }
108       throw new IOException("Region " + region + " failed to close within"
109           + " timeout " + timeout);
110     } finally {
111       try {
112         connection.close();
113       } catch (IOException ioe) {
114         if (success) {
115           throw ioe;
116         }
117       }
118     }
119   }
120 }