1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase;
19  
20  
21  import java.io.IOException;
22  import java.util.List;
23  
24  import junit.framework.Assert;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.fs.FileSystem;
29  import org.apache.hadoop.hbase.client.HBaseAdmin;
30  import org.apache.hadoop.hbase.master.HMaster;
31  import org.apache.hadoop.hbase.regionserver.HRegionServer;
32  import org.apache.hadoop.hbase.util.Bytes;
33  import org.apache.hadoop.hbase.util.FSUtils;
34  import org.apache.hadoop.hbase.util.FSTableDescriptors;
35  import org.apache.hadoop.hbase.util.Threads;
36  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
37  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
38  import org.apache.zookeeper.KeeperException;
39  import org.junit.AfterClass;
40  import org.junit.BeforeClass;
41  import org.junit.Test;
42  
43  /**
44   * Test the draining servers feature.
45   * @see <a href="https://issues.apache.org/jira/browse/HBASE-4298">HBASE-4298</a>
46   */
47  public class TestDrainingServer {
48    private static final Log LOG = LogFactory.getLog(TestDrainingServer.class);
49    private static final HBaseTestingUtility TEST_UTIL =
50      new HBaseTestingUtility();
51    private static final byte [] TABLENAME = Bytes.toBytes("t");
52    private static final byte [] FAMILY = Bytes.toBytes("f");
53    private static final int COUNT_OF_REGIONS = HBaseTestingUtility.KEYS.length;
54  
55    /**
56     * Spin up a cluster with a bunch of regions on it.
57     */
58    @BeforeClass
59    public static void setUpBeforeClass() throws Exception {
60      TEST_UTIL.startMiniCluster(5);
61      HTableDescriptor htd = new HTableDescriptor(TABLENAME);
62      htd.addFamily(new HColumnDescriptor(FAMILY));
63      TEST_UTIL.createMultiRegionsInMeta(TEST_UTIL.getConfiguration(), htd,
64          HBaseTestingUtility.KEYS);
65      // Make a mark for the table in the filesystem.
66      FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration());
67      FSTableDescriptors.
68        createTableDescriptor(fs, FSUtils.getRootDir(TEST_UTIL.getConfiguration()), htd);
69      // Assign out the regions we just created.
70      HBaseAdmin admin = new HBaseAdmin(TEST_UTIL.getConfiguration());
71      admin.disableTable(TABLENAME);
72      admin.enableTable(TABLENAME);
73      // Assert that every regionserver has some regions on it.
74      MiniHBaseCluster cluster = TEST_UTIL.getMiniHBaseCluster();
75      for (int i = 0; i < cluster.getRegionServerThreads().size(); i++) {
76        HRegionServer hrs = cluster.getRegionServer(i);
77        Assert.assertFalse(hrs.getOnlineRegions().isEmpty());
78      }
79    }
80  
81    private static HRegionServer setDrainingServer(final HRegionServer hrs)
82    throws KeeperException {
83      LOG.info("Making " + hrs.getServerName() + " the draining server; " +
84        "it has " + hrs.getNumberOfOnlineRegions() + " online regions");
85      ZooKeeperWatcher zkw = hrs.getZooKeeper();
86      String hrsDrainingZnode =
87        ZKUtil.joinZNode(zkw.drainingZNode, hrs.getServerName().toString());
88      ZKUtil.createWithParents(zkw, hrsDrainingZnode);
89      return hrs;
90    }
91  
92    private static HRegionServer unsetDrainingServer(final HRegionServer hrs)
93    throws KeeperException {
94      ZooKeeperWatcher zkw = hrs.getZooKeeper();
95      String hrsDrainingZnode =
96        ZKUtil.joinZNode(zkw.drainingZNode, hrs.getServerName().toString());
97      ZKUtil.deleteNode(zkw, hrsDrainingZnode);
98      return hrs;
99    }
100 
101   @AfterClass
102   public static void tearDownAfterClass() throws Exception {
103     TEST_UTIL.shutdownMiniCluster();
104   }
105 
106   /**
107    * Test adding server to draining servers and then move regions off it.
108    * Make sure that no regions are moved back to the draining server.
109    * @throws IOException 
110    * @throws KeeperException 
111    */
112   @Test  // (timeout=30000)
113   public void testDrainingServerOffloading()
114   throws IOException, KeeperException {
115     // I need master in the below.
116     HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
117     HRegionInfo hriToMoveBack = null;
118     // Set first server as draining server.
119     HRegionServer drainingServer =
120       setDrainingServer(TEST_UTIL.getMiniHBaseCluster().getRegionServer(0));
121     try {
122       final int regionsOnDrainingServer =
123         drainingServer.getNumberOfOnlineRegions();
124       Assert.assertTrue(regionsOnDrainingServer > 0);
125       List<HRegionInfo> hris = drainingServer.getOnlineRegions();
126       for (HRegionInfo hri : hris) {
127         // Pass null and AssignmentManager will chose a random server BUT it
128         // should exclude draining servers.
129         master.move(hri.getEncodedNameAsBytes(), null);
130         // Save off region to move back.
131         hriToMoveBack = hri;
132       }
133       // Wait for regions to come back on line again.
134       waitForAllRegionsOnline();
135       Assert.assertEquals(0, drainingServer.getNumberOfOnlineRegions());
136     } finally {
137       unsetDrainingServer(drainingServer);
138     }
139     // Now we've unset the draining server, we should be able to move a region
140     // to what was the draining server.
141     master.move(hriToMoveBack.getEncodedNameAsBytes(),
142       Bytes.toBytes(drainingServer.getServerName().toString()));
143     // Wait for regions to come back on line again.
144     waitForAllRegionsOnline();
145     Assert.assertEquals(1, drainingServer.getNumberOfOnlineRegions());
146   }
147 
148   /**
149    * Test that draining servers are ignored even after killing regionserver(s).
150    * Verify that the draining server is not given any of the dead servers regions.
151    * @throws KeeperException
152    * @throws IOException
153    */
154   @Test  (timeout=30000)
155   public void testDrainingServerWithAbort() throws KeeperException, IOException {
156     // Add first server to draining servers up in zk.
157     HRegionServer drainingServer =
158       setDrainingServer(TEST_UTIL.getMiniHBaseCluster().getRegionServer(0));
159     try {
160       final int regionsOnDrainingServer =
161         drainingServer.getNumberOfOnlineRegions();
162       Assert.assertTrue(regionsOnDrainingServer > 0);
163       // Kill a few regionservers.
164       int aborted = 0;
165       final int numberToAbort = 2;
166       for (int i = 1; i < TEST_UTIL.getMiniHBaseCluster().countServedRegions(); i++) {
167         HRegionServer hrs = TEST_UTIL.getMiniHBaseCluster().getRegionServer(i);
168         if (hrs.getServerName().equals(drainingServer.getServerName())) continue;
169         hrs.abort("Aborting");
170         aborted++;
171         if (aborted >= numberToAbort) break;
172       }
173       // Wait for regions to come back on line again.
174       waitForAllRegionsOnline();
175       // Assert the draining server still has the same number of regions.
176       Assert.assertEquals(regionsOnDrainingServer,
177         drainingServer.getNumberOfOnlineRegions());
178     } finally {
179       unsetDrainingServer(drainingServer);
180     }
181   }
182 
183   private void waitForAllRegionsOnline() {
184     while (TEST_UTIL.getMiniHBaseCluster().getMaster().
185         getAssignmentManager().isRegionsInTransition()) {
186       Threads.sleep(10);
187     }
188     // Wait for regions to come back on line again.
189     while (!isAllRegionsOnline()) {
190       Threads.sleep(10);
191     }
192   }
193 
194   private boolean isAllRegionsOnline() {
195     return TEST_UTIL.getMiniHBaseCluster().countServedRegions() ==
196       (COUNT_OF_REGIONS + 2 /*catalog regions*/);
197   }
198 }