1   /**
2    * Copyright 2008 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase;
21  
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.fail;
24  
25  import java.io.IOException;
26  import java.util.ArrayList;
27  import java.util.List;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.hbase.catalog.CatalogTracker;
32  import org.apache.hadoop.hbase.catalog.MetaReader;
33  import org.apache.hadoop.hbase.client.HBaseAdmin;
34  import org.apache.hadoop.hbase.client.HTable;
35  import org.apache.hadoop.hbase.regionserver.HRegionServer;
36  import org.apache.hadoop.hbase.util.Bytes;
37  import org.apache.hadoop.hbase.util.JVMClusterUtil;
38  import org.junit.AfterClass;
39  import org.junit.Before;
40  import org.junit.BeforeClass;
41  import org.junit.Test;
42  
43  /**
44   * Test whether region rebalancing works. (HBASE-71)
45   */
46  public class TestRegionRebalancing {
47    final Log LOG = LogFactory.getLog(this.getClass().getName());
48    private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
49    HTable table;
50    HTableDescriptor desc;
51    private static final byte [] FAMILY_NAME = Bytes.toBytes("col");
52  
53    @BeforeClass
54    public static void beforeClass() throws Exception {
55      UTIL.startMiniCluster(1);
56    }
57  
58    @AfterClass
59    public static void afterClass() throws Exception {
60      UTIL.shutdownMiniCluster();
61    }
62  
63    @Before
64    public void before() {
65      this.desc = new HTableDescriptor("test");
66      this.desc.addFamily(new HColumnDescriptor(FAMILY_NAME));
67    }
68  
69    /**
70     * For HBASE-71. Try a few different configurations of starting and stopping
71     * region servers to see if the assignment or regions is pretty balanced.
72     * @throws IOException
73     * @throws InterruptedException
74     */
75    @Test
76    public void testRebalanceOnRegionServerNumberChange()
77    throws IOException, InterruptedException {
78      HBaseAdmin admin = new HBaseAdmin(UTIL.getConfiguration());
79      admin.createTable(this.desc, HBaseTestingUtility.KEYS);
80      this.table = new HTable(UTIL.getConfiguration(), this.desc.getName());
81      CatalogTracker ct = new CatalogTracker(UTIL.getConfiguration());
82      ct.start();
83      try {
84        MetaReader.fullScanMetaAndPrint(ct);
85      } finally {
86        ct.stop();
87      }
88      assertEquals("Test table should have right number of regions",
89        HBaseTestingUtility.KEYS.length + 1/*One extra to account for start/end keys*/,
90        this.table.getStartKeys().length);
91  
92      // verify that the region assignments are balanced to start out
93      assertRegionsAreBalanced();
94  
95      // add a region server - total of 2
96      LOG.info("Started second server=" +
97        UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
98      UTIL.getHBaseCluster().getMaster().balance();
99      assertRegionsAreBalanced();
100 
101     // add a region server - total of 3
102     LOG.info("Started third server=" +
103         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
104     UTIL.getHBaseCluster().getMaster().balance();
105     assertRegionsAreBalanced();
106 
107     // kill a region server - total of 2
108     LOG.info("Stopped third server=" + UTIL.getHBaseCluster().stopRegionServer(2, false));
109     UTIL.getHBaseCluster().waitOnRegionServer(2);
110     UTIL.getHBaseCluster().getMaster().balance();
111     assertRegionsAreBalanced();
112 
113     // start two more region servers - total of 4
114     LOG.info("Readding third server=" +
115         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
116     LOG.info("Added fourth server=" +
117         UTIL.getHBaseCluster().startRegionServer().getRegionServer().getServerName());
118     UTIL.getHBaseCluster().getMaster().balance();
119     assertRegionsAreBalanced();
120 
121     for (int i = 0; i < 6; i++){
122       LOG.info("Adding " + (i + 5) + "th region server");
123       UTIL.getHBaseCluster().startRegionServer();
124     }
125     UTIL.getHBaseCluster().getMaster().balance();
126     assertRegionsAreBalanced();
127   }
128 
129   /** figure out how many regions are currently being served. */
130   private int getRegionCount() throws IOException {
131     int total = 0;
132     for (HRegionServer server : getOnlineRegionServers()) {
133       total += server.getOnlineRegions().size();
134     }
135     return total;
136   }
137 
138   /**
139    * Determine if regions are balanced. Figure out the total, divide by the
140    * number of online servers, then test if each server is +/- 1 of average
141    * rounded up.
142    */
143   private void assertRegionsAreBalanced() throws IOException {
144     // TODO: Fix this test.  Old balancer used to run with 'slop'.  New
145     // balancer does not.
146     boolean success = false;
147     float slop = (float)UTIL.getConfiguration().getFloat("hbase.regions.slop", 0.1f);
148     if (slop <= 0) slop = 1;
149 
150     for (int i = 0; i < 5; i++) {
151       success = true;
152       // make sure all the regions are reassigned before we test balance
153       waitForAllRegionsAssigned();
154 
155       int regionCount = getRegionCount();
156       List<HRegionServer> servers = getOnlineRegionServers();
157       double avg = UTIL.getHBaseCluster().getMaster().getAverageLoad();
158       int avgLoadPlusSlop = (int)Math.ceil(avg * (1 + slop));
159       int avgLoadMinusSlop = (int)Math.floor(avg * (1 - slop)) - 1;
160       LOG.debug("There are " + servers.size() + " servers and " + regionCount
161         + " regions. Load Average: " + avg + " low border: " + avgLoadMinusSlop
162         + ", up border: " + avgLoadPlusSlop + "; attempt: " + i);
163 
164       for (HRegionServer server : servers) {
165         int serverLoad = server.getOnlineRegions().size();
166         LOG.debug(server.getServerName() + " Avg: " + avg + " actual: " + serverLoad);
167         if (!(avg > 2.0 && serverLoad <= avgLoadPlusSlop
168             && serverLoad >= avgLoadMinusSlop)) {
169           LOG.debug(server.getServerName() + " Isn't balanced!!! Avg: " + avg +
170               " actual: " + serverLoad + " slop: " + slop);
171           success = false;
172         }
173       }
174 
175       if (!success) {
176         // one or more servers are not balanced. sleep a little to give it a
177         // chance to catch up. then, go back to the retry loop.
178         try {
179           Thread.sleep(10000);
180         } catch (InterruptedException e) {}
181 
182         UTIL.getHBaseCluster().getMaster().balance();
183         continue;
184       }
185 
186       // if we get here, all servers were balanced, so we should just return.
187       return;
188     }
189     // if we get here, we tried 5 times and never got to short circuit out of
190     // the retry loop, so this is a failure.
191     fail("After 5 attempts, region assignments were not balanced.");
192   }
193 
194   private List<HRegionServer> getOnlineRegionServers() {
195     List<HRegionServer> list = new ArrayList<HRegionServer>();
196     for (JVMClusterUtil.RegionServerThread rst :
197         UTIL.getHBaseCluster().getRegionServerThreads()) {
198       if (rst.getRegionServer().isOnline()) {
199         list.add(rst.getRegionServer());
200       }
201     }
202     return list;
203   }
204 
205   /**
206    * Wait until all the regions are assigned.
207    */
208   private void waitForAllRegionsAssigned() throws IOException {
209     while (getRegionCount() < 22) {
210     // while (!cluster.getMaster().allRegionsAssigned()) {
211       LOG.debug("Waiting for there to be 22 regions, but there are " + getRegionCount() + " right now.");
212       try {
213         Thread.sleep(1000);
214       } catch (InterruptedException e) {}
215     }
216   }
217 }