1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.master;
21  
22  import static org.junit.Assert.assertEquals;
23  import static org.junit.Assert.assertFalse;
24  import static org.junit.Assert.assertTrue;
25  
26  import java.util.ArrayList;
27  import java.util.List;
28  import java.util.Set;
29  import java.util.TreeSet;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.conf.Configuration;
34  import org.apache.hadoop.fs.FileSystem;
35  import org.apache.hadoop.fs.Path;
36  import org.apache.hadoop.hbase.Abortable;
37  import org.apache.hadoop.hbase.HBaseConfiguration;
38  import org.apache.hadoop.hbase.HBaseTestingUtility;
39  import org.apache.hadoop.hbase.HColumnDescriptor;
40  import org.apache.hadoop.hbase.HConstants;
41  import org.apache.hadoop.hbase.HRegionInfo;
42  import org.apache.hadoop.hbase.HTableDescriptor;
43  import org.apache.hadoop.hbase.MiniHBaseCluster;
44  import org.apache.hadoop.hbase.ServerName;
45  import org.apache.hadoop.hbase.executor.EventHandler.EventType;
46  import org.apache.hadoop.hbase.executor.RegionTransitionData;
47  import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
48  import org.apache.hadoop.hbase.regionserver.HRegion;
49  import org.apache.hadoop.hbase.regionserver.HRegionServer;
50  import org.apache.hadoop.hbase.util.Bytes;
51  import org.apache.hadoop.hbase.util.FSTableDescriptors;
52  import org.apache.hadoop.hbase.util.JVMClusterUtil;
53  import org.apache.hadoop.hbase.util.Threads;
54  import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
55  import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
56  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
57  import org.apache.hadoop.hbase.zookeeper.ZKTable;
58  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
59  import org.junit.Test;
60  
61  public class TestMasterFailover {
62    private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
63  
64    /**
65     * Simple test of master failover.
66     * <p>
67     * Starts with three masters.  Kills a backup master.  Then kills the active
68     * master.  Ensures the final master becomes active and we can still contact
69     * the cluster.
70     * @throws Exception
71     */
72    @Test (timeout=240000)
73    public void testSimpleMasterFailover() throws Exception {
74  
75      final int NUM_MASTERS = 3;
76      final int NUM_RS = 3;
77  
78      // Start the cluster
79      HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
80      TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
81      MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
82  
83      // get all the master threads
84      List<MasterThread> masterThreads = cluster.getMasterThreads();
85  
86      // wait for each to come online
87      for (MasterThread mt : masterThreads) {
88        assertTrue(mt.isAlive());
89      }
90  
91      // verify only one is the active master and we have right number
92      int numActive = 0;
93      int activeIndex = -1;
94      ServerName activeName = null;
95      for (int i = 0; i < masterThreads.size(); i++) {
96        if (masterThreads.get(i).getMaster().isActiveMaster()) {
97          numActive++;
98          activeIndex = i;
99          activeName = masterThreads.get(i).getMaster().getServerName();
100       }
101     }
102     assertEquals(1, numActive);
103     assertEquals(NUM_MASTERS, masterThreads.size());
104 
105     // attempt to stop one of the inactive masters
106     LOG.debug("\n\nStopping a backup master\n");
107     int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
108     cluster.stopMaster(backupIndex, false);
109     cluster.waitOnMaster(backupIndex);
110 
111     // verify still one active master and it's the same
112     for (int i = 0; i < masterThreads.size(); i++) {
113       if (masterThreads.get(i).getMaster().isActiveMaster()) {
114         assertTrue(activeName.equals(
115             masterThreads.get(i).getMaster().getServerName()));
116         activeIndex = i;
117       }
118     }
119     assertEquals(1, numActive);
120     assertEquals(2, masterThreads.size());
121 
122     // kill the active master
123     LOG.debug("\n\nStopping the active master\n");
124     cluster.stopMaster(activeIndex, false);
125     cluster.waitOnMaster(activeIndex);
126 
127     // wait for an active master to show up and be ready
128     assertTrue(cluster.waitForActiveAndReadyMaster());
129 
130     LOG.debug("\n\nVerifying backup master is now active\n");
131     // should only have one master now
132     assertEquals(1, masterThreads.size());
133     // and he should be active
134     assertTrue(masterThreads.get(0).getMaster().isActiveMaster());
135 
136     // Stop the cluster
137     TEST_UTIL.shutdownMiniCluster();
138   }
139 
140   @Test
141   public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
142       throws Exception {
143     final int NUM_MASTERS = 1;
144     final int NUM_RS = 2;
145 
146     Configuration conf = HBaseConfiguration.create();
147     conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
148     conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 8000);
149     // Start the cluster
150     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
151     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
152     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
153 
154     // get all the master threads
155     List<MasterThread> masterThreads = cluster.getMasterThreads();
156 
157     // wait for each to come online
158     for (MasterThread mt : masterThreads) {
159       assertTrue(mt.isAlive());
160     }
161 
162     // verify only one is the active master and we have right number
163     int numActive = 0;
164     ServerName activeName = null;
165     for (int i = 0; i < masterThreads.size(); i++) {
166       if (masterThreads.get(i).getMaster().isActiveMaster()) {
167         numActive++;
168         activeName = masterThreads.get(i).getMaster().getServerName();
169       }
170     }
171     assertEquals(1, numActive);
172     assertEquals(NUM_MASTERS, masterThreads.size());
173 
174     // verify still one active master and it's the same
175     for (int i = 0; i < masterThreads.size(); i++) {
176       if (masterThreads.get(i).getMaster().isActiveMaster()) {
177         assertTrue(activeName.equals(masterThreads.get(i).getMaster()
178             .getServerName()));
179       }
180     }
181     assertEquals(1, numActive);
182     assertEquals(1, masterThreads.size());
183 
184     List<RegionServerThread> regionServerThreads = cluster
185         .getRegionServerThreads();
186     int count = -1;
187     HRegion metaRegion = null;
188     for (RegionServerThread regionServerThread : regionServerThreads) {
189       HRegionServer regionServer = regionServerThread.getRegionServer();
190       metaRegion = regionServer
191           .getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
192       count++;
193       regionServer.abort("");
194       if (null != metaRegion) {
195         break;
196       }
197     }
198     HRegionServer regionServer = cluster.getRegionServer(count);
199 
200     cluster.shutdown();
201     // Create a ZKW to use in the test
202     ZooKeeperWatcher zkw = 
203       HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL, 
204           metaRegion, regionServer.getServerName());
205 
206     TEST_UTIL.startMiniHBaseCluster(1, 1);
207 
208     // Failover should be completed, now wait for no RIT
209     log("Waiting for no more RIT");
210     ZKAssign.blockUntilNoRIT(zkw);
211 
212     // Stop the cluster
213     TEST_UTIL.shutdownMiniCluster();
214   }
215 
216 
217   /**
218    * Complex test of master failover that tests as many permutations of the
219    * different possible states that regions in transition could be in within ZK.
220    * <p>
221    * This tests the proper handling of these states by the failed-over master
222    * and includes a thorough testing of the timeout code as well.
223    * <p>
224    * Starts with a single master and three regionservers.
225    * <p>
226    * Creates two tables, enabledTable and disabledTable, each containing 5
227    * regions.  The disabledTable is then disabled.
228    * <p>
229    * After reaching steady-state, the master is killed.  We then mock several
230    * states in ZK.
231    * <p>
232    * After mocking them, we will startup a new master which should become the
233    * active master and also detect that it is a failover.  The primary test
234    * passing condition will be that all regions of the enabled table are
235    * assigned and all the regions of the disabled table are not assigned.
236    * <p>
237    * The different scenarios to be tested are below:
238    * <p>
239    * <b>ZK State:  OFFLINE</b>
240    * <p>A node can get into OFFLINE state if</p>
241    * <ul>
242    * <li>An RS fails to open a region, so it reverts the state back to OFFLINE
243    * <li>The Master is assigning the region to a RS before it sends RPC
244    * </ul>
245    * <p>We will mock the scenarios</p>
246    * <ul>
247    * <li>Master has assigned an enabled region but RS failed so a region is
248    *     not assigned anywhere and is sitting in ZK as OFFLINE</li>
249    * <li>This seems to cover both cases?</li>
250    * </ul>
251    * <p>
252    * <b>ZK State:  CLOSING</b>
253    * <p>A node can get into CLOSING state if</p>
254    * <ul>
255    * <li>An RS has begun to close a region
256    * </ul>
257    * <p>We will mock the scenarios</p>
258    * <ul>
259    * <li>Region of enabled table was being closed but did not complete
260    * <li>Region of disabled table was being closed but did not complete
261    * </ul>
262    * <p>
263    * <b>ZK State:  CLOSED</b>
264    * <p>A node can get into CLOSED state if</p>
265    * <ul>
266    * <li>An RS has completed closing a region but not acknowledged by master yet
267    * </ul>
268    * <p>We will mock the scenarios</p>
269    * <ul>
270    * <li>Region of a table that should be enabled was closed on an RS
271    * <li>Region of a table that should be disabled was closed on an RS
272    * </ul>
273    * <p>
274    * <b>ZK State:  OPENING</b>
275    * <p>A node can get into OPENING state if</p>
276    * <ul>
277    * <li>An RS has begun to open a region
278    * </ul>
279    * <p>We will mock the scenarios</p>
280    * <ul>
281    * <li>RS was opening a region of enabled table but never finishes
282    * </ul>
283    * <p>
284    * <b>ZK State:  OPENED</b>
285    * <p>A node can get into OPENED state if</p>
286    * <ul>
287    * <li>An RS has finished opening a region but not acknowledged by master yet
288    * </ul>
289    * <p>We will mock the scenarios</p>
290    * <ul>
291    * <li>Region of a table that should be enabled was opened on an RS
292    * <li>Region of a table that should be disabled was opened on an RS
293    * </ul>
294    * @throws Exception
295    */
296   @Test (timeout=180000)
297   public void testMasterFailoverWithMockedRIT() throws Exception {
298 
299     final int NUM_MASTERS = 1;
300     final int NUM_RS = 3;
301 
302     // Create config to use for this cluster
303     Configuration conf = HBaseConfiguration.create();
304     // Need to drop the timeout much lower
305     conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
306     conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
307 
308     // Start the cluster
309     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
310     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
311     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
312     log("Cluster started");
313 
314     // Create a ZKW to use in the test
315     ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
316 
317     // get all the master threads
318     List<MasterThread> masterThreads = cluster.getMasterThreads();
319     assertEquals(1, masterThreads.size());
320 
321     // only one master thread, let's wait for it to be initialized
322     assertTrue(cluster.waitForActiveAndReadyMaster());
323     HMaster master = masterThreads.get(0).getMaster();
324     assertTrue(master.isActiveMaster());
325     assertTrue(master.isInitialized());
326 
327     // disable load balancing on this master
328     master.balanceSwitch(false);
329 
330     // create two tables in META, each with 10 regions
331     byte [] FAMILY = Bytes.toBytes("family");
332     byte [][] SPLIT_KEYS = new byte [][] {
333         new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
334         Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
335         Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
336         Bytes.toBytes("iii"), Bytes.toBytes("jjj")
337     };
338 
339     byte [] enabledTable = Bytes.toBytes("enabledTable");
340     HTableDescriptor htdEnabled = new HTableDescriptor(enabledTable);
341     htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
342 
343     FileSystem filesystem = FileSystem.get(conf);
344     Path rootdir = filesystem.makeQualified(
345         new Path(conf.get(HConstants.HBASE_DIR)));
346     // Write the .tableinfo
347     FSTableDescriptors.createTableDescriptor(filesystem, rootdir, htdEnabled);
348 
349     HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getName(), null, null);
350     HRegion.createHRegion(hriEnabled, rootdir, conf, htdEnabled);
351 
352     List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
353         TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
354 
355     byte [] disabledTable = Bytes.toBytes("disabledTable");
356     HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
357     htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
358     // Write the .tableinfo
359     FSTableDescriptors.createTableDescriptor(filesystem, rootdir, htdDisabled);
360     HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getName(), null, null);
361     HRegion.createHRegion(hriDisabled, rootdir, conf, htdDisabled);
362     List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
363         TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
364 
365     log("Regions in META have been created");
366 
367     // at this point we only expect 2 regions to be assigned out (catalogs)
368     assertEquals(2, cluster.countServedRegions());
369 
370     // Let's just assign everything to first RS
371     HRegionServer hrs = cluster.getRegionServer(0);
372     ServerName serverName = hrs.getServerName();
373     HRegionInfo closingRegion = enabledRegions.remove(0);
374     // we'll need some regions to already be assigned out properly on live RS
375     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
376     enabledAndAssignedRegions.add(enabledRegions.remove(0));
377     enabledAndAssignedRegions.add(enabledRegions.remove(0));
378     enabledAndAssignedRegions.add(closingRegion);
379     
380     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
381     disabledAndAssignedRegions.add(disabledRegions.remove(0));
382     disabledAndAssignedRegions.add(disabledRegions.remove(0));
383 
384     // now actually assign them
385     for (HRegionInfo hri : enabledAndAssignedRegions) {
386       master.assignmentManager.regionPlans.put(hri.getEncodedName(),
387           new RegionPlan(hri, null, serverName));
388       master.assignRegion(hri);
389     }
390     for (HRegionInfo hri : disabledAndAssignedRegions) {
391       master.assignmentManager.regionPlans.put(hri.getEncodedName(),
392           new RegionPlan(hri, null, serverName));
393       master.assignRegion(hri);
394     }
395 
396     // wait for no more RIT
397     log("Waiting for assignment to finish");
398     ZKAssign.blockUntilNoRIT(zkw);
399     log("Assignment completed");
400 
401     // Stop the master
402     log("Aborting master");
403     cluster.abortMaster(0);
404     cluster.waitOnMaster(0);
405     log("Master has aborted");
406 
407     /*
408      * Now, let's start mocking up some weird states as described in the method
409      * javadoc.
410      */
411 
412     List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
413     List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
414 
415     log("Beginning to mock scenarios");
416 
417     // Disable the disabledTable in ZK
418     ZKTable zktable = new ZKTable(zkw);
419     zktable.setDisabledTable(Bytes.toString(disabledTable));
420 
421     /*
422      *  ZK = OFFLINE
423      */
424 
425     // Region that should be assigned but is not and is in ZK as OFFLINE
426     HRegionInfo region = enabledRegions.remove(0);
427     regionsThatShouldBeOnline.add(region);
428     ZKAssign.createNodeOffline(zkw, region, serverName);
429 
430     /*
431      * ZK = CLOSING
432      */
433     regionsThatShouldBeOnline.add(closingRegion);
434     ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
435 
436     /*
437      * ZK = CLOSED
438      */
439 
440     // Region of enabled table closed but not ack
441     region = enabledRegions.remove(0);
442     regionsThatShouldBeOnline.add(region);
443     int version = ZKAssign.createNodeClosing(zkw, region, serverName);
444     ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
445 
446     // Region of disabled table closed but not ack
447     region = disabledRegions.remove(0);
448     regionsThatShouldBeOffline.add(region);
449     version = ZKAssign.createNodeClosing(zkw, region, serverName);
450     ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
451 
452     /*
453      * ZK = OPENING
454      */
455 
456     // RS was opening a region of enabled table but never finishes
457     region = enabledRegions.remove(0);
458     regionsThatShouldBeOnline.add(region);
459     ZKAssign.createNodeOffline(zkw, region, serverName);
460     ZKAssign.transitionNodeOpening(zkw, region, serverName);
461 
462     /*
463      * ZK = OPENED
464      */
465 
466     // Region of enabled table was opened on RS
467     region = enabledRegions.remove(0);
468     regionsThatShouldBeOnline.add(region);
469     ZKAssign.createNodeOffline(zkw, region, serverName);
470     hrs.openRegion(region);
471     while (true) {
472       RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
473       if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
474         break;
475       }
476       Thread.sleep(100);
477     }
478 
479     // Region of disable table was opened on RS
480     region = disabledRegions.remove(0);
481     regionsThatShouldBeOffline.add(region);
482     ZKAssign.createNodeOffline(zkw, region, serverName);
483     hrs.openRegion(region);
484     while (true) {
485       RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
486       if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
487         break;
488       }
489       Thread.sleep(100);
490     }
491 
492     /*
493      * ZK = NONE
494      */
495 
496     /*
497      * DONE MOCKING
498      */
499 
500     log("Done mocking data up in ZK");
501 
502     // Start up a new master
503     log("Starting up a new master");
504     master = cluster.startMaster().getMaster();
505     log("Waiting for master to be ready");
506     cluster.waitForActiveAndReadyMaster();
507     log("Master is ready");
508 
509     // Failover should be completed, now wait for no RIT
510     log("Waiting for no more RIT");
511     ZKAssign.blockUntilNoRIT(zkw);
512     log("No more RIT in ZK, now doing final test verification");
513 
514     // Grab all the regions that are online across RSs
515     Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
516     for (JVMClusterUtil.RegionServerThread rst :
517       cluster.getRegionServerThreads()) {
518       onlineRegions.addAll(rst.getRegionServer().getOnlineRegions());
519     }
520 
521     // Now, everything that should be online should be online
522     for (HRegionInfo hri : regionsThatShouldBeOnline) {
523       assertTrue(onlineRegions.contains(hri));
524     }
525 
526     // Everything that should be offline should not be online
527     for (HRegionInfo hri : regionsThatShouldBeOffline) {
528       assertFalse(onlineRegions.contains(hri));
529     }
530 
531     log("Done with verification, all passed, shutting down cluster");
532 
533     // Done, shutdown the cluster
534     TEST_UTIL.shutdownMiniCluster();
535   }
536 
537 
538   /**
539    * Complex test of master failover that tests as many permutations of the
540    * different possible states that regions in transition could be in within ZK
541    * pointing to an RS that has died while no master is around to process it.
542    * <p>
543    * This tests the proper handling of these states by the failed-over master
544    * and includes a thorough testing of the timeout code as well.
545    * <p>
546    * Starts with a single master and two regionservers.
547    * <p>
548    * Creates two tables, enabledTable and disabledTable, each containing 5
549    * regions.  The disabledTable is then disabled.
550    * <p>
551    * After reaching steady-state, the master is killed.  We then mock several
552    * states in ZK.  And one of the RS will be killed.
553    * <p>
554    * After mocking them and killing an RS, we will startup a new master which
555    * should become the active master and also detect that it is a failover.  The
556    * primary test passing condition will be that all regions of the enabled
557    * table are assigned and all the regions of the disabled table are not
558    * assigned.
559    * <p>
560    * The different scenarios to be tested are below:
561    * <p>
562    * <b>ZK State:  CLOSING</b>
563    * <p>A node can get into CLOSING state if</p>
564    * <ul>
565    * <li>An RS has begun to close a region
566    * </ul>
567    * <p>We will mock the scenarios</p>
568    * <ul>
569    * <li>Region was being closed but the RS died before finishing the close
570    * </ul>
571    * <b>ZK State:  OPENED</b>
572    * <p>A node can get into OPENED state if</p>
573    * <ul>
574    * <li>An RS has finished opening a region but not acknowledged by master yet
575    * </ul>
576    * <p>We will mock the scenarios</p>
577    * <ul>
578    * <li>Region of a table that should be enabled was opened by a now-dead RS
579    * <li>Region of a table that should be disabled was opened by a now-dead RS
580    * </ul>
581    * <p>
582    * <b>ZK State:  NONE</b>
583    * <p>A region could not have a transition node if</p>
584    * <ul>
585    * <li>The server hosting the region died and no master processed it
586    * </ul>
587    * <p>We will mock the scenarios</p>
588    * <ul>
589    * <li>Region of enabled table was on a dead RS that was not yet processed
590    * <li>Region of disabled table was on a dead RS that was not yet processed
591    * </ul>
592    * @throws Exception
593    */
594   @Test (timeout=180000)
595   public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
596 
597     final int NUM_MASTERS = 1;
598     final int NUM_RS = 2;
599 
600     // Create config to use for this cluster
601     Configuration conf = HBaseConfiguration.create();
602     // Need to drop the timeout much lower
603     conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
604     conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
605 
606     // Create and start the cluster
607     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
608     TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
609     MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
610     log("Cluster started");
611 
612     // Create a ZKW to use in the test
613     ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
614         "unittest", new Abortable() {
615           
616           @Override
617           public void abort(String why, Throwable e) {
618             LOG.error("Fatal ZK Error: " + why, e);
619             org.junit.Assert.assertFalse("Fatal ZK error", true);
620           }
621           
622           @Override
623           public boolean isAborted() {
624             return false;
625           }
626           
627     });
628 
629     // get all the master threads
630     List<MasterThread> masterThreads = cluster.getMasterThreads();
631     assertEquals(1, masterThreads.size());
632 
633     // only one master thread, let's wait for it to be initialized
634     assertTrue(cluster.waitForActiveAndReadyMaster());
635     HMaster master = masterThreads.get(0).getMaster();
636     assertTrue(master.isActiveMaster());
637     assertTrue(master.isInitialized());
638 
639     // disable load balancing on this master
640     master.balanceSwitch(false);
641 
642     // create two tables in META, each with 10 regions
643     byte [] FAMILY = Bytes.toBytes("family");
644     byte [][] SPLIT_KEYS = new byte [][] {
645         new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
646         Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
647         Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
648         Bytes.toBytes("iii"), Bytes.toBytes("jjj")
649     };
650 
651     byte [] enabledTable = Bytes.toBytes("enabledTable");
652     HTableDescriptor htdEnabled = new HTableDescriptor(enabledTable);
653     htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
654     FileSystem filesystem = FileSystem.get(conf);
655     Path rootdir = filesystem.makeQualified(
656            new Path(conf.get(HConstants.HBASE_DIR)));
657     // Write the .tableinfo
658     FSTableDescriptors.createTableDescriptor(filesystem, rootdir, htdEnabled);
659     HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getName(),
660         null, null);
661     HRegion.createHRegion(hriEnabled, rootdir, conf, htdEnabled);
662 
663     List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
664         TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
665 
666     byte [] disabledTable = Bytes.toBytes("disabledTable");
667     HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
668     htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
669     // Write the .tableinfo
670     FSTableDescriptors.createTableDescriptor(filesystem, rootdir, htdDisabled);
671     HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getName(), null, null);
672     HRegion.createHRegion(hriDisabled, rootdir, conf, htdDisabled);
673 
674     List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
675         TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
676 
677     log("Regions in META have been created");
678 
679     // at this point we only expect 2 regions to be assigned out (catalogs)
680     assertEquals(2, cluster.countServedRegions());
681 
682     // The first RS will stay online
683     List<RegionServerThread> regionservers =
684       cluster.getRegionServerThreads();
685     HRegionServer hrs = regionservers.get(0).getRegionServer();
686 
687     // The second RS is going to be hard-killed
688     RegionServerThread hrsDeadThread = regionservers.get(1);
689     HRegionServer hrsDead = hrsDeadThread.getRegionServer();
690     ServerName deadServerName = hrsDead.getServerName();
691 
692     // we'll need some regions to already be assigned out properly on live RS
693     List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
694     enabledAndAssignedRegions.add(enabledRegions.remove(0));
695     enabledAndAssignedRegions.add(enabledRegions.remove(0));
696     List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
697     disabledAndAssignedRegions.add(disabledRegions.remove(0));
698     disabledAndAssignedRegions.add(disabledRegions.remove(0));
699 
700     // now actually assign them
701     for (HRegionInfo hri : enabledAndAssignedRegions) {
702       master.assignmentManager.regionPlans.put(hri.getEncodedName(),
703           new RegionPlan(hri, null, hrs.getServerName()));
704       master.assignRegion(hri);
705     }
706     for (HRegionInfo hri : disabledAndAssignedRegions) {
707       master.assignmentManager.regionPlans.put(hri.getEncodedName(),
708           new RegionPlan(hri, null, hrs.getServerName()));
709       master.assignRegion(hri);
710     }
711 
712     // we also need regions assigned out on the dead server
713     List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
714     enabledAndOnDeadRegions.add(enabledRegions.remove(0));
715     enabledAndOnDeadRegions.add(enabledRegions.remove(0));
716     List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
717     disabledAndOnDeadRegions.add(disabledRegions.remove(0));
718     disabledAndOnDeadRegions.add(disabledRegions.remove(0));
719 
720     // set region plan to server to be killed and trigger assign
721     for (HRegionInfo hri : enabledAndOnDeadRegions) {
722       master.assignmentManager.regionPlans.put(hri.getEncodedName(),
723           new RegionPlan(hri, null, deadServerName));
724       master.assignRegion(hri);
725     }
726     for (HRegionInfo hri : disabledAndOnDeadRegions) {
727       master.assignmentManager.regionPlans.put(hri.getEncodedName(),
728           new RegionPlan(hri, null, deadServerName));
729       master.assignRegion(hri);
730     }
731 
732     // wait for no more RIT
733     log("Waiting for assignment to finish");
734     ZKAssign.blockUntilNoRIT(zkw);
735     log("Assignment completed");
736 
737     // Stop the master
738     log("Aborting master");
739     cluster.abortMaster(0);
740     cluster.waitOnMaster(0);
741     log("Master has aborted");
742 
743     /*
744      * Now, let's start mocking up some weird states as described in the method
745      * javadoc.
746      */
747 
748     List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
749     List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
750 
751     log("Beginning to mock scenarios");
752 
753     // Disable the disabledTable in ZK
754     ZKTable zktable = new ZKTable(zkw);
755     zktable.setDisabledTable(Bytes.toString(disabledTable));
756 
757     /*
758      * ZK = CLOSING
759      */
760 
761     // Region of enabled table being closed on dead RS but not finished
762     HRegionInfo region = enabledAndOnDeadRegions.remove(0);
763     regionsThatShouldBeOnline.add(region);
764     ZKAssign.createNodeClosing(zkw, region, deadServerName);
765     LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
766         region + "\n\n");
767 
768     // Region of disabled table being closed on dead RS but not finished
769     region = disabledAndOnDeadRegions.remove(0);
770     regionsThatShouldBeOffline.add(region);
771     ZKAssign.createNodeClosing(zkw, region, deadServerName);
772     LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
773         region + "\n\n");
774 
775     /*
776      * ZK = CLOSED
777      */
778 
779     // Region of enabled on dead server gets closed but not ack'd by master
780     region = enabledAndOnDeadRegions.remove(0);
781     regionsThatShouldBeOnline.add(region);
782     int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
783     ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
784     LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
785         region + "\n\n");
786 
787     // Region of disabled on dead server gets closed but not ack'd by master
788     region = disabledAndOnDeadRegions.remove(0);
789     regionsThatShouldBeOffline.add(region);
790     version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
791     ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
792     LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
793         region + "\n\n");
794 
795     /*
796      * ZK = OPENING
797      */
798 
799     // RS was opening a region of enabled table then died
800     region = enabledRegions.remove(0);
801     regionsThatShouldBeOnline.add(region);
802     ZKAssign.createNodeOffline(zkw, region, deadServerName);
803     ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
804     LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
805         region + "\n\n");
806 
807     // RS was opening a region of disabled table then died
808     region = disabledRegions.remove(0);
809     regionsThatShouldBeOffline.add(region);
810     ZKAssign.createNodeOffline(zkw, region, deadServerName);
811     ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
812     LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
813         region + "\n\n");
814 
815     /*
816      * ZK = OPENED
817      */
818 
819     // Region of enabled table was opened on dead RS
820     region = enabledRegions.remove(0);
821     regionsThatShouldBeOnline.add(region);
822     ZKAssign.createNodeOffline(zkw, region, deadServerName);
823     hrsDead.openRegion(region);
824     while (true) {
825       RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
826       if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
827         break;
828       }
829       Thread.sleep(100);
830     }
831     LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" +
832         region + "\n\n");
833 
834     // Region of disabled table was opened on dead RS
835     region = disabledRegions.remove(0);
836     regionsThatShouldBeOffline.add(region);
837     ZKAssign.createNodeOffline(zkw, region, deadServerName);
838     hrsDead.openRegion(region);
839     while (true) {
840       RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
841       if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
842         break;
843       }
844       Thread.sleep(100);
845     }
846     LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" +
847         region + "\n\n");
848 
849     /*
850      * ZK = NONE
851      */
852 
853     // Region of enabled table was open at steady-state on dead RS
854     region = enabledRegions.remove(0);
855     regionsThatShouldBeOnline.add(region);
856     ZKAssign.createNodeOffline(zkw, region, deadServerName);
857     hrsDead.openRegion(region);
858     while (true) {
859       RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
860       if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
861         ZKAssign.deleteOpenedNode(zkw, region.getEncodedName());
862         break;
863       }
864       Thread.sleep(100);
865     }
866     LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
867         + "\n" + region + "\n\n");
868 
869     // Region of disabled table was open at steady-state on dead RS
870     region = disabledRegions.remove(0);
871     regionsThatShouldBeOffline.add(region);
872     ZKAssign.createNodeOffline(zkw, region, deadServerName);
873     hrsDead.openRegion(region);
874     while (true) {
875       RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
876       if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
877         ZKAssign.deleteOpenedNode(zkw, region.getEncodedName());
878         break;
879       }
880       Thread.sleep(100);
881     }
882     LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
883         + "\n" + region + "\n\n");
884 
885     /*
886      * DONE MOCKING
887      */
888 
889     log("Done mocking data up in ZK");
890 
891     // Kill the RS that had a hard death
892     log("Killing RS " + deadServerName);
893     hrsDead.abort("Killing for unit test");
894     log("RS " + deadServerName + " killed");
895 
896     // Start up a new master.  Wait until regionserver is completely down
897     // before starting new master because of hbase-4511.
898     while (hrsDeadThread.isAlive()) {
899       Threads.sleep(10);
900     }
901     log("Starting up a new master");
902     master = cluster.startMaster().getMaster();
903     log("Waiting for master to be ready");
904     assertTrue(cluster.waitForActiveAndReadyMaster());
905     log("Master is ready");
906 
907     // Let's add some weird states to master in-memory state
908 
909     // After HBASE-3181, we need to have some ZK state if we're PENDING_OPEN
910     // b/c it is impossible for us to get into this state w/o a zk node
911     // this is not true of PENDING_CLOSE
912 
913     // PENDING_OPEN and enabled
914     region = enabledRegions.remove(0);
915     regionsThatShouldBeOnline.add(region);
916     master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
917         new RegionState(region, RegionState.State.PENDING_OPEN, 0, null));
918     ZKAssign.createNodeOffline(zkw, region, master.getServerName());
919     // PENDING_OPEN and disabled
920     region = disabledRegions.remove(0);
921     regionsThatShouldBeOffline.add(region);
922     master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
923         new RegionState(region, RegionState.State.PENDING_OPEN, 0, null));
924     ZKAssign.createNodeOffline(zkw, region, master.getServerName());
925     // This test is bad.  It puts up a PENDING_CLOSE but doesn't say what
926     // server we were PENDING_CLOSE against -- i.e. an entry in
927     // AssignmentManager#regions.  W/o a server, we NPE trying to resend close.
928     // In past, there was wonky logic that had us reassign region if no server
929     // at tail of the unassign.  This was removed.  Commenting out for now.
930     // TODO: Remove completely.
931     /*
932     // PENDING_CLOSE and enabled
933     region = enabledRegions.remove(0);
934     LOG.info("Setting PENDING_CLOSE enabled " + region.getEncodedName());
935     regionsThatShouldBeOnline.add(region);
936     master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
937       new RegionState(region, RegionState.State.PENDING_CLOSE, 0));
938     // PENDING_CLOSE and disabled
939     region = disabledRegions.remove(0);
940     LOG.info("Setting PENDING_CLOSE disabled " + region.getEncodedName());
941     regionsThatShouldBeOffline.add(region);
942     master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
943       new RegionState(region, RegionState.State.PENDING_CLOSE, 0));
944       */
945 
946     // Failover should be completed, now wait for no RIT
947     log("Waiting for no more RIT");
948     ZKAssign.blockUntilNoRIT(zkw);
949     log("No more RIT in ZK");
950     long now = System.currentTimeMillis();
951     final long maxTime = 120000;
952     boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
953     if (!done) {
954       LOG.info("rit=" + master.assignmentManager.getRegionsInTransition());
955     }
956     long elapsed = System.currentTimeMillis() - now;
957     assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
958       elapsed < maxTime);
959     log("No more RIT in RIT map, doing final test verification");
960 
961     // Grab all the regions that are online across RSs
962     Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
963     for (JVMClusterUtil.RegionServerThread rst :
964         cluster.getRegionServerThreads()) {
965       try {
966         onlineRegions.addAll(rst.getRegionServer().getOnlineRegions());
967       } catch (org.apache.hadoop.hbase.regionserver.RegionServerStoppedException e) {
968         LOG.info("Got RegionServerStoppedException", e);
969       }
970     }
971 
972     // Now, everything that should be online should be online
973     for (HRegionInfo hri : regionsThatShouldBeOnline) {
974       assertTrue("region=" + hri.getRegionNameAsString(), onlineRegions.contains(hri));
975     }
976 
977     // Everything that should be offline should not be online
978     for (HRegionInfo hri : regionsThatShouldBeOffline) {
979       assertFalse(onlineRegions.contains(hri));
980     }
981 
982     log("Done with verification, all passed, shutting down cluster");
983 
984     // Done, shutdown the cluster
985     TEST_UTIL.shutdownMiniCluster();
986   }
987 
988   // TODO: Next test to add is with testing permutations of the RIT or the RS
989   //       killed are hosting ROOT and META regions.
990 
991   private void log(String string) {
992     LOG.info("\n\n" + string + " \n\n");
993   }
994 }