1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.master;
21
22 import static org.junit.Assert.assertEquals;
23 import static org.junit.Assert.assertFalse;
24 import static org.junit.Assert.assertTrue;
25
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.Set;
29 import java.util.TreeSet;
30
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 import org.apache.hadoop.conf.Configuration;
34 import org.apache.hadoop.fs.FileSystem;
35 import org.apache.hadoop.fs.Path;
36 import org.apache.hadoop.hbase.Abortable;
37 import org.apache.hadoop.hbase.HBaseConfiguration;
38 import org.apache.hadoop.hbase.HBaseTestingUtility;
39 import org.apache.hadoop.hbase.HColumnDescriptor;
40 import org.apache.hadoop.hbase.HConstants;
41 import org.apache.hadoop.hbase.HRegionInfo;
42 import org.apache.hadoop.hbase.HTableDescriptor;
43 import org.apache.hadoop.hbase.MiniHBaseCluster;
44 import org.apache.hadoop.hbase.ServerName;
45 import org.apache.hadoop.hbase.executor.EventHandler.EventType;
46 import org.apache.hadoop.hbase.executor.RegionTransitionData;
47 import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
48 import org.apache.hadoop.hbase.regionserver.HRegion;
49 import org.apache.hadoop.hbase.regionserver.HRegionServer;
50 import org.apache.hadoop.hbase.util.Bytes;
51 import org.apache.hadoop.hbase.util.FSTableDescriptors;
52 import org.apache.hadoop.hbase.util.JVMClusterUtil;
53 import org.apache.hadoop.hbase.util.Threads;
54 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
55 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
56 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
57 import org.apache.hadoop.hbase.zookeeper.ZKTable;
58 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
59 import org.junit.Test;
60
61 public class TestMasterFailover {
62 private static final Log LOG = LogFactory.getLog(TestMasterFailover.class);
63
64
65
66
67
68
69
70
71
72 @Test (timeout=240000)
73 public void testSimpleMasterFailover() throws Exception {
74
75 final int NUM_MASTERS = 3;
76 final int NUM_RS = 3;
77
78
79 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
80 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
81 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
82
83
84 List<MasterThread> masterThreads = cluster.getMasterThreads();
85
86
87 for (MasterThread mt : masterThreads) {
88 assertTrue(mt.isAlive());
89 }
90
91
92 int numActive = 0;
93 int activeIndex = -1;
94 ServerName activeName = null;
95 for (int i = 0; i < masterThreads.size(); i++) {
96 if (masterThreads.get(i).getMaster().isActiveMaster()) {
97 numActive++;
98 activeIndex = i;
99 activeName = masterThreads.get(i).getMaster().getServerName();
100 }
101 }
102 assertEquals(1, numActive);
103 assertEquals(NUM_MASTERS, masterThreads.size());
104
105
106 LOG.debug("\n\nStopping a backup master\n");
107 int backupIndex = (activeIndex == 0 ? 1 : activeIndex - 1);
108 cluster.stopMaster(backupIndex, false);
109 cluster.waitOnMaster(backupIndex);
110
111
112 for (int i = 0; i < masterThreads.size(); i++) {
113 if (masterThreads.get(i).getMaster().isActiveMaster()) {
114 assertTrue(activeName.equals(
115 masterThreads.get(i).getMaster().getServerName()));
116 activeIndex = i;
117 }
118 }
119 assertEquals(1, numActive);
120 assertEquals(2, masterThreads.size());
121
122
123 LOG.debug("\n\nStopping the active master\n");
124 cluster.stopMaster(activeIndex, false);
125 cluster.waitOnMaster(activeIndex);
126
127
128 assertTrue(cluster.waitForActiveAndReadyMaster());
129
130 LOG.debug("\n\nVerifying backup master is now active\n");
131
132 assertEquals(1, masterThreads.size());
133
134 assertTrue(masterThreads.get(0).getMaster().isActiveMaster());
135
136
137 TEST_UTIL.shutdownMiniCluster();
138 }
139
140 @Test
141 public void testShouldCheckMasterFailOverWhenMETAIsInOpenedState()
142 throws Exception {
143 final int NUM_MASTERS = 1;
144 final int NUM_RS = 2;
145
146 Configuration conf = HBaseConfiguration.create();
147 conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
148 conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 8000);
149
150 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
151 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
152 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
153
154
155 List<MasterThread> masterThreads = cluster.getMasterThreads();
156
157
158 for (MasterThread mt : masterThreads) {
159 assertTrue(mt.isAlive());
160 }
161
162
163 int numActive = 0;
164 ServerName activeName = null;
165 for (int i = 0; i < masterThreads.size(); i++) {
166 if (masterThreads.get(i).getMaster().isActiveMaster()) {
167 numActive++;
168 activeName = masterThreads.get(i).getMaster().getServerName();
169 }
170 }
171 assertEquals(1, numActive);
172 assertEquals(NUM_MASTERS, masterThreads.size());
173
174
175 for (int i = 0; i < masterThreads.size(); i++) {
176 if (masterThreads.get(i).getMaster().isActiveMaster()) {
177 assertTrue(activeName.equals(masterThreads.get(i).getMaster()
178 .getServerName()));
179 }
180 }
181 assertEquals(1, numActive);
182 assertEquals(1, masterThreads.size());
183
184 List<RegionServerThread> regionServerThreads = cluster
185 .getRegionServerThreads();
186 int count = -1;
187 HRegion metaRegion = null;
188 for (RegionServerThread regionServerThread : regionServerThreads) {
189 HRegionServer regionServer = regionServerThread.getRegionServer();
190 metaRegion = regionServer
191 .getOnlineRegion(HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
192 count++;
193 regionServer.abort("");
194 if (null != metaRegion) {
195 break;
196 }
197 }
198 HRegionServer regionServer = cluster.getRegionServer(count);
199
200 cluster.shutdown();
201
202 ZooKeeperWatcher zkw =
203 HBaseTestingUtility.createAndForceNodeToOpenedState(TEST_UTIL,
204 metaRegion, regionServer.getServerName());
205
206 TEST_UTIL.startMiniHBaseCluster(1, 1);
207
208
209 log("Waiting for no more RIT");
210 ZKAssign.blockUntilNoRIT(zkw);
211
212
213 TEST_UTIL.shutdownMiniCluster();
214 }
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296 @Test (timeout=180000)
297 public void testMasterFailoverWithMockedRIT() throws Exception {
298
299 final int NUM_MASTERS = 1;
300 final int NUM_RS = 3;
301
302
303 Configuration conf = HBaseConfiguration.create();
304
305 conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
306 conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
307
308
309 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
310 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
311 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
312 log("Cluster started");
313
314
315 ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL);
316
317
318 List<MasterThread> masterThreads = cluster.getMasterThreads();
319 assertEquals(1, masterThreads.size());
320
321
322 assertTrue(cluster.waitForActiveAndReadyMaster());
323 HMaster master = masterThreads.get(0).getMaster();
324 assertTrue(master.isActiveMaster());
325 assertTrue(master.isInitialized());
326
327
328 master.balanceSwitch(false);
329
330
331 byte [] FAMILY = Bytes.toBytes("family");
332 byte [][] SPLIT_KEYS = new byte [][] {
333 new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
334 Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
335 Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
336 Bytes.toBytes("iii"), Bytes.toBytes("jjj")
337 };
338
339 byte [] enabledTable = Bytes.toBytes("enabledTable");
340 HTableDescriptor htdEnabled = new HTableDescriptor(enabledTable);
341 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
342
343 FileSystem filesystem = FileSystem.get(conf);
344 Path rootdir = filesystem.makeQualified(
345 new Path(conf.get(HConstants.HBASE_DIR)));
346
347 FSTableDescriptors.createTableDescriptor(filesystem, rootdir, htdEnabled);
348
349 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getName(), null, null);
350 HRegion.createHRegion(hriEnabled, rootdir, conf, htdEnabled);
351
352 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
353 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
354
355 byte [] disabledTable = Bytes.toBytes("disabledTable");
356 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
357 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
358
359 FSTableDescriptors.createTableDescriptor(filesystem, rootdir, htdDisabled);
360 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getName(), null, null);
361 HRegion.createHRegion(hriDisabled, rootdir, conf, htdDisabled);
362 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
363 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
364
365 log("Regions in META have been created");
366
367
368 assertEquals(2, cluster.countServedRegions());
369
370
371 HRegionServer hrs = cluster.getRegionServer(0);
372 ServerName serverName = hrs.getServerName();
373 HRegionInfo closingRegion = enabledRegions.remove(0);
374
375 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
376 enabledAndAssignedRegions.add(enabledRegions.remove(0));
377 enabledAndAssignedRegions.add(enabledRegions.remove(0));
378 enabledAndAssignedRegions.add(closingRegion);
379
380 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
381 disabledAndAssignedRegions.add(disabledRegions.remove(0));
382 disabledAndAssignedRegions.add(disabledRegions.remove(0));
383
384
385 for (HRegionInfo hri : enabledAndAssignedRegions) {
386 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
387 new RegionPlan(hri, null, serverName));
388 master.assignRegion(hri);
389 }
390 for (HRegionInfo hri : disabledAndAssignedRegions) {
391 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
392 new RegionPlan(hri, null, serverName));
393 master.assignRegion(hri);
394 }
395
396
397 log("Waiting for assignment to finish");
398 ZKAssign.blockUntilNoRIT(zkw);
399 log("Assignment completed");
400
401
402 log("Aborting master");
403 cluster.abortMaster(0);
404 cluster.waitOnMaster(0);
405 log("Master has aborted");
406
407
408
409
410
411
412 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
413 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
414
415 log("Beginning to mock scenarios");
416
417
418 ZKTable zktable = new ZKTable(zkw);
419 zktable.setDisabledTable(Bytes.toString(disabledTable));
420
421
422
423
424
425
426 HRegionInfo region = enabledRegions.remove(0);
427 regionsThatShouldBeOnline.add(region);
428 ZKAssign.createNodeOffline(zkw, region, serverName);
429
430
431
432
433 regionsThatShouldBeOnline.add(closingRegion);
434 ZKAssign.createNodeClosing(zkw, closingRegion, serverName);
435
436
437
438
439
440
441 region = enabledRegions.remove(0);
442 regionsThatShouldBeOnline.add(region);
443 int version = ZKAssign.createNodeClosing(zkw, region, serverName);
444 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
445
446
447 region = disabledRegions.remove(0);
448 regionsThatShouldBeOffline.add(region);
449 version = ZKAssign.createNodeClosing(zkw, region, serverName);
450 ZKAssign.transitionNodeClosed(zkw, region, serverName, version);
451
452
453
454
455
456
457 region = enabledRegions.remove(0);
458 regionsThatShouldBeOnline.add(region);
459 ZKAssign.createNodeOffline(zkw, region, serverName);
460 ZKAssign.transitionNodeOpening(zkw, region, serverName);
461
462
463
464
465
466
467 region = enabledRegions.remove(0);
468 regionsThatShouldBeOnline.add(region);
469 ZKAssign.createNodeOffline(zkw, region, serverName);
470 hrs.openRegion(region);
471 while (true) {
472 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
473 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
474 break;
475 }
476 Thread.sleep(100);
477 }
478
479
480 region = disabledRegions.remove(0);
481 regionsThatShouldBeOffline.add(region);
482 ZKAssign.createNodeOffline(zkw, region, serverName);
483 hrs.openRegion(region);
484 while (true) {
485 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
486 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
487 break;
488 }
489 Thread.sleep(100);
490 }
491
492
493
494
495
496
497
498
499
500 log("Done mocking data up in ZK");
501
502
503 log("Starting up a new master");
504 master = cluster.startMaster().getMaster();
505 log("Waiting for master to be ready");
506 cluster.waitForActiveAndReadyMaster();
507 log("Master is ready");
508
509
510 log("Waiting for no more RIT");
511 ZKAssign.blockUntilNoRIT(zkw);
512 log("No more RIT in ZK, now doing final test verification");
513
514
515 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
516 for (JVMClusterUtil.RegionServerThread rst :
517 cluster.getRegionServerThreads()) {
518 onlineRegions.addAll(rst.getRegionServer().getOnlineRegions());
519 }
520
521
522 for (HRegionInfo hri : regionsThatShouldBeOnline) {
523 assertTrue(onlineRegions.contains(hri));
524 }
525
526
527 for (HRegionInfo hri : regionsThatShouldBeOffline) {
528 assertFalse(onlineRegions.contains(hri));
529 }
530
531 log("Done with verification, all passed, shutting down cluster");
532
533
534 TEST_UTIL.shutdownMiniCluster();
535 }
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594 @Test (timeout=180000)
595 public void testMasterFailoverWithMockedRITOnDeadRS() throws Exception {
596
597 final int NUM_MASTERS = 1;
598 final int NUM_RS = 2;
599
600
601 Configuration conf = HBaseConfiguration.create();
602
603 conf.setInt("hbase.master.assignment.timeoutmonitor.period", 2000);
604 conf.setInt("hbase.master.assignment.timeoutmonitor.timeout", 4000);
605
606
607 HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf);
608 TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
609 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
610 log("Cluster started");
611
612
613 ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
614 "unittest", new Abortable() {
615
616 @Override
617 public void abort(String why, Throwable e) {
618 LOG.error("Fatal ZK Error: " + why, e);
619 org.junit.Assert.assertFalse("Fatal ZK error", true);
620 }
621
622 @Override
623 public boolean isAborted() {
624 return false;
625 }
626
627 });
628
629
630 List<MasterThread> masterThreads = cluster.getMasterThreads();
631 assertEquals(1, masterThreads.size());
632
633
634 assertTrue(cluster.waitForActiveAndReadyMaster());
635 HMaster master = masterThreads.get(0).getMaster();
636 assertTrue(master.isActiveMaster());
637 assertTrue(master.isInitialized());
638
639
640 master.balanceSwitch(false);
641
642
643 byte [] FAMILY = Bytes.toBytes("family");
644 byte [][] SPLIT_KEYS = new byte [][] {
645 new byte[0], Bytes.toBytes("aaa"), Bytes.toBytes("bbb"),
646 Bytes.toBytes("ccc"), Bytes.toBytes("ddd"), Bytes.toBytes("eee"),
647 Bytes.toBytes("fff"), Bytes.toBytes("ggg"), Bytes.toBytes("hhh"),
648 Bytes.toBytes("iii"), Bytes.toBytes("jjj")
649 };
650
651 byte [] enabledTable = Bytes.toBytes("enabledTable");
652 HTableDescriptor htdEnabled = new HTableDescriptor(enabledTable);
653 htdEnabled.addFamily(new HColumnDescriptor(FAMILY));
654 FileSystem filesystem = FileSystem.get(conf);
655 Path rootdir = filesystem.makeQualified(
656 new Path(conf.get(HConstants.HBASE_DIR)));
657
658 FSTableDescriptors.createTableDescriptor(filesystem, rootdir, htdEnabled);
659 HRegionInfo hriEnabled = new HRegionInfo(htdEnabled.getName(),
660 null, null);
661 HRegion.createHRegion(hriEnabled, rootdir, conf, htdEnabled);
662
663 List<HRegionInfo> enabledRegions = TEST_UTIL.createMultiRegionsInMeta(
664 TEST_UTIL.getConfiguration(), htdEnabled, SPLIT_KEYS);
665
666 byte [] disabledTable = Bytes.toBytes("disabledTable");
667 HTableDescriptor htdDisabled = new HTableDescriptor(disabledTable);
668 htdDisabled.addFamily(new HColumnDescriptor(FAMILY));
669
670 FSTableDescriptors.createTableDescriptor(filesystem, rootdir, htdDisabled);
671 HRegionInfo hriDisabled = new HRegionInfo(htdDisabled.getName(), null, null);
672 HRegion.createHRegion(hriDisabled, rootdir, conf, htdDisabled);
673
674 List<HRegionInfo> disabledRegions = TEST_UTIL.createMultiRegionsInMeta(
675 TEST_UTIL.getConfiguration(), htdDisabled, SPLIT_KEYS);
676
677 log("Regions in META have been created");
678
679
680 assertEquals(2, cluster.countServedRegions());
681
682
683 List<RegionServerThread> regionservers =
684 cluster.getRegionServerThreads();
685 HRegionServer hrs = regionservers.get(0).getRegionServer();
686
687
688 RegionServerThread hrsDeadThread = regionservers.get(1);
689 HRegionServer hrsDead = hrsDeadThread.getRegionServer();
690 ServerName deadServerName = hrsDead.getServerName();
691
692
693 List<HRegionInfo> enabledAndAssignedRegions = new ArrayList<HRegionInfo>();
694 enabledAndAssignedRegions.add(enabledRegions.remove(0));
695 enabledAndAssignedRegions.add(enabledRegions.remove(0));
696 List<HRegionInfo> disabledAndAssignedRegions = new ArrayList<HRegionInfo>();
697 disabledAndAssignedRegions.add(disabledRegions.remove(0));
698 disabledAndAssignedRegions.add(disabledRegions.remove(0));
699
700
701 for (HRegionInfo hri : enabledAndAssignedRegions) {
702 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
703 new RegionPlan(hri, null, hrs.getServerName()));
704 master.assignRegion(hri);
705 }
706 for (HRegionInfo hri : disabledAndAssignedRegions) {
707 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
708 new RegionPlan(hri, null, hrs.getServerName()));
709 master.assignRegion(hri);
710 }
711
712
713 List<HRegionInfo> enabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
714 enabledAndOnDeadRegions.add(enabledRegions.remove(0));
715 enabledAndOnDeadRegions.add(enabledRegions.remove(0));
716 List<HRegionInfo> disabledAndOnDeadRegions = new ArrayList<HRegionInfo>();
717 disabledAndOnDeadRegions.add(disabledRegions.remove(0));
718 disabledAndOnDeadRegions.add(disabledRegions.remove(0));
719
720
721 for (HRegionInfo hri : enabledAndOnDeadRegions) {
722 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
723 new RegionPlan(hri, null, deadServerName));
724 master.assignRegion(hri);
725 }
726 for (HRegionInfo hri : disabledAndOnDeadRegions) {
727 master.assignmentManager.regionPlans.put(hri.getEncodedName(),
728 new RegionPlan(hri, null, deadServerName));
729 master.assignRegion(hri);
730 }
731
732
733 log("Waiting for assignment to finish");
734 ZKAssign.blockUntilNoRIT(zkw);
735 log("Assignment completed");
736
737
738 log("Aborting master");
739 cluster.abortMaster(0);
740 cluster.waitOnMaster(0);
741 log("Master has aborted");
742
743
744
745
746
747
748 List<HRegionInfo> regionsThatShouldBeOnline = new ArrayList<HRegionInfo>();
749 List<HRegionInfo> regionsThatShouldBeOffline = new ArrayList<HRegionInfo>();
750
751 log("Beginning to mock scenarios");
752
753
754 ZKTable zktable = new ZKTable(zkw);
755 zktable.setDisabledTable(Bytes.toString(disabledTable));
756
757
758
759
760
761
762 HRegionInfo region = enabledAndOnDeadRegions.remove(0);
763 regionsThatShouldBeOnline.add(region);
764 ZKAssign.createNodeClosing(zkw, region, deadServerName);
765 LOG.debug("\n\nRegion of enabled table was CLOSING on dead RS\n" +
766 region + "\n\n");
767
768
769 region = disabledAndOnDeadRegions.remove(0);
770 regionsThatShouldBeOffline.add(region);
771 ZKAssign.createNodeClosing(zkw, region, deadServerName);
772 LOG.debug("\n\nRegion of disabled table was CLOSING on dead RS\n" +
773 region + "\n\n");
774
775
776
777
778
779
780 region = enabledAndOnDeadRegions.remove(0);
781 regionsThatShouldBeOnline.add(region);
782 int version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
783 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
784 LOG.debug("\n\nRegion of enabled table was CLOSED on dead RS\n" +
785 region + "\n\n");
786
787
788 region = disabledAndOnDeadRegions.remove(0);
789 regionsThatShouldBeOffline.add(region);
790 version = ZKAssign.createNodeClosing(zkw, region, deadServerName);
791 ZKAssign.transitionNodeClosed(zkw, region, deadServerName, version);
792 LOG.debug("\n\nRegion of disabled table was CLOSED on dead RS\n" +
793 region + "\n\n");
794
795
796
797
798
799
800 region = enabledRegions.remove(0);
801 regionsThatShouldBeOnline.add(region);
802 ZKAssign.createNodeOffline(zkw, region, deadServerName);
803 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
804 LOG.debug("\n\nRegion of enabled table was OPENING on dead RS\n" +
805 region + "\n\n");
806
807
808 region = disabledRegions.remove(0);
809 regionsThatShouldBeOffline.add(region);
810 ZKAssign.createNodeOffline(zkw, region, deadServerName);
811 ZKAssign.transitionNodeOpening(zkw, region, deadServerName);
812 LOG.debug("\n\nRegion of disabled table was OPENING on dead RS\n" +
813 region + "\n\n");
814
815
816
817
818
819
820 region = enabledRegions.remove(0);
821 regionsThatShouldBeOnline.add(region);
822 ZKAssign.createNodeOffline(zkw, region, deadServerName);
823 hrsDead.openRegion(region);
824 while (true) {
825 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
826 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
827 break;
828 }
829 Thread.sleep(100);
830 }
831 LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" +
832 region + "\n\n");
833
834
835 region = disabledRegions.remove(0);
836 regionsThatShouldBeOffline.add(region);
837 ZKAssign.createNodeOffline(zkw, region, deadServerName);
838 hrsDead.openRegion(region);
839 while (true) {
840 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
841 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
842 break;
843 }
844 Thread.sleep(100);
845 }
846 LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" +
847 region + "\n\n");
848
849
850
851
852
853
854 region = enabledRegions.remove(0);
855 regionsThatShouldBeOnline.add(region);
856 ZKAssign.createNodeOffline(zkw, region, deadServerName);
857 hrsDead.openRegion(region);
858 while (true) {
859 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
860 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
861 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName());
862 break;
863 }
864 Thread.sleep(100);
865 }
866 LOG.debug("\n\nRegion of enabled table was open at steady-state on dead RS"
867 + "\n" + region + "\n\n");
868
869
870 region = disabledRegions.remove(0);
871 regionsThatShouldBeOffline.add(region);
872 ZKAssign.createNodeOffline(zkw, region, deadServerName);
873 hrsDead.openRegion(region);
874 while (true) {
875 RegionTransitionData rtd = ZKAssign.getData(zkw, region.getEncodedName());
876 if (rtd != null && rtd.getEventType() == EventType.RS_ZK_REGION_OPENED) {
877 ZKAssign.deleteOpenedNode(zkw, region.getEncodedName());
878 break;
879 }
880 Thread.sleep(100);
881 }
882 LOG.debug("\n\nRegion of disabled table was open at steady-state on dead RS"
883 + "\n" + region + "\n\n");
884
885
886
887
888
889 log("Done mocking data up in ZK");
890
891
892 log("Killing RS " + deadServerName);
893 hrsDead.abort("Killing for unit test");
894 log("RS " + deadServerName + " killed");
895
896
897
898 while (hrsDeadThread.isAlive()) {
899 Threads.sleep(10);
900 }
901 log("Starting up a new master");
902 master = cluster.startMaster().getMaster();
903 log("Waiting for master to be ready");
904 assertTrue(cluster.waitForActiveAndReadyMaster());
905 log("Master is ready");
906
907
908
909
910
911
912
913
914 region = enabledRegions.remove(0);
915 regionsThatShouldBeOnline.add(region);
916 master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
917 new RegionState(region, RegionState.State.PENDING_OPEN, 0, null));
918 ZKAssign.createNodeOffline(zkw, region, master.getServerName());
919
920 region = disabledRegions.remove(0);
921 regionsThatShouldBeOffline.add(region);
922 master.assignmentManager.regionsInTransition.put(region.getEncodedName(),
923 new RegionState(region, RegionState.State.PENDING_OPEN, 0, null));
924 ZKAssign.createNodeOffline(zkw, region, master.getServerName());
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947 log("Waiting for no more RIT");
948 ZKAssign.blockUntilNoRIT(zkw);
949 log("No more RIT in ZK");
950 long now = System.currentTimeMillis();
951 final long maxTime = 120000;
952 boolean done = master.assignmentManager.waitUntilNoRegionsInTransition(maxTime);
953 if (!done) {
954 LOG.info("rit=" + master.assignmentManager.getRegionsInTransition());
955 }
956 long elapsed = System.currentTimeMillis() - now;
957 assertTrue("Elapsed=" + elapsed + ", maxTime=" + maxTime + ", done=" + done,
958 elapsed < maxTime);
959 log("No more RIT in RIT map, doing final test verification");
960
961
962 Set<HRegionInfo> onlineRegions = new TreeSet<HRegionInfo>();
963 for (JVMClusterUtil.RegionServerThread rst :
964 cluster.getRegionServerThreads()) {
965 try {
966 onlineRegions.addAll(rst.getRegionServer().getOnlineRegions());
967 } catch (org.apache.hadoop.hbase.regionserver.RegionServerStoppedException e) {
968 LOG.info("Got RegionServerStoppedException", e);
969 }
970 }
971
972
973 for (HRegionInfo hri : regionsThatShouldBeOnline) {
974 assertTrue("region=" + hri.getRegionNameAsString(), onlineRegions.contains(hri));
975 }
976
977
978 for (HRegionInfo hri : regionsThatShouldBeOffline) {
979 assertFalse(onlineRegions.contains(hri));
980 }
981
982 log("Done with verification, all passed, shutting down cluster");
983
984
985 TEST_UTIL.shutdownMiniCluster();
986 }
987
988
989
990
991 private void log(String string) {
992 LOG.info("\n\n" + string + " \n\n");
993 }
994 }