1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.master;
21
22 import java.io.FileNotFoundException;
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.Collection;
27 import java.util.Collections;
28 import java.util.Comparator;
29 import java.util.HashMap;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.NavigableMap;
33 import java.util.Random;
34 import java.util.Set;
35 import java.util.TreeMap;
36
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.conf.Configuration;
40 import org.apache.hadoop.fs.FileSystem;
41 import org.apache.hadoop.hbase.ClusterStatus;
42 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
43 import org.apache.hadoop.hbase.HRegionInfo;
44 import org.apache.hadoop.hbase.HTableDescriptor;
45 import org.apache.hadoop.hbase.ServerName;
46 import org.apache.hadoop.hbase.TableExistsException;
47 import org.apache.hadoop.hbase.regionserver.HRegion;
48 import org.apache.hadoop.hbase.util.Bytes;
49
50 import com.google.common.base.Joiner;
51 import com.google.common.collect.ArrayListMultimap;
52 import com.google.common.collect.MinMaxPriorityQueue;
53 import com.google.common.collect.Sets;
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71 public class DefaultLoadBalancer implements LoadBalancer {
72 private static final Log LOG = LogFactory.getLog(LoadBalancer.class);
73 private static final Random RANDOM = new Random(System.currentTimeMillis());
74
75 private float slop;
76 private Configuration config;
77 private ClusterStatus status;
78 private MasterServices services;
79
80 public void setClusterStatus(ClusterStatus st) {
81 this.status = st;
82 }
83
84 public void setMasterServices(MasterServices masterServices) {
85 this.services = masterServices;
86 }
87
88 @Override
89 public void setConf(Configuration conf) {
90 this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
91 if (slop < 0) slop = 0;
92 else if (slop > 1) slop = 1;
93 this.config = conf;
94 }
95
96 @Override
97 public Configuration getConf() {
98 return this.config;
99 }
100
101
102
103
104
105
106
107
108
109 private class RegionInfoComparator implements Comparator<HRegionInfo> {
110 @Override
111 public int compare(HRegionInfo l, HRegionInfo r) {
112 long diff = r.getRegionId() - l.getRegionId();
113 if (diff < 0) return -1;
114 if (diff > 0) return 1;
115 return 0;
116 }
117 }
118
119
120 RegionInfoComparator riComparator = new RegionInfoComparator();
121
122 private class RegionPlanComparator implements Comparator<RegionPlan> {
123 @Override
124 public int compare(RegionPlan l, RegionPlan r) {
125 long diff = r.getRegionInfo().getRegionId() - l.getRegionInfo().getRegionId();
126 if (diff < 0) return -1;
127 if (diff > 0) return 1;
128 return 0;
129 }
130 }
131
132 RegionPlanComparator rpComparator = new RegionPlanComparator();
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219 public List<RegionPlan> balanceCluster(
220 Map<ServerName, List<HRegionInfo>> clusterState) {
221 boolean emptyRegionServerPresent = false;
222 long startTime = System.currentTimeMillis();
223
224 int numServers = clusterState.size();
225 if (numServers == 0) {
226 LOG.debug("numServers=0 so skipping load balancing");
227 return null;
228 }
229 NavigableMap<ServerAndLoad, List<HRegionInfo>> serversByLoad =
230 new TreeMap<ServerAndLoad, List<HRegionInfo>>();
231 int numRegions = 0;
232
233 for (Map.Entry<ServerName, List<HRegionInfo>> server: clusterState.entrySet()) {
234 List<HRegionInfo> regions = server.getValue();
235 int sz = regions.size();
236 if (sz == 0) emptyRegionServerPresent = true;
237 numRegions += sz;
238 serversByLoad.put(new ServerAndLoad(server.getKey(), sz), regions);
239 }
240
241 float average = (float)numRegions / numServers;
242
243 int floor = (int) Math.floor(average * (1 - slop));
244 int ceiling = (int) Math.ceil(average * (1 + slop));
245 if (serversByLoad.lastKey().getLoad() <= ceiling &&
246 serversByLoad.firstKey().getLoad() >= floor) {
247
248 LOG.info("Skipping load balancing because balanced cluster; " +
249 "servers=" + numServers + " " +
250 "regions=" + numRegions + " average=" + average + " " +
251 "mostloaded=" + serversByLoad.lastKey().getLoad() +
252 " leastloaded=" + serversByLoad.firstKey().getLoad());
253 return null;
254 }
255 int min = numRegions / numServers;
256 int max = numRegions % numServers == 0 ? min : min + 1;
257
258
259 StringBuilder strBalanceParam = new StringBuilder();
260 strBalanceParam.append("Balance parameter: numRegions=").append(numRegions)
261 .append(", numServers=").append(numServers).append(", max=").append(max)
262 .append(", min=").append(min);
263 LOG.debug(strBalanceParam.toString());
264
265
266
267 MinMaxPriorityQueue<RegionPlan> regionsToMove =
268 MinMaxPriorityQueue.orderedBy(rpComparator).create();
269 List<RegionPlan> regionsToReturn = new ArrayList<RegionPlan>();
270
271
272 int serversOverloaded = 0;
273
274 boolean fetchFromTail = false;
275 Map<ServerName, BalanceInfo> serverBalanceInfo =
276 new TreeMap<ServerName, BalanceInfo>();
277 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server:
278 serversByLoad.descendingMap().entrySet()) {
279 ServerAndLoad sal = server.getKey();
280 int regionCount = sal.getLoad();
281 if (regionCount <= max) {
282 serverBalanceInfo.put(sal.getServerName(), new BalanceInfo(0, 0));
283 break;
284 }
285 serversOverloaded++;
286 List<HRegionInfo> regions = server.getValue();
287 int numToOffload = Math.min(regionCount - max, regions.size());
288
289
290 Collections.sort(regions, riComparator);
291 int numTaken = 0;
292 for (int i = 0; i <= numToOffload; ) {
293 HRegionInfo hri = regions.get(i);
294 if (fetchFromTail) {
295 hri = regions.get(regions.size() - 1 - i);
296 }
297 i++;
298
299 if (hri.isMetaRegion()) continue;
300 regionsToMove.add(new RegionPlan(hri, sal.getServerName(), null));
301 numTaken++;
302 if (numTaken >= numToOffload) break;
303
304 if (emptyRegionServerPresent) {
305 fetchFromTail = !fetchFromTail;
306 }
307 }
308 serverBalanceInfo.put(sal.getServerName(),
309 new BalanceInfo(numToOffload, (-1)*numTaken));
310 }
311 int totalNumMoved = regionsToMove.size();
312
313
314 int neededRegions = 0;
315 fetchFromTail = false;
316
317 Map<ServerName, Integer> underloadedServers = new HashMap<ServerName, Integer>();
318 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server:
319 serversByLoad.entrySet()) {
320 int regionCount = server.getKey().getLoad();
321 if (regionCount >= min) {
322 break;
323 }
324 underloadedServers.put(server.getKey().getServerName(), min - regionCount);
325 }
326
327 int serversUnderloaded = underloadedServers.size();
328 int incr = 1;
329 List<ServerName> sns =
330 Arrays.asList(underloadedServers.keySet().toArray(new ServerName[serversUnderloaded]));
331 Collections.shuffle(sns, RANDOM);
332 while (regionsToMove.size() > 0) {
333 int cnt = 0;
334 int i = incr > 0 ? 0 : underloadedServers.size()-1;
335 for (; i >= 0 && i < underloadedServers.size(); i += incr) {
336 if (regionsToMove.isEmpty()) break;
337 ServerName si = sns.get(i);
338 int numToTake = underloadedServers.get(si);
339 if (numToTake == 0) continue;
340
341 addRegionPlan(regionsToMove, fetchFromTail, si, regionsToReturn);
342 if (emptyRegionServerPresent) {
343 fetchFromTail = !fetchFromTail;
344 }
345
346 underloadedServers.put(si, numToTake-1);
347 cnt++;
348 BalanceInfo bi = serverBalanceInfo.get(si);
349 if (bi == null) {
350 bi = new BalanceInfo(0, 0);
351 serverBalanceInfo.put(si, bi);
352 }
353 bi.setNumRegionsAdded(bi.getNumRegionsAdded()+1);
354 }
355 if (cnt == 0) break;
356
357 incr = -incr;
358 }
359 for (Integer i : underloadedServers.values()) {
360
361 neededRegions += i;
362 }
363
364
365
366 if (neededRegions == 0 && regionsToMove.isEmpty()) {
367 long endTime = System.currentTimeMillis();
368 LOG.info("Calculated a load balance in " + (endTime-startTime) + "ms. " +
369 "Moving " + totalNumMoved + " regions off of " +
370 serversOverloaded + " overloaded servers onto " +
371 serversUnderloaded + " less loaded servers");
372 return regionsToReturn;
373 }
374
375
376
377
378
379 if (neededRegions != 0) {
380
381 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
382 serversByLoad.descendingMap().entrySet()) {
383 BalanceInfo balanceInfo =
384 serverBalanceInfo.get(server.getKey().getServerName());
385 int idx =
386 balanceInfo == null ? 0 : balanceInfo.getNextRegionForUnload();
387 if (idx >= server.getValue().size()) break;
388 HRegionInfo region = server.getValue().get(idx);
389 if (region.isMetaRegion()) continue;
390 regionsToMove.add(new RegionPlan(region, server.getKey().getServerName(), null));
391 totalNumMoved++;
392 if (--neededRegions == 0) {
393
394 break;
395 }
396 }
397 }
398
399
400
401
402
403 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
404 serversByLoad.entrySet()) {
405 int regionCount = server.getKey().getLoad();
406 if (regionCount >= min) break;
407 BalanceInfo balanceInfo = serverBalanceInfo.get(server.getKey().getServerName());
408 if(balanceInfo != null) {
409 regionCount += balanceInfo.getNumRegionsAdded();
410 }
411 if(regionCount >= min) {
412 continue;
413 }
414 int numToTake = min - regionCount;
415 int numTaken = 0;
416 while(numTaken < numToTake && 0 < regionsToMove.size()) {
417 addRegionPlan(regionsToMove, fetchFromTail,
418 server.getKey().getServerName(), regionsToReturn);
419 numTaken++;
420 if (emptyRegionServerPresent) {
421 fetchFromTail = !fetchFromTail;
422 }
423 }
424 }
425
426
427 if (0 < regionsToMove.size()) {
428 for (Map.Entry<ServerAndLoad, List<HRegionInfo>> server :
429 serversByLoad.entrySet()) {
430 int regionCount = server.getKey().getLoad();
431 if(regionCount >= max) {
432 break;
433 }
434 addRegionPlan(regionsToMove, fetchFromTail,
435 server.getKey().getServerName(), regionsToReturn);
436 if (emptyRegionServerPresent) {
437 fetchFromTail = !fetchFromTail;
438 }
439 if (regionsToMove.isEmpty()) {
440 break;
441 }
442 }
443 }
444
445 long endTime = System.currentTimeMillis();
446
447 if (!regionsToMove.isEmpty() || neededRegions != 0) {
448
449 LOG.warn("regionsToMove=" + totalNumMoved +
450 ", numServers=" + numServers + ", serversOverloaded=" + serversOverloaded +
451 ", serversUnderloaded=" + serversUnderloaded);
452 StringBuilder sb = new StringBuilder();
453 for (Map.Entry<ServerName, List<HRegionInfo>> e: clusterState.entrySet()) {
454 if (sb.length() > 0) sb.append(", ");
455 sb.append(e.getKey().toString());
456 sb.append(" ");
457 sb.append(e.getValue().size());
458 }
459 LOG.warn("Input " + sb.toString());
460 }
461
462
463 LOG.info("Done. Calculated a load balance in " + (endTime-startTime) + "ms. " +
464 "Moving " + totalNumMoved + " regions off of " +
465 serversOverloaded + " overloaded servers onto " +
466 serversUnderloaded + " less loaded servers");
467
468 return regionsToReturn;
469 }
470
471
472
473
474 void addRegionPlan(final MinMaxPriorityQueue<RegionPlan> regionsToMove,
475 final boolean fetchFromTail, final ServerName sn, List<RegionPlan> regionsToReturn) {
476 RegionPlan rp = null;
477 if (!fetchFromTail) rp = regionsToMove.remove();
478 else rp = regionsToMove.removeLast();
479 rp.setDestination(sn);
480 regionsToReturn.add(rp);
481 }
482
483
484
485
486
487
488
489
490
491 private static class BalanceInfo {
492
493 private final int nextRegionForUnload;
494 private int numRegionsAdded;
495
496 public BalanceInfo(int nextRegionForUnload, int numRegionsAdded) {
497 this.nextRegionForUnload = nextRegionForUnload;
498 this.numRegionsAdded = numRegionsAdded;
499 }
500
501 public int getNextRegionForUnload() {
502 return nextRegionForUnload;
503 }
504
505 public int getNumRegionsAdded() {
506 return numRegionsAdded;
507 }
508
509 public void setNumRegionsAdded(int numAdded) {
510 this.numRegionsAdded = numAdded;
511 }
512 }
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531 public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(
532 List<HRegionInfo> regions, List<ServerName> servers) {
533 if (regions.isEmpty() || servers.isEmpty()) {
534 return null;
535 }
536 Map<ServerName, List<HRegionInfo>> assignments =
537 new TreeMap<ServerName,List<HRegionInfo>>();
538 int numRegions = regions.size();
539 int numServers = servers.size();
540 int max = (int)Math.ceil((float)numRegions/numServers);
541 int serverIdx = 0;
542 if (numServers > 1) {
543 serverIdx = RANDOM.nextInt(numServers);
544 }
545 int regionIdx = 0;
546 for (int j = 0; j < numServers; j++) {
547 ServerName server = servers.get((j + serverIdx) % numServers);
548 List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
549 for (int i=regionIdx; i<numRegions; i += numServers) {
550 serverRegions.add(regions.get(i % numRegions));
551 }
552 assignments.put(server, serverRegions);
553 regionIdx++;
554 }
555 return assignments;
556 }
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574 public Map<ServerName, List<HRegionInfo>> retainAssignment(
575 Map<HRegionInfo, ServerName> regions, List<ServerName> servers) {
576
577
578
579
580
581
582 ArrayListMultimap<String, ServerName> serversByHostname =
583 ArrayListMultimap.create();
584 for (ServerName server : servers) {
585 serversByHostname.put(server.getHostname(), server);
586 }
587
588
589 Map<ServerName, List<HRegionInfo>> assignments =
590 new TreeMap<ServerName, List<HRegionInfo>>();
591
592 for (ServerName server : servers) {
593 assignments.put(server, new ArrayList<HRegionInfo>());
594 }
595
596
597
598
599 Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
600
601 int numRandomAssignments = 0;
602 int numRetainedAssigments = 0;
603 for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
604 HRegionInfo region = entry.getKey();
605 ServerName oldServerName = entry.getValue();
606 List<ServerName> localServers = new ArrayList<ServerName>();
607 if (oldServerName != null) {
608 localServers = serversByHostname.get(oldServerName.getHostname());
609 }
610 if (localServers.isEmpty()) {
611
612
613 ServerName randomServer = servers.get(RANDOM.nextInt(servers.size()));
614 assignments.get(randomServer).add(region);
615 numRandomAssignments++;
616 if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
617 } else if (localServers.size() == 1) {
618
619 assignments.get(localServers.get(0)).add(region);
620 numRetainedAssigments++;
621 } else {
622
623 int size = localServers.size();
624 ServerName target = localServers.get(RANDOM.nextInt(size));
625 assignments.get(target).add(region);
626 numRetainedAssigments++;
627 }
628 }
629
630 String randomAssignMsg = "";
631 if (numRandomAssignments > 0) {
632 randomAssignMsg = numRandomAssignments + " regions were assigned " +
633 "to random hosts, since the old hosts for these regions are no " +
634 "longer present in the cluster. These hosts were:\n " +
635 Joiner.on("\n ").join(oldHostsNoLongerPresent);
636 }
637
638 LOG.info("Reassigned " + regions.size() + " regions. " +
639 numRetainedAssigments + " retained the pre-restart assignment. " +
640 randomAssignMsg);
641 return assignments;
642 }
643
644
645
646
647
648
649
650
651
652
653
654 @SuppressWarnings("unused")
655 private List<ServerName> getTopBlockLocations(FileSystem fs,
656 HRegionInfo region) {
657 List<ServerName> topServerNames = null;
658 try {
659 HTableDescriptor tableDescriptor = getTableDescriptor(
660 region.getTableName());
661 if (tableDescriptor != null) {
662 HDFSBlocksDistribution blocksDistribution =
663 HRegion.computeHDFSBlocksDistribution(config, tableDescriptor,
664 region.getEncodedName());
665 List<String> topHosts = blocksDistribution.getTopHosts();
666 topServerNames = mapHostNameToServerName(topHosts);
667 }
668 } catch (IOException ioe) {
669 LOG.debug("IOException during HDFSBlocksDistribution computation. for " +
670 "region = " + region.getEncodedName() , ioe);
671 }
672
673 return topServerNames;
674 }
675
676
677
678
679
680
681
682 private HTableDescriptor getTableDescriptor(byte[] tableName)
683 throws IOException {
684 HTableDescriptor tableDescriptor = null;
685 try {
686 if ( this.services != null)
687 {
688 tableDescriptor = this.services.getTableDescriptors().
689 get(Bytes.toString(tableName));
690 }
691 } catch (TableExistsException tee) {
692 LOG.debug("TableExistsException during getTableDescriptors." +
693 " Current table name = " + tableName , tee);
694 } catch (FileNotFoundException fnfe) {
695 LOG.debug("FileNotFoundException during getTableDescriptors." +
696 " Current table name = " + tableName , fnfe);
697 }
698
699 return tableDescriptor;
700 }
701
702
703
704
705
706
707
708 private List<ServerName> mapHostNameToServerName(List<String> hosts) {
709 if ( hosts == null || status == null) {
710 return null;
711 }
712
713 List<ServerName> topServerNames = new ArrayList<ServerName>();
714 Collection<ServerName> regionServers = status.getServers();
715
716
717 HashMap<String, ServerName> hostToServerName =
718 new HashMap<String, ServerName>();
719 for (ServerName sn : regionServers) {
720 hostToServerName.put(sn.getHostname(), sn);
721 }
722
723 for (String host : hosts ) {
724 ServerName sn = hostToServerName.get(host);
725
726
727 if (sn != null) {
728 topServerNames.add(sn);
729 }
730 }
731 return topServerNames;
732 }
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753 public Map<HRegionInfo, ServerName> immediateAssignment(
754 List<HRegionInfo> regions, List<ServerName> servers) {
755 Map<HRegionInfo,ServerName> assignments =
756 new TreeMap<HRegionInfo,ServerName>();
757 for(HRegionInfo region : regions) {
758 assignments.put(region, servers.get(RANDOM.nextInt(servers.size())));
759 }
760 return assignments;
761 }
762
763 public ServerName randomAssignment(List<ServerName> servers) {
764 if (servers == null || servers.isEmpty()) {
765 LOG.warn("Wanted to do random assignment but no servers to assign to");
766 return null;
767 }
768 return servers.get(RANDOM.nextInt(servers.size()));
769 }
770
771 }