001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.util; 016 017 import org.apache.hadoop.conf.Configuration; 018 import org.apache.oozie.service.ConfigurationService; 019 020 import java.util.ArrayList; 021 import java.util.Collection; 022 import java.util.Collections; 023 import java.util.HashMap; 024 import java.util.LinkedHashMap; 025 import java.util.LinkedHashSet; 026 import java.util.List; 027 import java.util.Map; 028 import java.util.Set; 029 import java.util.concurrent.ConcurrentHashMap; 030 import java.util.concurrent.ScheduledExecutorService; 031 import java.util.concurrent.TimeUnit; 032 import java.util.concurrent.atomic.AtomicLong; 033 import java.util.concurrent.locks.Lock; 034 import java.util.concurrent.locks.ReentrantLock; 035 036 /** 037 * Instrumentation framework that supports Timers, Counters, Variables and Sampler instrumentation elements. <p/> All 038 * instrumentation elements have a group and a name. 039 */ 040 public class Instrumentation { 041 private ScheduledExecutorService scheduler; 042 private Lock counterLock; 043 private Lock timerLock; 044 private Lock variableLock; 045 private Lock samplerLock; 046 private Configuration configuration; 047 private Map<String, Map<String, Map<String, Object>>> all; 048 private Map<String, Map<String, Element<Long>>> counters; 049 private Map<String, Map<String, Element<Timer>>> timers; 050 private Map<String, Map<String, Element<Variable>>> variables; 051 private Map<String, Map<String, Element<Double>>> samplers; 052 053 /** 054 * Instrumentation constructor. 055 */ 056 @SuppressWarnings("unchecked") 057 public Instrumentation() { 058 counterLock = new ReentrantLock(); 059 timerLock = new ReentrantLock(); 060 variableLock = new ReentrantLock(); 061 samplerLock = new ReentrantLock(); 062 all = new LinkedHashMap<String, Map<String, Map<String, Object>>>(); 063 counters = new ConcurrentHashMap<String, Map<String, Element<Long>>>(); 064 timers = new ConcurrentHashMap<String, Map<String, Element<Timer>>>(); 065 variables = new ConcurrentHashMap<String, Map<String, Element<Variable>>>(); 066 samplers = new ConcurrentHashMap<String, Map<String, Element<Double>>>(); 067 all.put("variables", (Map<String, Map<String, Object>>) (Object) variables); 068 all.put("samplers", (Map<String, Map<String, Object>>) (Object) samplers); 069 all.put("counters", (Map<String, Map<String, Object>>) (Object) counters); 070 all.put("timers", (Map<String, Map<String, Object>>) (Object) timers); 071 } 072 073 /** 074 * Set the scheduler instance to handle the samplers. 075 * 076 * @param scheduler scheduler instance. 077 */ 078 public void setScheduler(ScheduledExecutorService scheduler) { 079 this.scheduler = scheduler; 080 } 081 082 /** 083 * Cron is a stopwatch that can be started/stopped several times. <p/> This class is not thread safe, it does not 084 * need to be. <p/> It keeps track of the total time (first start to last stop) and the running time (total time 085 * minus the stopped intervals). <p/> Once a Cron is complete it must be added to the corresponding group/name in a 086 * Instrumentation instance. 087 */ 088 public static class Cron { 089 private long start; 090 private long end; 091 private long lapStart; 092 private long own; 093 private long total; 094 private boolean running; 095 096 /** 097 * Creates new Cron, stopped, in zero. 098 */ 099 public Cron() { 100 running = false; 101 } 102 103 /** 104 * Start the cron. It cannot be already started. 105 */ 106 public void start() { 107 if (!running) { 108 if (lapStart == 0) { 109 lapStart = System.currentTimeMillis(); 110 if (start == 0) { 111 start = lapStart; 112 end = start; 113 } 114 } 115 running = true; 116 } 117 } 118 119 /** 120 * Stops the cron. It cannot be already stopped. 121 */ 122 public void stop() { 123 if (running) { 124 end = System.currentTimeMillis(); 125 if (start == 0) { 126 start = end; 127 } 128 total = end - start; 129 if (lapStart > 0) { 130 own += end - lapStart; 131 lapStart = 0; 132 } 133 running = false; 134 } 135 } 136 137 /** 138 * Return the start time of the cron. It must be stopped. 139 * 140 * @return the start time of the cron. 141 */ 142 public long getStart() { 143 if (running) { 144 throw new IllegalStateException("Timer running"); 145 } 146 return start; 147 } 148 149 /** 150 * Return the end time of the cron. It must be stopped. 151 * 152 * @return the end time of the cron. 153 */ 154 public long getEnd() { 155 if (running) { 156 throw new IllegalStateException("Timer running"); 157 } 158 return end; 159 } 160 161 /** 162 * Return the total time of the cron. It must be stopped. 163 * 164 * @return the total time of the cron. 165 */ 166 public long getTotal() { 167 if (running) { 168 throw new IllegalStateException("Timer running"); 169 } 170 return total; 171 } 172 173 /** 174 * Return the own time of the cron. It must be stopped. 175 * 176 * @return the own time of the cron. 177 */ 178 public long getOwn() { 179 if (running) { 180 throw new IllegalStateException("Timer running"); 181 } 182 return own; 183 } 184 185 } 186 187 /** 188 * Gives access to a snapshot of an Instrumentation element (Counter, Timer). <p/> Instrumentation element snapshots 189 * are returned by the {@link Instrumentation#getCounters()} and {@link Instrumentation#getTimers()} ()} methods. 190 */ 191 public interface Element<T> { 192 193 /** 194 * Return the snapshot value of the Intrumentation element. 195 * 196 * @return the snapshot value of the Intrumentation element. 197 */ 198 T getValue(); 199 } 200 201 /** 202 * Counter Instrumentation element. 203 */ 204 private static class Counter extends AtomicLong implements Element<Long> { 205 206 /** 207 * Return the counter snapshot. 208 * 209 * @return the counter snapshot. 210 */ 211 public Long getValue() { 212 return get(); 213 } 214 215 /** 216 * Return the String representation of the counter value. 217 * 218 * @return the String representation of the counter value. 219 */ 220 public String toString() { 221 return Long.toString(get()); 222 } 223 224 } 225 226 /** 227 * Timer Instrumentation element. 228 */ 229 public static class Timer implements Element<Timer> { 230 Lock lock = new ReentrantLock(); 231 private long ownTime; 232 private long totalTime; 233 private long ticks; 234 private long ownSquareTime; 235 private long totalSquareTime; 236 private long ownMinTime; 237 private long ownMaxTime; 238 private long totalMinTime; 239 private long totalMaxTime; 240 241 /** 242 * Timer constructor. <p/> It is project private for test purposes. 243 */ 244 Timer() { 245 } 246 247 /** 248 * Return the String representation of the timer value. 249 * 250 * @return the String representation of the timer value. 251 */ 252 public String toString() { 253 return XLog.format("ticks[{0}] totalAvg[{1}] ownAvg[{2}]", ticks, getTotalAvg(), getOwnAvg()); 254 } 255 256 /** 257 * Return the timer snapshot. 258 * 259 * @return the timer snapshot. 260 */ 261 public Timer getValue() { 262 try { 263 lock.lock(); 264 Timer timer = new Timer(); 265 timer.ownTime = ownTime; 266 timer.totalTime = totalTime; 267 timer.ticks = ticks; 268 timer.ownSquareTime = ownSquareTime; 269 timer.totalSquareTime = totalSquareTime; 270 timer.ownMinTime = ownMinTime; 271 timer.ownMaxTime = ownMaxTime; 272 timer.totalMinTime = totalMinTime; 273 timer.totalMaxTime = totalMaxTime; 274 return timer; 275 } 276 finally { 277 lock.unlock(); 278 } 279 } 280 281 /** 282 * Add a cron to a timer. <p/> It is project private for test purposes. 283 * 284 * @param cron Cron to add. 285 */ 286 void addCron(Cron cron) { 287 try { 288 lock.lock(); 289 long own = cron.getOwn(); 290 long total = cron.getTotal(); 291 ownTime += own; 292 totalTime += total; 293 ticks++; 294 ownSquareTime += own * own; 295 totalSquareTime += total * total; 296 if (ticks == 1) { 297 ownMinTime = own; 298 ownMaxTime = own; 299 totalMinTime = total; 300 totalMaxTime = total; 301 } 302 else { 303 ownMinTime = Math.min(ownMinTime, own); 304 ownMaxTime = Math.max(ownMaxTime, own); 305 totalMinTime = Math.min(totalMinTime, total); 306 totalMaxTime = Math.max(totalMaxTime, total); 307 } 308 } 309 finally { 310 lock.unlock(); 311 } 312 } 313 314 /** 315 * Return the own accumulated computing time by the timer. 316 * 317 * @return own accumulated computing time by the timer. 318 */ 319 public long getOwn() { 320 return ownTime; 321 } 322 323 /** 324 * Return the total accumulated computing time by the timer. 325 * 326 * @return total accumulated computing time by the timer. 327 */ 328 public long getTotal() { 329 return totalTime; 330 } 331 332 /** 333 * Return the number of times a cron was added to the timer. 334 * 335 * @return the number of times a cron was added to the timer. 336 */ 337 public long getTicks() { 338 return ticks; 339 } 340 341 /** 342 * Return the sum of the square own times. <p/> It can be used to calculate the standard deviation. 343 * 344 * @return the sum of the square own timer. 345 */ 346 public long getOwnSquareSum() { 347 return ownSquareTime; 348 } 349 350 /** 351 * Return the sum of the square total times. <p/> It can be used to calculate the standard deviation. 352 * 353 * @return the sum of the square own timer. 354 */ 355 public long getTotalSquareSum() { 356 return totalSquareTime; 357 } 358 359 /** 360 * Returns the own minimum time. 361 * 362 * @return the own minimum time. 363 */ 364 public long getOwnMin() { 365 return ownMinTime; 366 } 367 368 /** 369 * Returns the own maximum time. 370 * 371 * @return the own maximum time. 372 */ 373 public long getOwnMax() { 374 return ownMaxTime; 375 } 376 377 /** 378 * Returns the total minimum time. 379 * 380 * @return the total minimum time. 381 */ 382 public long getTotalMin() { 383 return totalMinTime; 384 } 385 386 /** 387 * Returns the total maximum time. 388 * 389 * @return the total maximum time. 390 */ 391 public long getTotalMax() { 392 return totalMaxTime; 393 } 394 395 /** 396 * Returns the own average time. 397 * 398 * @return the own average time. 399 */ 400 public long getOwnAvg() { 401 return (ticks != 0) ? ownTime / ticks : 0; 402 } 403 404 /** 405 * Returns the total average time. 406 * 407 * @return the total average time. 408 */ 409 public long getTotalAvg() { 410 return (ticks != 0) ? totalTime / ticks : 0; 411 } 412 413 /** 414 * Returns the total time standard deviation. 415 * 416 * @return the total time standard deviation. 417 */ 418 public double getTotalStdDev() { 419 return evalStdDev(ticks, totalTime, totalSquareTime); 420 } 421 422 /** 423 * Returns the own time standard deviation. 424 * 425 * @return the own time standard deviation. 426 */ 427 public double getOwnStdDev() { 428 return evalStdDev(ticks, ownTime, ownSquareTime); 429 } 430 431 private double evalStdDev(long n, long sn, long ssn) { 432 return (n < 2) ? -1 : Math.sqrt((n * ssn - sn * sn) / (n * (n - 1))); 433 } 434 435 } 436 437 /** 438 * Add a cron to an instrumentation timer. The timer is created if it does not exists. <p/> This method is thread 439 * safe. 440 * 441 * @param group timer group. 442 * @param name timer name. 443 * @param cron cron to add to the timer. 444 */ 445 public void addCron(String group, String name, Cron cron) { 446 Map<String, Element<Timer>> map = timers.get(group); 447 if (map == null) { 448 try { 449 timerLock.lock(); 450 map = timers.get(group); 451 if (map == null) { 452 map = new HashMap<String, Element<Timer>>(); 453 timers.put(group, map); 454 } 455 } 456 finally { 457 timerLock.unlock(); 458 } 459 } 460 Timer timer = (Timer) map.get(name); 461 if (timer == null) { 462 try { 463 timerLock.lock(); 464 timer = (Timer) map.get(name); 465 if (timer == null) { 466 timer = new Timer(); 467 map.put(name, timer); 468 } 469 } 470 finally { 471 timerLock.unlock(); 472 } 473 } 474 timer.addCron(cron); 475 } 476 477 /** 478 * Increment an instrumentation counter. The counter is created if it does not exists. <p/> This method is thread 479 * safe. 480 * 481 * @param group counter group. 482 * @param name counter name. 483 * @param count increment to add to the counter. 484 */ 485 public void incr(String group, String name, long count) { 486 Map<String, Element<Long>> map = counters.get(group); 487 if (map == null) { 488 try { 489 counterLock.lock(); 490 map = counters.get(group); 491 if (map == null) { 492 map = new HashMap<String, Element<Long>>(); 493 counters.put(group, map); 494 } 495 } 496 finally { 497 counterLock.unlock(); 498 } 499 } 500 Counter counter = (Counter) map.get(name); 501 if (counter == null) { 502 try { 503 counterLock.lock(); 504 counter = (Counter) map.get(name); 505 if (counter == null) { 506 counter = new Counter(); 507 map.put(name, counter); 508 } 509 } 510 finally { 511 counterLock.unlock(); 512 } 513 } 514 counter.addAndGet(count); 515 } 516 517 /** 518 * Interface for instrumentation variables. <p/> For example a the database service could expose the number of 519 * currently active connections. 520 */ 521 public interface Variable<T> extends Element<T> { 522 } 523 524 /** 525 * Add an instrumentation variable. The variable must not exist. <p/> This method is thread safe. 526 * 527 * @param group counter group. 528 * @param name counter name. 529 * @param variable variable to add. 530 */ 531 @SuppressWarnings("unchecked") 532 public void addVariable(String group, String name, Variable variable) { 533 Map<String, Element<Variable>> map = variables.get(group); 534 if (map == null) { 535 try { 536 variableLock.lock(); 537 map = variables.get(group); 538 if (map == null) { 539 map = new HashMap<String, Element<Variable>>(); 540 variables.put(group, map); 541 } 542 } 543 finally { 544 variableLock.unlock(); 545 } 546 } 547 if (map.containsKey(name)) { 548 throw new RuntimeException(XLog.format("Variable group=[{0}] name=[{1}] already defined", group, name)); 549 } 550 map.put(name, variable); 551 } 552 553 /** 554 * Set the system configuration. 555 * 556 * @param configuration system configuration. 557 */ 558 public void setConfiguration(Configuration configuration) { 559 this.configuration = configuration; 560 } 561 562 /** 563 * Return the JVM system properties. 564 * 565 * @return JVM system properties. 566 */ 567 @SuppressWarnings("unchecked") 568 public Map<String, String> getJavaSystemProperties() { 569 return (Map<String, String>) (Object) System.getProperties(); 570 } 571 572 /** 573 * Return the OS environment used to start Oozie. 574 * 575 * @return the OS environment used to start Oozie. 576 */ 577 public Map<String, String> getOSEnv() { 578 return System.getenv(); 579 } 580 581 /** 582 * Return the current system configuration as a Map<String,String>. 583 * 584 * @return the current system configuration as a Map<String,String>. 585 */ 586 public Map<String, String> getConfiguration() { 587 final Configuration maskedConf = ConfigurationService.maskPasswords(configuration); 588 589 return new Map<String, String>() { 590 public int size() { 591 return maskedConf.size(); 592 } 593 594 public boolean isEmpty() { 595 return maskedConf.size() == 0; 596 } 597 598 public boolean containsKey(Object o) { 599 return maskedConf.get((String) o) != null; 600 } 601 602 public boolean containsValue(Object o) { 603 throw new UnsupportedOperationException(); 604 } 605 606 public String get(Object o) { 607 return maskedConf.get((String) o); 608 } 609 610 public String put(String s, String s1) { 611 throw new UnsupportedOperationException(); 612 } 613 614 public String remove(Object o) { 615 throw new UnsupportedOperationException(); 616 } 617 618 public void putAll(Map<? extends String, ? extends String> map) { 619 throw new UnsupportedOperationException(); 620 } 621 622 public void clear() { 623 throw new UnsupportedOperationException(); 624 } 625 626 public Set<String> keySet() { 627 Set<String> set = new LinkedHashSet<String>(); 628 for (Entry<String, String> entry : maskedConf) { 629 set.add(entry.getKey()); 630 } 631 return set; 632 } 633 634 public Collection<String> values() { 635 Set<String> set = new LinkedHashSet<String>(); 636 for (Entry<String, String> entry : maskedConf) { 637 set.add(entry.getValue()); 638 } 639 return set; 640 } 641 642 public Set<Entry<String, String>> entrySet() { 643 Set<Entry<String, String>> set = new LinkedHashSet<Entry<String, String>>(); 644 for (Entry<String, String> entry : maskedConf) { 645 set.add(entry); 646 } 647 return set; 648 } 649 }; 650 } 651 652 /** 653 * Return all the counters. <p/> This method is thread safe. <p/> The counters are live. The counter value is a 654 * snapshot at the time the {@link Instrumentation.Element#getValue()} is invoked. 655 * 656 * @return all counters. 657 */ 658 public Map<String, Map<String, Element<Long>>> getCounters() { 659 return counters; 660 } 661 662 /** 663 * Return all the timers. <p/> This method is thread safe. <p/> The timers are live. Once a timer is obtained, all 664 * its values are consistent (they are snapshot at the time the {@link Instrumentation.Element#getValue()} is 665 * invoked. 666 * 667 * @return all counters. 668 */ 669 public Map<String, Map<String, Element<Timer>>> getTimers() { 670 return timers; 671 } 672 673 /** 674 * Return all the variables. <p/> This method is thread safe. <p/> The variables are live. The variable value is a 675 * snapshot at the time the {@link Instrumentation.Element#getValue()} is invoked. 676 * 677 * @return all counters. 678 */ 679 public Map<String, Map<String, Element<Variable>>> getVariables() { 680 return variables; 681 } 682 683 /** 684 * Return a map containing all variables, counters and timers. 685 * 686 * @return a map containing all variables, counters and timers. 687 */ 688 public Map<String, Map<String, Map<String, Object>>> getAll() { 689 return all; 690 } 691 692 /** 693 * Return the string representation of the instrumentation. 694 * 695 * @return the string representation of the instrumentation. 696 */ 697 public String toString() { 698 String E = System.getProperty("line.separator"); 699 StringBuilder sb = new StringBuilder(4096); 700 for (String element : all.keySet()) { 701 sb.append(element).append(':').append(E); 702 List<String> groups = new ArrayList<String>(all.get(element).keySet()); 703 Collections.sort(groups); 704 for (String group : groups) { 705 sb.append(" ").append(group).append(':').append(E); 706 List<String> names = new ArrayList<String>(all.get(element).get(group).keySet()); 707 Collections.sort(names); 708 for (String name : names) { 709 sb.append(" ").append(name).append(": ").append(((Element) all.get(element). 710 get(group).get(name)).getValue()).append(E); 711 } 712 } 713 } 714 return sb.toString(); 715 } 716 717 private static class Sampler implements Element<Double>, Runnable { 718 private Lock lock = new ReentrantLock(); 719 private int samplingInterval; 720 private Variable<Long> variable; 721 private long[] values; 722 private int current; 723 private long valuesSum; 724 private double rate; 725 726 public Sampler(int samplingPeriod, int samplingInterval, Variable<Long> variable) { 727 this.samplingInterval = samplingInterval; 728 this.variable = variable; 729 values = new long[samplingPeriod / samplingInterval]; 730 valuesSum = 0; 731 current = -1; 732 } 733 734 public int getSamplingInterval() { 735 return samplingInterval; 736 } 737 738 public void run() { 739 try { 740 lock.lock(); 741 long newValue = variable.getValue(); 742 if (current == -1) { 743 valuesSum = newValue; 744 current = 0; 745 values[current] = newValue; 746 } 747 else { 748 current = (current + 1) % values.length; 749 valuesSum = valuesSum - values[current] + newValue; 750 values[current] = newValue; 751 } 752 rate = ((double) valuesSum) / values.length; 753 } 754 finally { 755 lock.unlock(); 756 } 757 } 758 759 public Double getValue() { 760 return rate; 761 } 762 } 763 764 /** 765 * Add a sampling variable. <p/> This method is thread safe. 766 * 767 * @param group timer group. 768 * @param name timer name. 769 * @param period sampling period to compute rate. 770 * @param interval sampling frequency, how often the variable is probed. 771 * @param variable variable to sample. 772 */ 773 public void addSampler(String group, String name, int period, int interval, Variable<Long> variable) { 774 if (scheduler == null) { 775 throw new IllegalStateException("scheduler not set, cannot sample"); 776 } 777 try { 778 samplerLock.lock(); 779 Map<String, Element<Double>> map = samplers.get(group); 780 if (map == null) { 781 map = samplers.get(group); 782 if (map == null) { 783 map = new HashMap<String, Element<Double>>(); 784 samplers.put(group, map); 785 } 786 } 787 if (map.containsKey(name)) { 788 throw new RuntimeException(XLog.format("Sampler group=[{0}] name=[{1}] already defined", group, name)); 789 } 790 Sampler sampler = new Sampler(period, interval, variable); 791 map.put(name, sampler); 792 scheduler.scheduleAtFixedRate(sampler, 0, sampler.getSamplingInterval(), TimeUnit.SECONDS); 793 } 794 finally { 795 samplerLock.unlock(); 796 } 797 } 798 799 /** 800 * Return all the samplers. <p/> This method is thread safe. <p/> The samplers are live. The sampler value is a 801 * snapshot at the time the {@link Instrumentation.Element#getValue()} is invoked. 802 * 803 * @return all counters. 804 */ 805 public Map<String, Map<String, Element<Double>>> getSamplers() { 806 return samplers; 807 } 808 809 }