001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.action.hadoop; 016 017 import java.io.BufferedReader; 018 import java.io.File; 019 import java.io.FileReader; 020 import java.io.IOException; 021 import java.io.InputStream; 022 import java.io.InputStreamReader; 023 import java.io.OutputStream; 024 import java.io.OutputStreamWriter; 025 import java.io.PrintWriter; 026 import java.io.StringWriter; 027 import java.io.Writer; 028 import java.lang.reflect.InvocationTargetException; 029 import java.lang.reflect.Method; 030 import java.security.Permission; 031 import java.text.MessageFormat; 032 import java.util.Properties; 033 import java.util.concurrent.ScheduledThreadPoolExecutor; 034 import java.util.concurrent.TimeUnit; 035 036 import org.apache.hadoop.conf.Configuration; 037 import org.apache.hadoop.fs.FileSystem; 038 import org.apache.hadoop.fs.Path; 039 import org.apache.hadoop.mapred.Counters; 040 import org.apache.hadoop.mapred.JobConf; 041 import org.apache.hadoop.mapred.Mapper; 042 import org.apache.hadoop.mapred.OutputCollector; 043 import org.apache.hadoop.mapred.Reporter; 044 import org.apache.hadoop.mapred.RunningJob; 045 import org.apache.oozie.service.HadoopAccessorException; 046 import org.apache.oozie.service.HadoopAccessorService; 047 import org.apache.oozie.service.Services; 048 import org.apache.oozie.util.XLog; 049 050 public class LauncherMapper<K1, V1, K2, V2> implements Mapper<K1, V1, K2, V2>, Runnable { 051 052 public static final String CONF_OOZIE_ACTION_MAIN_CLASS = "oozie.launcher.action.main.class"; 053 054 private static final String CONF_OOZIE_ACTION_MAIN_ARG_COUNT = "oozie.action.main.arg.count"; 055 private static final String CONF_OOZIE_ACTION_MAIN_ARG_PREFIX = "oozie.action.main.arg."; 056 private static final String CONF_OOZIE_ACTION_MAX_OUTPUT_DATA = "oozie.action.max.output.data"; 057 058 private static final String COUNTER_GROUP = "oozie.launcher"; 059 private static final String COUNTER_DO_ID_SWAP = "oozie.do.id.swap"; 060 private static final String COUNTER_OUTPUT_DATA = "oozie.output.data"; 061 private static final String COUNTER_LAUNCHER_ERROR = "oozie.launcher.error"; 062 063 private static final String OOZIE_JOB_ID = "oozie.job.id"; 064 private static final String OOZIE_ACTION_ID = "oozie.action.id"; 065 066 private static final String OOZIE_ACTION_DIR_PATH = "oozie.action.dir.path"; 067 private static final String OOZIE_ACTION_RECOVERY_ID = "oozie.action.recovery.id"; 068 069 static final String ACTION_CONF_XML = "action.xml"; 070 private static final String ACTION_OUTPUT_PROPS = "output.properties"; 071 private static final String ACTION_NEW_ID_PROPS = "newId.properties"; 072 private static final String ACTION_ERROR_PROPS = "error.properties"; 073 074 private void setRecoveryId(Configuration launcherConf, Path actionDir, String recoveryId) throws LauncherException { 075 try { 076 FileSystem fs = FileSystem.get(launcherConf); 077 String jobId = launcherConf.get("mapred.job.id"); 078 Path path = new Path(actionDir, recoveryId); 079 if (!fs.exists(path)) { 080 try { 081 Writer writer = new OutputStreamWriter(fs.create(path)); 082 writer.write(jobId); 083 writer.close(); 084 } 085 catch (IOException ex) { 086 failLauncher("IO error", ex); 087 } 088 } 089 else { 090 InputStream is = fs.open(path); 091 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 092 String id = reader.readLine(); 093 reader.close(); 094 if (!jobId.equals(id)) { 095 failLauncher(MessageFormat.format( 096 "Hadoop job Id mismatch, action file [{0}] declares Id [{1}] current Id [{2}]", path, id, 097 jobId), null); 098 } 099 100 } 101 } 102 catch (IOException ex) { 103 failLauncher("IO error", ex); 104 } 105 } 106 107 /** 108 * @param launcherConf 109 * @param actionDir 110 * @param recoveryId 111 * @return 112 * @throws HadoopAccessorException 113 * @throws IOException 114 */ 115 public static String getRecoveryId(Configuration launcherConf, Path actionDir, String recoveryId) 116 throws HadoopAccessorException, IOException { 117 String jobId = null; 118 Path recoveryFile = new Path(actionDir, recoveryId); 119 //FileSystem fs = FileSystem.get(launcherConf); 120 FileSystem fs = Services.get().get(HadoopAccessorService.class) 121 .createFileSystem(launcherConf.get("user.name"), 122 launcherConf.get("group.name"), launcherConf); 123 124 if (fs.exists(recoveryFile)) { 125 InputStream is = fs.open(recoveryFile); 126 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 127 jobId = reader.readLine(); 128 reader.close(); 129 } 130 return jobId; 131 132 } 133 134 public static void setupMainClass(Configuration launcherConf, String javaMainClass) { 135 launcherConf.set(CONF_OOZIE_ACTION_MAIN_CLASS, javaMainClass); 136 } 137 138 public static void setupMainArguments(Configuration launcherConf, String[] args) { 139 launcherConf.setInt(CONF_OOZIE_ACTION_MAIN_ARG_COUNT, args.length); 140 for (int i = 0; i < args.length; i++) { 141 launcherConf.set(CONF_OOZIE_ACTION_MAIN_ARG_PREFIX + i, args[i]); 142 } 143 } 144 145 public static void setupMaxOutputData(Configuration launcherConf, int maxOutputData) { 146 launcherConf.setInt(CONF_OOZIE_ACTION_MAX_OUTPUT_DATA, maxOutputData); 147 } 148 149 /** 150 * @param launcherConf 151 * @param jobId 152 * @param actionId 153 * @param actionDir 154 * @param recoveryId 155 * @param actionConf 156 * @throws IOException 157 * @throws HadoopAccessorException 158 */ 159 public static void setupLauncherInfo(JobConf launcherConf, String jobId, String actionId, Path actionDir, 160 String recoveryId, Configuration actionConf) throws IOException, HadoopAccessorException { 161 162 launcherConf.setMapperClass(LauncherMapper.class); 163 launcherConf.setSpeculativeExecution(false); 164 launcherConf.setNumMapTasks(1); 165 launcherConf.setNumReduceTasks(0); 166 167 launcherConf.set(OOZIE_JOB_ID, jobId); 168 launcherConf.set(OOZIE_ACTION_ID, actionId); 169 launcherConf.set(OOZIE_ACTION_DIR_PATH, actionDir.toString()); 170 launcherConf.set(OOZIE_ACTION_RECOVERY_ID, recoveryId); 171 172 FileSystem fs = Services.get().get(HadoopAccessorService.class).createFileSystem(launcherConf.get("user.name"), 173 launcherConf.get("group.name"), launcherConf); 174 fs.mkdirs(actionDir); 175 176 OutputStream os = fs.create(new Path(actionDir, ACTION_CONF_XML)); 177 actionConf.writeXml(os); 178 os.close(); 179 180 Path inputDir = new Path(actionDir, "input"); 181 fs.mkdirs(inputDir); 182 Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "dummy.txt"))); 183 writer.write("dummy"); 184 writer.close(); 185 186 launcherConf.set("mapred.input.dir", inputDir.toString()); 187 launcherConf.set("mapred.output.dir", new Path(actionDir, "output").toString()); 188 } 189 190 public static boolean isMainDone(RunningJob runningJob) throws IOException { 191 return runningJob.isComplete(); 192 } 193 194 public static boolean isMainSuccessful(RunningJob runningJob) throws IOException { 195 boolean succeeded = runningJob.isSuccessful(); 196 if (succeeded) { 197 Counters counters = runningJob.getCounters(); 198 if (counters != null) { 199 Counters.Group group = counters.getGroup(COUNTER_GROUP); 200 if (group != null) { 201 succeeded = group.getCounter(COUNTER_LAUNCHER_ERROR) == 0; 202 } 203 } 204 } 205 return succeeded; 206 } 207 208 public static boolean hasOutputData(RunningJob runningJob) throws IOException { 209 boolean output = false; 210 Counters counters = runningJob.getCounters(); 211 if (counters != null) { 212 Counters.Group group = counters.getGroup(COUNTER_GROUP); 213 if (group != null) { 214 output = group.getCounter(COUNTER_OUTPUT_DATA) == 1; 215 } 216 } 217 return output; 218 } 219 220 /** 221 * @param runningJob 222 * @return 223 * @throws IOException 224 */ 225 public static boolean hasIdSwap(RunningJob runningJob) throws IOException { 226 boolean swap = false; 227 Counters counters = runningJob.getCounters(); 228 if (counters != null) { 229 Counters.Group group = counters.getGroup(COUNTER_GROUP); 230 if (group != null) { 231 swap = group.getCounter(COUNTER_DO_ID_SWAP) == 1; 232 } 233 } 234 return swap; 235 } 236 237 /** 238 * @param runningJob 239 * @param user 240 * @param group 241 * @param actionDir 242 * @return 243 * @throws IOException 244 * @throws HadoopAccessorException 245 */ 246 public static boolean hasIdSwap(RunningJob runningJob, String user, String group, Path actionDir) 247 throws IOException, HadoopAccessorException { 248 boolean swap = false; 249 250 XLog log = XLog.getLog("org.apache.oozie.action.hadoop.LauncherMapper"); 251 252 Counters counters = runningJob.getCounters(); 253 if (counters != null) { 254 Counters.Group counterGroup = counters.getGroup(COUNTER_GROUP); 255 if (counterGroup != null) { 256 swap = counterGroup.getCounter(COUNTER_DO_ID_SWAP) == 1; 257 } 258 } 259 // additional check for swapped hadoop ID 260 // Can't rely on hadoop counters existing 261 // we'll check for the newID file in hdfs if the hadoop counters is null 262 else { 263 264 Path p = getIdSwapPath(actionDir); 265 // log.debug("Checking for newId file in: [{0}]", p); 266 267 FileSystem fs = Services.get().get(HadoopAccessorService.class).createFileSystem(user, group, p.toUri(), 268 new Configuration()); 269 if (fs.exists(p)) { 270 log.debug("Hadoop Counters is null, but found newID file."); 271 272 swap = true; 273 } 274 else { 275 log.debug("Hadoop Counters is null, and newID file doesn't exist at: [{0}]", p); 276 } 277 } 278 return swap; 279 } 280 281 public static Path getOutputDataPath(Path actionDir) { 282 return new Path(actionDir, ACTION_OUTPUT_PROPS); 283 } 284 285 public static Path getErrorPath(Path actionDir) { 286 return new Path(actionDir, ACTION_ERROR_PROPS); 287 } 288 289 public static Path getIdSwapPath(Path actionDir) { 290 return new Path(actionDir, ACTION_NEW_ID_PROPS); 291 } 292 293 private JobConf jobConf; 294 private Path actionDir; 295 private ScheduledThreadPoolExecutor timer; 296 297 private boolean configFailure = false; 298 299 public LauncherMapper() { 300 } 301 302 public void configure(JobConf jobConf) { 303 System.out.println(); 304 System.out.println("Oozie Launcher starts"); 305 System.out.println(); 306 this.jobConf = jobConf; 307 actionDir = new Path(getJobConf().get(OOZIE_ACTION_DIR_PATH)); 308 String recoveryId = jobConf.get(OOZIE_ACTION_RECOVERY_ID, null); 309 try { 310 setRecoveryId(jobConf, actionDir, recoveryId); 311 } 312 catch (LauncherException ex) { 313 configFailure = true; 314 } 315 } 316 317 public void map(K1 key, V1 value, OutputCollector<K2, V2> collector, Reporter reporter) throws IOException { 318 try { 319 if (configFailure) { 320 throw new LauncherException(); 321 } 322 else { 323 String mainClass = getJobConf().get(CONF_OOZIE_ACTION_MAIN_CLASS); 324 String msgPrefix = "Main class [" + mainClass + "], "; 325 Throwable errorCause = null; 326 String errorMessage = null; 327 328 try { 329 new LauncherSecurityManager(); 330 } 331 catch (SecurityException ex) { 332 errorMessage = "Could not set LauncherSecurityManager"; 333 errorCause = ex; 334 } 335 336 try { 337 setupHeartBeater(reporter); 338 339 setupMainConfiguration(); 340 341 String[] args = getMainArguments(getJobConf()); 342 343 printContentsOfCurrentDir(); 344 345 System.out.println(); 346 System.out.println("Oozie Java/Map-Reduce/Pig action launcher-job configuration"); 347 System.out.println("================================================================="); 348 System.out.println("Workflow job id : " + System.getProperty("oozie.job.id")); 349 System.out.println("Workflow action id: " + System.getProperty("oozie.action.id")); 350 System.out.println(); 351 System.out.println("Main class : " + mainClass); 352 System.out.println(); 353 System.out.println("Maximum output : " 354 + getJobConf().getInt(CONF_OOZIE_ACTION_MAX_OUTPUT_DATA, 2 * 1024)); 355 System.out.println(); 356 System.out.println("Arguments :"); 357 for (String arg : args) { 358 System.out.println(" " + arg); 359 } 360 361 System.out.println(); 362 System.out.println("Java System Properties:"); 363 System.out.println("------------------------"); 364 System.getProperties().store(System.out, ""); 365 System.out.flush(); 366 System.out.println("------------------------"); 367 System.out.println(); 368 369 System.out.println("================================================================="); 370 System.out.println(); 371 System.out.println(">>> Invoking Main class now >>>"); 372 System.out.println(); 373 System.out.flush(); 374 375 try { 376 Class klass = getJobConf().getClass(CONF_OOZIE_ACTION_MAIN_CLASS, Object.class); 377 Method mainMethod = klass.getMethod("main", String[].class); 378 mainMethod.invoke(null, (Object) args); 379 } 380 catch (InvocationTargetException ex) { 381 if (SecurityException.class.isInstance(ex.getCause())) { 382 if (LauncherSecurityManager.getExitInvoked()) { 383 System.out.println("Intercepting System.exit(" + LauncherSecurityManager.getExitCode() 384 + ")"); 385 System.err.println("Intercepting System.exit(" + LauncherSecurityManager.getExitCode() 386 + ")"); 387 // if 0 main() method finished successfully 388 // ignoring 389 if (LauncherSecurityManager.getExitCode() != 0) { 390 errorMessage = msgPrefix + "exit code [" + LauncherSecurityManager.getExitCode() 391 + "]"; 392 errorCause = null; 393 } 394 } 395 } 396 else { 397 throw ex; 398 } 399 } 400 finally { 401 System.out.println(); 402 System.out.println("<<< Invocation of Main class completed <<<"); 403 System.out.println(); 404 } 405 if (errorMessage == null) { 406 File outputData = new File(System.getProperty("oozie.action.output.properties")); 407 if (outputData.exists()) { 408 FileSystem fs = FileSystem.get(getJobConf()); 409 fs.copyFromLocalFile(new Path(outputData.toString()), new Path(actionDir, 410 ACTION_OUTPUT_PROPS)); 411 reporter.incrCounter(COUNTER_GROUP, COUNTER_OUTPUT_DATA, 1); 412 413 int maxOutputData = getJobConf().getInt(CONF_OOZIE_ACTION_MAX_OUTPUT_DATA, 2 * 1024); 414 if (outputData.length() > maxOutputData) { 415 String msg = MessageFormat.format("Output data size [{0}] exceeds maximum [{1}]", 416 outputData.length(), maxOutputData); 417 failLauncher(msg, null); 418 } 419 System.out.println(); 420 System.out.println("Oozie Launcher, capturing output data:"); 421 System.out.println("======================="); 422 Properties props = new Properties(); 423 props.load(new FileReader(outputData)); 424 props.store(System.out, ""); 425 System.out.println(); 426 System.out.println("======================="); 427 System.out.println(); 428 } 429 File newId = new File(System.getProperty("oozie.action.newId.properties")); 430 if (newId.exists()) { 431 Properties props = new Properties(); 432 props.load(new FileReader(newId)); 433 if (props.getProperty("id") == null) { 434 throw new IllegalStateException("ID swap file does not have [id] property"); 435 } 436 FileSystem fs = FileSystem.get(getJobConf()); 437 fs.copyFromLocalFile(new Path(newId.toString()), new Path(actionDir, ACTION_NEW_ID_PROPS)); 438 reporter.incrCounter(COUNTER_GROUP, COUNTER_DO_ID_SWAP, 1); 439 440 System.out.println("Oozie Launcher, copying new Hadoop job id to file: " 441 + new Path(actionDir, ACTION_NEW_ID_PROPS).toUri()); 442 443 System.out.println(); 444 System.out.println("Oozie Launcher, propagating new Hadoop job id to Oozie"); 445 System.out.println("======================="); 446 System.out.println("id: " + props.getProperty("id")); 447 System.out.println("======================="); 448 System.out.println(); 449 } 450 } 451 } 452 catch (NoSuchMethodException ex) { 453 errorMessage = msgPrefix + "main() method not found"; 454 errorCause = ex; 455 } 456 catch (InvocationTargetException ex) { 457 errorMessage = msgPrefix + "main() threw exception"; 458 errorCause = ex.getTargetException(); 459 } 460 catch (Throwable ex) { 461 errorMessage = msgPrefix + "exception invoking main()"; 462 errorCause = ex; 463 } 464 finally { 465 destroyHeartBeater(); 466 if (errorMessage != null) { 467 failLauncher(errorMessage, errorCause); 468 } 469 } 470 } 471 } 472 catch (LauncherException ex) { 473 reporter.incrCounter(COUNTER_GROUP, COUNTER_LAUNCHER_ERROR, 1); 474 System.out.println(); 475 System.out.println("Oozie Launcher failed, finishing Hadoop job gracefully"); 476 System.out.println(); 477 } 478 } 479 480 public void close() throws IOException { 481 System.out.println(); 482 System.out.println("Oozie Launcher ends"); 483 System.out.println(); 484 } 485 486 protected JobConf getJobConf() { 487 return jobConf; 488 } 489 490 private void setupMainConfiguration() throws IOException { 491 FileSystem fs = FileSystem.get(getJobConf()); 492 fs.copyToLocalFile(new Path(getJobConf().get(OOZIE_ACTION_DIR_PATH), ACTION_CONF_XML), new Path(new File( 493 ACTION_CONF_XML).getAbsolutePath())); 494 495 System.setProperty("oozie.launcher.job.id", getJobConf().get("mapred.job.id")); 496 System.setProperty("oozie.job.id", getJobConf().get(OOZIE_JOB_ID)); 497 System.setProperty("oozie.action.id", getJobConf().get(OOZIE_ACTION_ID)); 498 System.setProperty("oozie.action.conf.xml", new File(ACTION_CONF_XML).getAbsolutePath()); 499 System.setProperty("oozie.action.output.properties", new File(ACTION_OUTPUT_PROPS).getAbsolutePath()); 500 System.setProperty("oozie.action.newId.properties", new File(ACTION_NEW_ID_PROPS).getAbsolutePath()); 501 } 502 503 public static String[] getMainArguments(Configuration conf) { 504 String[] args = new String[conf.getInt(CONF_OOZIE_ACTION_MAIN_ARG_COUNT, 0)]; 505 for (int i = 0; i < args.length; i++) { 506 args[i] = conf.get(CONF_OOZIE_ACTION_MAIN_ARG_PREFIX + i); 507 } 508 return args; 509 } 510 511 private void setupHeartBeater(Reporter reporter) { 512 timer = new ScheduledThreadPoolExecutor(1); 513 timer.scheduleAtFixedRate(new LauncherMapper(reporter), 0, 30, TimeUnit.SECONDS); 514 } 515 516 private void destroyHeartBeater() { 517 timer.shutdownNow(); 518 } 519 520 private Reporter reporter; 521 522 private LauncherMapper(Reporter reporter) { 523 this.reporter = reporter; 524 } 525 526 public void run() { 527 System.out.println("Heart beat"); 528 reporter.progress(); 529 } 530 531 private void failLauncher(String reason, Throwable ex) throws LauncherException { 532 try { 533 if (ex != null) { 534 reason += ", " + ex.getMessage(); 535 } 536 Properties errorProps = new Properties(); 537 errorProps.setProperty("error.reason", reason); 538 if (ex != null) { 539 if (ex.getMessage() != null) { 540 errorProps.setProperty("exception.message", ex.getMessage()); 541 } 542 StringWriter sw = new StringWriter(); 543 PrintWriter pw = new PrintWriter(sw); 544 ex.printStackTrace(pw); 545 pw.close(); 546 errorProps.setProperty("exception.stacktrace", sw.toString()); 547 } 548 FileSystem fs = FileSystem.get(getJobConf()); 549 OutputStream os = fs.create(new Path(actionDir, ACTION_ERROR_PROPS)); 550 errorProps.store(os, ""); 551 os.close(); 552 553 System.out.print("Failing Oozie Launcher, " + reason + "\n"); 554 System.err.print("Failing Oozie Launcher, " + reason + "\n"); 555 if (ex != null) { 556 ex.printStackTrace(System.out); 557 ex.printStackTrace(System.err); 558 } 559 throw new LauncherException(); 560 } 561 catch (IOException rex) { 562 throw new RuntimeException("Error while failing launcher, " + rex.getMessage(), rex); 563 } 564 } 565 566 /** 567 * Print files and directories in current directory. Will list files in the sub-directory (only 1 level deep) 568 */ 569 protected void printContentsOfCurrentDir() { 570 File folder = new File("."); 571 System.out.println(); 572 System.out.println("Files in current dir:" + folder.getAbsolutePath()); 573 System.out.println("======================"); 574 575 File[] listOfFiles = folder.listFiles(); 576 for (File fileName : listOfFiles) { 577 if (fileName.isFile()) { 578 System.out.println("File: " + fileName.getName()); 579 } 580 else if (fileName.isDirectory()) { 581 System.out.println("Dir: " + fileName.getName()); 582 File subDir = new File(fileName.getName()); 583 File[] moreFiles = subDir.listFiles(); 584 for (File subFileName : moreFiles) { 585 if (subFileName.isFile()) { 586 System.out.println(" File: " + subFileName.getName()); 587 } 588 else if (subFileName.isDirectory()) { 589 System.out.println(" Dir: " + subFileName.getName()); 590 } 591 } 592 } 593 } 594 } 595 596 } 597 598 class LauncherSecurityManager extends SecurityManager { 599 private static boolean exitInvoked; 600 private static int exitCode; 601 private SecurityManager securityManager; 602 603 public LauncherSecurityManager() { 604 reset(); 605 securityManager = System.getSecurityManager(); 606 System.setSecurityManager(this); 607 } 608 609 @Override 610 public void checkPermission(Permission perm) { 611 if (securityManager != null) { 612 // check everything with the original SecurityManager 613 securityManager.checkPermission(perm); 614 } 615 } 616 617 @Override 618 public void checkExit(int status) throws SecurityException { 619 exitInvoked = true; 620 exitCode = status; 621 throw new SecurityException("Intercepted System.exit(" + status + ")"); 622 } 623 624 public static boolean getExitInvoked() { 625 return exitInvoked; 626 } 627 628 public static int getExitCode() { 629 return exitCode; 630 } 631 632 public static void reset() { 633 exitInvoked = false; 634 exitCode = 0; 635 } 636 } 637 638 class LauncherException extends Exception { 639 }