001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.action.hadoop; 016 017 import java.io.BufferedReader; 018 import java.io.File; 019 import java.io.FileNotFoundException; 020 import java.io.IOException; 021 import java.io.InputStream; 022 import java.io.InputStreamReader; 023 import java.io.StringReader; 024 import java.net.ConnectException; 025 import java.net.URI; 026 import java.net.UnknownHostException; 027 import java.util.ArrayList; 028 import java.util.HashSet; 029 import java.util.List; 030 import java.util.Map; 031 import java.util.Properties; 032 import java.util.Set; 033 034 import org.apache.hadoop.conf.Configuration; 035 import org.apache.hadoop.filecache.DistributedCache; 036 import org.apache.hadoop.fs.FileSystem; 037 import org.apache.hadoop.fs.Path; 038 import org.apache.hadoop.fs.permission.AccessControlException; 039 import org.apache.hadoop.mapred.JobClient; 040 import org.apache.hadoop.mapred.JobConf; 041 import org.apache.hadoop.mapred.JobID; 042 import org.apache.hadoop.mapred.RunningJob; 043 import org.apache.hadoop.util.DiskChecker; 044 import org.apache.oozie.action.ActionExecutor; 045 import org.apache.oozie.action.ActionExecutorException; 046 import org.apache.oozie.client.OozieClient; 047 import org.apache.oozie.client.WorkflowAction; 048 import org.apache.oozie.service.HadoopAccessorException; 049 import org.apache.oozie.service.HadoopAccessorService; 050 import org.apache.oozie.service.Services; 051 import org.apache.oozie.service.WorkflowAppService; 052 import org.apache.oozie.servlet.CallbackServlet; 053 import org.apache.oozie.util.IOUtils; 054 import org.apache.oozie.util.PropertiesUtils; 055 import org.apache.oozie.util.XConfiguration; 056 import org.apache.oozie.util.XLog; 057 import org.apache.oozie.util.XmlUtils; 058 import org.jdom.Element; 059 import org.jdom.JDOMException; 060 import org.jdom.Namespace; 061 062 public class JavaActionExecutor extends ActionExecutor { 063 064 private static final String HADOOP_USER = "user.name"; 065 private static final String HADOOP_UGI = "hadoop.job.ugi"; 066 private static final String HADOOP_JOB_TRACKER = "mapred.job.tracker"; 067 private static final String HADOOP_NAME_NODE = "fs.default.name"; 068 069 private static final Set<String> DISALLOWED_PROPERTIES = new HashSet<String>(); 070 071 private static int maxActionOutputLen; 072 073 private static final String SUCCEEDED = "SUCCEEDED"; 074 private static final String KILLED = "KILLED"; 075 private static final String FAILED = "FAILED"; 076 private static final String FAILED_KILLED = "FAILED/KILLED"; 077 private static final String RUNNING = "RUNNING"; 078 private XLog log = XLog.getLog(getClass()); 079 080 static { 081 DISALLOWED_PROPERTIES.add(HADOOP_USER); 082 DISALLOWED_PROPERTIES.add(HADOOP_UGI); 083 DISALLOWED_PROPERTIES.add(HADOOP_JOB_TRACKER); 084 DISALLOWED_PROPERTIES.add(HADOOP_NAME_NODE); 085 DISALLOWED_PROPERTIES.add(WorkflowAppService.HADOOP_JT_KERBEROS_NAME); 086 DISALLOWED_PROPERTIES.add(WorkflowAppService.HADOOP_NN_KERBEROS_NAME); 087 } 088 089 public JavaActionExecutor() { 090 this("java"); 091 } 092 093 protected JavaActionExecutor(String type) { 094 super(type); 095 } 096 097 protected String getLauncherJarName() { 098 return getType() + "-launcher.jar"; 099 } 100 101 protected List<Class> getLauncherClasses() { 102 List<Class> classes = new ArrayList<Class>(); 103 classes.add(LauncherMapper.class); 104 classes.add(LauncherSecurityManager.class); 105 classes.add(LauncherException.class); 106 return classes; 107 } 108 109 @Override 110 public void initActionType() { 111 super.initActionType(); 112 maxActionOutputLen = getOozieConf().getInt(CallbackServlet.CONF_MAX_DATA_LEN, 2 * 1024); 113 try { 114 List<Class> classes = getLauncherClasses(); 115 Class[] launcherClasses = classes.toArray(new Class[classes.size()]); 116 IOUtils.createJar(new File(getOozieRuntimeDir()), getLauncherJarName(), launcherClasses); 117 118 registerError(UnknownHostException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA001"); 119 registerError(AccessControlException.class.getName(), ActionExecutorException.ErrorType.NON_TRANSIENT, 120 "JA002"); 121 registerError(DiskChecker.DiskOutOfSpaceException.class.getName(), 122 ActionExecutorException.ErrorType.NON_TRANSIENT, "JA003"); 123 registerError(org.apache.hadoop.hdfs.protocol.QuotaExceededException.class.getName(), 124 ActionExecutorException.ErrorType.NON_TRANSIENT, "JA004"); 125 registerError(org.apache.hadoop.hdfs.server.namenode.SafeModeException.class.getName(), 126 ActionExecutorException.ErrorType.NON_TRANSIENT, "JA005"); 127 registerError(ConnectException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA006"); 128 registerError(JDOMException.class.getName(), ActionExecutorException.ErrorType.ERROR, "JA007"); 129 registerError(FileNotFoundException.class.getName(), ActionExecutorException.ErrorType.ERROR, "JA008"); 130 registerError(IOException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA009"); 131 } 132 catch (IOException ex) { 133 throw new RuntimeException(ex); 134 } 135 } 136 137 void checkForDisallowedProps(Configuration conf, String confName) throws ActionExecutorException { 138 for (String prop : DISALLOWED_PROPERTIES) { 139 if (conf.get(prop) != null) { 140 throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA010", 141 "Property [{0}] not allowed in action [{1}] configuration", prop, confName); 142 } 143 } 144 } 145 146 Configuration createBaseHadoopConf(Context context, Element actionXml) { 147 Configuration conf = new XConfiguration(); 148 conf.set(HADOOP_USER, context.getProtoActionConf().get(WorkflowAppService.HADOOP_USER)); 149 conf.set(HADOOP_UGI, context.getProtoActionConf().get(WorkflowAppService.HADOOP_UGI)); 150 if (context.getProtoActionConf().get(WorkflowAppService.HADOOP_JT_KERBEROS_NAME) != null) { 151 conf.set(WorkflowAppService.HADOOP_JT_KERBEROS_NAME, context.getProtoActionConf().get( 152 WorkflowAppService.HADOOP_JT_KERBEROS_NAME)); 153 } 154 if (context.getProtoActionConf().get(WorkflowAppService.HADOOP_NN_KERBEROS_NAME) != null) { 155 conf.set(WorkflowAppService.HADOOP_NN_KERBEROS_NAME, context.getProtoActionConf().get( 156 WorkflowAppService.HADOOP_NN_KERBEROS_NAME)); 157 } 158 conf.set(OozieClient.GROUP_NAME, context.getProtoActionConf().get(OozieClient.GROUP_NAME)); 159 Namespace ns = actionXml.getNamespace(); 160 String jobTracker = actionXml.getChild("job-tracker", ns).getTextTrim(); 161 String nameNode = actionXml.getChild("name-node", ns).getTextTrim(); 162 conf.set(HADOOP_JOB_TRACKER, jobTracker); 163 conf.set(HADOOP_NAME_NODE, nameNode); 164 conf.set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "true"); 165 return conf; 166 } 167 168 Configuration setupLauncherConf(Configuration conf, Element actionXml, Path appPath, Context context) throws ActionExecutorException { 169 try { 170 Namespace ns = actionXml.getNamespace(); 171 Element e = actionXml.getChild("configuration", ns); 172 if (e != null) { 173 String strConf = XmlUtils.prettyPrint(e).toString(); 174 XConfiguration inlineConf = new XConfiguration(new StringReader(strConf)); 175 176 XConfiguration launcherConf = new XConfiguration(); 177 for (Map.Entry<String, String> entry : inlineConf) { 178 if (entry.getKey().startsWith("oozie.launcher.")) { 179 String name = entry.getKey().substring("oozie.launcher.".length()); 180 String value = entry.getValue(); 181 // setting original KEY 182 launcherConf.set(entry.getKey(), value); 183 // setting un-prefixed key (to allow Hadoop job config 184 // for the launcher job 185 launcherConf.set(name, value); 186 } 187 } 188 checkForDisallowedProps(launcherConf, "inline launcher configuration"); 189 XConfiguration.copy(launcherConf, conf); 190 } 191 return conf; 192 } 193 catch (IOException ex) { 194 throw convertException(ex); 195 } 196 } 197 198 protected FileSystem getActionFileSystem(Context context, WorkflowAction action) throws ActionExecutorException { 199 try { 200 Element actionXml = XmlUtils.parseXml(action.getConf()); 201 return getActionFileSystem(context, actionXml); 202 } 203 catch (JDOMException ex) { 204 throw convertException(ex); 205 } 206 } 207 208 protected FileSystem getActionFileSystem(Context context, Element actionXml) throws ActionExecutorException { 209 try { 210 return context.getAppFileSystem(); 211 } 212 catch (Exception ex) { 213 throw convertException(ex); 214 } 215 } 216 217 Configuration setupActionConf(Configuration actionConf, Context context, Element actionXml, Path appPath) 218 throws ActionExecutorException { 219 try { 220 Namespace ns = actionXml.getNamespace(); 221 Element e = actionXml.getChild("job-xml", ns); 222 if (e != null) { 223 String jobXml = e.getTextTrim(); 224 Path path = new Path(appPath, jobXml); 225 FileSystem fs = getActionFileSystem(context, actionXml); 226 Configuration jobXmlConf = new XConfiguration(fs.open(path)); 227 checkForDisallowedProps(jobXmlConf, "job-xml"); 228 XConfiguration.copy(jobXmlConf, actionConf); 229 } 230 e = actionXml.getChild("configuration", ns); 231 if (e != null) { 232 String strConf = XmlUtils.prettyPrint(e).toString(); 233 XConfiguration inlineConf = new XConfiguration(new StringReader(strConf)); 234 checkForDisallowedProps(inlineConf, "inline configuration"); 235 XConfiguration.copy(inlineConf, actionConf); 236 } 237 return actionConf; 238 } 239 catch (IOException ex) { 240 throw convertException(ex); 241 } 242 } 243 244 Configuration addToCache(Configuration conf, Path appPath, String filePath, boolean archive) 245 throws ActionExecutorException { 246 Path path = null; 247 try { 248 if (filePath.startsWith("/")) { 249 path = new Path(filePath); 250 } 251 else { 252 path = new Path(appPath, filePath); 253 } 254 URI uri = new URI(path.toUri().getPath()); 255 if (archive) { 256 DistributedCache.addCacheArchive(uri, conf); 257 } 258 else { 259 String fileName = filePath.substring(filePath.lastIndexOf("/") + 1); 260 if (fileName.endsWith(".so") || fileName.contains(".so.")) { // .so files 261 if (!fileName.endsWith(".so")) { 262 int extAt = fileName.indexOf(".so."); 263 fileName = fileName.substring(0, extAt + 3); 264 } 265 uri = new Path(path.toString() + "#" + fileName).toUri(); 266 uri = new URI(uri.getPath()); 267 } 268 else if (fileName.endsWith(".jar")){ // .jar files 269 if (!fileName.contains("#")) { 270 path = new Path(uri.toString()); 271 272 String user = conf.get("user.name"); 273 String group = conf.get("group.name"); 274 Services.get().get(HadoopAccessorService.class).addFileToClassPath(user, group, path, conf); 275 } 276 } 277 else { // regular files 278 if (!fileName.contains("#")) { 279 uri = new Path(path.toString() + "#" + fileName).toUri(); 280 uri = new URI(uri.getPath()); 281 } 282 } 283 DistributedCache.addCacheFile(uri, conf); 284 } 285 DistributedCache.createSymlink(conf); 286 return conf; 287 } 288 catch (Exception ex) { 289 XLog.getLog(getClass()).debug( 290 "Errors when add to DistributedCache. Path=" + path + ", archive=" + archive + ", conf=" 291 + XmlUtils.prettyPrint(conf).toString()); 292 throw convertException(ex); 293 } 294 } 295 296 String getOozieLauncherJar(Context context) throws ActionExecutorException { 297 try { 298 return new Path(context.getActionDir(), getLauncherJarName()).toString(); 299 } 300 catch (Exception ex) { 301 throw convertException(ex); 302 } 303 } 304 305 void prepareActionDir(FileSystem actionFs, Context context) throws ActionExecutorException { 306 try { 307 Path actionDir = context.getActionDir(); 308 Path tempActionDir = new Path(actionDir.getParent(), actionDir.getName() + ".tmp"); 309 if (!actionFs.exists(actionDir)) { 310 try { 311 actionFs.copyFromLocalFile(new Path(getOozieRuntimeDir(), getLauncherJarName()), new Path( 312 tempActionDir, getLauncherJarName())); 313 actionFs.rename(tempActionDir, actionDir); 314 } 315 catch (IOException ex) { 316 actionFs.delete(tempActionDir, true); 317 actionFs.delete(actionDir, true); 318 throw ex; 319 } 320 } 321 } 322 catch (Exception ex) { 323 throw convertException(ex); 324 } 325 } 326 327 void cleanUpActionDir(FileSystem actionFs, Context context) throws ActionExecutorException { 328 try { 329 Path actionDir = context.getActionDir(); 330 if (!context.getProtoActionConf().getBoolean("oozie.action.keep.action.dir", false) 331 && actionFs.exists(actionDir)) { 332 actionFs.delete(actionDir, true); 333 } 334 } 335 catch (Exception ex) { 336 throw convertException(ex); 337 } 338 } 339 340 @SuppressWarnings("unchecked") 341 void setLibFilesArchives(Context context, Element actionXml, Path appPath, Configuration conf) 342 throws ActionExecutorException { 343 Configuration proto = context.getProtoActionConf(); 344 345 addToCache(conf, appPath, getOozieLauncherJar(context), false); 346 347 String[] paths = proto.getStrings(WorkflowAppService.APP_LIB_PATH_LIST); 348 if (paths != null) { 349 for (String path : paths) { 350 addToCache(conf, appPath, path, false); 351 } 352 } 353 354 for (Element eProp : (List<Element>) actionXml.getChildren()) { 355 if (eProp.getName().equals("file")) { 356 String path = eProp.getTextTrim(); 357 addToCache(conf, appPath, path, false); 358 } 359 else { 360 if (eProp.getName().equals("archive")) { 361 String path = eProp.getTextTrim(); 362 addToCache(conf, appPath, path, true); 363 } 364 } 365 } 366 } 367 368 protected String getLauncherMain(Configuration launcherConf, Element actionXml) { 369 Namespace ns = actionXml.getNamespace(); 370 Element e = actionXml.getChild("main-class", ns); 371 return e.getTextTrim(); 372 } 373 374 private static final Set<String> SPECIAL_PROPERTIES = new HashSet<String>(); 375 376 static { 377 SPECIAL_PROPERTIES.add("mapred.job.queue.name"); 378 SPECIAL_PROPERTIES.add("mapreduce.jobtracker.kerberos.principal"); 379 SPECIAL_PROPERTIES.add("dfs.namenode.kerberos.principal"); 380 } 381 382 @SuppressWarnings("unchecked") 383 JobConf createLauncherConf(Context context, WorkflowAction action, Element actionXml, Configuration actionConf) 384 throws ActionExecutorException { 385 try { 386 Path appPath = new Path(context.getWorkflow().getAppPath()); 387 388 // launcher job configuration 389 Configuration launcherConf = createBaseHadoopConf(context, actionXml); 390 setupLauncherConf(launcherConf, actionXml, appPath, context); 391 392 // we are doing init+copy because if not we are getting 'hdfs' 393 // scheme not known 394 // its seems that new JobConf(Conf) does not load defaults, it 395 // assumes parameter Conf does. 396 JobConf launcherJobConf = new JobConf(); 397 XConfiguration.copy(launcherConf, launcherJobConf); 398 setLibFilesArchives(context, actionXml, appPath, launcherJobConf); 399 String jobName = XLog.format("oozie:launcher:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow() 400 .getAppName(), action.getName(), context.getWorkflow().getId()); 401 launcherJobConf.setJobName(jobName); 402 403 String jobId = context.getWorkflow().getId(); 404 String actionId = action.getId(); 405 Path actionDir = context.getActionDir(); 406 String recoveryId = context.getRecoveryId(); 407 408 LauncherMapper.setupLauncherInfo(launcherJobConf, jobId, actionId, actionDir, recoveryId, actionConf); 409 410 LauncherMapper.setupMainClass(launcherJobConf, getLauncherMain(launcherConf, actionXml)); 411 412 LauncherMapper.setupMaxOutputData(launcherJobConf, maxActionOutputLen); 413 414 Namespace ns = actionXml.getNamespace(); 415 List<Element> list = actionXml.getChildren("arg", ns); 416 String[] args = new String[list.size()]; 417 for (int i = 0; i < list.size(); i++) { 418 args[i] = list.get(i).getTextTrim(); 419 } 420 LauncherMapper.setupMainArguments(launcherJobConf, args); 421 422 Element opt = actionXml.getChild("java-opts", ns); 423 if (opt != null) { 424 String opts = launcherConf.get("mapred.child.java.opts", ""); 425 opts = opts + " " + opt.getTextTrim(); 426 opts = opts.trim(); 427 launcherJobConf.set("mapred.child.java.opts", opts); 428 } 429 430 // properties from action that are needed by the launcher (QUEUE 431 // NAME) 432 // maybe we should add queue to the WF schema, below job-tracker 433 for (String name : SPECIAL_PROPERTIES) { 434 String value = actionConf.get(name); 435 if (value != null) { 436 launcherJobConf.set(name, value); 437 } 438 } 439 440 // to disable cancelation of delegation token on launcher job end 441 launcherJobConf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false); 442 443 // setting the group owning the Oozie job to allow anybody in that 444 // group to kill the jobs. 445 launcherJobConf.set("mapreduce.job.acl-modify-job", context.getWorkflow().getGroup()); 446 447 return launcherJobConf; 448 } 449 catch (Exception ex) { 450 throw convertException(ex); 451 } 452 } 453 454 private void injectCallback(Context context, Configuration conf) { 455 String callback = context.getCallbackUrl("$jobStatus"); 456 if (conf.get("job.end.notification.url") != null) { 457 XLog.getLog(getClass()).warn("Overriding the action job end notification URI"); 458 } 459 conf.set("job.end.notification.url", callback); 460 } 461 462 void injectActionCallback(Context context, Configuration actionConf) { 463 injectCallback(context, actionConf); 464 } 465 466 void injectLauncherCallback(Context context, Configuration launcherConf) { 467 injectCallback(context, launcherConf); 468 } 469 470 void submitLauncher(Context context, WorkflowAction action) throws ActionExecutorException { 471 JobClient jobClient = null; 472 boolean exception = false; 473 try { 474 Path appPath = new Path(context.getWorkflow().getAppPath()); 475 Element actionXml = XmlUtils.parseXml(action.getConf()); 476 477 // action job configuration 478 Configuration actionConf = createBaseHadoopConf(context, actionXml); 479 setupActionConf(actionConf, context, actionXml, appPath); 480 XLog.getLog(getClass()).debug("Setting LibFilesArchives "); 481 setLibFilesArchives(context, actionXml, appPath, actionConf); 482 String jobName = XLog.format("oozie:action:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow() 483 .getAppName(), action.getName(), context.getWorkflow().getId()); 484 actionConf.set("mapred.job.name", jobName); 485 injectActionCallback(context, actionConf); 486 487 // setting the group owning the Oozie job to allow anybody in that 488 // group to kill the jobs. 489 actionConf.set("mapreduce.job.acl-modify-job", context.getWorkflow().getGroup()); 490 491 JobConf launcherJobConf = createLauncherConf(context, action, actionXml, actionConf); 492 injectLauncherCallback(context, launcherJobConf); 493 XLog.getLog(getClass()).debug("Creating Job Client for action " + action.getId()); 494 jobClient = createJobClient(context, launcherJobConf); 495 String launcherId = LauncherMapper.getRecoveryId(launcherJobConf, context.getActionDir(), context 496 .getRecoveryId()); 497 boolean alreadyRunning = launcherId != null; 498 RunningJob runningJob; 499 500 if (alreadyRunning) { 501 runningJob = jobClient.getJob(JobID.forName(launcherId)); 502 if (runningJob == null) { 503 String jobTracker = launcherJobConf.get("mapred.job.tracker"); 504 throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "JA017", 505 "unknown job [{0}@{1}], cannot recover", launcherId, jobTracker); 506 } 507 } 508 else { 509 prepare(context, actionXml); 510 XLog.getLog(getClass()).debug("Submitting the job through Job Client for action " + action.getId()); 511 512 // setting up propagation of the delegation token. 513 AuthHelper.get().set(jobClient, launcherJobConf); 514 log.debug(WorkflowAppService.HADOOP_JT_KERBEROS_NAME + " = " 515 + launcherJobConf.get(WorkflowAppService.HADOOP_JT_KERBEROS_NAME)); 516 log.debug(WorkflowAppService.HADOOP_NN_KERBEROS_NAME + " = " 517 + launcherJobConf.get(WorkflowAppService.HADOOP_NN_KERBEROS_NAME)); 518 runningJob = jobClient.submitJob(launcherJobConf); 519 if (runningJob == null) { 520 throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "JA017", 521 "Error submitting launcher for action [{0}]", action.getId()); 522 } 523 launcherId = runningJob.getID().toString(); 524 XLog.getLog(getClass()).debug("After submission get the launcherId " + launcherId); 525 } 526 527 String jobTracker = launcherJobConf.get(HADOOP_JOB_TRACKER); 528 String consoleUrl = runningJob.getTrackingURL(); 529 context.setStartData(launcherId, jobTracker, consoleUrl); 530 } 531 catch (Exception ex) { 532 exception = true; 533 throw convertException(ex); 534 } 535 finally { 536 if (jobClient != null) { 537 try { 538 jobClient.close(); 539 } 540 catch (Exception e) { 541 if (exception) { 542 log.error("JobClient error: ", e); 543 } 544 else { 545 throw convertException(e); 546 } 547 } 548 } 549 } 550 } 551 552 void prepare(Context context, Element actionXml) throws ActionExecutorException { 553 Namespace ns = actionXml.getNamespace(); 554 Element prepare = actionXml.getChild("prepare", ns); 555 if (prepare != null) { 556 XLog.getLog(getClass()).debug("Preparing the action with FileSystem operation"); 557 FsActionExecutor fsAe = new FsActionExecutor(); 558 fsAe.doOperations(context, prepare); 559 XLog.getLog(getClass()).debug("FS Operation is completed"); 560 } 561 } 562 563 @Override 564 public void start(Context context, WorkflowAction action) throws ActionExecutorException { 565 try { 566 XLog.getLog(getClass()).debug("Starting action " + action.getId() + " getting Action File System"); 567 FileSystem actionFs = getActionFileSystem(context, action); 568 XLog.getLog(getClass()).debug("Preparing action Dir through copying " + context.getActionDir()); 569 prepareActionDir(actionFs, context); 570 XLog.getLog(getClass()).debug("Action Dir is ready. Submitting the action "); 571 submitLauncher(context, action); 572 XLog.getLog(getClass()).debug("Action submit completed. Performing check "); 573 check(context, action); 574 XLog.getLog(getClass()).debug("Action check is done after submission"); 575 } 576 catch (Exception ex) { 577 throw convertException(ex); 578 } 579 } 580 581 @Override 582 public void end(Context context, WorkflowAction action) throws ActionExecutorException { 583 try { 584 String externalStatus = action.getExternalStatus(); 585 WorkflowAction.Status status = externalStatus.equals(SUCCEEDED) ? WorkflowAction.Status.OK 586 : WorkflowAction.Status.ERROR; 587 context.setEndData(status, getActionSignal(status)); 588 } 589 catch (Exception ex) { 590 throw convertException(ex); 591 } 592 finally { 593 try { 594 FileSystem actionFs = getActionFileSystem(context, action); 595 cleanUpActionDir(actionFs, context); 596 } 597 catch (Exception ex) { 598 throw convertException(ex); 599 } 600 } 601 } 602 603 /** 604 * Create job client object 605 * @param context 606 * @param jobConf 607 * @return 608 * @throws HadoopAccessorException 609 */ 610 protected JobClient createJobClient(Context context, JobConf jobConf) throws HadoopAccessorException { 611 String user = context.getWorkflow().getUser(); 612 String group = context.getWorkflow().getGroup(); 613 return Services.get().get(HadoopAccessorService.class).createJobClient(user, group, jobConf); 614 } 615 616 @Override 617 public void check(Context context, WorkflowAction action) throws ActionExecutorException { 618 JobClient jobClient = null; 619 boolean exception = false; 620 try { 621 Element actionXml = XmlUtils.parseXml(action.getConf()); 622 FileSystem actionFs = getActionFileSystem(context, actionXml); 623 Configuration conf = createBaseHadoopConf(context, actionXml); 624 JobConf jobConf = new JobConf(); 625 XConfiguration.copy(conf, jobConf); 626 jobClient = createJobClient(context, jobConf); 627 RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalId())); 628 if (runningJob == null) { 629 context.setExternalStatus(FAILED); 630 context.setExecutionData(FAILED, null); 631 throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA017", 632 "Unknown hadoop job [{0}] associated with action [{1}]. Failing this action!", action 633 .getExternalId(), action.getId()); 634 } 635 if (runningJob.isComplete()) { 636 Path actionDir = context.getActionDir(); 637 638 String user = context.getWorkflow().getUser(); 639 String group = context.getWorkflow().getGroup(); 640 if (LauncherMapper.hasIdSwap(runningJob, user, group, actionDir)) { 641 String launcherId = action.getExternalId(); 642 Path idSwapPath = LauncherMapper.getIdSwapPath(context.getActionDir()); 643 InputStream is = actionFs.open(idSwapPath); 644 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 645 Properties props = PropertiesUtils.readProperties(reader, maxActionOutputLen); 646 reader.close(); 647 String newId = props.getProperty("id"); 648 runningJob = jobClient.getJob(JobID.forName(newId)); 649 if (runningJob == null) { 650 context.setExternalStatus(FAILED); 651 throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA017", 652 "Unknown hadoop job [{0}] associated with action [{1}]. Failing this action!", newId, 653 action.getId()); 654 } 655 656 context.setStartData(newId, action.getTrackerUri(), runningJob.getTrackingURL()); 657 XLog.getLog(getClass()).info(XLog.STD, "External ID swap, old ID [{0}] new ID [{1}]", launcherId, 658 newId); 659 } 660 if (runningJob.isComplete()) { 661 XLog.getLog(getClass()).info(XLog.STD, "action completed, external ID [{0}]", 662 action.getExternalId()); 663 if (runningJob.isSuccessful() && LauncherMapper.isMainSuccessful(runningJob)) { 664 Properties props = null; 665 if (getCaptureOutput(action)) { 666 props = new Properties(); 667 if (LauncherMapper.hasOutputData(runningJob)) { 668 Path actionOutput = LauncherMapper.getOutputDataPath(context.getActionDir()); 669 InputStream is = actionFs.open(actionOutput); 670 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 671 props = PropertiesUtils.readProperties(reader, maxActionOutputLen); 672 reader.close(); 673 } 674 } 675 context.setExecutionData(SUCCEEDED, props); 676 XLog.getLog(getClass()).info(XLog.STD, "action produced output"); 677 } 678 else { 679 XLog log = XLog.getLog(getClass()); 680 String errorReason; 681 Path actionError = LauncherMapper.getErrorPath(context.getActionDir()); 682 if (actionFs.exists(actionError)) { 683 InputStream is = actionFs.open(actionError); 684 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 685 Properties props = PropertiesUtils.readProperties(reader, -1); 686 reader.close(); 687 errorReason = props.getProperty("error.reason"); 688 log.warn("Launcher ERROR, reason: {0}", errorReason); 689 String exMsg = props.getProperty("exception.message"); 690 String errorInfo = (exMsg != null) ? exMsg : errorReason; 691 context.setErrorInfo("JA018", errorInfo); 692 String exStackTrace = props.getProperty("exception.stacktrace"); 693 if (exMsg != null) { 694 log.warn("Launcher exception: {0}{E}{1}", exMsg, exStackTrace); 695 } 696 } 697 else { 698 errorReason = XLog.format("LauncherMapper died, check Hadoop log for job [{0}:{1}]", action 699 .getTrackerUri(), action.getExternalId()); 700 log.warn(errorReason); 701 } 702 context.setExecutionData(FAILED_KILLED, null); 703 } 704 } 705 else { 706 context.setExternalStatus(RUNNING); 707 XLog.getLog(getClass()).info(XLog.STD, "checking action, external ID [{0}] status [{1}]", 708 action.getExternalId(), action.getExternalStatus()); 709 } 710 } 711 else { 712 context.setExternalStatus(RUNNING); 713 XLog.getLog(getClass()).info(XLog.STD, "checking action, external ID [{0}] status [{1}]", 714 action.getExternalId(), action.getExternalStatus()); 715 } 716 } 717 catch (Exception ex) { 718 XLog.getLog(getClass()).warn("Exception in check(). Message[{0}]", ex.getMessage(), ex); 719 exception = true; 720 throw convertException(ex); 721 } 722 finally { 723 if (jobClient != null) { 724 try { 725 jobClient.close(); 726 } 727 catch (Exception e) { 728 if (exception) { 729 log.error("JobClient error: ", e); 730 } 731 else { 732 throw convertException(e); 733 } 734 } 735 } 736 } 737 } 738 739 protected boolean getCaptureOutput(WorkflowAction action) throws JDOMException { 740 Element eConf = XmlUtils.parseXml(action.getConf()); 741 Namespace ns = eConf.getNamespace(); 742 Element captureOutput = eConf.getChild("capture-output", ns); 743 return captureOutput != null; 744 } 745 746 @Override 747 public void kill(Context context, WorkflowAction action) throws ActionExecutorException { 748 JobClient jobClient = null; 749 boolean exception = false; 750 try { 751 Element actionXml = XmlUtils.parseXml(action.getConf()); 752 Configuration conf = createBaseHadoopConf(context, actionXml); 753 JobConf jobConf = new JobConf(); 754 XConfiguration.copy(conf, jobConf); 755 jobClient = createJobClient(context, jobConf); 756 RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalId())); 757 if (runningJob != null) { 758 runningJob.killJob(); 759 } 760 context.setExternalStatus(KILLED); 761 context.setExecutionData(KILLED, null); 762 } 763 catch (Exception ex) { 764 exception = true; 765 throw convertException(ex); 766 } 767 finally { 768 try { 769 FileSystem actionFs = getActionFileSystem(context, action); 770 cleanUpActionDir(actionFs, context); 771 if (jobClient != null) { 772 jobClient.close(); 773 } 774 } 775 catch (Exception ex) { 776 if (exception) { 777 log.error("Error: ", ex); 778 } 779 else { 780 throw convertException(ex); 781 } 782 } 783 } 784 } 785 786 private static Set<String> FINAL_STATUS = new HashSet<String>(); 787 788 static { 789 FINAL_STATUS.add(SUCCEEDED); 790 FINAL_STATUS.add(KILLED); 791 FINAL_STATUS.add(FAILED); 792 FINAL_STATUS.add(FAILED_KILLED); 793 } 794 795 @Override 796 public boolean isCompleted(String externalStatus) { 797 return FINAL_STATUS.contains(externalStatus); 798 } 799 800 }