001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.action.hadoop; 016 017 import java.io.BufferedReader; 018 import java.io.File; 019 import java.io.FileNotFoundException; 020 import java.io.IOException; 021 import java.io.InputStream; 022 import java.io.InputStreamReader; 023 import java.io.StringReader; 024 import java.net.ConnectException; 025 import java.net.URI; 026 import java.net.UnknownHostException; 027 import java.util.ArrayList; 028 import java.util.HashSet; 029 import java.util.List; 030 import java.util.Map; 031 import java.util.Properties; 032 import java.util.Set; 033 034 import org.apache.hadoop.conf.Configuration; 035 import org.apache.hadoop.filecache.DistributedCache; 036 import org.apache.hadoop.fs.FileSystem; 037 import org.apache.hadoop.fs.Path; 038 import org.apache.hadoop.fs.permission.AccessControlException; 039 import org.apache.hadoop.mapred.JobClient; 040 import org.apache.hadoop.mapred.JobConf; 041 import org.apache.hadoop.mapred.JobID; 042 import org.apache.hadoop.mapred.RunningJob; 043 import org.apache.hadoop.util.DiskChecker; 044 import org.apache.oozie.action.ActionExecutor; 045 import org.apache.oozie.action.ActionExecutorException; 046 import org.apache.oozie.client.OozieClient; 047 import org.apache.oozie.client.WorkflowAction; 048 import org.apache.oozie.service.HadoopAccessorException; 049 import org.apache.oozie.service.HadoopAccessorService; 050 import org.apache.oozie.service.Services; 051 import org.apache.oozie.service.WorkflowAppService; 052 import org.apache.oozie.servlet.CallbackServlet; 053 import org.apache.oozie.util.IOUtils; 054 import org.apache.oozie.util.PropertiesUtils; 055 import org.apache.oozie.util.XConfiguration; 056 import org.apache.oozie.util.XLog; 057 import org.apache.oozie.util.XmlUtils; 058 import org.jdom.Element; 059 import org.jdom.JDOMException; 060 import org.jdom.Namespace; 061 062 public class JavaActionExecutor extends ActionExecutor { 063 064 private static final String HADOOP_USER = "user.name"; 065 private static final String HADOOP_UGI = "hadoop.job.ugi"; 066 private static final String HADOOP_JOB_TRACKER = "mapred.job.tracker"; 067 private static final String HADOOP_NAME_NODE = "fs.default.name"; 068 069 private static final Set<String> DISALLOWED_PROPERTIES = new HashSet<String>(); 070 071 private static int maxActionOutputLen; 072 073 private static final String SUCCEEDED = "SUCCEEDED"; 074 private static final String KILLED = "KILLED"; 075 private static final String FAILED = "FAILED"; 076 private static final String FAILED_KILLED = "FAILED/KILLED"; 077 private static final String RUNNING = "RUNNING"; 078 private XLog log = XLog.getLog(getClass()); 079 080 static { 081 DISALLOWED_PROPERTIES.add(HADOOP_USER); 082 DISALLOWED_PROPERTIES.add(HADOOP_UGI); 083 DISALLOWED_PROPERTIES.add(HADOOP_JOB_TRACKER); 084 DISALLOWED_PROPERTIES.add(HADOOP_NAME_NODE); 085 DISALLOWED_PROPERTIES.add(WorkflowAppService.HADOOP_JT_KERBEROS_NAME); 086 DISALLOWED_PROPERTIES.add(WorkflowAppService.HADOOP_NN_KERBEROS_NAME); 087 } 088 089 public JavaActionExecutor() { 090 this("java"); 091 } 092 093 protected JavaActionExecutor(String type) { 094 super(type); 095 } 096 097 protected String getLauncherJarName() { 098 return getType() + "-launcher.jar"; 099 } 100 101 protected List<Class> getLauncherClasses() { 102 List<Class> classes = new ArrayList<Class>(); 103 classes.add(LauncherMapper.class); 104 classes.add(LauncherSecurityManager.class); 105 classes.add(LauncherException.class); 106 classes.add(LauncherMainException.class); 107 return classes; 108 } 109 110 @Override 111 public void initActionType() { 112 super.initActionType(); 113 maxActionOutputLen = getOozieConf().getInt(CallbackServlet.CONF_MAX_DATA_LEN, 2 * 1024); 114 try { 115 List<Class> classes = getLauncherClasses(); 116 Class[] launcherClasses = classes.toArray(new Class[classes.size()]); 117 IOUtils.createJar(new File(getOozieRuntimeDir()), getLauncherJarName(), launcherClasses); 118 119 registerError(UnknownHostException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA001"); 120 registerError(AccessControlException.class.getName(), ActionExecutorException.ErrorType.NON_TRANSIENT, 121 "JA002"); 122 registerError(DiskChecker.DiskOutOfSpaceException.class.getName(), 123 ActionExecutorException.ErrorType.NON_TRANSIENT, "JA003"); 124 registerError(org.apache.hadoop.hdfs.protocol.QuotaExceededException.class.getName(), 125 ActionExecutorException.ErrorType.NON_TRANSIENT, "JA004"); 126 registerError(org.apache.hadoop.hdfs.server.namenode.SafeModeException.class.getName(), 127 ActionExecutorException.ErrorType.NON_TRANSIENT, "JA005"); 128 registerError(ConnectException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA006"); 129 registerError(JDOMException.class.getName(), ActionExecutorException.ErrorType.ERROR, "JA007"); 130 registerError(FileNotFoundException.class.getName(), ActionExecutorException.ErrorType.ERROR, "JA008"); 131 registerError(IOException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA009"); 132 } 133 catch (IOException ex) { 134 throw new RuntimeException(ex); 135 } 136 } 137 138 void checkForDisallowedProps(Configuration conf, String confName) throws ActionExecutorException { 139 for (String prop : DISALLOWED_PROPERTIES) { 140 if (conf.get(prop) != null) { 141 throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA010", 142 "Property [{0}] not allowed in action [{1}] configuration", prop, confName); 143 } 144 } 145 } 146 147 Configuration createBaseHadoopConf(Context context, Element actionXml) { 148 Configuration conf = new XConfiguration(); 149 conf.set(HADOOP_USER, context.getProtoActionConf().get(WorkflowAppService.HADOOP_USER)); 150 conf.set(HADOOP_UGI, context.getProtoActionConf().get(WorkflowAppService.HADOOP_UGI)); 151 if (context.getProtoActionConf().get(WorkflowAppService.HADOOP_JT_KERBEROS_NAME) != null) { 152 conf.set(WorkflowAppService.HADOOP_JT_KERBEROS_NAME, context.getProtoActionConf().get( 153 WorkflowAppService.HADOOP_JT_KERBEROS_NAME)); 154 } 155 if (context.getProtoActionConf().get(WorkflowAppService.HADOOP_NN_KERBEROS_NAME) != null) { 156 conf.set(WorkflowAppService.HADOOP_NN_KERBEROS_NAME, context.getProtoActionConf().get( 157 WorkflowAppService.HADOOP_NN_KERBEROS_NAME)); 158 } 159 conf.set(OozieClient.GROUP_NAME, context.getProtoActionConf().get(OozieClient.GROUP_NAME)); 160 Namespace ns = actionXml.getNamespace(); 161 String jobTracker = actionXml.getChild("job-tracker", ns).getTextTrim(); 162 String nameNode = actionXml.getChild("name-node", ns).getTextTrim(); 163 conf.set(HADOOP_JOB_TRACKER, jobTracker); 164 conf.set(HADOOP_NAME_NODE, nameNode); 165 conf.set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "true"); 166 return conf; 167 } 168 169 Configuration setupLauncherConf(Configuration conf, Element actionXml, Path appPath, Context context) throws ActionExecutorException { 170 try { 171 Namespace ns = actionXml.getNamespace(); 172 Element e = actionXml.getChild("configuration", ns); 173 if (e != null) { 174 String strConf = XmlUtils.prettyPrint(e).toString(); 175 XConfiguration inlineConf = new XConfiguration(new StringReader(strConf)); 176 177 XConfiguration launcherConf = new XConfiguration(); 178 for (Map.Entry<String, String> entry : inlineConf) { 179 if (entry.getKey().startsWith("oozie.launcher.")) { 180 String name = entry.getKey().substring("oozie.launcher.".length()); 181 String value = entry.getValue(); 182 // setting original KEY 183 launcherConf.set(entry.getKey(), value); 184 // setting un-prefixed key (to allow Hadoop job config 185 // for the launcher job 186 launcherConf.set(name, value); 187 } 188 } 189 checkForDisallowedProps(launcherConf, "inline launcher configuration"); 190 XConfiguration.copy(launcherConf, conf); 191 } 192 return conf; 193 } 194 catch (IOException ex) { 195 throw convertException(ex); 196 } 197 } 198 199 protected FileSystem getActionFileSystem(Context context, WorkflowAction action) throws ActionExecutorException { 200 try { 201 Element actionXml = XmlUtils.parseXml(action.getConf()); 202 return getActionFileSystem(context, actionXml); 203 } 204 catch (JDOMException ex) { 205 throw convertException(ex); 206 } 207 } 208 209 protected FileSystem getActionFileSystem(Context context, Element actionXml) throws ActionExecutorException { 210 try { 211 return context.getAppFileSystem(); 212 } 213 catch (Exception ex) { 214 throw convertException(ex); 215 } 216 } 217 218 Configuration setupActionConf(Configuration actionConf, Context context, Element actionXml, Path appPath) 219 throws ActionExecutorException { 220 try { 221 Namespace ns = actionXml.getNamespace(); 222 Element e = actionXml.getChild("job-xml", ns); 223 if (e != null) { 224 String jobXml = e.getTextTrim(); 225 Path path = new Path(appPath, jobXml); 226 FileSystem fs = getActionFileSystem(context, actionXml); 227 Configuration jobXmlConf = new XConfiguration(fs.open(path)); 228 checkForDisallowedProps(jobXmlConf, "job-xml"); 229 XConfiguration.copy(jobXmlConf, actionConf); 230 } 231 e = actionXml.getChild("configuration", ns); 232 if (e != null) { 233 String strConf = XmlUtils.prettyPrint(e).toString(); 234 XConfiguration inlineConf = new XConfiguration(new StringReader(strConf)); 235 checkForDisallowedProps(inlineConf, "inline configuration"); 236 XConfiguration.copy(inlineConf, actionConf); 237 } 238 return actionConf; 239 } 240 catch (IOException ex) { 241 throw convertException(ex); 242 } 243 } 244 245 Configuration addToCache(Configuration conf, Path appPath, String filePath, boolean archive) 246 throws ActionExecutorException { 247 Path path = null; 248 try { 249 if (filePath.startsWith("/")) { 250 path = new Path(filePath); 251 } 252 else { 253 path = new Path(appPath, filePath); 254 } 255 URI uri = new URI(path.toUri().getPath()); 256 if (archive) { 257 DistributedCache.addCacheArchive(uri, conf); 258 } 259 else { 260 String fileName = filePath.substring(filePath.lastIndexOf("/") + 1); 261 if (fileName.endsWith(".so") || fileName.contains(".so.")) { // .so files 262 if (!fileName.endsWith(".so")) { 263 int extAt = fileName.indexOf(".so."); 264 fileName = fileName.substring(0, extAt + 3); 265 } 266 uri = new Path(path.toString() + "#" + fileName).toUri(); 267 uri = new URI(uri.getPath()); 268 DistributedCache.addCacheFile(uri, conf); 269 } 270 else if (fileName.endsWith(".jar")){ // .jar files 271 if (!fileName.contains("#")) { 272 path = new Path(uri.toString()); 273 274 String user = conf.get("user.name"); 275 String group = conf.get("group.name"); 276 Services.get().get(HadoopAccessorService.class).addFileToClassPath(user, group, path, conf); 277 } 278 else { 279 DistributedCache.addCacheFile(uri, conf); 280 } 281 } 282 else { // regular files 283 if (!fileName.contains("#")) { 284 uri = new Path(path.toString() + "#" + fileName).toUri(); 285 uri = new URI(uri.getPath()); 286 } 287 DistributedCache.addCacheFile(uri, conf); 288 } 289 } 290 DistributedCache.createSymlink(conf); 291 return conf; 292 } 293 catch (Exception ex) { 294 XLog.getLog(getClass()).debug( 295 "Errors when add to DistributedCache. Path=" + path + ", archive=" + archive + ", conf=" 296 + XmlUtils.prettyPrint(conf).toString()); 297 throw convertException(ex); 298 } 299 } 300 301 String getOozieLauncherJar(Context context) throws ActionExecutorException { 302 try { 303 return new Path(context.getActionDir(), getLauncherJarName()).toString(); 304 } 305 catch (Exception ex) { 306 throw convertException(ex); 307 } 308 } 309 310 void prepareActionDir(FileSystem actionFs, Context context) throws ActionExecutorException { 311 try { 312 Path actionDir = context.getActionDir(); 313 Path tempActionDir = new Path(actionDir.getParent(), actionDir.getName() + ".tmp"); 314 if (!actionFs.exists(actionDir)) { 315 try { 316 actionFs.copyFromLocalFile(new Path(getOozieRuntimeDir(), getLauncherJarName()), new Path( 317 tempActionDir, getLauncherJarName())); 318 actionFs.rename(tempActionDir, actionDir); 319 } 320 catch (IOException ex) { 321 actionFs.delete(tempActionDir, true); 322 actionFs.delete(actionDir, true); 323 throw ex; 324 } 325 } 326 } 327 catch (Exception ex) { 328 throw convertException(ex); 329 } 330 } 331 332 void cleanUpActionDir(FileSystem actionFs, Context context) throws ActionExecutorException { 333 try { 334 Path actionDir = context.getActionDir(); 335 if (!context.getProtoActionConf().getBoolean("oozie.action.keep.action.dir", false) 336 && actionFs.exists(actionDir)) { 337 actionFs.delete(actionDir, true); 338 } 339 } 340 catch (Exception ex) { 341 throw convertException(ex); 342 } 343 } 344 345 @SuppressWarnings("unchecked") 346 void setLibFilesArchives(Context context, Element actionXml, Path appPath, Configuration conf) 347 throws ActionExecutorException { 348 Configuration proto = context.getProtoActionConf(); 349 350 addToCache(conf, appPath, getOozieLauncherJar(context), false); 351 352 String[] paths = proto.getStrings(WorkflowAppService.APP_LIB_PATH_LIST); 353 if (paths != null) { 354 for (String path : paths) { 355 addToCache(conf, appPath, path, false); 356 } 357 } 358 359 for (Element eProp : (List<Element>) actionXml.getChildren()) { 360 if (eProp.getName().equals("file")) { 361 String path = eProp.getTextTrim(); 362 addToCache(conf, appPath, path, false); 363 } 364 else { 365 if (eProp.getName().equals("archive")) { 366 String path = eProp.getTextTrim(); 367 addToCache(conf, appPath, path, true); 368 } 369 } 370 } 371 } 372 373 protected String getLauncherMain(Configuration launcherConf, Element actionXml) { 374 Namespace ns = actionXml.getNamespace(); 375 Element e = actionXml.getChild("main-class", ns); 376 return e.getTextTrim(); 377 } 378 379 private static final String QUEUE_NAME = "mapred.job.queue.name"; 380 private static final String OOZIE_LAUNCHER_QUEUE_NAME = "oozie.launcher.mapred.job.queue.name"; 381 382 private static final Set<String> SPECIAL_PROPERTIES = new HashSet<String>(); 383 384 static { 385 SPECIAL_PROPERTIES.add(QUEUE_NAME); 386 SPECIAL_PROPERTIES.add("mapreduce.jobtracker.kerberos.principal"); 387 SPECIAL_PROPERTIES.add("dfs.namenode.kerberos.principal"); 388 } 389 390 @SuppressWarnings("unchecked") 391 JobConf createLauncherConf(Context context, WorkflowAction action, Element actionXml, Configuration actionConf) 392 throws ActionExecutorException { 393 try { 394 Path appPathRoot = new Path(context.getWorkflow().getAppPath()).getParent(); 395 396 // launcher job configuration 397 Configuration launcherConf = createBaseHadoopConf(context, actionXml); 398 setupLauncherConf(launcherConf, actionXml, appPathRoot, context); 399 400 // we are doing init+copy because if not we are getting 'hdfs' 401 // scheme not known 402 // its seems that new JobConf(Conf) does not load defaults, it 403 // assumes parameter Conf does. 404 JobConf launcherJobConf = new JobConf(); 405 XConfiguration.copy(launcherConf, launcherJobConf); 406 setLibFilesArchives(context, actionXml, appPathRoot, launcherJobConf); 407 String jobName = XLog.format("oozie:launcher:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow() 408 .getAppName(), action.getName(), context.getWorkflow().getId()); 409 launcherJobConf.setJobName(jobName); 410 411 String jobId = context.getWorkflow().getId(); 412 String actionId = action.getId(); 413 Path actionDir = context.getActionDir(); 414 String recoveryId = context.getRecoveryId(); 415 416 LauncherMapper.setupLauncherInfo(launcherJobConf, jobId, actionId, actionDir, recoveryId, actionConf); 417 418 LauncherMapper.setupMainClass(launcherJobConf, getLauncherMain(launcherConf, actionXml)); 419 420 LauncherMapper.setupMaxOutputData(launcherJobConf, maxActionOutputLen); 421 422 Namespace ns = actionXml.getNamespace(); 423 List<Element> list = actionXml.getChildren("arg", ns); 424 String[] args = new String[list.size()]; 425 for (int i = 0; i < list.size(); i++) { 426 args[i] = list.get(i).getTextTrim(); 427 } 428 LauncherMapper.setupMainArguments(launcherJobConf, args); 429 430 Element opt = actionXml.getChild("java-opts", ns); 431 if (opt != null) { 432 String opts = launcherConf.get("mapred.child.java.opts", ""); 433 opts = opts + " " + opt.getTextTrim(); 434 opts = opts.trim(); 435 launcherJobConf.set("mapred.child.java.opts", opts); 436 } 437 438 // properties from action that are needed by the launcher (QUEUE 439 // NAME) 440 // maybe we should add queue to the WF schema, below job-tracker 441 for (String name : SPECIAL_PROPERTIES) { 442 String value = actionConf.get(name); 443 if (value != null) { 444 if (!name.equals(QUEUE_NAME) || 445 (name.equals(QUEUE_NAME) && launcherJobConf.get(OOZIE_LAUNCHER_QUEUE_NAME) == null)) { 446 launcherJobConf.set(name, value); 447 } 448 } 449 } 450 451 // to disable cancelation of delegation token on launcher job end 452 launcherJobConf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false); 453 454 // setting the group owning the Oozie job to allow anybody in that 455 // group to kill the jobs. 456 launcherJobConf.set("mapreduce.job.acl-modify-job", context.getWorkflow().getGroup()); 457 458 return launcherJobConf; 459 } 460 catch (Exception ex) { 461 throw convertException(ex); 462 } 463 } 464 465 private void injectCallback(Context context, Configuration conf) { 466 String callback = context.getCallbackUrl("$jobStatus"); 467 if (conf.get("job.end.notification.url") != null) { 468 XLog.getLog(getClass()).warn("Overriding the action job end notification URI"); 469 } 470 conf.set("job.end.notification.url", callback); 471 } 472 473 void injectActionCallback(Context context, Configuration actionConf) { 474 injectCallback(context, actionConf); 475 } 476 477 void injectLauncherCallback(Context context, Configuration launcherConf) { 478 injectCallback(context, launcherConf); 479 } 480 481 void submitLauncher(Context context, WorkflowAction action) throws ActionExecutorException { 482 JobClient jobClient = null; 483 boolean exception = false; 484 try { 485 Path appPathRoot = new Path(context.getWorkflow().getAppPath()).getParent(); 486 Element actionXml = XmlUtils.parseXml(action.getConf()); 487 488 // action job configuration 489 Configuration actionConf = createBaseHadoopConf(context, actionXml); 490 setupActionConf(actionConf, context, actionXml, appPathRoot); 491 XLog.getLog(getClass()).debug("Setting LibFilesArchives "); 492 setLibFilesArchives(context, actionXml, appPathRoot, actionConf); 493 String jobName = XLog.format("oozie:action:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow() 494 .getAppName(), action.getName(), context.getWorkflow().getId()); 495 actionConf.set("mapred.job.name", jobName); 496 injectActionCallback(context, actionConf); 497 498 // setting the group owning the Oozie job to allow anybody in that 499 // group to kill the jobs. 500 actionConf.set("mapreduce.job.acl-modify-job", context.getWorkflow().getGroup()); 501 502 JobConf launcherJobConf = createLauncherConf(context, action, actionXml, actionConf); 503 injectLauncherCallback(context, launcherJobConf); 504 XLog.getLog(getClass()).debug("Creating Job Client for action " + action.getId()); 505 jobClient = createJobClient(context, launcherJobConf); 506 String launcherId = LauncherMapper.getRecoveryId(launcherJobConf, context.getActionDir(), context 507 .getRecoveryId()); 508 boolean alreadyRunning = launcherId != null; 509 RunningJob runningJob; 510 511 if (alreadyRunning) { 512 runningJob = jobClient.getJob(JobID.forName(launcherId)); 513 if (runningJob == null) { 514 String jobTracker = launcherJobConf.get("mapred.job.tracker"); 515 throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "JA017", 516 "unknown job [{0}@{1}], cannot recover", launcherId, jobTracker); 517 } 518 } 519 else { 520 prepare(context, actionXml); 521 XLog.getLog(getClass()).debug("Submitting the job through Job Client for action " + action.getId()); 522 523 // setting up propagation of the delegation token. 524 AuthHelper.get().set(jobClient, launcherJobConf); 525 log.debug(WorkflowAppService.HADOOP_JT_KERBEROS_NAME + " = " 526 + launcherJobConf.get(WorkflowAppService.HADOOP_JT_KERBEROS_NAME)); 527 log.debug(WorkflowAppService.HADOOP_NN_KERBEROS_NAME + " = " 528 + launcherJobConf.get(WorkflowAppService.HADOOP_NN_KERBEROS_NAME)); 529 runningJob = jobClient.submitJob(launcherJobConf); 530 if (runningJob == null) { 531 throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "JA017", 532 "Error submitting launcher for action [{0}]", action.getId()); 533 } 534 launcherId = runningJob.getID().toString(); 535 XLog.getLog(getClass()).debug("After submission get the launcherId " + launcherId); 536 } 537 538 String jobTracker = launcherJobConf.get(HADOOP_JOB_TRACKER); 539 String consoleUrl = runningJob.getTrackingURL(); 540 context.setStartData(launcherId, jobTracker, consoleUrl); 541 } 542 catch (Exception ex) { 543 exception = true; 544 throw convertException(ex); 545 } 546 finally { 547 if (jobClient != null) { 548 try { 549 jobClient.close(); 550 } 551 catch (Exception e) { 552 if (exception) { 553 log.error("JobClient error: ", e); 554 } 555 else { 556 throw convertException(e); 557 } 558 } 559 } 560 } 561 } 562 563 void prepare(Context context, Element actionXml) throws ActionExecutorException { 564 Namespace ns = actionXml.getNamespace(); 565 Element prepare = actionXml.getChild("prepare", ns); 566 if (prepare != null) { 567 XLog.getLog(getClass()).debug("Preparing the action with FileSystem operation"); 568 FsActionExecutor fsAe = new FsActionExecutor(); 569 fsAe.doOperations(context, prepare); 570 XLog.getLog(getClass()).debug("FS Operation is completed"); 571 } 572 } 573 574 @Override 575 public void start(Context context, WorkflowAction action) throws ActionExecutorException { 576 try { 577 XLog.getLog(getClass()).debug("Starting action " + action.getId() + " getting Action File System"); 578 FileSystem actionFs = getActionFileSystem(context, action); 579 XLog.getLog(getClass()).debug("Preparing action Dir through copying " + context.getActionDir()); 580 prepareActionDir(actionFs, context); 581 XLog.getLog(getClass()).debug("Action Dir is ready. Submitting the action "); 582 submitLauncher(context, action); 583 XLog.getLog(getClass()).debug("Action submit completed. Performing check "); 584 check(context, action); 585 XLog.getLog(getClass()).debug("Action check is done after submission"); 586 } 587 catch (Exception ex) { 588 throw convertException(ex); 589 } 590 } 591 592 @Override 593 public void end(Context context, WorkflowAction action) throws ActionExecutorException { 594 try { 595 String externalStatus = action.getExternalStatus(); 596 WorkflowAction.Status status = externalStatus.equals(SUCCEEDED) ? WorkflowAction.Status.OK 597 : WorkflowAction.Status.ERROR; 598 context.setEndData(status, getActionSignal(status)); 599 } 600 catch (Exception ex) { 601 throw convertException(ex); 602 } 603 finally { 604 try { 605 FileSystem actionFs = getActionFileSystem(context, action); 606 cleanUpActionDir(actionFs, context); 607 } 608 catch (Exception ex) { 609 throw convertException(ex); 610 } 611 } 612 } 613 614 /** 615 * Create job client object 616 * @param context 617 * @param jobConf 618 * @return 619 * @throws HadoopAccessorException 620 */ 621 protected JobClient createJobClient(Context context, JobConf jobConf) throws HadoopAccessorException { 622 String user = context.getWorkflow().getUser(); 623 String group = context.getWorkflow().getGroup(); 624 return Services.get().get(HadoopAccessorService.class).createJobClient(user, group, jobConf); 625 } 626 627 @Override 628 public void check(Context context, WorkflowAction action) throws ActionExecutorException { 629 JobClient jobClient = null; 630 boolean exception = false; 631 try { 632 Element actionXml = XmlUtils.parseXml(action.getConf()); 633 FileSystem actionFs = getActionFileSystem(context, actionXml); 634 Configuration conf = createBaseHadoopConf(context, actionXml); 635 JobConf jobConf = new JobConf(); 636 XConfiguration.copy(conf, jobConf); 637 jobClient = createJobClient(context, jobConf); 638 RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalId())); 639 if (runningJob == null) { 640 context.setExternalStatus(FAILED); 641 context.setExecutionData(FAILED, null); 642 throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA017", 643 "Unknown hadoop job [{0}] associated with action [{1}]. Failing this action!", action 644 .getExternalId(), action.getId()); 645 } 646 if (runningJob.isComplete()) { 647 Path actionDir = context.getActionDir(); 648 649 String user = context.getWorkflow().getUser(); 650 String group = context.getWorkflow().getGroup(); 651 if (LauncherMapper.hasIdSwap(runningJob, user, group, actionDir)) { 652 String launcherId = action.getExternalId(); 653 Path idSwapPath = LauncherMapper.getIdSwapPath(context.getActionDir()); 654 InputStream is = actionFs.open(idSwapPath); 655 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 656 Properties props = PropertiesUtils.readProperties(reader, maxActionOutputLen); 657 reader.close(); 658 String newId = props.getProperty("id"); 659 runningJob = jobClient.getJob(JobID.forName(newId)); 660 if (runningJob == null) { 661 context.setExternalStatus(FAILED); 662 throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA017", 663 "Unknown hadoop job [{0}] associated with action [{1}]. Failing this action!", newId, 664 action.getId()); 665 } 666 667 context.setStartData(newId, action.getTrackerUri(), runningJob.getTrackingURL()); 668 XLog.getLog(getClass()).info(XLog.STD, "External ID swap, old ID [{0}] new ID [{1}]", launcherId, 669 newId); 670 } 671 if (runningJob.isComplete()) { 672 XLog.getLog(getClass()).info(XLog.STD, "action completed, external ID [{0}]", 673 action.getExternalId()); 674 if (runningJob.isSuccessful() && LauncherMapper.isMainSuccessful(runningJob)) { 675 Properties props = null; 676 if (getCaptureOutput(action)) { 677 props = new Properties(); 678 if (LauncherMapper.hasOutputData(runningJob)) { 679 Path actionOutput = LauncherMapper.getOutputDataPath(context.getActionDir()); 680 InputStream is = actionFs.open(actionOutput); 681 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 682 props = PropertiesUtils.readProperties(reader, maxActionOutputLen); 683 reader.close(); 684 } 685 } 686 context.setExecutionData(SUCCEEDED, props); 687 XLog.getLog(getClass()).info(XLog.STD, "action produced output"); 688 } 689 else { 690 XLog log = XLog.getLog(getClass()); 691 String errorReason; 692 Path actionError = LauncherMapper.getErrorPath(context.getActionDir()); 693 if (actionFs.exists(actionError)) { 694 InputStream is = actionFs.open(actionError); 695 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 696 Properties props = PropertiesUtils.readProperties(reader, -1); 697 reader.close(); 698 String errorCode = props.getProperty("error.code"); 699 if (errorCode.equals("0")) { 700 errorCode = "JA018"; 701 } 702 errorReason = props.getProperty("error.reason"); 703 log.warn("Launcher ERROR, reason: {0}", errorReason); 704 String exMsg = props.getProperty("exception.message"); 705 String errorInfo = (exMsg != null) ? exMsg : errorReason; 706 context.setErrorInfo(errorCode, errorInfo); 707 String exStackTrace = props.getProperty("exception.stacktrace"); 708 if (exMsg != null) { 709 log.warn("Launcher exception: {0}{E}{1}", exMsg, exStackTrace); 710 } 711 } 712 else { 713 errorReason = XLog.format("LauncherMapper died, check Hadoop log for job [{0}:{1}]", action 714 .getTrackerUri(), action.getExternalId()); 715 log.warn(errorReason); 716 } 717 context.setExecutionData(FAILED_KILLED, null); 718 } 719 } 720 else { 721 context.setExternalStatus(RUNNING); 722 XLog.getLog(getClass()).info(XLog.STD, "checking action, external ID [{0}] status [{1}]", 723 action.getExternalId(), action.getExternalStatus()); 724 } 725 } 726 else { 727 context.setExternalStatus(RUNNING); 728 XLog.getLog(getClass()).info(XLog.STD, "checking action, external ID [{0}] status [{1}]", 729 action.getExternalId(), action.getExternalStatus()); 730 } 731 } 732 catch (Exception ex) { 733 XLog.getLog(getClass()).warn("Exception in check(). Message[{0}]", ex.getMessage(), ex); 734 exception = true; 735 throw convertException(ex); 736 } 737 finally { 738 if (jobClient != null) { 739 try { 740 jobClient.close(); 741 } 742 catch (Exception e) { 743 if (exception) { 744 log.error("JobClient error: ", e); 745 } 746 else { 747 throw convertException(e); 748 } 749 } 750 } 751 } 752 } 753 754 protected boolean getCaptureOutput(WorkflowAction action) throws JDOMException { 755 Element eConf = XmlUtils.parseXml(action.getConf()); 756 Namespace ns = eConf.getNamespace(); 757 Element captureOutput = eConf.getChild("capture-output", ns); 758 return captureOutput != null; 759 } 760 761 @Override 762 public void kill(Context context, WorkflowAction action) throws ActionExecutorException { 763 JobClient jobClient = null; 764 boolean exception = false; 765 try { 766 Element actionXml = XmlUtils.parseXml(action.getConf()); 767 Configuration conf = createBaseHadoopConf(context, actionXml); 768 JobConf jobConf = new JobConf(); 769 XConfiguration.copy(conf, jobConf); 770 jobClient = createJobClient(context, jobConf); 771 RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalId())); 772 if (runningJob != null) { 773 runningJob.killJob(); 774 } 775 context.setExternalStatus(KILLED); 776 context.setExecutionData(KILLED, null); 777 } 778 catch (Exception ex) { 779 exception = true; 780 throw convertException(ex); 781 } 782 finally { 783 try { 784 FileSystem actionFs = getActionFileSystem(context, action); 785 cleanUpActionDir(actionFs, context); 786 if (jobClient != null) { 787 jobClient.close(); 788 } 789 } 790 catch (Exception ex) { 791 if (exception) { 792 log.error("Error: ", ex); 793 } 794 else { 795 throw convertException(ex); 796 } 797 } 798 } 799 } 800 801 private static Set<String> FINAL_STATUS = new HashSet<String>(); 802 803 static { 804 FINAL_STATUS.add(SUCCEEDED); 805 FINAL_STATUS.add(KILLED); 806 FINAL_STATUS.add(FAILED); 807 FINAL_STATUS.add(FAILED_KILLED); 808 } 809 810 @Override 811 public boolean isCompleted(String externalStatus) { 812 return FINAL_STATUS.contains(externalStatus); 813 } 814 815 }