001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.action.hadoop; 016 017 import java.io.BufferedReader; 018 import java.io.File; 019 import java.io.FileNotFoundException; 020 import java.io.IOException; 021 import java.io.InputStream; 022 import java.io.InputStreamReader; 023 import java.io.StringReader; 024 import java.net.ConnectException; 025 import java.net.URI; 026 import java.net.UnknownHostException; 027 import java.util.ArrayList; 028 import java.util.HashSet; 029 import java.util.List; 030 import java.util.Map; 031 import java.util.Properties; 032 import java.util.Set; 033 034 import org.apache.hadoop.conf.Configuration; 035 import org.apache.hadoop.filecache.DistributedCache; 036 import org.apache.hadoop.fs.FileSystem; 037 import org.apache.hadoop.fs.Path; 038 import org.apache.hadoop.fs.permission.AccessControlException; 039 import org.apache.hadoop.mapred.JobClient; 040 import org.apache.hadoop.mapred.JobConf; 041 import org.apache.hadoop.mapred.JobID; 042 import org.apache.hadoop.mapred.RunningJob; 043 import org.apache.hadoop.util.DiskChecker; 044 import org.apache.oozie.action.ActionExecutor; 045 import org.apache.oozie.action.ActionExecutorException; 046 import org.apache.oozie.client.OozieClient; 047 import org.apache.oozie.client.WorkflowAction; 048 import org.apache.oozie.service.HadoopAccessorException; 049 import org.apache.oozie.service.HadoopAccessorService; 050 import org.apache.oozie.service.Services; 051 import org.apache.oozie.service.WorkflowAppService; 052 import org.apache.oozie.servlet.CallbackServlet; 053 import org.apache.oozie.util.IOUtils; 054 import org.apache.oozie.util.PropertiesUtils; 055 import org.apache.oozie.util.XConfiguration; 056 import org.apache.oozie.util.XLog; 057 import org.apache.oozie.util.XmlUtils; 058 import org.jdom.Element; 059 import org.jdom.JDOMException; 060 import org.jdom.Namespace; 061 062 public class JavaActionExecutor extends ActionExecutor { 063 064 private static final String HADOOP_USER = "user.name"; 065 private static final String HADOOP_UGI = "hadoop.job.ugi"; 066 private static final String HADOOP_JOB_TRACKER = "mapred.job.tracker"; 067 private static final String HADOOP_NAME_NODE = "fs.default.name"; 068 069 private static final Set<String> DISALLOWED_PROPERTIES = new HashSet<String>(); 070 071 private static int maxActionOutputLen; 072 073 private static final String SUCCEEDED = "SUCCEEDED"; 074 private static final String KILLED = "KILLED"; 075 private static final String FAILED = "FAILED"; 076 private static final String FAILED_KILLED = "FAILED/KILLED"; 077 private static final String RUNNING = "RUNNING"; 078 protected XLog log = XLog.getLog(getClass()); 079 080 static { 081 DISALLOWED_PROPERTIES.add(HADOOP_USER); 082 DISALLOWED_PROPERTIES.add(HADOOP_UGI); 083 DISALLOWED_PROPERTIES.add(HADOOP_JOB_TRACKER); 084 DISALLOWED_PROPERTIES.add(HADOOP_NAME_NODE); 085 DISALLOWED_PROPERTIES.add(WorkflowAppService.HADOOP_JT_KERBEROS_NAME); 086 DISALLOWED_PROPERTIES.add(WorkflowAppService.HADOOP_NN_KERBEROS_NAME); 087 } 088 089 public JavaActionExecutor() { 090 this("java"); 091 } 092 093 protected JavaActionExecutor(String type) { 094 super(type); 095 } 096 097 protected String getLauncherJarName() { 098 return getType() + "-launcher.jar"; 099 } 100 101 protected List<Class> getLauncherClasses() { 102 List<Class> classes = new ArrayList<Class>(); 103 classes.add(LauncherMapper.class); 104 classes.add(LauncherSecurityManager.class); 105 classes.add(LauncherException.class); 106 classes.add(LauncherMainException.class); 107 return classes; 108 } 109 110 @Override 111 public void initActionType() { 112 super.initActionType(); 113 maxActionOutputLen = getOozieConf().getInt(CallbackServlet.CONF_MAX_DATA_LEN, 2 * 1024); 114 try { 115 List<Class> classes = getLauncherClasses(); 116 Class[] launcherClasses = classes.toArray(new Class[classes.size()]); 117 IOUtils.createJar(new File(getOozieRuntimeDir()), getLauncherJarName(), launcherClasses); 118 119 registerError(UnknownHostException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA001"); 120 registerError(AccessControlException.class.getName(), ActionExecutorException.ErrorType.NON_TRANSIENT, 121 "JA002"); 122 registerError(DiskChecker.DiskOutOfSpaceException.class.getName(), 123 ActionExecutorException.ErrorType.NON_TRANSIENT, "JA003"); 124 registerError(org.apache.hadoop.hdfs.protocol.QuotaExceededException.class.getName(), 125 ActionExecutorException.ErrorType.NON_TRANSIENT, "JA004"); 126 registerError(org.apache.hadoop.hdfs.server.namenode.SafeModeException.class.getName(), 127 ActionExecutorException.ErrorType.NON_TRANSIENT, "JA005"); 128 registerError(ConnectException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA006"); 129 registerError(JDOMException.class.getName(), ActionExecutorException.ErrorType.ERROR, "JA007"); 130 registerError(FileNotFoundException.class.getName(), ActionExecutorException.ErrorType.ERROR, "JA008"); 131 registerError(IOException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA009"); 132 } 133 catch (IOException ex) { 134 throw new RuntimeException(ex); 135 } 136 } 137 138 void checkForDisallowedProps(Configuration conf, String confName) throws ActionExecutorException { 139 for (String prop : DISALLOWED_PROPERTIES) { 140 if (conf.get(prop) != null) { 141 throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA010", 142 "Property [{0}] not allowed in action [{1}] configuration", prop, confName); 143 } 144 } 145 } 146 147 Configuration createBaseHadoopConf(Context context, Element actionXml) { 148 Configuration conf = new XConfiguration(); 149 conf.set(HADOOP_USER, context.getProtoActionConf().get(WorkflowAppService.HADOOP_USER)); 150 conf.set(HADOOP_UGI, context.getProtoActionConf().get(WorkflowAppService.HADOOP_UGI)); 151 if (context.getProtoActionConf().get(WorkflowAppService.HADOOP_JT_KERBEROS_NAME) != null) { 152 conf.set(WorkflowAppService.HADOOP_JT_KERBEROS_NAME, context.getProtoActionConf().get( 153 WorkflowAppService.HADOOP_JT_KERBEROS_NAME)); 154 } 155 if (context.getProtoActionConf().get(WorkflowAppService.HADOOP_NN_KERBEROS_NAME) != null) { 156 conf.set(WorkflowAppService.HADOOP_NN_KERBEROS_NAME, context.getProtoActionConf().get( 157 WorkflowAppService.HADOOP_NN_KERBEROS_NAME)); 158 } 159 conf.set(OozieClient.GROUP_NAME, context.getProtoActionConf().get(OozieClient.GROUP_NAME)); 160 Namespace ns = actionXml.getNamespace(); 161 String jobTracker = actionXml.getChild("job-tracker", ns).getTextTrim(); 162 String nameNode = actionXml.getChild("name-node", ns).getTextTrim(); 163 conf.set(HADOOP_JOB_TRACKER, jobTracker); 164 conf.set(HADOOP_NAME_NODE, nameNode); 165 conf.set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "true"); 166 return conf; 167 } 168 169 Configuration setupLauncherConf(Configuration conf, Element actionXml, Path appPath, Context context) throws ActionExecutorException { 170 try { 171 Namespace ns = actionXml.getNamespace(); 172 Element e = actionXml.getChild("configuration", ns); 173 if (e != null) { 174 String strConf = XmlUtils.prettyPrint(e).toString(); 175 XConfiguration inlineConf = new XConfiguration(new StringReader(strConf)); 176 177 XConfiguration launcherConf = new XConfiguration(); 178 for (Map.Entry<String, String> entry : inlineConf) { 179 if (entry.getKey().startsWith("oozie.launcher.")) { 180 String name = entry.getKey().substring("oozie.launcher.".length()); 181 String value = entry.getValue(); 182 // setting original KEY 183 launcherConf.set(entry.getKey(), value); 184 // setting un-prefixed key (to allow Hadoop job config 185 // for the launcher job 186 launcherConf.set(name, value); 187 } 188 } 189 checkForDisallowedProps(launcherConf, "inline launcher configuration"); 190 XConfiguration.copy(launcherConf, conf); 191 } 192 return conf; 193 } 194 catch (IOException ex) { 195 throw convertException(ex); 196 } 197 } 198 199 protected FileSystem getActionFileSystem(Context context, WorkflowAction action) throws ActionExecutorException { 200 try { 201 Element actionXml = XmlUtils.parseXml(action.getConf()); 202 return getActionFileSystem(context, actionXml); 203 } 204 catch (JDOMException ex) { 205 throw convertException(ex); 206 } 207 } 208 209 protected FileSystem getActionFileSystem(Context context, Element actionXml) throws ActionExecutorException { 210 try { 211 return context.getAppFileSystem(); 212 } 213 catch (Exception ex) { 214 throw convertException(ex); 215 } 216 } 217 218 Configuration setupActionConf(Configuration actionConf, Context context, Element actionXml, Path appPath) 219 throws ActionExecutorException { 220 try { 221 Namespace ns = actionXml.getNamespace(); 222 Element e = actionXml.getChild("job-xml", ns); 223 if (e != null) { 224 String jobXml = e.getTextTrim(); 225 Path path = new Path(appPath, jobXml); 226 FileSystem fs = getActionFileSystem(context, actionXml); 227 Configuration jobXmlConf = new XConfiguration(fs.open(path)); 228 checkForDisallowedProps(jobXmlConf, "job-xml"); 229 XConfiguration.copy(jobXmlConf, actionConf); 230 } 231 e = actionXml.getChild("configuration", ns); 232 if (e != null) { 233 String strConf = XmlUtils.prettyPrint(e).toString(); 234 XConfiguration inlineConf = new XConfiguration(new StringReader(strConf)); 235 checkForDisallowedProps(inlineConf, "inline configuration"); 236 XConfiguration.copy(inlineConf, actionConf); 237 } 238 return actionConf; 239 } 240 catch (IOException ex) { 241 throw convertException(ex); 242 } 243 } 244 245 Configuration addToCache(Configuration conf, Path appPath, String filePath, boolean archive) 246 throws ActionExecutorException { 247 Path path = null; 248 try { 249 if (filePath.startsWith("/")) { 250 path = new Path(filePath); 251 } 252 else { 253 path = new Path(appPath, filePath); 254 } 255 URI uri = new URI(path.toUri().getPath()); 256 if (archive) { 257 DistributedCache.addCacheArchive(uri, conf); 258 } 259 else { 260 String fileName = filePath.substring(filePath.lastIndexOf("/") + 1); 261 if (fileName.endsWith(".so") || fileName.contains(".so.")) { // .so files 262 if (!fileName.endsWith(".so")) { 263 int extAt = fileName.indexOf(".so."); 264 fileName = fileName.substring(0, extAt + 3); 265 } 266 uri = new Path(path.toString() + "#" + fileName).toUri(); 267 uri = new URI(uri.getPath()); 268 DistributedCache.addCacheFile(uri, conf); 269 } 270 else if (fileName.endsWith(".jar")){ // .jar files 271 if (!fileName.contains("#")) { 272 path = new Path(uri.toString()); 273 274 String user = conf.get("user.name"); 275 String group = conf.get("group.name"); 276 FileSystem fs = 277 Services.get().get(HadoopAccessorService.class).createFileSystem(user, group, conf); 278 DistributedCache.addFileToClassPath(path, conf, fs); 279 } 280 else { 281 DistributedCache.addCacheFile(uri, conf); 282 } 283 } 284 else { // regular files 285 if (!fileName.contains("#")) { 286 uri = new Path(path.toString() + "#" + fileName).toUri(); 287 uri = new URI(uri.getPath()); 288 } 289 DistributedCache.addCacheFile(uri, conf); 290 } 291 } 292 DistributedCache.createSymlink(conf); 293 return conf; 294 } 295 catch (Exception ex) { 296 XLog.getLog(getClass()).debug( 297 "Errors when add to DistributedCache. Path=" + path + ", archive=" + archive + ", conf=" 298 + XmlUtils.prettyPrint(conf).toString()); 299 throw convertException(ex); 300 } 301 } 302 303 String getOozieLauncherJar(Context context) throws ActionExecutorException { 304 try { 305 return new Path(context.getActionDir(), getLauncherJarName()).toString(); 306 } 307 catch (Exception ex) { 308 throw convertException(ex); 309 } 310 } 311 312 void prepareActionDir(FileSystem actionFs, Context context) throws ActionExecutorException { 313 try { 314 Path actionDir = context.getActionDir(); 315 Path tempActionDir = new Path(actionDir.getParent(), actionDir.getName() + ".tmp"); 316 if (!actionFs.exists(actionDir)) { 317 try { 318 actionFs.copyFromLocalFile(new Path(getOozieRuntimeDir(), getLauncherJarName()), new Path( 319 tempActionDir, getLauncherJarName())); 320 actionFs.rename(tempActionDir, actionDir); 321 } 322 catch (IOException ex) { 323 actionFs.delete(tempActionDir, true); 324 actionFs.delete(actionDir, true); 325 throw ex; 326 } 327 } 328 } 329 catch (Exception ex) { 330 throw convertException(ex); 331 } 332 } 333 334 void cleanUpActionDir(FileSystem actionFs, Context context) throws ActionExecutorException { 335 try { 336 Path actionDir = context.getActionDir(); 337 if (!context.getProtoActionConf().getBoolean("oozie.action.keep.action.dir", false) 338 && actionFs.exists(actionDir)) { 339 actionFs.delete(actionDir, true); 340 } 341 } 342 catch (Exception ex) { 343 throw convertException(ex); 344 } 345 } 346 347 @SuppressWarnings("unchecked") 348 void setLibFilesArchives(Context context, Element actionXml, Path appPath, Configuration conf) 349 throws ActionExecutorException { 350 Configuration proto = context.getProtoActionConf(); 351 352 addToCache(conf, appPath, getOozieLauncherJar(context), false); 353 354 String[] paths = proto.getStrings(WorkflowAppService.APP_LIB_PATH_LIST); 355 if (paths != null) { 356 for (String path : paths) { 357 addToCache(conf, appPath, path, false); 358 } 359 } 360 361 for (Element eProp : (List<Element>) actionXml.getChildren()) { 362 if (eProp.getName().equals("file")) { 363 String path = eProp.getTextTrim(); 364 addToCache(conf, appPath, path, false); 365 } 366 else { 367 if (eProp.getName().equals("archive")) { 368 String path = eProp.getTextTrim(); 369 addToCache(conf, appPath, path, true); 370 } 371 } 372 } 373 } 374 375 protected String getLauncherMain(Configuration launcherConf, Element actionXml) { 376 Namespace ns = actionXml.getNamespace(); 377 Element e = actionXml.getChild("main-class", ns); 378 return e.getTextTrim(); 379 } 380 381 private static final String QUEUE_NAME = "mapred.job.queue.name"; 382 private static final String OOZIE_LAUNCHER_QUEUE_NAME = "oozie.launcher.mapred.job.queue.name"; 383 384 private static final Set<String> SPECIAL_PROPERTIES = new HashSet<String>(); 385 386 static { 387 SPECIAL_PROPERTIES.add(QUEUE_NAME); 388 SPECIAL_PROPERTIES.add("mapreduce.jobtracker.kerberos.principal"); 389 SPECIAL_PROPERTIES.add("dfs.namenode.kerberos.principal"); 390 } 391 392 @SuppressWarnings("unchecked") 393 JobConf createLauncherConf(Context context, WorkflowAction action, Element actionXml, Configuration actionConf) 394 throws ActionExecutorException { 395 try { 396 Path appPathRoot = new Path(context.getWorkflow().getAppPath()).getParent(); 397 398 // launcher job configuration 399 Configuration launcherConf = createBaseHadoopConf(context, actionXml); 400 setupLauncherConf(launcherConf, actionXml, appPathRoot, context); 401 402 // we are doing init+copy because if not we are getting 'hdfs' 403 // scheme not known 404 // its seems that new JobConf(Conf) does not load defaults, it 405 // assumes parameter Conf does. 406 JobConf launcherJobConf = new JobConf(); 407 XConfiguration.copy(launcherConf, launcherJobConf); 408 setLibFilesArchives(context, actionXml, appPathRoot, launcherJobConf); 409 String jobName = XLog.format("oozie:launcher:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow() 410 .getAppName(), action.getName(), context.getWorkflow().getId()); 411 launcherJobConf.setJobName(jobName); 412 413 String jobId = context.getWorkflow().getId(); 414 String actionId = action.getId(); 415 Path actionDir = context.getActionDir(); 416 String recoveryId = context.getRecoveryId(); 417 418 LauncherMapper.setupLauncherInfo(launcherJobConf, jobId, actionId, actionDir, recoveryId, actionConf); 419 420 LauncherMapper.setupMainClass(launcherJobConf, getLauncherMain(launcherConf, actionXml)); 421 422 LauncherMapper.setupMaxOutputData(launcherJobConf, maxActionOutputLen); 423 424 Namespace ns = actionXml.getNamespace(); 425 List<Element> list = actionXml.getChildren("arg", ns); 426 String[] args = new String[list.size()]; 427 for (int i = 0; i < list.size(); i++) { 428 args[i] = list.get(i).getTextTrim(); 429 } 430 LauncherMapper.setupMainArguments(launcherJobConf, args); 431 432 Element opt = actionXml.getChild("java-opts", ns); 433 if (opt != null) { 434 String opts = launcherConf.get("mapred.child.java.opts", ""); 435 opts = opts + " " + opt.getTextTrim(); 436 opts = opts.trim(); 437 launcherJobConf.set("mapred.child.java.opts", opts); 438 } 439 440 // properties from action that are needed by the launcher (QUEUE 441 // NAME) 442 // maybe we should add queue to the WF schema, below job-tracker 443 for (String name : SPECIAL_PROPERTIES) { 444 String value = actionConf.get(name); 445 if (value != null) { 446 if (!name.equals(QUEUE_NAME) || 447 (name.equals(QUEUE_NAME) && launcherJobConf.get(OOZIE_LAUNCHER_QUEUE_NAME) == null)) { 448 launcherJobConf.set(name, value); 449 } 450 } 451 } 452 453 // to disable cancelation of delegation token on launcher job end 454 launcherJobConf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false); 455 456 // setting the group owning the Oozie job to allow anybody in that 457 // group to kill the jobs. 458 launcherJobConf.set("mapreduce.job.acl-modify-job", context.getWorkflow().getGroup()); 459 460 return launcherJobConf; 461 } 462 catch (Exception ex) { 463 throw convertException(ex); 464 } 465 } 466 467 private void injectCallback(Context context, Configuration conf) { 468 String callback = context.getCallbackUrl("$jobStatus"); 469 if (conf.get("job.end.notification.url") != null) { 470 XLog.getLog(getClass()).warn("Overriding the action job end notification URI"); 471 } 472 conf.set("job.end.notification.url", callback); 473 } 474 475 void injectActionCallback(Context context, Configuration actionConf) { 476 injectCallback(context, actionConf); 477 } 478 479 void injectLauncherCallback(Context context, Configuration launcherConf) { 480 injectCallback(context, launcherConf); 481 } 482 483 void submitLauncher(Context context, WorkflowAction action) throws ActionExecutorException { 484 JobClient jobClient = null; 485 boolean exception = false; 486 try { 487 Path appPathRoot = new Path(context.getWorkflow().getAppPath()).getParent(); 488 Element actionXml = XmlUtils.parseXml(action.getConf()); 489 490 // action job configuration 491 Configuration actionConf = createBaseHadoopConf(context, actionXml); 492 setupActionConf(actionConf, context, actionXml, appPathRoot); 493 XLog.getLog(getClass()).debug("Setting LibFilesArchives "); 494 setLibFilesArchives(context, actionXml, appPathRoot, actionConf); 495 String jobName = XLog.format("oozie:action:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow() 496 .getAppName(), action.getName(), context.getWorkflow().getId()); 497 actionConf.set("mapred.job.name", jobName); 498 injectActionCallback(context, actionConf); 499 500 // setting the group owning the Oozie job to allow anybody in that 501 // group to kill the jobs. 502 actionConf.set("mapreduce.job.acl-modify-job", context.getWorkflow().getGroup()); 503 504 JobConf launcherJobConf = createLauncherConf(context, action, actionXml, actionConf); 505 injectLauncherCallback(context, launcherJobConf); 506 XLog.getLog(getClass()).debug("Creating Job Client for action " + action.getId()); 507 jobClient = createJobClient(context, launcherJobConf); 508 String launcherId = LauncherMapper.getRecoveryId(launcherJobConf, context.getActionDir(), context 509 .getRecoveryId()); 510 boolean alreadyRunning = launcherId != null; 511 RunningJob runningJob; 512 513 if (alreadyRunning) { 514 runningJob = jobClient.getJob(JobID.forName(launcherId)); 515 if (runningJob == null) { 516 String jobTracker = launcherJobConf.get("mapred.job.tracker"); 517 throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "JA017", 518 "unknown job [{0}@{1}], cannot recover", launcherId, jobTracker); 519 } 520 } 521 else { 522 prepare(context, actionXml); 523 XLog.getLog(getClass()).debug("Submitting the job through Job Client for action " + action.getId()); 524 525 // setting up propagation of the delegation token. 526 AuthHelper.get().set(jobClient, launcherJobConf); 527 log.debug(WorkflowAppService.HADOOP_JT_KERBEROS_NAME + " = " 528 + launcherJobConf.get(WorkflowAppService.HADOOP_JT_KERBEROS_NAME)); 529 log.debug(WorkflowAppService.HADOOP_NN_KERBEROS_NAME + " = " 530 + launcherJobConf.get(WorkflowAppService.HADOOP_NN_KERBEROS_NAME)); 531 runningJob = jobClient.submitJob(launcherJobConf); 532 if (runningJob == null) { 533 throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "JA017", 534 "Error submitting launcher for action [{0}]", action.getId()); 535 } 536 launcherId = runningJob.getID().toString(); 537 XLog.getLog(getClass()).debug("After submission get the launcherId " + launcherId); 538 } 539 540 String jobTracker = launcherJobConf.get(HADOOP_JOB_TRACKER); 541 String consoleUrl = runningJob.getTrackingURL(); 542 context.setStartData(launcherId, jobTracker, consoleUrl); 543 } 544 catch (Exception ex) { 545 exception = true; 546 throw convertException(ex); 547 } 548 finally { 549 if (jobClient != null) { 550 try { 551 jobClient.close(); 552 } 553 catch (Exception e) { 554 if (exception) { 555 log.error("JobClient error: ", e); 556 } 557 else { 558 throw convertException(e); 559 } 560 } 561 } 562 } 563 } 564 565 void prepare(Context context, Element actionXml) throws ActionExecutorException { 566 Namespace ns = actionXml.getNamespace(); 567 Element prepare = actionXml.getChild("prepare", ns); 568 if (prepare != null) { 569 XLog.getLog(getClass()).debug("Preparing the action with FileSystem operation"); 570 FsActionExecutor fsAe = new FsActionExecutor(); 571 fsAe.doOperations(context, prepare); 572 XLog.getLog(getClass()).debug("FS Operation is completed"); 573 } 574 } 575 576 @Override 577 public void start(Context context, WorkflowAction action) throws ActionExecutorException { 578 try { 579 XLog.getLog(getClass()).debug("Starting action " + action.getId() + " getting Action File System"); 580 FileSystem actionFs = getActionFileSystem(context, action); 581 XLog.getLog(getClass()).debug("Preparing action Dir through copying " + context.getActionDir()); 582 prepareActionDir(actionFs, context); 583 XLog.getLog(getClass()).debug("Action Dir is ready. Submitting the action "); 584 submitLauncher(context, action); 585 XLog.getLog(getClass()).debug("Action submit completed. Performing check "); 586 check(context, action); 587 XLog.getLog(getClass()).debug("Action check is done after submission"); 588 } 589 catch (Exception ex) { 590 throw convertException(ex); 591 } 592 } 593 594 @Override 595 public void end(Context context, WorkflowAction action) throws ActionExecutorException { 596 try { 597 String externalStatus = action.getExternalStatus(); 598 WorkflowAction.Status status = externalStatus.equals(SUCCEEDED) ? WorkflowAction.Status.OK 599 : WorkflowAction.Status.ERROR; 600 context.setEndData(status, getActionSignal(status)); 601 } 602 catch (Exception ex) { 603 throw convertException(ex); 604 } 605 finally { 606 try { 607 FileSystem actionFs = getActionFileSystem(context, action); 608 cleanUpActionDir(actionFs, context); 609 } 610 catch (Exception ex) { 611 throw convertException(ex); 612 } 613 } 614 } 615 616 /** 617 * Create job client object 618 * @param context 619 * @param jobConf 620 * @return 621 * @throws HadoopAccessorException 622 */ 623 protected JobClient createJobClient(Context context, JobConf jobConf) throws HadoopAccessorException { 624 String user = context.getWorkflow().getUser(); 625 String group = context.getWorkflow().getGroup(); 626 return Services.get().get(HadoopAccessorService.class).createJobClient(user, group, jobConf); 627 } 628 629 @Override 630 public void check(Context context, WorkflowAction action) throws ActionExecutorException { 631 JobClient jobClient = null; 632 boolean exception = false; 633 try { 634 Element actionXml = XmlUtils.parseXml(action.getConf()); 635 FileSystem actionFs = getActionFileSystem(context, actionXml); 636 Configuration conf = createBaseHadoopConf(context, actionXml); 637 JobConf jobConf = new JobConf(); 638 XConfiguration.copy(conf, jobConf); 639 jobClient = createJobClient(context, jobConf); 640 RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalId())); 641 if (runningJob == null) { 642 context.setExternalStatus(FAILED); 643 context.setExecutionData(FAILED, null); 644 throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA017", 645 "Unknown hadoop job [{0}] associated with action [{1}]. Failing this action!", action 646 .getExternalId(), action.getId()); 647 } 648 if (runningJob.isComplete()) { 649 Path actionDir = context.getActionDir(); 650 651 String user = context.getWorkflow().getUser(); 652 String group = context.getWorkflow().getGroup(); 653 if (LauncherMapper.hasIdSwap(runningJob, user, group, actionDir)) { 654 String launcherId = action.getExternalId(); 655 Path idSwapPath = LauncherMapper.getIdSwapPath(context.getActionDir()); 656 InputStream is = actionFs.open(idSwapPath); 657 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 658 Properties props = PropertiesUtils.readProperties(reader, maxActionOutputLen); 659 reader.close(); 660 String newId = props.getProperty("id"); 661 runningJob = jobClient.getJob(JobID.forName(newId)); 662 if (runningJob == null) { 663 context.setExternalStatus(FAILED); 664 throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA017", 665 "Unknown hadoop job [{0}] associated with action [{1}]. Failing this action!", newId, 666 action.getId()); 667 } 668 669 context.setStartData(newId, action.getTrackerUri(), runningJob.getTrackingURL()); 670 XLog.getLog(getClass()).info(XLog.STD, "External ID swap, old ID [{0}] new ID [{1}]", launcherId, 671 newId); 672 } 673 if (runningJob.isComplete()) { 674 XLog.getLog(getClass()).info(XLog.STD, "action completed, external ID [{0}]", 675 action.getExternalId()); 676 if (runningJob.isSuccessful() && LauncherMapper.isMainSuccessful(runningJob)) { 677 Properties props = null; 678 if (getCaptureOutput(action)) { 679 props = new Properties(); 680 if (LauncherMapper.hasOutputData(runningJob)) { 681 Path actionOutput = LauncherMapper.getOutputDataPath(context.getActionDir()); 682 InputStream is = actionFs.open(actionOutput); 683 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 684 props = PropertiesUtils.readProperties(reader, maxActionOutputLen); 685 reader.close(); 686 } 687 } 688 context.setExecutionData(SUCCEEDED, props); 689 XLog.getLog(getClass()).info(XLog.STD, "action produced output"); 690 } 691 else { 692 XLog log = XLog.getLog(getClass()); 693 String errorReason; 694 Path actionError = LauncherMapper.getErrorPath(context.getActionDir()); 695 if (actionFs.exists(actionError)) { 696 InputStream is = actionFs.open(actionError); 697 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 698 Properties props = PropertiesUtils.readProperties(reader, -1); 699 reader.close(); 700 String errorCode = props.getProperty("error.code"); 701 if (errorCode.equals("0")) { 702 errorCode = "JA018"; 703 } 704 errorReason = props.getProperty("error.reason"); 705 log.warn("Launcher ERROR, reason: {0}", errorReason); 706 String exMsg = props.getProperty("exception.message"); 707 String errorInfo = (exMsg != null) ? exMsg : errorReason; 708 context.setErrorInfo(errorCode, errorInfo); 709 String exStackTrace = props.getProperty("exception.stacktrace"); 710 if (exMsg != null) { 711 log.warn("Launcher exception: {0}{E}{1}", exMsg, exStackTrace); 712 } 713 } 714 else { 715 errorReason = XLog.format("LauncherMapper died, check Hadoop log for job [{0}:{1}]", action 716 .getTrackerUri(), action.getExternalId()); 717 log.warn(errorReason); 718 } 719 context.setExecutionData(FAILED_KILLED, null); 720 } 721 } 722 else { 723 context.setExternalStatus(RUNNING); 724 XLog.getLog(getClass()).info(XLog.STD, "checking action, external ID [{0}] status [{1}]", 725 action.getExternalId(), action.getExternalStatus()); 726 } 727 } 728 else { 729 context.setExternalStatus(RUNNING); 730 XLog.getLog(getClass()).info(XLog.STD, "checking action, external ID [{0}] status [{1}]", 731 action.getExternalId(), action.getExternalStatus()); 732 } 733 } 734 catch (Exception ex) { 735 XLog.getLog(getClass()).warn("Exception in check(). Message[{0}]", ex.getMessage(), ex); 736 exception = true; 737 throw convertException(ex); 738 } 739 finally { 740 if (jobClient != null) { 741 try { 742 jobClient.close(); 743 } 744 catch (Exception e) { 745 if (exception) { 746 log.error("JobClient error: ", e); 747 } 748 else { 749 throw convertException(e); 750 } 751 } 752 } 753 } 754 } 755 756 protected boolean getCaptureOutput(WorkflowAction action) throws JDOMException { 757 Element eConf = XmlUtils.parseXml(action.getConf()); 758 Namespace ns = eConf.getNamespace(); 759 Element captureOutput = eConf.getChild("capture-output", ns); 760 return captureOutput != null; 761 } 762 763 @Override 764 public void kill(Context context, WorkflowAction action) throws ActionExecutorException { 765 JobClient jobClient = null; 766 boolean exception = false; 767 try { 768 Element actionXml = XmlUtils.parseXml(action.getConf()); 769 Configuration conf = createBaseHadoopConf(context, actionXml); 770 JobConf jobConf = new JobConf(); 771 XConfiguration.copy(conf, jobConf); 772 jobClient = createJobClient(context, jobConf); 773 RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalId())); 774 if (runningJob != null) { 775 runningJob.killJob(); 776 } 777 context.setExternalStatus(KILLED); 778 context.setExecutionData(KILLED, null); 779 } 780 catch (Exception ex) { 781 exception = true; 782 throw convertException(ex); 783 } 784 finally { 785 try { 786 FileSystem actionFs = getActionFileSystem(context, action); 787 cleanUpActionDir(actionFs, context); 788 if (jobClient != null) { 789 jobClient.close(); 790 } 791 } 792 catch (Exception ex) { 793 if (exception) { 794 log.error("Error: ", ex); 795 } 796 else { 797 throw convertException(ex); 798 } 799 } 800 } 801 } 802 803 private static Set<String> FINAL_STATUS = new HashSet<String>(); 804 805 static { 806 FINAL_STATUS.add(SUCCEEDED); 807 FINAL_STATUS.add(KILLED); 808 FINAL_STATUS.add(FAILED); 809 FINAL_STATUS.add(FAILED_KILLED); 810 } 811 812 @Override 813 public boolean isCompleted(String externalStatus) { 814 return FINAL_STATUS.contains(externalStatus); 815 } 816 817 }