001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.workflow.lite; 016 017 import org.apache.oozie.service.XLogService; 018 import org.apache.oozie.service.DagXLogInfoService; 019 import org.apache.oozie.client.OozieClient; 020 import org.apache.hadoop.io.Writable; 021 import org.apache.hadoop.util.ReflectionUtils; 022 import org.apache.hadoop.conf.Configuration; 023 import org.apache.oozie.workflow.WorkflowApp; 024 import org.apache.oozie.workflow.WorkflowException; 025 import org.apache.oozie.workflow.WorkflowInstance; 026 import org.apache.oozie.util.ParamChecker; 027 import org.apache.oozie.util.XLog; 028 import org.apache.oozie.util.XConfiguration; 029 import org.apache.oozie.ErrorCode; 030 031 import java.io.DataInput; 032 import java.io.DataOutput; 033 import java.io.IOException; 034 import java.io.ByteArrayOutputStream; 035 import java.io.ByteArrayInputStream; 036 import java.util.ArrayList; 037 import java.util.HashMap; 038 import java.util.List; 039 import java.util.Map; 040 041 //TODO javadoc 042 public class LiteWorkflowInstance implements Writable, WorkflowInstance { 043 private static final String TRANSITION_TO = "transition.to"; 044 045 private XLog log; 046 047 private static String PATH_SEPARATOR = "/"; 048 private static String ROOT = PATH_SEPARATOR; 049 private static String TRANSITION_SEPARATOR = "#"; 050 051 private static class NodeInstance { 052 String nodeName; 053 boolean started = false; 054 055 private NodeInstance(String nodeName) { 056 this.nodeName = nodeName; 057 } 058 } 059 060 private class Context implements NodeHandler.Context { 061 private NodeDef nodeDef; 062 private String executionPath; 063 private String exitState; 064 private Status status = Status.RUNNING; 065 066 private Context(NodeDef nodeDef, String executionPath, String exitState) { 067 this.nodeDef = nodeDef; 068 this.executionPath = executionPath; 069 this.exitState = exitState; 070 } 071 072 public NodeDef getNodeDef() { 073 return nodeDef; 074 } 075 076 public String getExecutionPath() { 077 return executionPath; 078 } 079 080 public String getParentExecutionPath(String executionPath) { 081 return LiteWorkflowInstance.getParentPath(executionPath); 082 } 083 084 public String getSignalValue() { 085 return exitState; 086 } 087 088 public String createExecutionPath(String name) { 089 return LiteWorkflowInstance.createChildPath(executionPath, name); 090 } 091 092 public String createFullTransition(String executionPath, String transition) { 093 return LiteWorkflowInstance.createFullTransition(executionPath, transition); 094 } 095 096 public void deleteExecutionPath() { 097 if (!executionPaths.containsKey(executionPath)) { 098 throw new IllegalStateException(); 099 } 100 executionPaths.remove(executionPath); 101 executionPath = LiteWorkflowInstance.getParentPath(executionPath); 102 } 103 104 public void failJob() { 105 status = Status.FAILED; 106 } 107 108 public void killJob() { 109 status = Status.KILLED; 110 } 111 112 public void completeJob() { 113 status = Status.SUCCEEDED; 114 } 115 116 @Override 117 public Object getTransientVar(String name) { 118 return LiteWorkflowInstance.this.getTransientVar(name); 119 } 120 121 @Override 122 public String getVar(String name) { 123 return LiteWorkflowInstance.this.getVar(name); 124 } 125 126 @Override 127 public void setTransientVar(String name, Object value) { 128 LiteWorkflowInstance.this.setTransientVar(name, value); 129 } 130 131 @Override 132 public void setVar(String name, String value) { 133 LiteWorkflowInstance.this.setVar(name, value); 134 } 135 136 @Override 137 public LiteWorkflowInstance getProcessInstance() { 138 return LiteWorkflowInstance.this; 139 } 140 141 } 142 143 private LiteWorkflowApp def; 144 private Configuration conf; 145 private String instanceId; 146 private Status status; 147 private Map<String, NodeInstance> executionPaths = new HashMap<String, NodeInstance>(); 148 private Map<String, String> persistentVars = new HashMap<String, String>(); 149 private Map<String, Object> transientVars = new HashMap<String, Object>(); 150 151 protected LiteWorkflowInstance() { 152 log = XLog.getLog(getClass()); 153 } 154 155 public LiteWorkflowInstance(LiteWorkflowApp def, Configuration conf, String instanceId) { 156 this(); 157 this.def = ParamChecker.notNull(def, "def"); 158 this.instanceId = ParamChecker.notNull(instanceId, "instanceId"); 159 this.conf = ParamChecker.notNull(conf, "conf"); 160 refreshLog(); 161 status = Status.PREP; 162 } 163 164 public synchronized boolean start() throws WorkflowException { 165 if (status != Status.PREP) { 166 throw new WorkflowException(ErrorCode.E0719); 167 } 168 log.debug(XLog.STD, "Starting job"); 169 status = Status.RUNNING; 170 executionPaths.put(ROOT, new NodeInstance(StartNodeDef.START)); 171 return signal(ROOT, StartNodeDef.START); 172 } 173 174 //todo if suspended store signal and use when resuming 175 176 public synchronized boolean signal(String executionPath, String signalValue) throws WorkflowException { 177 ParamChecker.notEmpty(executionPath, "executionPath"); 178 ParamChecker.notNull(signalValue, "signalValue"); 179 log.debug(XLog.STD, "Signaling job execution path [{0}] signal value [{1}]", executionPath, signalValue); 180 if (status != Status.RUNNING) { 181 throw new WorkflowException(ErrorCode.E0716); 182 } 183 NodeInstance nodeJob = executionPaths.get(executionPath); 184 if (nodeJob == null) { 185 status = Status.FAILED; 186 log.error("invalid execution path [{0}]", executionPath); 187 } 188 NodeDef nodeDef = null; 189 if (!status.isEndState()) { 190 nodeDef = def.getNode(nodeJob.nodeName); 191 if (nodeDef == null) { 192 status = Status.FAILED; 193 log.error("invalid transition [{0}]", nodeJob.nodeName); 194 } 195 } 196 if (!status.isEndState()) { 197 NodeHandler nodeHandler = newInstance(nodeDef.getHandlerClass()); 198 boolean exiting = true; 199 200 Context context = new Context(nodeDef, executionPath, signalValue); 201 if (!nodeJob.started) { 202 try { 203 nodeHandler.loopDetection(context); 204 exiting = nodeHandler.enter(context); 205 nodeJob.started = true; 206 } 207 catch (WorkflowException ex) { 208 status = Status.FAILED; 209 throw ex; 210 } 211 } 212 213 if (exiting) { 214 List<String> pathsToStart = new ArrayList<String>(); 215 List<String> fullTransitions; 216 try { 217 fullTransitions = nodeHandler.multiExit(context); 218 int last = fullTransitions.size() - 1; 219 // TEST THIS 220 if (last >= 0) { 221 String transitionTo = getTransitionNode(fullTransitions.get(last)); 222 223 persistentVars.put(nodeDef.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + TRANSITION_TO, 224 transitionTo); 225 } 226 } 227 catch (WorkflowException ex) { 228 status = Status.FAILED; 229 throw ex; 230 } 231 232 if (context.status == Status.KILLED) { 233 status = Status.KILLED; 234 log.debug(XLog.STD, "Completing job, kill node [{0}]", nodeJob.nodeName); 235 } 236 else { 237 if (context.status == Status.FAILED) { 238 status = Status.FAILED; 239 log.debug(XLog.STD, "Completing job, fail node [{0}]", nodeJob.nodeName); 240 } 241 else { 242 if (context.status == Status.SUCCEEDED) { 243 status = Status.SUCCEEDED; 244 log.debug(XLog.STD, "Completing job, end node [{0}]", nodeJob.nodeName); 245 } 246 /* 247 else if (context.status == Status.SUSPENDED) { 248 status = Status.SUSPENDED; 249 log.debug(XLog.STD, "Completing job, end node [{0}]", nodeJob.nodeName); 250 } 251 */ 252 else { 253 for (String fullTransition : fullTransitions) { 254 // this is the whole trick for forking, we need the 255 // executionpath and the transition 256 // in the case of no forking last element of 257 // executionpath is different from transition 258 // in the case of forking they are the same 259 260 log.debug(XLog.STD, "Exiting node [{0}] with transition[{1}]", nodeJob.nodeName, 261 fullTransition); 262 263 String execPathFromTransition = getExecutionPath(fullTransition); 264 String transition = getTransitionNode(fullTransition); 265 def.validateTransition(nodeJob.nodeName, transition); 266 267 NodeInstance nodeJobInPath = executionPaths.get(execPathFromTransition); 268 if ((nodeJobInPath == null) || (!transition.equals(nodeJobInPath.nodeName))) { 269 // TODO explain this IF better 270 // If the WfJob is signaled with the parent 271 // execution executionPath again 272 // The Fork node will execute again.. and replace 273 // the Node WorkflowJobBean 274 // so this is required to prevent that.. 275 // Question : Should we throw an error in this case 276 // ?? 277 executionPaths.put(execPathFromTransition, new NodeInstance(transition)); 278 pathsToStart.add(execPathFromTransition); 279 } 280 281 } 282 // signal all new synch transitions 283 for (String pathToStart : pathsToStart) { 284 signal(pathToStart, "::synch::"); 285 } 286 } 287 } 288 } 289 } 290 } 291 if (status.isEndState()) { 292 if (status == Status.FAILED) { 293 List<String> failedNodes = terminateNodes(status); 294 log.warn(XLog.STD, "Workflow completed [{0}], failing [{1}] running nodes", status, failedNodes 295 .size()); 296 } 297 else { 298 List<String> killedNodes = terminateNodes(Status.KILLED); 299 if (killedNodes.size() > 1) { 300 log.warn(XLog.STD, "Workflow completed [{0}], killing [{1}] running nodes", status, killedNodes 301 .size()); 302 } 303 } 304 } 305 return status.isEndState(); 306 } 307 308 public synchronized void fail(String nodeName) throws WorkflowException { 309 if (status.isEndState()) { 310 throw new WorkflowException(ErrorCode.E0718); 311 } 312 String failedNode = failNode(nodeName); 313 if (failedNode != null) { 314 log.warn(XLog.STD, "Workflow Failed. Failing node [{0}]", failedNode); 315 } 316 else { 317 //TODO failed attempting to fail the action. EXCEPTION 318 } 319 List<String> killedNodes = killNodes(); 320 if (killedNodes.size() > 1) { 321 log.warn(XLog.STD, "Workflow Failed, killing [{0}] nodes", killedNodes.size()); 322 } 323 status = Status.FAILED; 324 } 325 326 public synchronized void kill() throws WorkflowException { 327 if (status.isEndState()) { 328 throw new WorkflowException(ErrorCode.E0718); 329 } 330 log.debug(XLog.STD, "Killing job"); 331 List<String> killedNodes = killNodes(); 332 if (killedNodes.size() > 1) { 333 log.warn(XLog.STD, "workflow killed, killing [{0}] nodes", killedNodes.size()); 334 } 335 status = Status.KILLED; 336 } 337 338 public synchronized void suspend() throws WorkflowException { 339 if (status != Status.RUNNING) { 340 throw new WorkflowException(ErrorCode.E0716); 341 } 342 log.debug(XLog.STD, "Suspending job"); 343 this.status = Status.SUSPENDED; 344 } 345 346 public boolean isSuspended() { 347 return (status == Status.SUSPENDED); 348 } 349 350 public synchronized void resume() throws WorkflowException { 351 if (status != Status.SUSPENDED) { 352 throw new WorkflowException(ErrorCode.E0717); 353 } 354 log.debug(XLog.STD, "Resuming job"); 355 status = Status.RUNNING; 356 } 357 358 public void setVar(String name, String value) { 359 if (value != null) { 360 persistentVars.put(name, value); 361 } 362 else { 363 persistentVars.remove(name); 364 } 365 } 366 367 @Override 368 public Map<String, String> getAllVars() { 369 return persistentVars; 370 } 371 372 @Override 373 public void setAllVars(Map<String, String> varMap) { 374 persistentVars.putAll(varMap); 375 } 376 377 public String getVar(String name) { 378 return persistentVars.get(name); 379 } 380 381 382 public void setTransientVar(String name, Object value) { 383 if (value != null) { 384 transientVars.put(name, value); 385 } 386 else { 387 transientVars.remove(name); 388 } 389 } 390 391 public boolean hasTransientVar(String name) { 392 return transientVars.containsKey(name); 393 } 394 395 public Object getTransientVar(String name) { 396 return transientVars.get(name); 397 } 398 399 public boolean hasEnded() { 400 return status.isEndState(); 401 } 402 403 private List<String> terminateNodes(Status endStatus) { 404 List<String> endNodes = new ArrayList<String>(); 405 for (Map.Entry<String, NodeInstance> entry : executionPaths.entrySet()) { 406 if (entry.getValue().started) { 407 NodeDef nodeDef = def.getNode(entry.getValue().nodeName); 408 NodeHandler nodeHandler = newInstance(nodeDef.getHandlerClass()); 409 try { 410 if (endStatus == Status.KILLED) { 411 nodeHandler.kill(new Context(nodeDef, entry.getKey(), null)); 412 } 413 else { 414 if (endStatus == Status.FAILED) { 415 nodeHandler.fail(new Context(nodeDef, entry.getKey(), null)); 416 } 417 } 418 endNodes.add(nodeDef.getName()); 419 } 420 catch (Exception ex) { 421 log.warn(XLog.STD, "Error Changing node state to [{0}] for Node [{1}]", endStatus.toString(), 422 nodeDef.getName(), ex); 423 } 424 } 425 } 426 return endNodes; 427 } 428 429 private String failNode(String nodeName) { 430 String failedNode = null; 431 for (Map.Entry<String, NodeInstance> entry : executionPaths.entrySet()) { 432 String node = entry.getKey(); 433 NodeInstance nodeInstance = entry.getValue(); 434 if (nodeInstance.started && nodeInstance.nodeName.equals(nodeName)) { 435 NodeDef nodeDef = def.getNode(nodeInstance.nodeName); 436 NodeHandler nodeHandler = newInstance(nodeDef.getHandlerClass()); 437 try { 438 nodeHandler.fail(new Context(nodeDef, node, null)); 439 failedNode = nodeDef.getName(); 440 nodeInstance.started = false; 441 } 442 catch (Exception ex) { 443 log.warn(XLog.STD, "Error failing node [{0}]", nodeDef.getName(), ex); 444 } 445 return failedNode; 446 } 447 } 448 return failedNode; 449 } 450 451 private List<String> killNodes() { 452 List<String> killedNodes = new ArrayList<String>(); 453 for (Map.Entry<String, NodeInstance> entry : executionPaths.entrySet()) { 454 String node = entry.getKey(); 455 NodeInstance nodeInstance = entry.getValue(); 456 if (nodeInstance.started) { 457 NodeDef nodeDef = def.getNode(nodeInstance.nodeName); 458 NodeHandler nodeHandler = newInstance(nodeDef.getHandlerClass()); 459 try { 460 nodeHandler.kill(new Context(nodeDef, node, null)); 461 killedNodes.add(nodeDef.getName()); 462 } 463 catch (Exception ex) { 464 log.warn(XLog.STD, "Error killing node [{0}]", nodeDef.getName(), ex); 465 } 466 } 467 } 468 return killedNodes; 469 } 470 471 public LiteWorkflowApp getProcessDefinition() { 472 return def; 473 } 474 475 private static String createChildPath(String path, String child) { 476 return path + child + PATH_SEPARATOR; 477 } 478 479 private static String getParentPath(String path) { 480 path = path.substring(0, path.length() - 1); 481 return (path.length() == 0) ? null : path.substring(0, path.lastIndexOf(PATH_SEPARATOR) + 1); 482 } 483 484 private static String createFullTransition(String executionPath, String transition) { 485 return executionPath + TRANSITION_SEPARATOR + transition; 486 } 487 488 private static String getExecutionPath(String fullTransition) { 489 int index = fullTransition.indexOf(TRANSITION_SEPARATOR); 490 if (index == -1) { 491 throw new IllegalArgumentException("Invalid fullTransition"); 492 } 493 return fullTransition.substring(0, index); 494 } 495 496 private static String getTransitionNode(String fullTransition) { 497 int index = fullTransition.indexOf(TRANSITION_SEPARATOR); 498 if (index == -1) { 499 throw new IllegalArgumentException("Invalid fullTransition"); 500 } 501 return fullTransition.substring(index + 1); 502 } 503 504 private NodeHandler newInstance(Class<? extends NodeHandler> handler) { 505 return (NodeHandler) ReflectionUtils.newInstance(handler, null); 506 } 507 508 private void refreshLog() { 509 XLog.Info.get().setParameter(XLogService.USER, conf.get(OozieClient.USER_NAME)); 510 XLog.Info.get().setParameter(XLogService.GROUP, conf.get(OozieClient.GROUP_NAME)); 511 XLog.Info.get().setParameter(DagXLogInfoService.APP, def.getName()); 512 XLog.Info.get().setParameter(DagXLogInfoService.TOKEN, conf.get(OozieClient.LOG_TOKEN, "")); 513 XLog.Info.get().setParameter(DagXLogInfoService.JOB, instanceId); 514 log = XLog.getLog(getClass()); 515 } 516 517 public Status getStatus() { 518 return status; 519 } 520 521 public void setStatus(Status status) { 522 this.status = status; 523 } 524 525 @Override 526 public void write(DataOutput dOut) throws IOException { 527 dOut.writeUTF(instanceId); 528 529 //Hadoop Configuration has to get its act right 530 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 531 conf.writeXml(baos); 532 baos.close(); 533 byte[] array = baos.toByteArray(); 534 dOut.writeInt(array.length); 535 dOut.write(array); 536 537 def.write(dOut); 538 dOut.writeUTF(status.toString()); 539 dOut.writeInt(executionPaths.size()); 540 for (Map.Entry<String, NodeInstance> entry : executionPaths.entrySet()) { 541 dOut.writeUTF(entry.getKey()); 542 dOut.writeUTF(entry.getValue().nodeName); 543 dOut.writeBoolean(entry.getValue().started); 544 } 545 dOut.writeInt(persistentVars.size()); 546 for (Map.Entry<String, String> entry : persistentVars.entrySet()) { 547 dOut.writeUTF(entry.getKey()); 548 dOut.writeUTF(entry.getValue()); 549 } 550 } 551 552 @Override 553 public void readFields(DataInput dIn) throws IOException { 554 instanceId = dIn.readUTF(); 555 556 //Hadoop Configuration has to get its act right 557 int len = dIn.readInt(); 558 byte[] array = new byte[len]; 559 dIn.readFully(array); 560 ByteArrayInputStream bais = new ByteArrayInputStream(array); 561 conf = new XConfiguration(bais); 562 563 def = new LiteWorkflowApp(); 564 def.readFields(dIn); 565 status = Status.valueOf(dIn.readUTF()); 566 int numExPaths = dIn.readInt(); 567 for (int x = 0; x < numExPaths; x++) { 568 String path = dIn.readUTF(); 569 String nodeName = dIn.readUTF(); 570 boolean isStarted = dIn.readBoolean(); 571 NodeInstance nodeInstance = new NodeInstance(nodeName); 572 nodeInstance.started = isStarted; 573 executionPaths.put(path, nodeInstance); 574 } 575 int numVars = dIn.readInt(); 576 for (int x = 0; x < numVars; x++) { 577 String vName = dIn.readUTF(); 578 String vVal = dIn.readUTF(); 579 persistentVars.put(vName, vVal); 580 } 581 refreshLog(); 582 } 583 584 @Override 585 public Configuration getConf() { 586 return conf; 587 } 588 589 @Override 590 public WorkflowApp getApp() { 591 return def; 592 } 593 594 @Override 595 public String getId() { 596 return instanceId; 597 } 598 599 @Override 600 public String getTransition(String node) { 601 return persistentVars.get(node + WorkflowInstance.NODE_VAR_SEPARATOR + TRANSITION_TO); 602 } 603 604 public boolean equals(Object o) { 605 return (o != null) && (getClass().isInstance(o)) && ((WorkflowInstance) o).getId().equals(instanceId); 606 } 607 608 public int hashCode() { 609 return instanceId.hashCode(); 610 } 611 612 }