001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.workflow.lite;
016    
017    import org.apache.oozie.service.XLogService;
018    import org.apache.oozie.service.DagXLogInfoService;
019    import org.apache.oozie.client.OozieClient;
020    import org.apache.hadoop.io.Writable;
021    import org.apache.hadoop.util.ReflectionUtils;
022    import org.apache.hadoop.conf.Configuration;
023    import org.apache.oozie.workflow.WorkflowApp;
024    import org.apache.oozie.workflow.WorkflowException;
025    import org.apache.oozie.workflow.WorkflowInstance;
026    import org.apache.oozie.util.ParamChecker;
027    import org.apache.oozie.util.XLog;
028    import org.apache.oozie.util.XConfiguration;
029    import org.apache.oozie.ErrorCode;
030    
031    import java.io.DataInput;
032    import java.io.DataOutput;
033    import java.io.IOException;
034    import java.io.ByteArrayOutputStream;
035    import java.io.ByteArrayInputStream;
036    import java.util.ArrayList;
037    import java.util.HashMap;
038    import java.util.List;
039    import java.util.Map;
040    
041    //TODO javadoc
042    public class LiteWorkflowInstance implements Writable, WorkflowInstance {
043        private static final String TRANSITION_TO = "transition.to";
044    
045        private XLog log;
046    
047        private static String PATH_SEPARATOR = "/";
048        private static String ROOT = PATH_SEPARATOR;
049        private static String TRANSITION_SEPARATOR = "#";
050    
051        private static class NodeInstance {
052            String nodeName;
053            boolean started = false;
054    
055            private NodeInstance(String nodeName) {
056                this.nodeName = nodeName;
057            }
058        }
059    
060        private class Context implements NodeHandler.Context {
061            private NodeDef nodeDef;
062            private String executionPath;
063            private String exitState;
064            private Status status = Status.RUNNING;
065    
066            private Context(NodeDef nodeDef, String executionPath, String exitState) {
067                this.nodeDef = nodeDef;
068                this.executionPath = executionPath;
069                this.exitState = exitState;
070            }
071    
072            public NodeDef getNodeDef() {
073                return nodeDef;
074            }
075    
076            public String getExecutionPath() {
077                return executionPath;
078            }
079    
080            public String getParentExecutionPath(String executionPath) {
081                return LiteWorkflowInstance.getParentPath(executionPath);
082            }
083    
084            public String getSignalValue() {
085                return exitState;
086            }
087    
088            public String createExecutionPath(String name) {
089                return LiteWorkflowInstance.createChildPath(executionPath, name);
090            }
091    
092            public String createFullTransition(String executionPath, String transition) {
093                return LiteWorkflowInstance.createFullTransition(executionPath, transition);
094            }
095    
096            public void deleteExecutionPath() {
097                if (!executionPaths.containsKey(executionPath)) {
098                    throw new IllegalStateException();
099                }
100                executionPaths.remove(executionPath);
101                executionPath = LiteWorkflowInstance.getParentPath(executionPath);
102            }
103    
104            public void failJob() {
105                status = Status.FAILED;
106            }
107    
108            public void killJob() {
109                status = Status.KILLED;
110            }
111    
112            public void completeJob() {
113                status = Status.SUCCEEDED;
114            }
115    
116            @Override
117            public Object getTransientVar(String name) {
118                return LiteWorkflowInstance.this.getTransientVar(name);
119            }
120    
121            @Override
122            public String getVar(String name) {
123                return LiteWorkflowInstance.this.getVar(name);
124            }
125    
126            @Override
127            public void setTransientVar(String name, Object value) {
128                LiteWorkflowInstance.this.setTransientVar(name, value);
129            }
130    
131            @Override
132            public void setVar(String name, String value) {
133                LiteWorkflowInstance.this.setVar(name, value);
134            }
135    
136            @Override
137            public LiteWorkflowInstance getProcessInstance() {
138                return LiteWorkflowInstance.this;
139            }
140    
141        }
142    
143        private LiteWorkflowApp def;
144        private Configuration conf;
145        private String instanceId;
146        private Status status;
147        private Map<String, NodeInstance> executionPaths = new HashMap<String, NodeInstance>();
148        private Map<String, String> persistentVars = new HashMap<String, String>();
149        private Map<String, Object> transientVars = new HashMap<String, Object>();
150    
151        protected LiteWorkflowInstance() {
152            log = XLog.getLog(getClass());
153        }
154    
155        public LiteWorkflowInstance(LiteWorkflowApp def, Configuration conf, String instanceId) {
156            this();
157            this.def = ParamChecker.notNull(def, "def");
158            this.instanceId = ParamChecker.notNull(instanceId, "instanceId");
159            this.conf = ParamChecker.notNull(conf, "conf");
160            refreshLog();
161            status = Status.PREP;
162        }
163    
164        public synchronized boolean start() throws WorkflowException {
165            if (status != Status.PREP) {
166                throw new WorkflowException(ErrorCode.E0719);
167            }
168            log.debug(XLog.STD, "Starting job");
169            status = Status.RUNNING;
170            executionPaths.put(ROOT, new NodeInstance(StartNodeDef.START));
171            return signal(ROOT, StartNodeDef.START);
172        }
173    
174        //todo if suspended store signal and use when resuming
175    
176        public synchronized boolean signal(String executionPath, String signalValue) throws WorkflowException {
177            ParamChecker.notEmpty(executionPath, "executionPath");
178            ParamChecker.notNull(signalValue, "signalValue");
179            log.debug(XLog.STD, "Signaling job execution path [{0}] signal value [{1}]", executionPath, signalValue);
180            if (status != Status.RUNNING) {
181                throw new WorkflowException(ErrorCode.E0716);
182            }
183            NodeInstance nodeJob = executionPaths.get(executionPath);
184            if (nodeJob == null) {
185                status = Status.FAILED;
186                log.error("invalid execution path [{0}]", executionPath);
187            }
188            NodeDef nodeDef = null;
189            if (!status.isEndState()) {
190                nodeDef = def.getNode(nodeJob.nodeName);
191                if (nodeDef == null) {
192                    status = Status.FAILED;
193                    log.error("invalid transition [{0}]", nodeJob.nodeName);
194                }
195            }
196            if (!status.isEndState()) {
197                NodeHandler nodeHandler = newInstance(nodeDef.getHandlerClass());
198                boolean exiting = true;
199    
200                Context context = new Context(nodeDef, executionPath, signalValue);
201                if (!nodeJob.started) {
202                    try {
203                        nodeHandler.loopDetection(context);
204                        exiting = nodeHandler.enter(context);
205                        nodeJob.started = true;
206                    }
207                    catch (WorkflowException ex) {
208                        status = Status.FAILED;
209                        throw ex;
210                    }
211                }
212    
213                if (exiting) {
214                    List<String> pathsToStart = new ArrayList<String>();
215                    List<String> fullTransitions;
216                    try {
217                        fullTransitions = nodeHandler.multiExit(context);
218                        int last = fullTransitions.size() - 1;
219                        // TEST THIS
220                        if (last >= 0) {
221                            String transitionTo = getTransitionNode(fullTransitions.get(last));
222    
223                            persistentVars.put(nodeDef.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + TRANSITION_TO,
224                                               transitionTo);
225                        }
226                    }
227                    catch (WorkflowException ex) {
228                        status = Status.FAILED;
229                        throw ex;
230                    }
231    
232                    if (context.status == Status.KILLED) {
233                        status = Status.KILLED;
234                        log.debug(XLog.STD, "Completing job, kill node [{0}]", nodeJob.nodeName);
235                    }
236                    else {
237                        if (context.status == Status.FAILED) {
238                            status = Status.FAILED;
239                            log.debug(XLog.STD, "Completing job, fail node [{0}]", nodeJob.nodeName);
240                        }
241                        else {
242                            if (context.status == Status.SUCCEEDED) {
243                                status = Status.SUCCEEDED;
244                                log.debug(XLog.STD, "Completing job, end node [{0}]", nodeJob.nodeName);
245                            }
246    /*
247                    else if (context.status == Status.SUSPENDED) {
248                        status = Status.SUSPENDED;
249                        log.debug(XLog.STD, "Completing job, end node [{0}]", nodeJob.nodeName);
250                    }
251    */
252                            else {
253                                for (String fullTransition : fullTransitions) {
254                                    // this is the whole trick for forking, we need the
255                                    // executionpath and the transition
256                                    // in the case of no forking last element of
257                                    // executionpath is different from transition
258                                    // in the case of forking they are the same
259    
260                                    log.debug(XLog.STD, "Exiting node [{0}] with transition[{1}]", nodeJob.nodeName,
261                                              fullTransition);
262    
263                                    String execPathFromTransition = getExecutionPath(fullTransition);
264                                    String transition = getTransitionNode(fullTransition);
265                                    def.validateTransition(nodeJob.nodeName, transition);
266    
267                                    NodeInstance nodeJobInPath = executionPaths.get(execPathFromTransition);
268                                    if ((nodeJobInPath == null) || (!transition.equals(nodeJobInPath.nodeName))) {
269                                        // TODO explain this IF better
270                                        // If the WfJob is signaled with the parent
271                                        // execution executionPath again
272                                        // The Fork node will execute again.. and replace
273                                        // the Node WorkflowJobBean
274                                        // so this is required to prevent that..
275                                        // Question : Should we throw an error in this case
276                                        // ??
277                                        executionPaths.put(execPathFromTransition, new NodeInstance(transition));
278                                        pathsToStart.add(execPathFromTransition);
279                                    }
280    
281                                }
282                                // signal all new synch transitions
283                                for (String pathToStart : pathsToStart) {
284                                    signal(pathToStart, "::synch::");
285                                }
286                            }
287                        }
288                    }
289                }
290            }
291            if (status.isEndState()) {
292                if (status == Status.FAILED) {
293                    List<String> failedNodes = terminateNodes(status);
294                    log.warn(XLog.STD, "Workflow completed [{0}], failing [{1}] running nodes", status, failedNodes
295                            .size());
296                }
297                else {
298                    List<String> killedNodes = terminateNodes(Status.KILLED);
299                    if (killedNodes.size() > 1) {
300                        log.warn(XLog.STD, "Workflow completed [{0}], killing [{1}] running nodes", status, killedNodes
301                                .size());
302                    }
303                }
304            }
305            return status.isEndState();
306        }
307    
308        public synchronized void fail(String nodeName) throws WorkflowException {
309            if (status.isEndState()) {
310                throw new WorkflowException(ErrorCode.E0718);
311            }
312            String failedNode = failNode(nodeName);
313            if (failedNode != null) {
314                log.warn(XLog.STD, "Workflow Failed. Failing node [{0}]", failedNode);
315            }
316            else {
317                //TODO failed attempting to fail the action. EXCEPTION
318            }
319            List<String> killedNodes = killNodes();
320            if (killedNodes.size() > 1) {
321                log.warn(XLog.STD, "Workflow Failed, killing [{0}] nodes", killedNodes.size());
322            }
323            status = Status.FAILED;
324        }
325    
326        public synchronized void kill() throws WorkflowException {
327            if (status.isEndState()) {
328                throw new WorkflowException(ErrorCode.E0718);
329            }
330            log.debug(XLog.STD, "Killing job");
331            List<String> killedNodes = killNodes();
332            if (killedNodes.size() > 1) {
333                log.warn(XLog.STD, "workflow killed, killing [{0}] nodes", killedNodes.size());
334            }
335            status = Status.KILLED;
336        }
337    
338        public synchronized void suspend() throws WorkflowException {
339            if (status != Status.RUNNING) {
340                throw new WorkflowException(ErrorCode.E0716);
341            }
342            log.debug(XLog.STD, "Suspending job");
343            this.status = Status.SUSPENDED;
344        }
345    
346        public boolean isSuspended() {
347            return (status == Status.SUSPENDED);
348        }
349    
350        public synchronized void resume() throws WorkflowException {
351            if (status != Status.SUSPENDED) {
352                throw new WorkflowException(ErrorCode.E0717);
353            }
354            log.debug(XLog.STD, "Resuming job");
355            status = Status.RUNNING;
356        }
357    
358        public void setVar(String name, String value) {
359            if (value != null) {
360                persistentVars.put(name, value);
361            }
362            else {
363                persistentVars.remove(name);
364            }
365        }
366    
367        @Override
368        public Map<String, String> getAllVars() {
369            return persistentVars;
370        }
371    
372        @Override
373        public void setAllVars(Map<String, String> varMap) {
374            persistentVars.putAll(varMap);
375        }
376    
377        public String getVar(String name) {
378            return persistentVars.get(name);
379        }
380    
381    
382        public void setTransientVar(String name, Object value) {
383            if (value != null) {
384                transientVars.put(name, value);
385            }
386            else {
387                transientVars.remove(name);
388            }
389        }
390    
391        public boolean hasTransientVar(String name) {
392            return transientVars.containsKey(name);
393        }
394    
395        public Object getTransientVar(String name) {
396            return transientVars.get(name);
397        }
398    
399        public boolean hasEnded() {
400            return status.isEndState();
401        }
402    
403        private List<String> terminateNodes(Status endStatus) {
404            List<String> endNodes = new ArrayList<String>();
405            for (Map.Entry<String, NodeInstance> entry : executionPaths.entrySet()) {
406                if (entry.getValue().started) {
407                    NodeDef nodeDef = def.getNode(entry.getValue().nodeName);
408                    NodeHandler nodeHandler = newInstance(nodeDef.getHandlerClass());
409                    try {
410                        if (endStatus == Status.KILLED) {
411                            nodeHandler.kill(new Context(nodeDef, entry.getKey(), null));
412                        }
413                        else {
414                            if (endStatus == Status.FAILED) {
415                                nodeHandler.fail(new Context(nodeDef, entry.getKey(), null));
416                            }
417                        }
418                        endNodes.add(nodeDef.getName());
419                    }
420                    catch (Exception ex) {
421                        log.warn(XLog.STD, "Error Changing node state to [{0}] for Node [{1}]", endStatus.toString(),
422                                 nodeDef.getName(), ex);
423                    }
424                }
425            }
426            return endNodes;
427        }
428    
429        private String failNode(String nodeName) {
430            String failedNode = null;
431            for (Map.Entry<String, NodeInstance> entry : executionPaths.entrySet()) {
432                String node = entry.getKey();
433                NodeInstance nodeInstance = entry.getValue();
434                if (nodeInstance.started && nodeInstance.nodeName.equals(nodeName)) {
435                    NodeDef nodeDef = def.getNode(nodeInstance.nodeName);
436                    NodeHandler nodeHandler = newInstance(nodeDef.getHandlerClass());
437                    try {
438                        nodeHandler.fail(new Context(nodeDef, node, null));
439                        failedNode = nodeDef.getName();
440                        nodeInstance.started = false;
441                    }
442                    catch (Exception ex) {
443                        log.warn(XLog.STD, "Error failing node [{0}]", nodeDef.getName(), ex);
444                    }
445                    return failedNode;
446                }
447            }
448            return failedNode;
449        }
450    
451        private List<String> killNodes() {
452            List<String> killedNodes = new ArrayList<String>();
453            for (Map.Entry<String, NodeInstance> entry : executionPaths.entrySet()) {
454                String node = entry.getKey();
455                NodeInstance nodeInstance = entry.getValue();
456                if (nodeInstance.started) {
457                    NodeDef nodeDef = def.getNode(nodeInstance.nodeName);
458                    NodeHandler nodeHandler = newInstance(nodeDef.getHandlerClass());
459                    try {
460                        nodeHandler.kill(new Context(nodeDef, node, null));
461                        killedNodes.add(nodeDef.getName());
462                    }
463                    catch (Exception ex) {
464                        log.warn(XLog.STD, "Error killing node [{0}]", nodeDef.getName(), ex);
465                    }
466                }
467            }
468            return killedNodes;
469        }
470    
471        public LiteWorkflowApp getProcessDefinition() {
472            return def;
473        }
474    
475        private static String createChildPath(String path, String child) {
476            return path + child + PATH_SEPARATOR;
477        }
478    
479        private static String getParentPath(String path) {
480            path = path.substring(0, path.length() - 1);
481            return (path.length() == 0) ? null : path.substring(0, path.lastIndexOf(PATH_SEPARATOR) + 1);
482        }
483    
484        private static String createFullTransition(String executionPath, String transition) {
485            return executionPath + TRANSITION_SEPARATOR + transition;
486        }
487    
488        private static String getExecutionPath(String fullTransition) {
489            int index = fullTransition.indexOf(TRANSITION_SEPARATOR);
490            if (index == -1) {
491                throw new IllegalArgumentException("Invalid fullTransition");
492            }
493            return fullTransition.substring(0, index);
494        }
495    
496        private static String getTransitionNode(String fullTransition) {
497            int index = fullTransition.indexOf(TRANSITION_SEPARATOR);
498            if (index == -1) {
499                throw new IllegalArgumentException("Invalid fullTransition");
500            }
501            return fullTransition.substring(index + 1);
502        }
503    
504        private NodeHandler newInstance(Class<? extends NodeHandler> handler) {
505            return (NodeHandler) ReflectionUtils.newInstance(handler, null);
506        }
507    
508        private void refreshLog() {
509            XLog.Info.get().setParameter(XLogService.USER, conf.get(OozieClient.USER_NAME));
510            XLog.Info.get().setParameter(XLogService.GROUP, conf.get(OozieClient.GROUP_NAME));
511            XLog.Info.get().setParameter(DagXLogInfoService.APP, def.getName());
512            XLog.Info.get().setParameter(DagXLogInfoService.TOKEN, conf.get(OozieClient.LOG_TOKEN, ""));
513            XLog.Info.get().setParameter(DagXLogInfoService.JOB, instanceId);
514            log = XLog.getLog(getClass());
515        }
516    
517        public Status getStatus() {
518            return status;
519        }
520    
521        public void setStatus(Status status) {
522            this.status = status;
523        }
524    
525        @Override
526        public void write(DataOutput dOut) throws IOException {
527            dOut.writeUTF(instanceId);
528    
529            //Hadoop Configuration has to get its act right
530            ByteArrayOutputStream baos = new ByteArrayOutputStream();
531            conf.writeXml(baos);
532            baos.close();
533            byte[] array = baos.toByteArray();
534            dOut.writeInt(array.length);
535            dOut.write(array);
536    
537            def.write(dOut);
538            dOut.writeUTF(status.toString());
539            dOut.writeInt(executionPaths.size());
540            for (Map.Entry<String, NodeInstance> entry : executionPaths.entrySet()) {
541                dOut.writeUTF(entry.getKey());
542                dOut.writeUTF(entry.getValue().nodeName);
543                dOut.writeBoolean(entry.getValue().started);
544            }
545            dOut.writeInt(persistentVars.size());
546            for (Map.Entry<String, String> entry : persistentVars.entrySet()) {
547                dOut.writeUTF(entry.getKey());
548                dOut.writeUTF(entry.getValue());
549            }
550        }
551    
552        @Override
553        public void readFields(DataInput dIn) throws IOException {
554            instanceId = dIn.readUTF();
555    
556            //Hadoop Configuration has to get its act right
557            int len = dIn.readInt();
558            byte[] array = new byte[len];
559            dIn.readFully(array);
560            ByteArrayInputStream bais = new ByteArrayInputStream(array);
561            conf = new XConfiguration(bais);
562    
563            def = new LiteWorkflowApp();
564            def.readFields(dIn);
565            status = Status.valueOf(dIn.readUTF());
566            int numExPaths = dIn.readInt();
567            for (int x = 0; x < numExPaths; x++) {
568                String path = dIn.readUTF();
569                String nodeName = dIn.readUTF();
570                boolean isStarted = dIn.readBoolean();
571                NodeInstance nodeInstance = new NodeInstance(nodeName);
572                nodeInstance.started = isStarted;
573                executionPaths.put(path, nodeInstance);
574            }
575            int numVars = dIn.readInt();
576            for (int x = 0; x < numVars; x++) {
577                String vName = dIn.readUTF();
578                String vVal = dIn.readUTF();
579                persistentVars.put(vName, vVal);
580            }
581            refreshLog();
582        }
583    
584        @Override
585        public Configuration getConf() {
586            return conf;
587        }
588    
589        @Override
590        public WorkflowApp getApp() {
591            return def;
592        }
593    
594        @Override
595        public String getId() {
596            return instanceId;
597        }
598    
599        @Override
600        public String getTransition(String node) {
601            return persistentVars.get(node + WorkflowInstance.NODE_VAR_SEPARATOR + TRANSITION_TO);
602        }
603    
604        public boolean equals(Object o) {
605            return (o != null) && (getClass().isInstance(o)) && ((WorkflowInstance) o).getId().equals(instanceId);
606        }
607    
608        public int hashCode() {
609            return instanceId.hashCode();
610        }
611    
612    }