001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.command.wf;
016    
017    import java.util.Date;
018    
019    import org.apache.hadoop.conf.Configuration;
020    import org.apache.oozie.DagELFunctions;
021    import org.apache.oozie.ErrorCode;
022    import org.apache.oozie.WorkflowActionBean;
023    import org.apache.oozie.WorkflowJobBean;
024    import org.apache.oozie.action.ActionExecutor;
025    import org.apache.oozie.action.ActionExecutorException;
026    import org.apache.oozie.client.OozieClient;
027    import org.apache.oozie.client.WorkflowAction;
028    import org.apache.oozie.client.WorkflowJob;
029    import org.apache.oozie.client.SLAEvent.SlaAppType;
030    import org.apache.oozie.client.SLAEvent.Status;
031    import org.apache.oozie.command.CommandException;
032    import org.apache.oozie.service.ActionService;
033    import org.apache.oozie.service.Services;
034    import org.apache.oozie.service.UUIDService;
035    import org.apache.oozie.store.StoreException;
036    import org.apache.oozie.store.WorkflowStore;
037    import org.apache.oozie.util.Instrumentation;
038    import org.apache.oozie.util.XLog;
039    import org.apache.oozie.util.db.SLADbOperations;
040    import org.apache.oozie.workflow.WorkflowInstance;
041    
042    public class ActionEndCommand extends ActionCommand<Void> {
043        public static final String COULD_NOT_END = "COULD_NOT_END";
044        public static final String END_DATA_MISSING = "END_DATA_MISSING";
045    
046        private String id;
047        private String jobId = null;
048    
049        public ActionEndCommand(String id, String type) {
050            super("action.end", type, 0);
051            this.id = id;
052        }
053    
054        @Override
055        protected Void call(WorkflowStore store) throws StoreException, CommandException {
056            WorkflowJobBean workflow = store.getWorkflow(jobId, false);
057            setLogInfo(workflow);
058            WorkflowActionBean action = store.getAction(id, false);
059            setLogInfo(action);
060            if (action.isPending()
061                    && (action.getStatus() == WorkflowActionBean.Status.DONE
062                    || action.getStatus() == WorkflowActionBean.Status.END_RETRY || action.getStatus() == WorkflowActionBean.Status.END_MANUAL)) {
063                if (workflow.getStatus() == WorkflowJob.Status.RUNNING) {
064    
065                    ActionExecutor executor = Services.get().get(ActionService.class).getExecutor(action.getType());
066                    Configuration conf = workflow.getWorkflowInstance().getConf();
067                    int maxRetries = conf.getInt(OozieClient.ACTION_MAX_RETRIES, executor.getMaxRetries());
068                    long retryInterval = conf.getLong(OozieClient.ACTION_RETRY_INTERVAL, executor.getRetryInterval());
069                    executor.setMaxRetries(maxRetries);
070                    executor.setRetryInterval(retryInterval);
071    
072                    if (executor != null) {
073                        boolean isRetry = false;
074                        if (action.getStatus() == WorkflowActionBean.Status.END_RETRY
075                                || action.getStatus() == WorkflowActionBean.Status.END_MANUAL) {
076                            isRetry = true;
077                        }
078                        ActionExecutorContext context = new ActionCommand.ActionExecutorContext(workflow, action, isRetry);
079                        try {
080    
081                            XLog.getLog(getClass()).debug(
082                                    "End, name [{0}] type [{1}] status[{2}] external status [{3}] signal value [{4}]",
083                                    action.getName(), action.getType(), action.getStatus(), action.getExternalStatus(),
084                                    action.getSignalValue());
085                            WorkflowInstance wfInstance = workflow.getWorkflowInstance();
086                            DagELFunctions.setActionInfo(wfInstance, action);
087                            workflow.setWorkflowInstance(wfInstance);
088                            incrActionCounter(action.getType(), 1);
089    
090                            Instrumentation.Cron cron = new Instrumentation.Cron();
091                            cron.start();
092                            executor.end(context, action);
093                            cron.stop();
094                            addActionCron(action.getType(), cron);
095    
096                            if (!context.isEnded()) {
097                                XLog.getLog(getClass()).warn(XLog.OPS,
098                                                             "Action Ended, ActionExecutor [{0}] must call setEndData()", executor.getType());
099                                action.setErrorInfo(END_DATA_MISSING, "Execution Ended, but End Data Missing from Action");
100                                failJob(context);
101                                store.updateAction(action);
102                                store.updateWorkflow(workflow);
103                                return null;
104                            }
105                            action.setRetries(0);
106                            action.setEndTime(new Date());
107                            store.updateAction(action);
108                            store.updateWorkflow(workflow);
109                            Status slaStatus = null;
110                            switch (action.getStatus()) {
111                                case OK:
112                                    slaStatus = Status.SUCCEEDED;
113                                    break;
114                                case KILLED:
115                                    slaStatus = Status.KILLED;
116                                    break;
117                                case FAILED:
118                                    slaStatus = Status.FAILED;
119                                    break;
120                                case ERROR:
121                                    XLog.getLog(getClass()).info("ERROR is considered as FAILED for SLA");
122                                    slaStatus = Status.KILLED;
123                                    break;
124                                default: // TODO: What will happen for other Action
125                                    // status
126                                    slaStatus = Status.FAILED;
127                                    break;
128                            }
129                            SLADbOperations.writeStausEvent(action.getSlaXml(), action.getId(), store, slaStatus,
130                                                            SlaAppType.WORKFLOW_ACTION);
131                            queueCallable(new NotificationCommand(workflow, action));
132                            XLog.getLog(getClass()).debug(
133                                    "Queuing commands for action=" + id + ", status=" + action.getStatus()
134                                            + ", Set pending=" + action.getPending());
135                            queueCallable(new SignalCommand(workflow.getId(), id));
136                        }
137                        catch (ActionExecutorException ex) {
138                            XLog.getLog(getClass()).warn(
139                                    "Error ending action [{0}]. ErrorType [{1}], ErrorCode [{2}], Message [{3}]",
140                                    action.getName(), ex.getErrorType(), ex.getErrorCode(), ex.getMessage());
141                            action.setErrorInfo(ex.getErrorCode(), ex.getMessage());
142                            action.setEndTime(null);
143                            switch (ex.getErrorType()) {
144                                case TRANSIENT:
145                                    if (!handleTransient(context, executor, WorkflowAction.Status.END_RETRY)) {
146                                        handleNonTransient(store, context, executor, WorkflowAction.Status.END_MANUAL);
147                                        action.setPendingAge(new Date());
148                                        action.setRetries(0);
149                                    }
150                                    action.setEndTime(null);
151                                    break;
152                                case NON_TRANSIENT:
153                                    handleNonTransient(store, context, executor, WorkflowAction.Status.END_MANUAL);
154                                    action.setEndTime(null);
155                                    break;
156                                case ERROR:
157                                    handleError(context, executor, COULD_NOT_END, false, WorkflowAction.Status.ERROR);
158                                    queueCallable(new SignalCommand(workflow.getId(), id));
159                                    break;
160                                case FAILED:
161                                    failJob(context);
162                                    break;
163                            }
164                            store.updateAction(action);
165                            store.updateWorkflow(workflow);
166                        }
167                    }
168                    else {
169                        throw new CommandException(ErrorCode.E0802, action.getType());
170                    }
171                }
172                else {
173                    XLog.getLog(getClass()).warn("Job state is not {0}. Skipping ActionEnd Execution",
174                                                 WorkflowJob.Status.RUNNING.toString());
175                }
176            }
177            else {
178                XLog.getLog(getClass()).debug("Action pending={0}, status={1}. Skipping ActionEnd Execution",
179                                              action.getPending(), action.getStatusStr());
180            }
181            return null;
182        }
183    
184        @Override
185        protected Void execute(WorkflowStore store) throws CommandException, StoreException {
186            XLog.getLog(getClass()).debug("STARTED ActionEndCommand for action " + id);
187            try {
188                jobId = Services.get().get(UUIDService.class).getId(id);
189                if (lock(jobId)) {
190                    call(store);
191                }
192                else {
193                    queueCallable(new ActionEndCommand(id, type), LOCK_FAILURE_REQUEUE_INTERVAL);
194                    XLog.getLog(getClass()).warn("ActionEnd lock was not acquired - failed {0}", id);
195                }
196            }
197            catch (InterruptedException e) {
198                queueCallable(new ActionEndCommand(id, type), LOCK_FAILURE_REQUEUE_INTERVAL);
199                XLog.getLog(getClass()).warn("ActionEnd lock was not acquired - interrupted exception failed {0}", id);
200            }
201            finally {
202                XLog.getLog(getClass()).debug("ENDED ActionEndCommand for action " + id);
203            }
204            return null;
205        }
206    }