001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.command.wf;
016    
017    import java.util.Date;
018    
019    import org.apache.hadoop.conf.Configuration;
020    import org.apache.oozie.DagELFunctions;
021    import org.apache.oozie.ErrorCode;
022    import org.apache.oozie.WorkflowActionBean;
023    import org.apache.oozie.WorkflowJobBean;
024    import org.apache.oozie.action.ActionExecutor;
025    import org.apache.oozie.action.ActionExecutorException;
026    import org.apache.oozie.client.OozieClient;
027    import org.apache.oozie.client.WorkflowAction;
028    import org.apache.oozie.client.WorkflowJob;
029    import org.apache.oozie.client.SLAEvent.SlaAppType;
030    import org.apache.oozie.client.SLAEvent.Status;
031    import org.apache.oozie.command.CommandException;
032    import org.apache.oozie.service.ActionService;
033    import org.apache.oozie.service.Services;
034    import org.apache.oozie.service.UUIDService;
035    import org.apache.oozie.store.StoreException;
036    import org.apache.oozie.store.WorkflowStore;
037    import org.apache.oozie.util.Instrumentation;
038    import org.apache.oozie.util.XLog;
039    import org.apache.oozie.util.db.SLADbOperations;
040    import org.apache.oozie.workflow.WorkflowInstance;
041    
042    public class ActionEndCommand extends ActionCommand<Void> {
043        public static final String COULD_NOT_END = "COULD_NOT_END";
044        public static final String END_DATA_MISSING = "END_DATA_MISSING";
045    
046        private String id;
047        private String jobId = null;
048    
049        public ActionEndCommand(String id, String type) {
050            super("action.end", type, 0);
051            this.id = id;
052        }
053    
054        @Override
055        protected Void call(WorkflowStore store) throws StoreException, CommandException {
056            WorkflowJobBean workflow = store.getWorkflow(jobId, false);
057            setLogInfo(workflow);
058            WorkflowActionBean action = store.getAction(id, false);
059            setLogInfo(action);
060            if (action.isPending()
061                    && (action.getStatus() == WorkflowActionBean.Status.DONE
062                    || action.getStatus() == WorkflowActionBean.Status.END_RETRY || action.getStatus() == WorkflowActionBean.Status.END_MANUAL)) {
063                if (workflow.getStatus() == WorkflowJob.Status.RUNNING) {
064    
065                    ActionExecutor executor = Services.get().get(ActionService.class).getExecutor(action.getType());
066                    Configuration conf = workflow.getWorkflowInstance().getConf();
067                    int maxRetries = conf.getInt(OozieClient.ACTION_MAX_RETRIES, executor.getMaxRetries());
068                    long retryInterval = conf.getLong(OozieClient.ACTION_RETRY_INTERVAL, executor.getRetryInterval());
069                    executor.setMaxRetries(maxRetries);
070                    executor.setRetryInterval(retryInterval);
071    
072                    if (executor != null) {
073                        boolean isRetry = false;
074                        if (action.getStatus() == WorkflowActionBean.Status.END_RETRY
075                                || action.getStatus() == WorkflowActionBean.Status.END_MANUAL) {
076                            isRetry = true;
077                        }
078                        ActionExecutorContext context = new ActionCommand.ActionExecutorContext(workflow, action, isRetry);
079                        try {
080    
081                            XLog.getLog(getClass()).debug(
082                                    "End, name [{0}] type [{1}] status[{2}] external status [{3}] signal value [{4}]",
083                                    action.getName(), action.getType(), action.getStatus(), action.getExternalStatus(),
084                                    action.getSignalValue());
085    
086                            Instrumentation.Cron cron = new Instrumentation.Cron();
087                            cron.start();
088                            executor.end(context, action);
089                            cron.stop();
090                            addActionCron(action.getType(), cron);
091    
092                            WorkflowInstance wfInstance = workflow.getWorkflowInstance();
093                            DagELFunctions.setActionInfo(wfInstance, action);
094                            workflow.setWorkflowInstance(wfInstance);
095                            incrActionCounter(action.getType(), 1);
096    
097                            if (!context.isEnded()) {
098                                XLog.getLog(getClass()).warn(XLog.OPS,
099                                                             "Action Ended, ActionExecutor [{0}] must call setEndData()", executor.getType());
100                                action.setErrorInfo(END_DATA_MISSING, "Execution Ended, but End Data Missing from Action");
101                                failJob(context);
102                                store.updateAction(action);
103                                store.updateWorkflow(workflow);
104                                return null;
105                            }
106                            action.setRetries(0);
107                            action.setEndTime(new Date());
108                            store.updateAction(action);
109                            store.updateWorkflow(workflow);
110                            Status slaStatus = null;
111                            switch (action.getStatus()) {
112                                case OK:
113                                    slaStatus = Status.SUCCEEDED;
114                                    break;
115                                case KILLED:
116                                    slaStatus = Status.KILLED;
117                                    break;
118                                case FAILED:
119                                    slaStatus = Status.FAILED;
120                                    break;
121                                case ERROR:
122                                    XLog.getLog(getClass()).info("ERROR is considered as FAILED for SLA");
123                                    slaStatus = Status.KILLED;
124                                    break;
125                                default: // TODO: What will happen for other Action
126                                    // status
127                                    slaStatus = Status.FAILED;
128                                    break;
129                            }
130                            SLADbOperations.writeStausEvent(action.getSlaXml(), action.getId(), store, slaStatus,
131                                                            SlaAppType.WORKFLOW_ACTION);
132                            queueCallable(new NotificationCommand(workflow, action));
133                            XLog.getLog(getClass()).debug(
134                                    "Queuing commands for action=" + id + ", status=" + action.getStatus()
135                                            + ", Set pending=" + action.getPending());
136                            queueCallable(new SignalCommand(workflow.getId(), id));
137                        }
138                        catch (ActionExecutorException ex) {
139                            XLog.getLog(getClass()).warn(
140                                    "Error ending action [{0}]. ErrorType [{1}], ErrorCode [{2}], Message [{3}]",
141                                    action.getName(), ex.getErrorType(), ex.getErrorCode(), ex.getMessage());
142                            action.setErrorInfo(ex.getErrorCode(), ex.getMessage());
143                            action.setEndTime(null);
144                            switch (ex.getErrorType()) {
145                                case TRANSIENT:
146                                    if (!handleTransient(context, executor, WorkflowAction.Status.END_RETRY)) {
147                                        handleNonTransient(store, context, executor, WorkflowAction.Status.END_MANUAL);
148                                        action.setPendingAge(new Date());
149                                        action.setRetries(0);
150                                    }
151                                    action.setEndTime(null);
152                                    break;
153                                case NON_TRANSIENT:
154                                    handleNonTransient(store, context, executor, WorkflowAction.Status.END_MANUAL);
155                                    action.setEndTime(null);
156                                    break;
157                                case ERROR:
158                                    handleError(context, executor, COULD_NOT_END, false, WorkflowAction.Status.ERROR);
159                                    queueCallable(new SignalCommand(workflow.getId(), id));
160                                    break;
161                                case FAILED:
162                                    failJob(context);
163                                    break;
164                            }
165                            store.updateAction(action);
166                            store.updateWorkflow(workflow);
167                        }
168                    }
169                    else {
170                        throw new CommandException(ErrorCode.E0802, action.getType());
171                    }
172                }
173                else {
174                    XLog.getLog(getClass()).warn("Job state is not {0}. Skipping ActionEnd Execution",
175                                                 WorkflowJob.Status.RUNNING.toString());
176                }
177            }
178            else {
179                XLog.getLog(getClass()).debug("Action pending={0}, status={1}. Skipping ActionEnd Execution",
180                                              action.getPending(), action.getStatusStr());
181            }
182            return null;
183        }
184    
185        @Override
186        protected Void execute(WorkflowStore store) throws CommandException, StoreException {
187            XLog.getLog(getClass()).debug("STARTED ActionEndCommand for action " + id);
188            try {
189                jobId = Services.get().get(UUIDService.class).getId(id);
190                if (lock(jobId)) {
191                    call(store);
192                }
193                else {
194                    queueCallable(new ActionEndCommand(id, type), LOCK_FAILURE_REQUEUE_INTERVAL);
195                    XLog.getLog(getClass()).warn("ActionEnd lock was not acquired - failed {0}", id);
196                }
197            }
198            catch (InterruptedException e) {
199                queueCallable(new ActionEndCommand(id, type), LOCK_FAILURE_REQUEUE_INTERVAL);
200                XLog.getLog(getClass()).warn("ActionEnd lock was not acquired - interrupted exception failed {0}", id);
201            }
202            finally {
203                XLog.getLog(getClass()).debug("ENDED ActionEndCommand for action " + id);
204            }
205            return null;
206        }
207    }