001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.command.wf;
016    
017    import java.util.Date;
018    
019    import javax.servlet.jsp.el.ELException;
020    
021    import org.apache.hadoop.conf.Configuration;
022    import org.apache.oozie.ErrorCode;
023    import org.apache.oozie.FaultInjection;
024    import org.apache.oozie.WorkflowActionBean;
025    import org.apache.oozie.WorkflowJobBean;
026    import org.apache.oozie.XException;
027    import org.apache.oozie.action.ActionExecutor;
028    import org.apache.oozie.action.ActionExecutorException;
029    import org.apache.oozie.client.OozieClient;
030    import org.apache.oozie.client.WorkflowAction;
031    import org.apache.oozie.client.WorkflowJob;
032    import org.apache.oozie.client.SLAEvent.SlaAppType;
033    import org.apache.oozie.client.SLAEvent.Status;
034    import org.apache.oozie.command.CommandException;
035    import org.apache.oozie.command.coord.CoordActionUpdateCommand;
036    import org.apache.oozie.service.ActionService;
037    import org.apache.oozie.service.Services;
038    import org.apache.oozie.service.UUIDService;
039    import org.apache.oozie.store.StoreException;
040    import org.apache.oozie.store.WorkflowStore;
041    import org.apache.oozie.util.ELEvaluationException;
042    import org.apache.oozie.util.Instrumentation;
043    import org.apache.oozie.util.XLog;
044    import org.apache.oozie.util.XmlUtils;
045    import org.apache.oozie.util.db.SLADbOperations;
046    
047    public class ActionStartCommand extends ActionCommand<Void> {
048        public static final String EL_ERROR = "EL_ERROR";
049        public static final String EL_EVAL_ERROR = "EL_EVAL_ERROR";
050        public static final String COULD_NOT_START = "COULD_NOT_START";
051        public static final String START_DATA_MISSING = "START_DATA_MISSING";
052        public static final String EXEC_DATA_MISSING = "EXEC_DATA_MISSING";
053    
054        private String id;
055        private String jobId;
056    
057        public ActionStartCommand(String id, String type) {
058            super("action.start", type, 0);
059            this.id = id;
060        }
061    
062        @Override
063        protected Void call(WorkflowStore store) throws StoreException, CommandException {
064            WorkflowJobBean workflow = store.getWorkflow(jobId, false);
065            setLogInfo(workflow);
066            WorkflowActionBean action = store.getAction(id, false);
067            XLog.getLog(getClass()).warn(XLog.STD,
068                                         "[***" + action.getId() + "***]" + "In call()....status=" + action.getStatusStr());
069            setLogInfo(action);
070            if (action.isPending()
071                    && (action.getStatus() == WorkflowActionBean.Status.PREP
072                    || action.getStatus() == WorkflowActionBean.Status.START_RETRY || action.getStatus() == WorkflowActionBean.Status.START_MANUAL)) {
073                if (workflow.getStatus() == WorkflowJob.Status.RUNNING) {
074    
075                    ActionExecutor executor = Services.get().get(ActionService.class).getExecutor(action.getType());
076                    Configuration conf = workflow.getWorkflowInstance().getConf();
077    
078                    int maxRetries = conf.getInt(OozieClient.ACTION_MAX_RETRIES, executor.getMaxRetries());
079                    long retryInterval = conf.getLong(OozieClient.ACTION_RETRY_INTERVAL, executor.getRetryInterval());
080                    executor.setMaxRetries(maxRetries);
081                    executor.setRetryInterval(retryInterval);
082    
083                    if (executor != null) {
084                        ActionExecutorContext context = null;
085                        try {
086                            boolean isRetry = false;
087                            if (action.getStatus() == WorkflowActionBean.Status.START_RETRY
088                                    || action.getStatus() == WorkflowActionBean.Status.START_MANUAL) {
089                                isRetry = true;
090                            }
091                            context = new ActionCommand.ActionExecutorContext(workflow, action, isRetry);
092                            try {
093                                String tmpActionConf = XmlUtils.removeComments(action.getConf());
094                                String actionConf = context.getELEvaluator().evaluate(tmpActionConf, String.class);
095                                action.setConf(actionConf);
096    
097                                XLog.getLog(getClass()).debug("Start, name [{0}] type [{1}] configuration{E}{E}{2}{E}",
098                                                              action.getName(), action.getType(), actionConf);
099    
100                            }
101                            catch (ELEvaluationException ex) {
102                                throw new ActionExecutorException(ActionExecutorException.ErrorType.TRANSIENT,
103                                                                  EL_EVAL_ERROR, ex.getMessage(), ex);
104                            }
105                            catch (ELException ex) {
106                                context.setErrorInfo(EL_ERROR, ex.getMessage());
107                                XLog.getLog(getClass()).warn("ELException in ActionStartCommand ", ex.getMessage(), ex);
108                                handleError(context, store, workflow, action);
109                                return null;
110                            }
111                            catch (org.jdom.JDOMException je) {
112                                context.setErrorInfo("ParsingError", je.getMessage());
113                                XLog.getLog(getClass()).warn("JDOMException in ActionStartCommand ", je.getMessage(), je);
114                                handleError(context, store, workflow, action);
115                                return null;
116                            }
117                            catch (Exception ex) {
118                                context.setErrorInfo(EL_ERROR, ex.getMessage());
119                                XLog.getLog(getClass()).warn("Exception in ActionStartCommand ", ex.getMessage(), ex);
120                                handleError(context, store, workflow, action);
121                                return null;
122                            }
123                            action.setErrorInfo(null, null);
124                            incrActionCounter(action.getType(), 1);
125    
126                            Instrumentation.Cron cron = new Instrumentation.Cron();
127                            cron.start();
128                            executor.start(context, action);
129                            cron.stop();
130                            FaultInjection.activate("org.apache.oozie.command.SkipCommitFaultInjection");
131                            addActionCron(action.getType(), cron);
132    
133                            action.setRetries(0);
134                            if (action.isExecutionComplete()) {
135                                if (!context.isExecuted()) {
136                                    XLog.getLog(getClass()).warn(XLog.OPS,
137                                                                 "Action Completed, ActionExecutor [{0}] must call setExecutionData()",
138                                                                 executor.getType());
139                                    action.setErrorInfo(EXEC_DATA_MISSING,
140                                                        "Execution Complete, but Execution Data Missing from Action");
141                                    failJob(context);
142                                    store.updateAction(action);
143                                    store.updateWorkflow(workflow);
144                                    return null;
145                                }
146                                action.setPending();
147                                queueCallable(new ActionEndCommand(action.getId(), action.getType()));
148                            }
149                            else {
150                                if (!context.isStarted()) {
151                                    XLog.getLog(getClass()).warn(XLog.OPS,
152                                                                 "Action Started, ActionExecutor [{0}] must call setStartData()",
153                                                                 executor.getType());
154                                    action.setErrorInfo(START_DATA_MISSING,
155                                                        "Execution Started, but Start Data Missing from Action");
156                                    failJob(context);
157                                    store.updateAction(action);
158                                    store.updateWorkflow(workflow);
159                                    return null;
160                                }
161                                queueCallable(new NotificationCommand(workflow, action));
162                            }
163    
164                            XLog.getLog(getClass()).warn(XLog.STD,
165                                                         "[***" + action.getId() + "***]" + "Action status=" + action.getStatusStr());
166    
167                            store.updateAction(action);
168                            store.updateWorkflow(workflow);
169                            // Add SLA status event (STARTED) for WF_ACTION
170                            // SLADbOperations.writeSlaStatusEvent(eSla,
171                            // action.getId(), Status.STARTED, store);
172                            SLADbOperations.writeStausEvent(action.getSlaXml(), action.getId(), store, Status.STARTED,
173                                                            SlaAppType.WORKFLOW_ACTION);
174                            XLog.getLog(getClass()).warn(XLog.STD,
175                                                         "[***" + action.getId() + "***]" + "Action updated in DB!");
176    
177                        }
178                        catch (ActionExecutorException ex) {
179                            XLog.getLog(getClass()).warn(
180                                    "Error starting action [{0}]. ErrorType [{1}], ErrorCode [{2}], Message [{3}]",
181                                    action.getName(), ex.getErrorType(), ex.getErrorCode(), ex.getMessage(), ex);
182                            action.setErrorInfo(ex.getErrorCode(), ex.getMessage());
183                            switch (ex.getErrorType()) {
184                                case TRANSIENT:
185                                    if (!handleTransient(context, executor, WorkflowAction.Status.START_RETRY)) {
186                                        handleNonTransient(store, context, executor, WorkflowAction.Status.START_MANUAL);
187                                        action.setPendingAge(new Date());
188                                        action.setRetries(0);
189                                        action.setStartTime(null);
190                                    }
191                                    break;
192                                case NON_TRANSIENT:
193                                    handleNonTransient(store, context, executor, WorkflowAction.Status.START_MANUAL);
194                                    break;
195                                case ERROR:
196                                    handleError(context, executor, WorkflowAction.Status.ERROR.toString(), true,
197                                                WorkflowAction.Status.DONE);
198                                    break;
199                                case FAILED:
200                                    try {
201                                        failJob(context);
202                                        queueCallable(new CoordActionUpdateCommand(workflow));
203                                        new WfEndCommand(jobId).call(); //To delete the WF temp dir
204                                        SLADbOperations.writeStausEvent(action.getSlaXml(), action.getId(), store,
205                                                                        Status.FAILED, SlaAppType.WORKFLOW_ACTION);
206                                        SLADbOperations.writeStausEvent(workflow.getSlaXml(), workflow.getId(), store,
207                                                                        Status.FAILED, SlaAppType.WORKFLOW_JOB);
208                                    }
209                                    catch (XException x) {
210                                        XLog.getLog(getClass()).warn("ActionStartCommand - case:FAILED ", x.getMessage());
211                                    }
212                                    break;
213                            }
214                            store.updateAction(action);
215                            store.updateWorkflow(workflow);
216                        }
217                    }
218                    else {
219                        throw new CommandException(ErrorCode.E0802, action.getType());
220                    }
221    
222                }
223                else {
224                    XLog.getLog(getClass()).warn("Job state is not {0}. Skipping Action Execution",
225                                                 WorkflowJob.Status.RUNNING.toString());
226                }
227            }
228            return null;
229        }
230    
231        private void handleError(ActionExecutorContext context, WorkflowStore store, WorkflowJobBean workflow,
232                                 WorkflowActionBean action) throws CommandException, StoreException {
233            failJob(context);
234            store.updateAction(action);
235            store.updateWorkflow(workflow);
236            SLADbOperations.writeStausEvent(action.getSlaXml(), action.getId(), store, Status.FAILED,
237                                            SlaAppType.WORKFLOW_ACTION);
238            SLADbOperations.writeStausEvent(workflow.getSlaXml(), workflow.getId(), store, Status.FAILED,
239                                            SlaAppType.WORKFLOW_JOB);
240            queueCallable(new CoordActionUpdateCommand(workflow));
241            new WfEndCommand(jobId).call(); //To delete the WF temp dir
242            return;
243        }
244    
245        @Override
246        protected Void execute(WorkflowStore store) throws CommandException, StoreException {
247            try {
248                XLog.getLog(getClass()).debug("STARTED ActionStartCommand for wf actionId=" + id);
249                jobId = Services.get().get(UUIDService.class).getId(id);
250                if (lock(jobId)) {
251                    call(store);
252                }
253                else {
254                    queueCallable(new ActionStartCommand(id, type), LOCK_FAILURE_REQUEUE_INTERVAL);
255                    XLog.getLog(getClass()).warn("ActionStartCommand lock was not acquired - failed {0}", id);
256                }
257            }
258            catch (InterruptedException e) {
259                queueCallable(new ActionStartCommand(id, type), LOCK_FAILURE_REQUEUE_INTERVAL);
260                XLog.getLog(getClass()).warn("ActionStartCommand lock was not acquired - interrupted exception failed {0}",
261                                             id);
262            }
263            XLog.getLog(getClass()).debug("ENDED ActionStartCommand for wf actionId=" + id + ", jobId=" + jobId);
264            return null;
265        }
266    
267    }