001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.command.wf;
016    
017    import java.util.Date;
018    
019    import javax.servlet.jsp.el.ELException;
020    
021    import org.apache.hadoop.conf.Configuration;
022    import org.apache.oozie.ErrorCode;
023    import org.apache.oozie.FaultInjection;
024    import org.apache.oozie.WorkflowActionBean;
025    import org.apache.oozie.WorkflowJobBean;
026    import org.apache.oozie.XException;
027    import org.apache.oozie.action.ActionExecutor;
028    import org.apache.oozie.action.ActionExecutorException;
029    import org.apache.oozie.client.OozieClient;
030    import org.apache.oozie.client.WorkflowAction;
031    import org.apache.oozie.client.WorkflowJob;
032    import org.apache.oozie.client.SLAEvent.SlaAppType;
033    import org.apache.oozie.client.SLAEvent.Status;
034    import org.apache.oozie.command.CommandException;
035    import org.apache.oozie.command.coord.CoordActionUpdateCommand;
036    import org.apache.oozie.service.ActionService;
037    import org.apache.oozie.service.Services;
038    import org.apache.oozie.service.UUIDService;
039    import org.apache.oozie.store.StoreException;
040    import org.apache.oozie.store.WorkflowStore;
041    import org.apache.oozie.util.ELEvaluationException;
042    import org.apache.oozie.util.Instrumentation;
043    import org.apache.oozie.util.XLog;
044    import org.apache.oozie.util.XmlUtils;
045    import org.apache.oozie.util.db.SLADbOperations;
046    
047    public class ActionStartCommand extends ActionCommand<Void> {
048        public static final String EL_ERROR = "EL_ERROR";
049        public static final String EL_EVAL_ERROR = "EL_EVAL_ERROR";
050        public static final String COULD_NOT_START = "COULD_NOT_START";
051        public static final String START_DATA_MISSING = "START_DATA_MISSING";
052        public static final String EXEC_DATA_MISSING = "EXEC_DATA_MISSING";
053    
054        private String id;
055        private String jobId;
056    
057        public ActionStartCommand(String id, String type) {
058            super("action.start", type, 0);
059            this.id = id;
060        }
061    
062        @Override
063        protected Void call(WorkflowStore store) throws StoreException, CommandException {
064            WorkflowJobBean workflow = store.getWorkflow(jobId, false);
065            setLogInfo(workflow);
066            WorkflowActionBean action = store.getAction(id, false);
067            XLog.getLog(getClass()).warn(XLog.STD,
068                                         "[***" + action.getId() + "***]" + "In call()....status=" + action.getStatusStr());
069            setLogInfo(action);
070            if (action.isPending()
071                    && (action.getStatus() == WorkflowActionBean.Status.PREP
072                    || action.getStatus() == WorkflowActionBean.Status.START_RETRY || action.getStatus() == WorkflowActionBean.Status.START_MANUAL)) {
073                if (workflow.getStatus() == WorkflowJob.Status.RUNNING) {
074    
075                    ActionExecutor executor = Services.get().get(ActionService.class).getExecutor(action.getType());
076                    Configuration conf = workflow.getWorkflowInstance().getConf();
077    
078                    int maxRetries = conf.getInt(OozieClient.ACTION_MAX_RETRIES, executor.getMaxRetries());
079                    long retryInterval = conf.getLong(OozieClient.ACTION_RETRY_INTERVAL, executor.getRetryInterval());
080                    executor.setMaxRetries(maxRetries);
081                    executor.setRetryInterval(retryInterval);
082    
083                    if (executor != null) {
084                        ActionExecutorContext context = null;
085                        try {
086                            boolean isRetry = false;
087                            if (action.getStatus() == WorkflowActionBean.Status.START_RETRY
088                                    || action.getStatus() == WorkflowActionBean.Status.START_MANUAL) {
089                                isRetry = true;
090                            }
091                            context = new ActionCommand.ActionExecutorContext(workflow, action, isRetry);
092                            try {
093                                String tmpActionConf = XmlUtils.removeComments(action.getConf());
094                                String actionConf = context.getELEvaluator().evaluate(tmpActionConf, String.class);
095                                action.setConf(actionConf);
096    
097                                XLog.getLog(getClass()).debug("Start, name [{0}] type [{1}] configuration{E}{E}{2}{E}",
098                                                              action.getName(), action.getType(), actionConf);
099    
100                            }
101                            catch (ELEvaluationException ex) {
102                                throw new ActionExecutorException(ActionExecutorException.ErrorType.TRANSIENT,
103                                                                  EL_EVAL_ERROR, ex.getMessage(), ex);
104                            }
105                            catch (ELException ex) {
106                                context.setErrorInfo(EL_ERROR, ex.getMessage());
107                                XLog.getLog(getClass()).warn("ELException in ActionStartCommand ", ex.getMessage(), ex);
108                                handleError(context, store, workflow, action);
109                                return null;
110                            }
111                            catch (org.jdom.JDOMException je) {
112                                context.setErrorInfo("ParsingError", je.getMessage());
113                                XLog.getLog(getClass()).warn("JDOMException in ActionStartCommand ", je.getMessage(), je);
114                                handleError(context, store, workflow, action);
115                                return null;
116                            }
117                            catch (Exception ex) {
118                                context.setErrorInfo(EL_ERROR, ex.getMessage());
119                                XLog.getLog(getClass()).warn("Exception in ActionStartCommand ", ex.getMessage(), ex);
120                                handleError(context, store, workflow, action);
121                                return null;
122                            }
123                            action.setErrorInfo(null, null);
124                            incrActionCounter(action.getType(), 1);
125    
126                            Instrumentation.Cron cron = new Instrumentation.Cron();
127                            cron.start();
128                            executor.start(context, action);
129                            cron.stop();
130                            FaultInjection.activate("org.apache.oozie.command.SkipCommitFaultInjection");
131                            addActionCron(action.getType(), cron);
132    
133                            action.setRetries(0);
134                            if (action.isExecutionComplete()) {
135                                if (!context.isExecuted()) {
136                                    XLog.getLog(getClass()).warn(XLog.OPS,
137                                                                 "Action Completed, ActionExecutor [{0}] must call setExecutionData()",
138                                                                 executor.getType());
139                                    action.setErrorInfo(EXEC_DATA_MISSING,
140                                                        "Execution Complete, but Execution Data Missing from Action");
141                                    failJob(context);
142                                    store.updateAction(action);
143                                    store.updateWorkflow(workflow);
144                                    return null;
145                                }
146                                action.setPending();
147                                queueCallable(new ActionEndCommand(action.getId(), action.getType()));
148                            }
149                            else {
150                                if (!context.isStarted()) {
151                                    XLog.getLog(getClass()).warn(XLog.OPS,
152                                                                 "Action Started, ActionExecutor [{0}] must call setStartData()",
153                                                                 executor.getType());
154                                    action.setErrorInfo(START_DATA_MISSING,
155                                                        "Execution Started, but Start Data Missing from Action");
156                                    failJob(context);
157                                    store.updateAction(action);
158                                    store.updateWorkflow(workflow);
159                                    return null;
160                                }
161                                queueCallable(new NotificationCommand(workflow, action));
162                            }
163    
164                            XLog.getLog(getClass()).warn(XLog.STD,
165                                                         "[***" + action.getId() + "***]" + "Action status=" + action.getStatusStr());
166    
167                            store.updateAction(action);
168                            store.updateWorkflow(workflow);
169                            // Add SLA status event (STARTED) for WF_ACTION
170                            // SLADbOperations.writeSlaStatusEvent(eSla,
171                            // action.getId(), Status.STARTED, store);
172                            SLADbOperations.writeStausEvent(action.getSlaXml(), action.getId(), store, Status.STARTED,
173                                                            SlaAppType.WORKFLOW_ACTION);
174                            XLog.getLog(getClass()).warn(XLog.STD,
175                                                         "[***" + action.getId() + "***]" + "Action updated in DB!");
176    
177                        }
178                        catch (ActionExecutorException ex) {
179                            XLog.getLog(getClass()).warn(
180                                    "Error starting action [{0}]. ErrorType [{1}], ErrorCode [{2}], Message [{3}]",
181                                    action.getName(), ex.getErrorType(), ex.getErrorCode(), ex.getMessage(), ex);
182                            action.setErrorInfo(ex.getErrorCode(), ex.getMessage());
183                            switch (ex.getErrorType()) {
184                                case TRANSIENT:
185                                    if (!handleTransient(context, executor, WorkflowAction.Status.START_RETRY)) {
186                                        handleNonTransient(store, context, executor, WorkflowAction.Status.START_MANUAL);
187                                        action.setPendingAge(new Date());
188                                        action.setRetries(0);
189                                        action.setStartTime(null);
190                                    }
191                                    break;
192                                case NON_TRANSIENT:
193                                    handleNonTransient(store, context, executor, WorkflowAction.Status.START_MANUAL);
194                                    break;
195                                case ERROR:
196                                    handleError(context, executor, WorkflowAction.Status.ERROR.toString(), true,
197                                                WorkflowAction.Status.DONE);
198                                    break;
199                                case FAILED:
200                                    try {
201                                        failJob(context);
202                                        queueCallable(new CoordActionUpdateCommand(workflow));
203                                        SLADbOperations.writeStausEvent(action.getSlaXml(), action.getId(), store,
204                                                                        Status.FAILED, SlaAppType.WORKFLOW_ACTION);
205                                        SLADbOperations.writeStausEvent(workflow.getSlaXml(), workflow.getId(), store,
206                                                                        Status.FAILED, SlaAppType.WORKFLOW_JOB);
207                                    }
208                                    catch (XException x) {
209                                        XLog.getLog(getClass()).warn("ActionStartCommand - case:FAILED ", x.getMessage());
210                                    }
211                                    break;
212                            }
213                            store.updateAction(action);
214                            store.updateWorkflow(workflow);
215                        }
216                    }
217                    else {
218                        throw new CommandException(ErrorCode.E0802, action.getType());
219                    }
220    
221                }
222                else {
223                    XLog.getLog(getClass()).warn("Job state is not {0}. Skipping Action Execution",
224                                                 WorkflowJob.Status.RUNNING.toString());
225                }
226            }
227            return null;
228        }
229    
230        private void handleError(ActionExecutorContext context, WorkflowStore store, WorkflowJobBean workflow,
231                                 WorkflowActionBean action) throws CommandException, StoreException {
232            failJob(context);
233            store.updateAction(action);
234            store.updateWorkflow(workflow);
235            SLADbOperations.writeStausEvent(action.getSlaXml(), action.getId(), store, Status.FAILED,
236                                            SlaAppType.WORKFLOW_ACTION);
237            SLADbOperations.writeStausEvent(workflow.getSlaXml(), workflow.getId(), store, Status.FAILED,
238                                            SlaAppType.WORKFLOW_JOB);
239            queueCallable(new CoordActionUpdateCommand(workflow));
240            return;
241        }
242    
243        @Override
244        protected Void execute(WorkflowStore store) throws CommandException, StoreException {
245            try {
246                XLog.getLog(getClass()).debug("STARTED ActionStartCommand for wf actionId=" + id);
247                jobId = Services.get().get(UUIDService.class).getId(id);
248                if (lock(jobId)) {
249                    call(store);
250                }
251                else {
252                    queueCallable(new ActionStartCommand(id, type), LOCK_FAILURE_REQUEUE_INTERVAL);
253                    XLog.getLog(getClass()).warn("ActionStartCommand lock was not acquired - failed {0}", id);
254                }
255            }
256            catch (InterruptedException e) {
257                queueCallable(new ActionStartCommand(id, type), LOCK_FAILURE_REQUEUE_INTERVAL);
258                XLog.getLog(getClass()).warn("ActionStartCommand lock was not acquired - interrupted exception failed {0}",
259                                             id);
260            }
261            XLog.getLog(getClass()).debug("ENDED ActionStartCommand for wf actionId=" + id + ", jobId=" + jobId);
262            return null;
263        }
264    
265    }