001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.command.wf;
016    
017    import java.io.IOException;
018    import java.io.StringReader;
019    import java.net.URI;
020    import java.net.URISyntaxException;
021    import java.util.Date;
022    import java.util.Properties;
023    
024    import org.apache.hadoop.conf.Configuration;
025    import org.apache.hadoop.fs.FileSystem;
026    import org.apache.hadoop.fs.Path;
027    import org.apache.oozie.DagELFunctions;
028    import org.apache.oozie.WorkflowActionBean;
029    import org.apache.oozie.WorkflowJobBean;
030    import org.apache.oozie.action.ActionExecutor;
031    import org.apache.oozie.client.WorkflowAction;
032    import org.apache.oozie.client.WorkflowJob;
033    import org.apache.oozie.command.CommandException;
034    import org.apache.oozie.service.CallbackService;
035    import org.apache.oozie.service.ELService;
036    import org.apache.oozie.service.HadoopAccessorException;
037    import org.apache.oozie.service.HadoopAccessorService;
038    import org.apache.oozie.service.Services;
039    import org.apache.oozie.store.StoreException;
040    import org.apache.oozie.store.WorkflowStore;
041    import org.apache.oozie.util.ELEvaluator;
042    import org.apache.oozie.util.Instrumentation;
043    import org.apache.oozie.util.XConfiguration;
044    import org.apache.oozie.util.XLog;
045    import org.apache.oozie.workflow.WorkflowException;
046    import org.apache.oozie.workflow.WorkflowInstance;
047    import org.apache.oozie.workflow.lite.LiteWorkflowInstance;
048    
049    /**
050     * Base class for Action execution commands. Provides common functionality to handle different types of errors while
051     * attempting to start or end an action.
052     */
053    public abstract class ActionCommand<T> extends WorkflowCommand<Void> {
054        private static final String INSTRUMENTATION_GROUP = "action.executors";
055    
056        protected static final String INSTR_FAILED_JOBS_COUNTER = "failed";
057    
058        protected static final String RECOVERY_ID_SEPARATOR = "@";
059    
060        public ActionCommand(String name, String type, int priority) {
061            super(name, type, priority, XLog.STD);
062        }
063    
064        /**
065         * Takes care of Transient failures. Sets the action status to retry and increments the retry count if not enough
066         * attempts have been made. Otherwise returns false.
067         *
068         * @param context the execution context.
069         * @param executor the executor instance being used.
070         * @param status the status to be set for the action.
071         * @return true if the action is scheduled for another retry. false if the number of retries has exceeded the
072         *         maximum number of configured retries.
073         * @throws StoreException
074         * @throws org.apache.oozie.command.CommandException
075         */
076        protected boolean handleTransient(ActionExecutor.Context context, ActionExecutor executor, WorkflowAction.Status status)
077                throws StoreException, CommandException {
078            XLog.getLog(getClass()).debug("Attempting to retry");
079            ActionExecutorContext aContext = (ActionExecutorContext) context;
080            WorkflowActionBean action = (WorkflowActionBean) aContext.getAction();
081            incrActionErrorCounter(action.getType(), "transient", 1);
082    
083            int actionRetryCount = action.getRetries();
084            if (actionRetryCount >= executor.getMaxRetries()) {
085                XLog.getLog(getClass()).warn("Exceeded max retry count [{0}]. Suspending Job", executor.getMaxRetries());
086                return false;
087            }
088            else {
089                action.setStatus(status);
090                action.setPending();
091                action.incRetries();
092                long retryDelayMillis = executor.getRetryInterval() * 1000;
093                action.setPendingAge(new Date(System.currentTimeMillis() + retryDelayMillis));
094                XLog.getLog(getClass()).info("Next Retry, Attempt Number [{0}] in [{1}] milliseconds",
095                                             actionRetryCount + 1, retryDelayMillis);
096                queueCallable(this, retryDelayMillis);
097                return true;
098            }
099        }
100    
101        /**
102         * Takes care of non transient failures. The job is suspended, and the state of the action is changed to *MANUAL
103         * and set pending flag of action to false
104         *
105         * @param store WorkflowStore
106         * @param context the execution context.
107         * @param executor the executor instance being used.
108         * @param status the status to be set for the action.
109         * @throws StoreException
110         * @throws CommandException
111         */
112        protected void handleNonTransient(WorkflowStore store, ActionExecutor.Context context, ActionExecutor executor,
113                WorkflowAction.Status status)
114                throws StoreException, CommandException {
115            ActionExecutorContext aContext = (ActionExecutorContext) context;
116            WorkflowActionBean action = (WorkflowActionBean) aContext.getAction();
117            incrActionErrorCounter(action.getType(), "nontransient", 1);
118            WorkflowJobBean workflow = (WorkflowJobBean) context.getWorkflow();
119            String id = workflow.getId();
120            action.setStatus(status);
121            action.resetPendingOnly();
122            XLog.getLog(getClass()).warn("Suspending Workflow Job id=" + id);
123            try {
124                SuspendCommand.suspendJob(store, workflow, id, action.getId());
125            }
126            catch (WorkflowException e) {
127                throw new CommandException(e);
128            }
129        }
130    
131        /**
132         * Takes care of errors. </p> For errors while attempting to start the action, the job state is updated and an
133         * {@link ActionEndCommand} is queued. </p> For errors while attempting to end the action, the job state is updated.
134         * </p>
135         *
136         * @param context the execution context.
137         * @param executor the executor instance being used.
138         * @param message
139         * @param isStart whether the error was generated while starting or ending an action.
140         * @param status the status to be set for the action.
141         * @throws org.apache.oozie.command.CommandException
142         */
143        protected void handleError(ActionExecutor.Context context, ActionExecutor executor, String message,
144                                   boolean isStart, WorkflowAction.Status status) throws CommandException {
145            XLog.getLog(getClass()).warn("Setting Action Status to [{0}]", status);
146            ActionExecutorContext aContext = (ActionExecutorContext) context;
147            WorkflowActionBean action = (WorkflowActionBean) aContext.getAction();
148            incrActionErrorCounter(action.getType(), "error", 1);
149            action.setPending();
150            if (isStart) {
151                action.setExecutionData(message, null);
152                queueCallable(new ActionEndCommand(action.getId(), action.getType()));
153            }
154            else {
155                action.setEndData(status, WorkflowAction.Status.ERROR.toString());
156            }
157        }
158    
159        public void failJob(ActionExecutor.Context context) throws CommandException {
160            ActionExecutorContext aContext = (ActionExecutorContext) context;
161            WorkflowActionBean action = (WorkflowActionBean) aContext.getAction();
162            incrActionErrorCounter(action.getType(), "failed", 1);
163            WorkflowJobBean workflow = (WorkflowJobBean) context.getWorkflow();
164            XLog.getLog(getClass()).warn("Failing Job due to failed action [{0}]", action.getName());
165            try {
166                workflow.getWorkflowInstance().fail(action.getName());
167                WorkflowInstance wfInstance = workflow.getWorkflowInstance();
168                ((LiteWorkflowInstance) wfInstance).setStatus(WorkflowInstance.Status.FAILED);
169                workflow.setWorkflowInstance(wfInstance);
170                workflow.setStatus(WorkflowJob.Status.FAILED);
171                action.setStatus(WorkflowAction.Status.FAILED);
172                action.resetPending();
173                queueCallable(new NotificationCommand(workflow, action));
174                queueCallable(new KillCommand(workflow.getId()));
175                incrJobCounter(INSTR_FAILED_JOBS_COUNTER, 1);
176            }
177            catch (WorkflowException ex) {
178                throw new CommandException(ex);
179            }
180        }
181    
182        private void incrActionErrorCounter(String type, String error, int count) {
183            getInstrumentation().incr(INSTRUMENTATION_GROUP, type + "#ex." + error, count);
184        }
185    
186        protected void incrActionCounter(String type, int count) {
187            getInstrumentation().incr(INSTRUMENTATION_GROUP, type + "#" + getName(), count);
188        }
189    
190        protected void addActionCron(String type, Instrumentation.Cron cron) {
191            getInstrumentation().addCron(INSTRUMENTATION_GROUP, type + "#" + getName(), cron);
192        }
193    
194        public static class ActionExecutorContext implements ActionExecutor.Context {
195            private WorkflowJobBean workflow;
196            private Configuration protoConf;
197            private WorkflowActionBean action;
198            private boolean isRetry;
199            private boolean started;
200            private boolean ended;
201            private boolean executed;
202    
203            public ActionExecutorContext(WorkflowJobBean workflow, WorkflowActionBean action, boolean isRetry) {
204                this.workflow = workflow;
205                this.action = action;
206                this.isRetry = isRetry;
207                try {
208                    protoConf = new XConfiguration(new StringReader(workflow.getProtoActionConf()));
209                }
210                catch (IOException ex) {
211                    throw new RuntimeException("It should not happen", ex);
212                }
213            }
214    
215            public String getCallbackUrl(String externalStatusVar) {
216                return Services.get().get(CallbackService.class).createCallBackUrl(action.getId(), externalStatusVar);
217            }
218    
219            public Configuration getProtoActionConf() {
220                return protoConf;
221            }
222    
223            public WorkflowJob getWorkflow() {
224                return workflow;
225            }
226    
227            public WorkflowAction getAction() {
228                return action;
229            }
230    
231            public ELEvaluator getELEvaluator() {
232                ELEvaluator evaluator = Services.get().get(ELService.class).createEvaluator("workflow");
233                DagELFunctions.configureEvaluator(evaluator, workflow, action);
234                return evaluator;
235            }
236    
237            public void setVar(String name, String value) {
238                name = action.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + name;
239                WorkflowInstance wfInstance = workflow.getWorkflowInstance();
240                wfInstance.setVar(name, value);
241                //workflow.getWorkflowInstance().setVar(name, value);
242                workflow.setWorkflowInstance(wfInstance);
243            }
244    
245            public String getVar(String name) {
246                name = action.getName() + WorkflowInstance.NODE_VAR_SEPARATOR + name;
247                return workflow.getWorkflowInstance().getVar(name);
248            }
249    
250            public void setStartData(String externalId, String trackerUri, String consoleUrl) {
251                action.setStartData(externalId, trackerUri, consoleUrl);
252                started = true;
253            }
254    
255            public void setExecutionData(String externalStatus, Properties actionData) {
256                action.setExecutionData(externalStatus, actionData);
257                executed = true;
258            }
259    
260            public void setEndData(WorkflowAction.Status status, String signalValue) {
261                action.setEndData(status, signalValue);
262                ended = true;
263            }
264    
265            public boolean isRetry() {
266                return isRetry;
267            }
268    
269            /**
270             * Returns whether setStartData has been called or not.
271             *
272             * @return true if start completion info has been set.
273             */
274            public boolean isStarted() {
275                return started;
276            }
277    
278            /**
279             * Returns whether setExecutionData has been called or not.
280             *
281             * @return true if execution completion info has been set, otherwise false.
282             */
283            public boolean isExecuted() {
284                return executed;
285            }
286    
287    
288            /**
289             * Returns whether setEndData has been called or not.
290             *
291             * @return true if end completion info has been set.
292             */
293            public boolean isEnded() {
294                return ended;
295            }
296    
297            public void setExternalStatus(String externalStatus) {
298                action.setExternalStatus(externalStatus);
299            }
300    
301            @Override
302            public String getRecoveryId() {
303                return action.getId() + RECOVERY_ID_SEPARATOR + workflow.getRun();
304            }
305    
306            /* (non-Javadoc)
307             * @see org.apache.oozie.action.ActionExecutor.Context#getActionDir()
308             */
309            public Path getActionDir() throws HadoopAccessorException, IOException, URISyntaxException {
310                String name = getWorkflow().getId() + "/" + action.getName() + "--" + action.getType();
311                FileSystem fs = getAppFileSystem();
312                String actionDirPath = Services.get().getSystemId() + "/" + name;
313                Path fqActionDir = new Path(fs.getHomeDirectory(), actionDirPath);
314                return fqActionDir;
315            }
316    
317            /* (non-Javadoc)
318             * @see org.apache.oozie.action.ActionExecutor.Context#getAppFileSystem()
319             */
320            public FileSystem getAppFileSystem() throws HadoopAccessorException, IOException, URISyntaxException {
321                WorkflowJob workflow = getWorkflow();
322                XConfiguration jobConf = new XConfiguration(new StringReader(workflow.getConf()));
323                Configuration fsConf = new Configuration();
324                XConfiguration.copy(jobConf, fsConf);
325                return Services.get().get(HadoopAccessorService.class).createFileSystem(workflow.getUser(),
326                        workflow.getGroup(), new URI(getWorkflow().getAppPath()), fsConf);
327    
328            }
329    
330            @Override
331            public void setErrorInfo(String str, String exMsg) {
332                action.setErrorInfo(str, exMsg);
333            }
334        }
335    
336    }