001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.command.wf; 016 017 import java.util.Date; 018 019 import org.apache.hadoop.conf.Configuration; 020 import org.apache.oozie.DagELFunctions; 021 import org.apache.oozie.ErrorCode; 022 import org.apache.oozie.WorkflowActionBean; 023 import org.apache.oozie.WorkflowJobBean; 024 import org.apache.oozie.action.ActionExecutor; 025 import org.apache.oozie.action.ActionExecutorException; 026 import org.apache.oozie.client.OozieClient; 027 import org.apache.oozie.client.WorkflowAction; 028 import org.apache.oozie.client.WorkflowJob; 029 import org.apache.oozie.client.SLAEvent.SlaAppType; 030 import org.apache.oozie.client.SLAEvent.Status; 031 import org.apache.oozie.command.CommandException; 032 import org.apache.oozie.service.ActionService; 033 import org.apache.oozie.service.Services; 034 import org.apache.oozie.service.UUIDService; 035 import org.apache.oozie.store.StoreException; 036 import org.apache.oozie.store.WorkflowStore; 037 import org.apache.oozie.util.Instrumentation; 038 import org.apache.oozie.util.XLog; 039 import org.apache.oozie.util.db.SLADbOperations; 040 import org.apache.oozie.workflow.WorkflowInstance; 041 042 public class ActionEndCommand extends ActionCommand<Void> { 043 public static final String COULD_NOT_END = "COULD_NOT_END"; 044 public static final String END_DATA_MISSING = "END_DATA_MISSING"; 045 046 private String id; 047 private String jobId = null; 048 049 public ActionEndCommand(String id, String type) { 050 super("action.end", type, 0); 051 this.id = id; 052 } 053 054 @Override 055 protected Void call(WorkflowStore store) throws StoreException, CommandException { 056 WorkflowJobBean workflow = store.getWorkflow(jobId, false); 057 setLogInfo(workflow); 058 WorkflowActionBean action = store.getAction(id, false); 059 setLogInfo(action); 060 if (action.isPending() 061 && (action.getStatus() == WorkflowActionBean.Status.DONE 062 || action.getStatus() == WorkflowActionBean.Status.END_RETRY || action.getStatus() == WorkflowActionBean.Status.END_MANUAL)) { 063 if (workflow.getStatus() == WorkflowJob.Status.RUNNING) { 064 065 ActionExecutor executor = Services.get().get(ActionService.class).getExecutor(action.getType()); 066 Configuration conf = workflow.getWorkflowInstance().getConf(); 067 int maxRetries = conf.getInt(OozieClient.ACTION_MAX_RETRIES, executor.getMaxRetries()); 068 long retryInterval = conf.getLong(OozieClient.ACTION_RETRY_INTERVAL, executor.getRetryInterval()); 069 executor.setMaxRetries(maxRetries); 070 executor.setRetryInterval(retryInterval); 071 072 if (executor != null) { 073 boolean isRetry = false; 074 if (action.getStatus() == WorkflowActionBean.Status.END_RETRY 075 || action.getStatus() == WorkflowActionBean.Status.END_MANUAL) { 076 isRetry = true; 077 } 078 ActionExecutorContext context = new ActionCommand.ActionExecutorContext(workflow, action, isRetry); 079 try { 080 081 XLog.getLog(getClass()).debug( 082 "End, name [{0}] type [{1}] status[{2}] external status [{3}] signal value [{4}]", 083 action.getName(), action.getType(), action.getStatus(), action.getExternalStatus(), 084 action.getSignalValue()); 085 WorkflowInstance wfInstance = workflow.getWorkflowInstance(); 086 DagELFunctions.setActionInfo(wfInstance, action); 087 workflow.setWorkflowInstance(wfInstance); 088 incrActionCounter(action.getType(), 1); 089 090 Instrumentation.Cron cron = new Instrumentation.Cron(); 091 cron.start(); 092 executor.end(context, action); 093 cron.stop(); 094 addActionCron(action.getType(), cron); 095 096 if (!context.isEnded()) { 097 XLog.getLog(getClass()).warn(XLog.OPS, 098 "Action Ended, ActionExecutor [{0}] must call setEndData()", executor.getType()); 099 action.setErrorInfo(END_DATA_MISSING, "Execution Ended, but End Data Missing from Action"); 100 failJob(context); 101 store.updateAction(action); 102 store.updateWorkflow(workflow); 103 return null; 104 } 105 action.setRetries(0); 106 action.setEndTime(new Date()); 107 store.updateAction(action); 108 store.updateWorkflow(workflow); 109 Status slaStatus = null; 110 switch (action.getStatus()) { 111 case OK: 112 slaStatus = Status.SUCCEEDED; 113 break; 114 case KILLED: 115 slaStatus = Status.KILLED; 116 break; 117 case FAILED: 118 slaStatus = Status.FAILED; 119 break; 120 case ERROR: 121 XLog.getLog(getClass()).info("ERROR is considered as FAILED for SLA"); 122 slaStatus = Status.KILLED; 123 break; 124 default: // TODO: What will happen for other Action 125 // status 126 slaStatus = Status.FAILED; 127 break; 128 } 129 SLADbOperations.writeStausEvent(action.getSlaXml(), action.getId(), store, slaStatus, 130 SlaAppType.WORKFLOW_ACTION); 131 queueCallable(new NotificationCommand(workflow, action)); 132 XLog.getLog(getClass()).debug( 133 "Queuing commands for action=" + id + ", status=" + action.getStatus() 134 + ", Set pending=" + action.getPending()); 135 queueCallable(new SignalCommand(workflow.getId(), id)); 136 } 137 catch (ActionExecutorException ex) { 138 XLog.getLog(getClass()).warn( 139 "Error ending action [{0}]. ErrorType [{1}], ErrorCode [{2}], Message [{3}]", 140 action.getName(), ex.getErrorType(), ex.getErrorCode(), ex.getMessage()); 141 action.setErrorInfo(ex.getErrorCode(), ex.getMessage()); 142 action.setEndTime(null); 143 switch (ex.getErrorType()) { 144 case TRANSIENT: 145 if (!handleTransient(context, executor, WorkflowAction.Status.END_RETRY)) { 146 handleNonTransient(store, context, executor, WorkflowAction.Status.END_MANUAL); 147 action.setPendingAge(new Date()); 148 action.setRetries(0); 149 } 150 action.setEndTime(null); 151 break; 152 case NON_TRANSIENT: 153 handleNonTransient(store, context, executor, WorkflowAction.Status.END_MANUAL); 154 action.setEndTime(null); 155 break; 156 case ERROR: 157 handleError(context, executor, COULD_NOT_END, false, WorkflowAction.Status.ERROR); 158 queueCallable(new SignalCommand(workflow.getId(), id)); 159 break; 160 case FAILED: 161 failJob(context); 162 break; 163 } 164 store.updateAction(action); 165 store.updateWorkflow(workflow); 166 } 167 } 168 else { 169 throw new CommandException(ErrorCode.E0802, action.getType()); 170 } 171 } 172 else { 173 XLog.getLog(getClass()).warn("Job state is not {0}. Skipping ActionEnd Execution", 174 WorkflowJob.Status.RUNNING.toString()); 175 } 176 } 177 else { 178 XLog.getLog(getClass()).debug("Action pending={0}, status={1}. Skipping ActionEnd Execution", 179 action.getPending(), action.getStatusStr()); 180 } 181 return null; 182 } 183 184 @Override 185 protected Void execute(WorkflowStore store) throws CommandException, StoreException { 186 XLog.getLog(getClass()).debug("STARTED ActionEndCommand for action " + id); 187 try { 188 jobId = Services.get().get(UUIDService.class).getId(id); 189 if (lock(jobId)) { 190 call(store); 191 } 192 else { 193 queueCallable(new ActionEndCommand(id, type), LOCK_FAILURE_REQUEUE_INTERVAL); 194 XLog.getLog(getClass()).warn("ActionEnd lock was not acquired - failed {0}", id); 195 } 196 } 197 catch (InterruptedException e) { 198 queueCallable(new ActionEndCommand(id, type), LOCK_FAILURE_REQUEUE_INTERVAL); 199 XLog.getLog(getClass()).warn("ActionEnd lock was not acquired - interrupted exception failed {0}", id); 200 } 201 finally { 202 XLog.getLog(getClass()).debug("ENDED ActionEndCommand for action " + id); 203 } 204 return null; 205 } 206 }