001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.command.wf; 016 017 import java.util.Date; 018 019 import org.apache.hadoop.conf.Configuration; 020 import org.apache.oozie.DagELFunctions; 021 import org.apache.oozie.ErrorCode; 022 import org.apache.oozie.WorkflowActionBean; 023 import org.apache.oozie.WorkflowJobBean; 024 import org.apache.oozie.action.ActionExecutor; 025 import org.apache.oozie.action.ActionExecutorException; 026 import org.apache.oozie.client.OozieClient; 027 import org.apache.oozie.client.WorkflowAction; 028 import org.apache.oozie.client.WorkflowJob; 029 import org.apache.oozie.client.SLAEvent.SlaAppType; 030 import org.apache.oozie.client.SLAEvent.Status; 031 import org.apache.oozie.command.CommandException; 032 import org.apache.oozie.service.ActionService; 033 import org.apache.oozie.service.Services; 034 import org.apache.oozie.service.UUIDService; 035 import org.apache.oozie.store.StoreException; 036 import org.apache.oozie.store.WorkflowStore; 037 import org.apache.oozie.util.Instrumentation; 038 import org.apache.oozie.util.XLog; 039 import org.apache.oozie.util.db.SLADbOperations; 040 import org.apache.oozie.workflow.WorkflowInstance; 041 042 public class ActionEndCommand extends ActionCommand<Void> { 043 public static final String COULD_NOT_END = "COULD_NOT_END"; 044 public static final String END_DATA_MISSING = "END_DATA_MISSING"; 045 046 private String id; 047 private String jobId = null; 048 049 public ActionEndCommand(String id, String type) { 050 super("action.end", type, 0); 051 this.id = id; 052 } 053 054 @Override 055 protected Void call(WorkflowStore store) throws StoreException, CommandException { 056 WorkflowJobBean workflow = store.getWorkflow(jobId, false); 057 setLogInfo(workflow); 058 WorkflowActionBean action = store.getAction(id, false); 059 setLogInfo(action); 060 if (action.isPending() 061 && (action.getStatus() == WorkflowActionBean.Status.DONE 062 || action.getStatus() == WorkflowActionBean.Status.END_RETRY || action.getStatus() == WorkflowActionBean.Status.END_MANUAL)) { 063 if (workflow.getStatus() == WorkflowJob.Status.RUNNING) { 064 065 ActionExecutor executor = Services.get().get(ActionService.class).getExecutor(action.getType()); 066 Configuration conf = workflow.getWorkflowInstance().getConf(); 067 int maxRetries = conf.getInt(OozieClient.ACTION_MAX_RETRIES, executor.getMaxRetries()); 068 long retryInterval = conf.getLong(OozieClient.ACTION_RETRY_INTERVAL, executor.getRetryInterval()); 069 executor.setMaxRetries(maxRetries); 070 executor.setRetryInterval(retryInterval); 071 072 if (executor != null) { 073 boolean isRetry = false; 074 if (action.getStatus() == WorkflowActionBean.Status.END_RETRY 075 || action.getStatus() == WorkflowActionBean.Status.END_MANUAL) { 076 isRetry = true; 077 } 078 ActionExecutorContext context = new ActionCommand.ActionExecutorContext(workflow, action, isRetry); 079 try { 080 081 XLog.getLog(getClass()).debug( 082 "End, name [{0}] type [{1}] status[{2}] external status [{3}] signal value [{4}]", 083 action.getName(), action.getType(), action.getStatus(), action.getExternalStatus(), 084 action.getSignalValue()); 085 086 Instrumentation.Cron cron = new Instrumentation.Cron(); 087 cron.start(); 088 executor.end(context, action); 089 cron.stop(); 090 addActionCron(action.getType(), cron); 091 092 WorkflowInstance wfInstance = workflow.getWorkflowInstance(); 093 DagELFunctions.setActionInfo(wfInstance, action); 094 workflow.setWorkflowInstance(wfInstance); 095 incrActionCounter(action.getType(), 1); 096 097 if (!context.isEnded()) { 098 XLog.getLog(getClass()).warn(XLog.OPS, 099 "Action Ended, ActionExecutor [{0}] must call setEndData()", executor.getType()); 100 action.setErrorInfo(END_DATA_MISSING, "Execution Ended, but End Data Missing from Action"); 101 failJob(context); 102 store.updateAction(action); 103 store.updateWorkflow(workflow); 104 return null; 105 } 106 action.setRetries(0); 107 action.setEndTime(new Date()); 108 store.updateAction(action); 109 store.updateWorkflow(workflow); 110 Status slaStatus = null; 111 switch (action.getStatus()) { 112 case OK: 113 slaStatus = Status.SUCCEEDED; 114 break; 115 case KILLED: 116 slaStatus = Status.KILLED; 117 break; 118 case FAILED: 119 slaStatus = Status.FAILED; 120 break; 121 case ERROR: 122 XLog.getLog(getClass()).info("ERROR is considered as FAILED for SLA"); 123 slaStatus = Status.KILLED; 124 break; 125 default: // TODO: What will happen for other Action 126 // status 127 slaStatus = Status.FAILED; 128 break; 129 } 130 SLADbOperations.writeStausEvent(action.getSlaXml(), action.getId(), store, slaStatus, 131 SlaAppType.WORKFLOW_ACTION); 132 queueCallable(new NotificationCommand(workflow, action)); 133 XLog.getLog(getClass()).debug( 134 "Queuing commands for action=" + id + ", status=" + action.getStatus() 135 + ", Set pending=" + action.getPending()); 136 queueCallable(new SignalCommand(workflow.getId(), id)); 137 } 138 catch (ActionExecutorException ex) { 139 XLog.getLog(getClass()).warn( 140 "Error ending action [{0}]. ErrorType [{1}], ErrorCode [{2}], Message [{3}]", 141 action.getName(), ex.getErrorType(), ex.getErrorCode(), ex.getMessage()); 142 action.setErrorInfo(ex.getErrorCode(), ex.getMessage()); 143 action.setEndTime(null); 144 switch (ex.getErrorType()) { 145 case TRANSIENT: 146 if (!handleTransient(context, executor, WorkflowAction.Status.END_RETRY)) { 147 handleNonTransient(store, context, executor, WorkflowAction.Status.END_MANUAL); 148 action.setPendingAge(new Date()); 149 action.setRetries(0); 150 } 151 action.setEndTime(null); 152 break; 153 case NON_TRANSIENT: 154 handleNonTransient(store, context, executor, WorkflowAction.Status.END_MANUAL); 155 action.setEndTime(null); 156 break; 157 case ERROR: 158 handleError(context, executor, COULD_NOT_END, false, WorkflowAction.Status.ERROR); 159 queueCallable(new SignalCommand(workflow.getId(), id)); 160 break; 161 case FAILED: 162 failJob(context); 163 break; 164 } 165 store.updateAction(action); 166 store.updateWorkflow(workflow); 167 } 168 } 169 else { 170 throw new CommandException(ErrorCode.E0802, action.getType()); 171 } 172 } 173 else { 174 XLog.getLog(getClass()).warn("Job state is not {0}. Skipping ActionEnd Execution", 175 WorkflowJob.Status.RUNNING.toString()); 176 } 177 } 178 else { 179 XLog.getLog(getClass()).debug("Action pending={0}, status={1}. Skipping ActionEnd Execution", 180 action.getPending(), action.getStatusStr()); 181 } 182 return null; 183 } 184 185 @Override 186 protected Void execute(WorkflowStore store) throws CommandException, StoreException { 187 XLog.getLog(getClass()).debug("STARTED ActionEndCommand for action " + id); 188 try { 189 jobId = Services.get().get(UUIDService.class).getId(id); 190 if (lock(jobId)) { 191 call(store); 192 } 193 else { 194 queueCallable(new ActionEndCommand(id, type), LOCK_FAILURE_REQUEUE_INTERVAL); 195 XLog.getLog(getClass()).warn("ActionEnd lock was not acquired - failed {0}", id); 196 } 197 } 198 catch (InterruptedException e) { 199 queueCallable(new ActionEndCommand(id, type), LOCK_FAILURE_REQUEUE_INTERVAL); 200 XLog.getLog(getClass()).warn("ActionEnd lock was not acquired - interrupted exception failed {0}", id); 201 } 202 finally { 203 XLog.getLog(getClass()).debug("ENDED ActionEndCommand for action " + id); 204 } 205 return null; 206 } 207 }