001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.command.wf; 016 017 import org.apache.hadoop.conf.Configuration; 018 import org.apache.oozie.client.CoordinatorAction; 019 import org.apache.oozie.client.WorkflowJob; 020 import org.apache.oozie.client.SLAEvent.SlaAppType; 021 import org.apache.oozie.client.SLAEvent.Status; 022 import org.apache.oozie.CoordinatorActionBean; 023 import org.apache.oozie.WorkflowActionBean; 024 import org.apache.oozie.WorkflowJobBean; 025 import org.apache.oozie.ErrorCode; 026 import org.apache.oozie.XException; 027 import org.apache.oozie.command.CommandException; 028 import org.apache.oozie.command.coord.CoordActionReadyCommand; 029 import org.apache.oozie.command.coord.CoordActionUpdateCommand; 030 import org.apache.oozie.coord.CoordELFunctions; 031 import org.apache.oozie.coord.CoordinatorJobException; 032 import org.apache.oozie.service.ELService; 033 import org.apache.oozie.service.SchemaService; 034 import org.apache.oozie.service.Services; 035 import org.apache.oozie.service.StoreService; 036 import org.apache.oozie.service.UUIDService; 037 import org.apache.oozie.service.WorkflowStoreService; 038 import org.apache.oozie.store.CoordinatorStore; 039 import org.apache.oozie.store.StoreException; 040 import org.apache.oozie.store.WorkflowStore; 041 import org.apache.oozie.workflow.WorkflowException; 042 import org.apache.oozie.workflow.WorkflowInstance; 043 import org.apache.oozie.util.ELEvaluator; 044 import org.apache.oozie.util.XConfiguration; 045 import org.apache.oozie.util.XLog; 046 import org.apache.oozie.util.ParamChecker; 047 import org.apache.oozie.util.XmlUtils; 048 import org.apache.oozie.util.db.SLADbOperations; 049 import org.apache.openjpa.lib.log.Log; 050 import org.jdom.Element; 051 import org.jdom.JDOMException; 052 import org.jdom.Namespace; 053 054 import java.io.StringReader; 055 import java.util.Date; 056 import java.util.List; 057 import java.util.Map; 058 059 public class SignalCommand extends WorkflowCommand<Void> { 060 061 protected static final String INSTR_SUCCEEDED_JOBS_COUNTER_NAME = "succeeded"; 062 063 private String jobId; 064 private String actionId; 065 066 protected SignalCommand(String name, int priority, String jobId) { 067 super(name, name, priority, XLog.STD); 068 this.jobId = ParamChecker.notEmpty(jobId, "jobId"); 069 } 070 071 public SignalCommand(String jobId, String actionId) { 072 super("signal", "signal", 1, XLog.STD); 073 this.jobId = ParamChecker.notEmpty(jobId, "jobId"); 074 this.actionId = ParamChecker.notEmpty(actionId, "actionId"); 075 } 076 077 @Override 078 protected Void call(WorkflowStore store) throws CommandException, StoreException { 079 080 WorkflowJobBean workflow = store.getWorkflow(jobId, false); 081 setLogInfo(workflow); 082 WorkflowActionBean action = null; 083 boolean skipAction = false; 084 if (actionId != null) { 085 action = store.getAction(actionId, false); 086 setLogInfo(action); 087 } 088 if ((action == null) || (action.isComplete() && action.isPending())) { 089 try { 090 if (workflow.getStatus() == WorkflowJob.Status.RUNNING 091 || workflow.getStatus() == WorkflowJob.Status.PREP) { 092 WorkflowInstance workflowInstance = workflow.getWorkflowInstance(); 093 workflowInstance.setTransientVar(WorkflowStoreService.WORKFLOW_BEAN, workflow); 094 boolean completed; 095 if (action == null) { 096 if (workflow.getStatus() == WorkflowJob.Status.PREP) { 097 completed = workflowInstance.start(); 098 workflow.setStatus(WorkflowJob.Status.RUNNING); 099 workflow.setStartTime(new Date()); 100 workflow.setWorkflowInstance(workflowInstance); 101 // 1. Add SLA status event for WF-JOB with status 102 // STARTED 103 // 2. Add SLA registration events for all WF_ACTIONS 104 SLADbOperations.writeStausEvent(workflow.getSlaXml(), jobId, store, Status.STARTED, 105 SlaAppType.WORKFLOW_JOB); 106 writeSLARegistrationForAllActions(workflowInstance.getApp().getDefinition(), workflow 107 .getUser(), workflow.getGroup(), workflow.getConf(), store); 108 queueCallable(new NotificationCommand(workflow)); 109 } 110 else { 111 throw new CommandException(ErrorCode.E0801, workflow.getId()); 112 } 113 } 114 else { 115 String skipVar = workflowInstance.getVar(action.getName() + WorkflowInstance.NODE_VAR_SEPARATOR 116 + ReRunCommand.TO_SKIP); 117 if (skipVar != null) { 118 skipAction = skipVar.equals("true"); 119 } 120 completed = workflowInstance.signal(action.getExecutionPath(), action.getSignalValue()); 121 workflow.setWorkflowInstance(workflowInstance); 122 action.resetPending(); 123 if (!skipAction) { 124 action.setTransition(workflowInstance.getTransition(action.getName())); 125 } 126 store.updateAction(action); 127 } 128 129 if (completed) { 130 for (String actionToKillId : WorkflowStoreService.getActionsToKill(workflowInstance)) { 131 WorkflowActionBean actionToKill = store.getAction(actionToKillId, false); 132 actionToKill.setPending(); 133 actionToKill.setStatus(WorkflowActionBean.Status.KILLED); 134 store.updateAction(actionToKill); 135 queueCallable(new ActionKillCommand(actionToKill.getId(), actionToKill.getType())); 136 } 137 138 for (String actionToFailId : WorkflowStoreService.getActionsToFail(workflowInstance)) { 139 WorkflowActionBean actionToFail = store.getAction(actionToFailId, false); 140 actionToFail.resetPending(); 141 actionToFail.setStatus(WorkflowActionBean.Status.FAILED); 142 SLADbOperations.writeStausEvent(action.getSlaXml(), action.getId(), store, Status.FAILED, 143 SlaAppType.WORKFLOW_ACTION); 144 store.updateAction(actionToFail); 145 } 146 147 workflow.setStatus(WorkflowJob.Status.valueOf(workflowInstance.getStatus().toString())); 148 workflow.setEndTime(new Date()); 149 workflow.setWorkflowInstance(workflowInstance); 150 Status slaStatus = Status.SUCCEEDED; 151 switch (workflow.getStatus()) { 152 case SUCCEEDED: 153 slaStatus = Status.SUCCEEDED; 154 break; 155 case KILLED: 156 slaStatus = Status.KILLED; 157 break; 158 case FAILED: 159 slaStatus = Status.FAILED; 160 break; 161 default: // TODO about SUSPENDED 162 163 } 164 SLADbOperations.writeStausEvent(workflow.getSlaXml(), jobId, store, slaStatus, 165 SlaAppType.WORKFLOW_JOB); 166 queueCallable(new NotificationCommand(workflow)); 167 if (workflow.getStatus() == WorkflowJob.Status.SUCCEEDED) { 168 incrJobCounter(INSTR_SUCCEEDED_JOBS_COUNTER_NAME, 1); 169 } 170 } 171 else { 172 for (WorkflowActionBean newAction : WorkflowStoreService.getStartedActions(workflowInstance)) { 173 String skipVar = workflowInstance.getVar(newAction.getName() 174 + WorkflowInstance.NODE_VAR_SEPARATOR + ReRunCommand.TO_SKIP); 175 boolean skipNewAction = false; 176 if (skipVar != null) { 177 skipNewAction = skipVar.equals("true"); 178 } 179 if (skipNewAction) { 180 WorkflowActionBean oldAction = store.getAction(newAction.getId(), false); 181 oldAction.setPending(); 182 store.updateAction(oldAction); 183 queueCallable(new SignalCommand(jobId, oldAction.getId())); 184 } 185 else { 186 newAction.setPending(); 187 String actionSlaXml = getActionSLAXml(newAction.getName(), workflowInstance.getApp() 188 .getDefinition(), workflow.getConf()); 189 // System.out.println("111111 actionXml " + 190 // actionSlaXml); 191 // newAction.setSlaXml(workflow.getSlaXml()); 192 newAction.setSlaXml(actionSlaXml); 193 store.insertAction(newAction); 194 queueCallable(new ActionStartCommand(newAction.getId(), newAction.getType())); 195 } 196 } 197 } 198 199 store.updateWorkflow(workflow); 200 XLog.getLog(getClass()).debug( 201 "Updated the workflow status to " + workflow.getId() + " status =" 202 + workflow.getStatusStr()); 203 if (workflow.getStatus() != WorkflowJob.Status.RUNNING 204 && workflow.getStatus() != WorkflowJob.Status.SUSPENDED) { 205 queueCallable(new CoordActionUpdateCommand(workflow)); 206 queueCallable(new WfEndCommand(jobId)); //To delete the WF temp dir 207 } 208 } 209 else { 210 XLog.getLog(getClass()).warn("Workflow not RUNNING, current status [{0}]", workflow.getStatus()); 211 } 212 } 213 catch (WorkflowException ex) { 214 throw new CommandException(ex); 215 } 216 } 217 else { 218 XLog.getLog(getClass()).warn( 219 "SignalCommand for action id :" + actionId + " is already processed. status=" + action.getStatus() 220 + ", Pending=" + action.isPending()); 221 } 222 return null; 223 } 224 225 public static ELEvaluator createELEvaluatorForGroup(Configuration conf, String group) { 226 ELEvaluator eval = Services.get().get(ELService.class).createEvaluator(group); 227 for (Map.Entry<String, String> entry : conf) { 228 eval.setVariable(entry.getKey(), entry.getValue()); 229 } 230 return eval; 231 } 232 233 private String getActionSLAXml(String actionName, String wfXml, String wfConf) throws CommandException { 234 String slaXml = null; 235 // TODO need to fill-out the code 236 // Get the appropriate action:slaXml and resolve that. 237 try { 238 // Configuration conf = new XConfiguration(new 239 // StringReader(wfConf)); 240 Element eWfJob = XmlUtils.parseXml(wfXml); 241 // String prefix = XmlUtils.getNamespacePrefix(eWfJob, 242 // SchemaService.SLA_NAME_SPACE_URI); 243 for (Element action : (List<Element>) eWfJob.getChildren("action", eWfJob.getNamespace())) { 244 if (action.getAttributeValue("name").equals(actionName) == false) { 245 continue; 246 } 247 Element eSla = action.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAME_SPACE_URI)); 248 if (eSla != null) { 249 // resolveSla(eSla, conf); 250 slaXml = XmlUtils.prettyPrint(eSla).toString();// Could use 251 // any 252 // non-null 253 // string 254 break; 255 } 256 } 257 } 258 catch (Exception e) { 259 throw new CommandException(ErrorCode.E1004, e.getMessage(), e); 260 } 261 return slaXml; 262 } 263 264 private String resolveSla(Element eSla, Configuration conf) throws CommandException { 265 String slaXml = null; 266 try { 267 ELEvaluator evalSla = SubmitCommand.createELEvaluatorForGroup(conf, "wf-sla-submit"); 268 slaXml = SubmitCommand.resolveSla(eSla, evalSla); 269 } 270 catch (Exception e) { 271 throw new CommandException(ErrorCode.E1004, e.getMessage(), e); 272 } 273 return slaXml; 274 } 275 276 private void writeSLARegistrationForAllActions(String wfXml, String user, String group, String strConf, 277 WorkflowStore store) throws CommandException { 278 try { 279 Element eWfJob = XmlUtils.parseXml(wfXml); 280 // String prefix = XmlUtils.getNamespacePrefix(eWfJob, 281 // SchemaService.SLA_NAME_SPACE_URI); 282 Configuration conf = new XConfiguration(new StringReader(strConf)); 283 for (Element action : (List<Element>) eWfJob.getChildren("action", eWfJob.getNamespace())) { 284 Element eSla = action.getChild("info", Namespace.getNamespace(SchemaService.SLA_NAME_SPACE_URI)); 285 if (eSla != null) { 286 String slaXml = resolveSla(eSla, conf); 287 eSla = XmlUtils.parseXml(slaXml); 288 String actionId = Services.get().get(UUIDService.class).generateChildId(jobId, 289 action.getAttributeValue("name") + ""); 290 SLADbOperations.writeSlaRegistrationEvent(eSla, store, actionId, SlaAppType.WORKFLOW_ACTION, user, 291 group); 292 } 293 } 294 } 295 catch (Exception e) { 296 throw new CommandException(ErrorCode.E1007, "workflow:Actions " + jobId, e); 297 } 298 299 } 300 301 @Override 302 protected Void execute(WorkflowStore store) throws CommandException, StoreException { 303 XLog.getLog(getClass()).debug("STARTED SignalCommand for jobid=" + jobId + ", actionId=" + actionId); 304 try { 305 if (lock(jobId)) { 306 call(store); 307 } 308 else { 309 queueCallable(new SignalCommand(jobId, actionId), LOCK_FAILURE_REQUEUE_INTERVAL); 310 XLog.getLog(getClass()).warn("SignalCommand lock was not acquired - failed {0}", jobId); 311 } 312 } 313 catch (InterruptedException e) { 314 queueCallable(new SignalCommand(jobId, actionId), LOCK_FAILURE_REQUEUE_INTERVAL); 315 XLog.getLog(getClass()).warn("SignalCommand lock not acquired - interrupted exception failed {0}", jobId); 316 } 317 XLog.getLog(getClass()).debug("ENDED SignalCommand for jobid=" + jobId + ", actionId=" + actionId); 318 return null; 319 } 320 }