001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.service; 016 017 import java.util.ArrayList; 018 import java.util.List; 019 020 import org.apache.hadoop.conf.Configuration; 021 import org.apache.oozie.CoordinatorActionBean; 022 import org.apache.oozie.WorkflowActionBean; 023 import org.apache.oozie.command.coord.CoordActionCheckCommand; 024 import org.apache.oozie.command.wf.ActionCheckCommand; 025 import org.apache.oozie.store.CoordinatorStore; 026 import org.apache.oozie.store.Store; 027 import org.apache.oozie.store.StoreException; 028 import org.apache.oozie.store.WorkflowStore; 029 import org.apache.oozie.util.XCallable; 030 import org.apache.oozie.util.XLog; 031 032 /** 033 * The Action Checker Service queue ActionCheckCommands to check the status of running actions and 034 * CoordActionCheckCommands to check the status of coordinator actions. The delay between checks on the same action can 035 * be configured. 036 */ 037 public class ActionCheckerService implements Service { 038 039 public static final String CONF_PREFIX = Service.CONF_PREFIX + "ActionCheckerService."; 040 /** 041 * The frequency at which the ActionCheckService will run. 042 */ 043 public static final String CONF_ACTION_CHECK_INTERVAL = CONF_PREFIX + "action.check.interval"; 044 /** 045 * The time, in seconds, between an ActionCheck for the same action. 046 */ 047 public static final String CONF_ACTION_CHECK_DELAY = CONF_PREFIX + "action.check.delay"; 048 049 /** 050 * The number of callables to be queued in a batch. 051 */ 052 public static final String CONF_CALLABLE_BATCH_SIZE = CONF_PREFIX + "callable.batch.size"; 053 054 protected static final String INSTRUMENTATION_GROUP = "actionchecker"; 055 protected static final String INSTR_CHECK_ACTIONS_COUNTER = "checks_wf_actions"; 056 protected static final String INSTR_CHECK_COORD_ACTIONS_COUNTER = "checks_coord_actions"; 057 058 /** 059 * {@link ActionCheckRunnable} is the runnable which is scheduled to run and queue Action checks. 060 */ 061 static class ActionCheckRunnable<S extends Store> implements Runnable { 062 private int actionCheckDelay; 063 private List<XCallable<Void>> callables; 064 private StringBuilder msg = null; 065 066 public ActionCheckRunnable(int actionCheckDelay) { 067 this.actionCheckDelay = actionCheckDelay; 068 } 069 070 public void run() { 071 XLog.Info.get().clear(); 072 XLog log = XLog.getLog(getClass()); 073 msg = new StringBuilder(); 074 runWFActionCheck(); 075 runCoordActionCheck(); 076 log.debug("QUEUING [{0}] for potential checking", msg.toString()); 077 if (null != callables) { 078 boolean ret = Services.get().get(CallableQueueService.class).queueSerial(callables); 079 if (ret == false) { 080 log.warn("Unable to queue the callables commands for CheckerService. " 081 + "Most possibly command queue is full. Queue size is :" 082 + Services.get().get(CallableQueueService.class).queueSize()); 083 } 084 callables = null; 085 } 086 } 087 088 /** 089 * check workflow actions 090 */ 091 private void runWFActionCheck() { 092 XLog.Info.get().clear(); 093 XLog log = XLog.getLog(getClass()); 094 095 WorkflowStore store = null; 096 try { 097 store = (WorkflowStore) Services.get().get(StoreService.class).getStore(WorkflowStore.class); 098 store.beginTrx(); 099 List<WorkflowActionBean> actions = store.getRunningActions(actionCheckDelay); 100 msg.append(" WF_ACTIONS : " + actions.size()); 101 for (WorkflowActionBean action : actions) { 102 Services.get().get(InstrumentationService.class).get().incr(INSTRUMENTATION_GROUP, 103 INSTR_CHECK_ACTIONS_COUNTER, 1); 104 queueCallable(new ActionCheckCommand(action.getId())); 105 } 106 store.commitTrx(); 107 } 108 catch (StoreException ex) { 109 if (store != null) { 110 store.rollbackTrx(); 111 } 112 log.warn("Exception while accessing the store", ex); 113 } 114 catch (Exception ex) { 115 log.error("Exception, {0}", ex.getMessage(), ex); 116 if (store != null && store.isActive()) { 117 try { 118 store.rollbackTrx(); 119 } 120 catch (RuntimeException rex) { 121 log.warn("openjpa error, {0}", rex.getMessage(), rex); 122 } 123 } 124 } 125 finally { 126 if (store != null) { 127 if (!store.isActive()) { 128 try { 129 store.closeTrx(); 130 } 131 catch (RuntimeException rex) { 132 log.warn("Exception while attempting to close store", rex); 133 } 134 } 135 else { 136 log.warn("transaction is not committed or rolled back before closing entitymanager."); 137 } 138 } 139 } 140 } 141 142 /** 143 * check coordinator actions 144 */ 145 private void runCoordActionCheck() { 146 XLog.Info.get().clear(); 147 XLog log = XLog.getLog(getClass()); 148 149 CoordinatorStore store = null; 150 try { 151 store = Services.get().get(StoreService.class).getStore(CoordinatorStore.class); 152 store.beginTrx(); 153 List<CoordinatorActionBean> cactions = store.getRunningActionsOlderThan(actionCheckDelay, false); 154 msg.append(" COORD_ACTIONS : " + cactions.size()); 155 for (CoordinatorActionBean caction : cactions) { 156 Services.get().get(InstrumentationService.class).get().incr(INSTRUMENTATION_GROUP, 157 INSTR_CHECK_COORD_ACTIONS_COUNTER, 1); 158 queueCallable(new CoordActionCheckCommand(caction.getId(), actionCheckDelay)); 159 } 160 store.commitTrx(); 161 } 162 catch (StoreException ex) { 163 if (store != null) { 164 store.rollbackTrx(); 165 } 166 log.warn("Exception while accessing the store", ex); 167 } 168 catch (Exception ex) { 169 log.error("Exception, {0}", ex.getMessage(), ex); 170 if (store != null && store.isActive()) { 171 try { 172 store.rollbackTrx(); 173 } 174 catch (RuntimeException rex) { 175 log.warn("openjpa error, {0}", rex.getMessage(), rex); 176 } 177 } 178 } 179 finally { 180 if (store != null) { 181 if (!store.isActive()) { 182 try { 183 store.closeTrx(); 184 } 185 catch (RuntimeException rex) { 186 log.warn("Exception while attempting to close store", rex); 187 } 188 } 189 else { 190 log.warn("transaction is not committed or rolled back before closing entitymanager."); 191 } 192 } 193 } 194 } 195 196 /** 197 * Adds callables to a list. If the number of callables in the list reaches {@link 198 * ActionCheckerService#CONF_CALLABLE_BATCH_SIZE}, the entire batch is queued and the callables list is reset. 199 * 200 * @param callable the callable to queue. 201 */ 202 private void queueCallable(XCallable<Void> callable) { 203 if (callables == null) { 204 callables = new ArrayList<XCallable<Void>>(); 205 } 206 callables.add(callable); 207 if (callables.size() == Services.get().getConf().getInt(CONF_CALLABLE_BATCH_SIZE, 10)) { 208 boolean ret = Services.get().get(CallableQueueService.class).queueSerial(callables); 209 if (ret == false) { 210 XLog.getLog(getClass()).warn( 211 "Unable to queue the callables commands for CheckerService. " 212 + "Most possibly command queue is full. Queue size is :" 213 + Services.get().get(CallableQueueService.class).queueSize()); 214 } 215 callables = new ArrayList<XCallable<Void>>(); 216 } 217 } 218 } 219 220 /** 221 * Initializes the Action Check service. 222 * 223 * @param services services instance. 224 */ 225 @Override 226 public void init(Services services) { 227 Configuration conf = services.getConf(); 228 Runnable actionCheckRunnable = new ActionCheckRunnable(conf.getInt(CONF_ACTION_CHECK_DELAY, 600)); 229 services.get(SchedulerService.class).schedule(actionCheckRunnable, 10, 230 conf.getInt(CONF_ACTION_CHECK_INTERVAL, 60), SchedulerService.Unit.SEC); 231 } 232 233 /** 234 * Destroy the Action Checker Services. 235 */ 236 @Override 237 public void destroy() { 238 } 239 240 /** 241 * Return the public interface for the action checker service. 242 * 243 * @return {@link ActionCheckerService}. 244 */ 245 @Override 246 public Class<? extends Service> getInterface() { 247 return ActionCheckerService.class; 248 } 249 }