001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.service;
016    
017    import java.util.ArrayList;
018    import java.util.List;
019    
020    import org.apache.hadoop.conf.Configuration;
021    import org.apache.oozie.CoordinatorActionBean;
022    import org.apache.oozie.WorkflowActionBean;
023    import org.apache.oozie.command.coord.CoordActionCheckCommand;
024    import org.apache.oozie.command.wf.ActionCheckCommand;
025    import org.apache.oozie.store.CoordinatorStore;
026    import org.apache.oozie.store.Store;
027    import org.apache.oozie.store.StoreException;
028    import org.apache.oozie.store.WorkflowStore;
029    import org.apache.oozie.util.XCallable;
030    import org.apache.oozie.util.XLog;
031    
032    /**
033     * The Action Checker Service queue ActionCheckCommands to check the status of running actions and
034     * CoordActionCheckCommands to check the status of coordinator actions. The delay between checks on the same action can
035     * be configured.
036     */
037    public class ActionCheckerService implements Service {
038    
039        public static final String CONF_PREFIX = Service.CONF_PREFIX + "ActionCheckerService.";
040        /**
041         * The frequency at which the ActionCheckService will run.
042         */
043        public static final String CONF_ACTION_CHECK_INTERVAL = CONF_PREFIX + "action.check.interval";
044        /**
045         * The time, in seconds, between an ActionCheck for the same action.
046         */
047        public static final String CONF_ACTION_CHECK_DELAY = CONF_PREFIX + "action.check.delay";
048    
049        /**
050         * The number of callables to be queued in a batch.
051         */
052        public static final String CONF_CALLABLE_BATCH_SIZE = CONF_PREFIX + "callable.batch.size";
053    
054        protected static final String INSTRUMENTATION_GROUP = "actionchecker";
055        protected static final String INSTR_CHECK_ACTIONS_COUNTER = "checks_wf_actions";
056        protected static final String INSTR_CHECK_COORD_ACTIONS_COUNTER = "checks_coord_actions";
057    
058        /**
059         * {@link ActionCheckRunnable} is the runnable which is scheduled to run and queue Action checks.
060         */
061        static class ActionCheckRunnable<S extends Store> implements Runnable {
062            private int actionCheckDelay;
063            private List<XCallable<Void>> callables;
064            private StringBuilder msg = null;
065    
066            public ActionCheckRunnable(int actionCheckDelay) {
067                this.actionCheckDelay = actionCheckDelay;
068            }
069    
070            public void run() {
071                XLog.Info.get().clear();
072                XLog log = XLog.getLog(getClass());
073                msg = new StringBuilder();
074                runWFActionCheck();
075                runCoordActionCheck();
076                log.debug("QUEUING [{0}] for potential checking", msg.toString());
077                if (null != callables) {
078                    boolean ret = Services.get().get(CallableQueueService.class).queueSerial(callables);
079                    if (ret == false) {
080                        log.warn("Unable to queue the callables commands for CheckerService. "
081                                + "Most possibly command queue is full. Queue size is :"
082                                + Services.get().get(CallableQueueService.class).queueSize());
083                    }
084                    callables = null;
085                }
086            }
087    
088            /**
089             * check workflow actions
090             */
091            private void runWFActionCheck() {
092                XLog.Info.get().clear();
093                XLog log = XLog.getLog(getClass());
094    
095                WorkflowStore store = null;
096                try {
097                    store = (WorkflowStore) Services.get().get(StoreService.class).getStore(WorkflowStore.class);
098                    store.beginTrx();
099                    List<WorkflowActionBean> actions = store.getRunningActions(actionCheckDelay);
100                    msg.append(" WF_ACTIONS : " + actions.size());
101                    for (WorkflowActionBean action : actions) {
102                        Services.get().get(InstrumentationService.class).get().incr(INSTRUMENTATION_GROUP,
103                                                                                    INSTR_CHECK_ACTIONS_COUNTER, 1);
104                        queueCallable(new ActionCheckCommand(action.getId()));
105                    }
106                    store.commitTrx();
107                }
108                catch (StoreException ex) {
109                    if (store != null) {
110                        store.rollbackTrx();
111                    }
112                    log.warn("Exception while accessing the store", ex);
113                }
114                catch (Exception ex) {
115                    log.error("Exception, {0}", ex.getMessage(), ex);
116                    if (store != null && store.isActive()) {
117                        try {
118                            store.rollbackTrx();
119                        }
120                        catch (RuntimeException rex) {
121                            log.warn("openjpa error, {0}", rex.getMessage(), rex);
122                        }
123                    }
124                }
125                finally {
126                    if (store != null) {
127                        if (!store.isActive()) {
128                            try {
129                                store.closeTrx();
130                            }
131                            catch (RuntimeException rex) {
132                                log.warn("Exception while attempting to close store", rex);
133                            }
134                        }
135                        else {
136                            log.warn("transaction is not committed or rolled back before closing entitymanager.");
137                        }
138                    }
139                }
140            }
141    
142            /**
143             * check coordinator actions
144             */
145            private void runCoordActionCheck() {
146                XLog.Info.get().clear();
147                XLog log = XLog.getLog(getClass());
148    
149                CoordinatorStore store = null;
150                try {
151                    store = Services.get().get(StoreService.class).getStore(CoordinatorStore.class);
152                    store.beginTrx();
153                    List<CoordinatorActionBean> cactions = store.getRunningActionsOlderThan(actionCheckDelay, false);
154                    msg.append(" COORD_ACTIONS : " + cactions.size());
155                    for (CoordinatorActionBean caction : cactions) {
156                        Services.get().get(InstrumentationService.class).get().incr(INSTRUMENTATION_GROUP,
157                                                                                    INSTR_CHECK_COORD_ACTIONS_COUNTER, 1);
158                        queueCallable(new CoordActionCheckCommand(caction.getId(), actionCheckDelay));
159                    }
160                    store.commitTrx();
161                }
162                catch (StoreException ex) {
163                    if (store != null) {
164                        store.rollbackTrx();
165                    }
166                    log.warn("Exception while accessing the store", ex);
167                }
168                catch (Exception ex) {
169                    log.error("Exception, {0}", ex.getMessage(), ex);
170                    if (store != null && store.isActive()) {
171                        try {
172                            store.rollbackTrx();
173                        }
174                        catch (RuntimeException rex) {
175                            log.warn("openjpa error, {0}", rex.getMessage(), rex);
176                        }
177                    }
178                }
179                finally {
180                    if (store != null) {
181                        if (!store.isActive()) {
182                            try {
183                                store.closeTrx();
184                            }
185                            catch (RuntimeException rex) {
186                                log.warn("Exception while attempting to close store", rex);
187                            }
188                        }
189                        else {
190                            log.warn("transaction is not committed or rolled back before closing entitymanager.");
191                        }
192                    }
193                }
194            }
195    
196            /**
197             * Adds callables to a list. If the number of callables in the list reaches {@link
198             * ActionCheckerService#CONF_CALLABLE_BATCH_SIZE}, the entire batch is queued and the callables list is reset.
199             *
200             * @param callable the callable to queue.
201             */
202            private void queueCallable(XCallable<Void> callable) {
203                if (callables == null) {
204                    callables = new ArrayList<XCallable<Void>>();
205                }
206                callables.add(callable);
207                if (callables.size() == Services.get().getConf().getInt(CONF_CALLABLE_BATCH_SIZE, 10)) {
208                    boolean ret = Services.get().get(CallableQueueService.class).queueSerial(callables);
209                    if (ret == false) {
210                        XLog.getLog(getClass()).warn(
211                                "Unable to queue the callables commands for CheckerService. "
212                                        + "Most possibly command queue is full. Queue size is :"
213                                        + Services.get().get(CallableQueueService.class).queueSize());
214                    }
215                    callables = new ArrayList<XCallable<Void>>();
216                }
217            }
218        }
219    
220        /**
221         * Initializes the Action Check service.
222         *
223         * @param services services instance.
224         */
225        @Override
226        public void init(Services services) {
227            Configuration conf = services.getConf();
228            Runnable actionCheckRunnable = new ActionCheckRunnable(conf.getInt(CONF_ACTION_CHECK_DELAY, 600));
229            services.get(SchedulerService.class).schedule(actionCheckRunnable, 10,
230                                                          conf.getInt(CONF_ACTION_CHECK_INTERVAL, 60), SchedulerService.Unit.SEC);
231        }
232    
233        /**
234         * Destroy the Action Checker Services.
235         */
236        @Override
237        public void destroy() {
238        }
239    
240        /**
241         * Return the public interface for the action checker service.
242         *
243         * @return {@link ActionCheckerService}.
244         */
245        @Override
246        public Class<? extends Service> getInterface() {
247            return ActionCheckerService.class;
248        }
249    }