001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.command.coord;
016    
017    import java.io.IOException;
018    import java.io.StringReader;
019    import java.util.ArrayList;
020    import java.util.Date;
021    import java.util.HashSet;
022    import java.util.List;
023    import java.util.Set;
024    
025    import org.apache.hadoop.conf.Configuration;
026    import org.apache.hadoop.fs.Path;
027    import org.apache.oozie.CoordinatorActionBean;
028    import org.apache.oozie.CoordinatorActionInfo;
029    import org.apache.oozie.CoordinatorJobBean;
030    import org.apache.oozie.ErrorCode;
031    import org.apache.oozie.XException;
032    import org.apache.oozie.action.ActionExecutorException;
033    import org.apache.oozie.action.hadoop.FsActionExecutor;
034    import org.apache.oozie.client.CoordinatorAction;
035    import org.apache.oozie.client.CoordinatorJob;
036    import org.apache.oozie.client.SLAEvent.SlaAppType;
037    import org.apache.oozie.client.rest.RestConstants;
038    import org.apache.oozie.command.CommandException;
039    import org.apache.oozie.coord.CoordELFunctions;
040    import org.apache.oozie.store.CoordinatorStore;
041    import org.apache.oozie.store.StoreException;
042    import org.apache.oozie.util.DateUtils;
043    import org.apache.oozie.util.ParamChecker;
044    import org.apache.oozie.util.XConfiguration;
045    import org.apache.oozie.util.XLog;
046    import org.apache.oozie.util.XmlUtils;
047    import org.apache.oozie.util.db.SLADbOperations;
048    import org.jdom.Element;
049    import org.jdom.JDOMException;
050    
051    public class CoordRerunCommand extends CoordinatorCommand<CoordinatorActionInfo> {
052    
053        private String jobId;
054        private String rerunType;
055        private String scope;
056        private boolean refresh;
057        private boolean noCleanup;
058        private final XLog log = XLog.getLog(getClass());
059    
060        public CoordRerunCommand(String jobId, String rerunType, String scope, boolean refresh, boolean noCleanup) {
061            super("coord_rerun", "coord_rerun", 1, XLog.STD);
062            this.jobId = ParamChecker.notEmpty(jobId, "jobId");
063            this.rerunType = ParamChecker.notEmpty(rerunType, "rerunType");
064            this.scope = ParamChecker.notEmpty(scope, "scope");
065            this.refresh = refresh;
066            this.noCleanup = noCleanup;
067        }
068    
069        @Override
070        protected CoordinatorActionInfo call(CoordinatorStore store) throws StoreException, CommandException {
071            try {
072                CoordinatorJobBean coordJob = store.getCoordinatorJob(jobId, false);
073                CoordinatorActionInfo coordInfo = null;
074                setLogInfo(coordJob);
075                if (coordJob.getStatus() != CoordinatorJob.Status.KILLED
076                        && coordJob.getStatus() != CoordinatorJob.Status.FAILED) {
077                    incrJobCounter(1);
078    
079                    List<CoordinatorActionBean> coordActions;
080                    if (rerunType.equals(RestConstants.JOB_COORD_RERUN_DATE)) {
081                        coordActions = getCoordActionsFromDates(jobId, scope, store);
082                    }
083                    else if (rerunType.equals(RestConstants.JOB_COORD_RERUN_ACTION)) {
084                        coordActions = getCoordActionsFromIds(jobId, scope, store);
085                    }
086                    else {
087                        throw new CommandException(ErrorCode.E1018, "date or action expected.");
088                    }
089                    if (checkAllActionsRunnable(coordActions)) {
090                        for (CoordinatorActionBean coordAction : coordActions) {
091                            String actionXml = coordAction.getActionXml();
092                            if (!noCleanup) {
093                                Element eAction = XmlUtils.parseXml(actionXml);
094                                cleanupOutputEvents(eAction, coordJob.getUser(), coordJob.getGroup());
095                            }
096                            if (refresh) {
097                                refreshAction(coordJob, coordAction, store);
098                            }
099                            updateAction(coordJob, coordAction, actionXml, store);
100    
101                            // TODO: time 100s should be configurable
102                            queueCallable(new CoordActionNotification(coordAction), 100);
103                            queueCallable(new CoordActionInputCheckCommand(coordAction.getId()), 100);
104                        }
105                    }
106                    else {
107                        throw new CommandException(ErrorCode.E1018, "part or all actions are not eligible to rerun!");
108                    }
109                    coordInfo = new CoordinatorActionInfo(coordActions);
110                }
111                else {
112                    log.info("CoordRerunCommand is not able to run, job status=" + coordJob.getStatus() + ", jobid="
113                            + jobId);
114                    throw new CommandException(ErrorCode.E1018,
115                            "coordinator job is killed or failed so all actions are not eligible to rerun!");
116                }
117                return coordInfo;
118            }
119            catch (XException xex) {
120                throw new CommandException(xex);
121            }
122            catch (JDOMException jex) {
123                throw new CommandException(ErrorCode.E0700, jex);
124            }
125            catch (Exception ex) {
126                throw new CommandException(ErrorCode.E1018, ex);
127            }
128        }
129    
130        /**
131         * Get the list of actions for given id ranges
132         *
133         * @param jobId
134         * @param scope
135         * @param store
136         * @return the list of all actions to rerun
137         * @throws CommandException
138         * @throws StoreException
139         */
140        private List<CoordinatorActionBean> getCoordActionsFromIds(String jobId, String scope, CoordinatorStore store)
141                throws CommandException, StoreException {
142            ParamChecker.notEmpty(jobId, "jobId");
143            ParamChecker.notEmpty(scope, "scope");
144    
145            Set<String> actions = new HashSet<String>();
146            String[] list = scope.split(",");
147            for (String s : list) {
148                s = s.trim();
149                if (s.contains("-")) {
150                    String[] range = s.split("-");
151                    if (range.length != 2) {
152                        throw new CommandException(ErrorCode.E0302, "format is wrong for action's range '" + s + "'");
153                    }
154                    int start;
155                    int end;
156                    try {
157                        start = Integer.parseInt(range[0].trim());
158                        end = Integer.parseInt(range[1].trim());
159                        if (start > end) {
160                            throw new CommandException(ErrorCode.E0302, "format is wrong for action's range '" + s + "'");
161                        }
162                    }
163                    catch (NumberFormatException ne) {
164                        throw new CommandException(ErrorCode.E0302, ne);
165                    }
166                    for (int i = start; i <= end; i++) {
167                        actions.add(jobId + "@" + i);
168                    }
169                }
170                else {
171                    try {
172                        Integer.parseInt(s);
173                    }
174                    catch (NumberFormatException ne) {
175                        throw new CommandException(ErrorCode.E0302, "format is wrong for action id'" + s
176                                + "'. Integer only.");
177                    }
178                    actions.add(jobId + "@" + s);
179                }
180            }
181    
182            List<CoordinatorActionBean> coordActions = new ArrayList<CoordinatorActionBean>();
183            for (String id : actions) {
184                CoordinatorActionBean coordAction = store.getCoordinatorAction(id, false);
185                coordActions.add(coordAction);
186                log.debug("Rerun coordinator for actionId='" + id + "'");
187            }
188            return coordActions;
189        }
190    
191        /**
192         * Get the list of actions for given date ranges
193         *
194         * @param jobId
195         * @param scope
196         * @param store
197         * @return the list of dates to rerun
198         * @throws CommandException
199         * @throws StoreException
200         */
201        private List<CoordinatorActionBean> getCoordActionsFromDates(String jobId, String scope, CoordinatorStore store)
202                throws CommandException, StoreException {
203            ParamChecker.notEmpty(jobId, "jobId");
204            ParamChecker.notEmpty(scope, "scope");
205    
206            Set<CoordinatorActionBean> actionSet = new HashSet<CoordinatorActionBean>();
207            String[] list = scope.split(",");
208            for (String s : list) {
209                s = s.trim();
210                if (s.contains("::")) {
211                    String[] dateRange = s.split("::");
212                    if (dateRange.length != 2) {
213                        throw new CommandException(ErrorCode.E0302, "format is wrong for date's range '" + s + "'");
214                    }
215                    Date start;
216                    Date end;
217                    try {
218                        start = DateUtils.parseDateUTC(dateRange[0].trim());
219                        end = DateUtils.parseDateUTC(dateRange[1].trim());
220                        if (start.after(end)) {
221                            throw new CommandException(ErrorCode.E0302, "start date is older than end date: '" + s + "'");
222                        }
223                    }
224                    catch (Exception e) {
225                        throw new CommandException(ErrorCode.E0302, e);
226                    }
227    
228                    List<CoordinatorActionBean> listOfActions = getActionIdsFromDateRange(jobId, start, end, store);
229                    actionSet.addAll(listOfActions);
230                }
231                else {
232                    Date date;
233                    try {
234                        date = DateUtils.parseDateUTC(s.trim());
235                    }
236                    catch (Exception e) {
237                        throw new CommandException(ErrorCode.E0302, e);
238                    }
239    
240                    CoordinatorActionBean coordAction = store.getCoordActionForNominalTime(jobId, date);
241                    actionSet.add(coordAction);
242                }
243            }
244    
245            List<CoordinatorActionBean> coordActions = new ArrayList<CoordinatorActionBean>();
246            for (CoordinatorActionBean coordAction : actionSet) {
247                coordActions.add(coordAction);
248                log.debug("Rerun coordinator for actionId='" + coordAction.getId() + "'");
249            }
250            return coordActions;
251        }
252    
253        private List<CoordinatorActionBean> getActionIdsFromDateRange(String jobId, Date start, Date end,
254                CoordinatorStore store)
255                throws StoreException {
256            List<CoordinatorActionBean> list = store.getCoordActionsForDates(jobId, start, end);
257            return list;
258        }
259    
260        /**
261         * Check if all given actions are eligible to rerun.
262         *
263         * @param actions list of CoordinatorActionBean
264         * @return true if all actions are eligible to rerun
265         */
266        private boolean checkAllActionsRunnable(List<CoordinatorActionBean> coordActions) {
267            for (CoordinatorActionBean coordAction : coordActions) {
268                if (!coordAction.isTerminalStatus()) {
269                    return false;
270                }
271            }
272            return true;
273        }
274    
275        /**
276         * Cleanup output-events directories
277         *
278         * @param eAction
279         * @param workflow
280         * @param action
281         */
282        @SuppressWarnings("unchecked")
283        private void cleanupOutputEvents(Element eAction, String user, String group) {
284            Element outputList = eAction.getChild("output-events", eAction.getNamespace());
285            for (Element data : (List<Element>) outputList.getChildren("data-out", eAction.getNamespace())) {
286                if (data.getChild("uris", data.getNamespace()) != null) {
287                    String uris = data.getChild("uris", data.getNamespace()).getTextTrim();
288                    if (uris != null) {
289                        String[] uriArr = uris.split(CoordELFunctions.INSTANCE_SEPARATOR);
290                        FsActionExecutor fsAe = new FsActionExecutor();
291                        for (String uri : uriArr) {
292                            Path path = new Path(uri);
293                            try {
294                                fsAe.delete(user, group, path);
295                                log.debug("Cleanup the output dir " + path);
296                            }
297                            catch (ActionExecutorException ae) {
298                                log.warn("Failed to cleanup the output dir " + uri, ae);
299                            }
300                        }
301                    }
302    
303                }
304            }
305        }
306    
307        /**
308         * Refresh an Action
309         *
310         * @param coordJob
311         * @param coordAction
312         * @param store
313         * @throws Exception
314         */
315        private void refreshAction(CoordinatorJobBean coordJob, CoordinatorActionBean coordAction, CoordinatorStore store)
316                throws Exception {
317            Configuration jobConf = null;
318            try {
319                jobConf = new XConfiguration(new StringReader(coordJob.getConf()));
320            }
321            catch (IOException ioe) {
322                log.warn("Configuration parse error. read from DB :" + coordJob.getConf(), ioe);
323                throw new CommandException(ErrorCode.E1005, ioe);
324            }
325            String jobXml = coordJob.getJobXml();
326            Element eJob = XmlUtils.parseXml(jobXml);
327            String actionXml = CoordCommandUtils.materializeOneInstance(jobId, dryrun, (Element) eJob.clone(), coordAction
328                    .getNominalTime(), coordAction.getActionNumber(), jobConf, coordAction);
329            log.debug("Refresh Action actionId=" + coordAction.getId() + ", actionXml="
330                    + XmlUtils.prettyPrint(actionXml).toString());
331            coordAction.setActionXml(actionXml);
332        }
333    
334        /**
335         * Update an Action into database table
336         *
337         * @param coordJob
338         * @param coordAction
339         * @param actionXml
340         * @param store
341         * @throws Exception
342         */
343        private void updateAction(CoordinatorJobBean coordJob, CoordinatorActionBean coordAction, String actionXml,
344                CoordinatorStore store) throws Exception {
345            log.debug("updateAction for actionId=" + coordAction.getId());
346            coordAction.setStatus(CoordinatorAction.Status.WAITING);
347            coordAction.setExternalId("");
348            coordAction.setExternalStatus("");
349            coordAction.setRerunTime(new Date());
350            store.updateCoordinatorAction(coordAction);
351            writeActionRegistration(coordAction.getActionXml(), coordAction, store, coordJob.getUser(), coordJob.getGroup());
352        }
353    
354        /**
355         * Create SLA RegistrationEvent
356         *
357         * @param actionXml
358         * @param actionBean
359         * @param store
360         * @param user
361         * @param group
362         * @throws Exception
363         */
364        private void writeActionRegistration(String actionXml, CoordinatorActionBean actionBean, CoordinatorStore store,
365                String user, String group)
366                throws Exception {
367            Element eAction = XmlUtils.parseXml(actionXml);
368            Element eSla = eAction.getChild("action", eAction.getNamespace()).getChild("info", eAction.getNamespace("sla"));
369            SLADbOperations.writeSlaRegistrationEvent(eSla, store, actionBean.getId(), SlaAppType.COORDINATOR_ACTION, user,
370                    group);
371        }
372    
373        @Override
374        protected CoordinatorActionInfo execute(CoordinatorStore store) throws StoreException, CommandException {
375            log.info("STARTED CoordRerunCommand for jobId=" + jobId + ", scope=" + scope);
376            CoordinatorActionInfo coordInfo = null;
377            try {
378                if (lock(jobId)) {
379                    coordInfo = call(store);
380                }
381                else {
382                    queueCallable(new CoordResumeCommand(jobId), LOCK_FAILURE_REQUEUE_INTERVAL);
383                    log.warn("CoordRerunCommand lock was not acquired - " + " failed " + jobId + ". Requeing the same.");
384                }
385            }
386            catch (InterruptedException e) {
387                queueCallable(new CoordResumeCommand(jobId), LOCK_FAILURE_REQUEUE_INTERVAL);
388                log.warn("CoordRerunCommand lock acquiring failed " + " with exception " + e.getMessage() + " for job id "
389                        + jobId + ". Requeing the same.");
390            }
391            finally {
392                log.info("ENDED CoordRerunCommand for jobId=" + jobId + ", scope=" + scope);
393            }
394            return coordInfo;
395        }
396    
397    }