001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.command.coord;
016    
017    import java.io.IOException;
018    import java.io.StringReader;
019    import java.util.ArrayList;
020    import java.util.Date;
021    import java.util.HashSet;
022    import java.util.List;
023    import java.util.Set;
024    
025    import org.apache.hadoop.conf.Configuration;
026    import org.apache.hadoop.fs.FileSystem;
027    import org.apache.hadoop.fs.Path;
028    import org.apache.oozie.CoordinatorActionBean;
029    import org.apache.oozie.CoordinatorActionInfo;
030    import org.apache.oozie.CoordinatorJobBean;
031    import org.apache.oozie.ErrorCode;
032    import org.apache.oozie.XException;
033    import org.apache.oozie.client.CoordinatorAction;
034    import org.apache.oozie.client.CoordinatorJob;
035    import org.apache.oozie.client.SLAEvent.SlaAppType;
036    import org.apache.oozie.client.rest.RestConstants;
037    import org.apache.oozie.command.CommandException;
038    import org.apache.oozie.coord.CoordELFunctions;
039    import org.apache.oozie.service.HadoopAccessorService;
040    import org.apache.oozie.service.Services;
041    import org.apache.oozie.store.CoordinatorStore;
042    import org.apache.oozie.store.StoreException;
043    import org.apache.oozie.util.DateUtils;
044    import org.apache.oozie.util.ParamChecker;
045    import org.apache.oozie.util.XConfiguration;
046    import org.apache.oozie.util.XLog;
047    import org.apache.oozie.util.XmlUtils;
048    import org.apache.oozie.util.db.SLADbOperations;
049    import org.jdom.Element;
050    import org.jdom.JDOMException;
051    
052    public class CoordRerunCommand extends CoordinatorCommand<CoordinatorActionInfo> {
053    
054        private String jobId;
055        private String rerunType;
056        private String scope;
057        private boolean refresh;
058        private boolean noCleanup;
059        private final XLog log = XLog.getLog(getClass());
060    
061        public CoordRerunCommand(String jobId, String rerunType, String scope, boolean refresh, boolean noCleanup) {
062            super("coord_rerun", "coord_rerun", 1, XLog.STD);
063            this.jobId = ParamChecker.notEmpty(jobId, "jobId");
064            this.rerunType = ParamChecker.notEmpty(rerunType, "rerunType");
065            this.scope = ParamChecker.notEmpty(scope, "scope");
066            this.refresh = refresh;
067            this.noCleanup = noCleanup;
068        }
069    
070        @Override
071        protected CoordinatorActionInfo call(CoordinatorStore store) throws StoreException, CommandException {
072            try {
073                CoordinatorJobBean coordJob = store.getCoordinatorJob(jobId, false);
074                CoordinatorActionInfo coordInfo = null;
075                setLogInfo(coordJob);
076                if (coordJob.getStatus() != CoordinatorJob.Status.KILLED
077                        && coordJob.getStatus() != CoordinatorJob.Status.FAILED) {
078                    incrJobCounter(1);
079    
080                    List<CoordinatorActionBean> coordActions;
081                    if (rerunType.equals(RestConstants.JOB_COORD_RERUN_DATE)) {
082                        coordActions = getCoordActionsFromDates(jobId, scope, store);
083                    }
084                    else if (rerunType.equals(RestConstants.JOB_COORD_RERUN_ACTION)) {
085                        coordActions = getCoordActionsFromIds(jobId, scope, store);
086                    }
087                    else {
088                        throw new CommandException(ErrorCode.E1018, "date or action expected.");
089                    }
090                    if (checkAllActionsRunnable(coordActions)) {
091                        Configuration conf = new XConfiguration(new StringReader(coordJob.getConf()));
092                        for (CoordinatorActionBean coordAction : coordActions) {
093                            String actionXml = coordAction.getActionXml();
094                            if (!noCleanup) {
095                                Element eAction = XmlUtils.parseXml(actionXml);
096                                cleanupOutputEvents(eAction, coordJob.getUser(), coordJob.getGroup(), conf);
097                            }
098                            if (refresh) {
099                                refreshAction(coordJob, coordAction, store);
100                            }
101                            updateAction(coordJob, coordAction, actionXml, store);
102    
103                            // TODO: time 100s should be configurable
104                            queueCallable(new CoordActionNotification(coordAction), 100);
105                            CoordActionInputCheckCommand.queue(new CoordActionInputCheckCommand(coordAction.getId()), 100);
106                        }
107                    }
108                    else {
109                        throw new CommandException(ErrorCode.E1018, "part or all actions are not eligible to rerun!");
110                    }
111                    coordInfo = new CoordinatorActionInfo(coordActions);
112                }
113                else {
114                    log.info("CoordRerunCommand is not able to run, job status=" + coordJob.getStatus() + ", jobid="
115                            + jobId);
116                    throw new CommandException(ErrorCode.E1018,
117                            "coordinator job is killed or failed so all actions are not eligible to rerun!");
118                }
119                return coordInfo;
120            }
121            catch (XException xex) {
122                throw new CommandException(xex);
123            }
124            catch (JDOMException jex) {
125                throw new CommandException(ErrorCode.E0700, jex);
126            }
127            catch (Exception ex) {
128                throw new CommandException(ErrorCode.E1018, ex);
129            }
130        }
131    
132        /**
133         * Get the list of actions for given id ranges
134         *
135         * @param jobId
136         * @param scope
137         * @param store
138         * @return the list of all actions to rerun
139         * @throws CommandException
140         * @throws StoreException
141         */
142        private List<CoordinatorActionBean> getCoordActionsFromIds(String jobId, String scope, CoordinatorStore store)
143                throws CommandException, StoreException {
144            ParamChecker.notEmpty(jobId, "jobId");
145            ParamChecker.notEmpty(scope, "scope");
146    
147            Set<String> actions = new HashSet<String>();
148            String[] list = scope.split(",");
149            for (String s : list) {
150                s = s.trim();
151                if (s.contains("-")) {
152                    String[] range = s.split("-");
153                    if (range.length != 2) {
154                        throw new CommandException(ErrorCode.E0302, "format is wrong for action's range '" + s + "'");
155                    }
156                    int start;
157                    int end;
158                    try {
159                        start = Integer.parseInt(range[0].trim());
160                        end = Integer.parseInt(range[1].trim());
161                        if (start > end) {
162                            throw new CommandException(ErrorCode.E0302, "format is wrong for action's range '" + s + "'");
163                        }
164                    }
165                    catch (NumberFormatException ne) {
166                        throw new CommandException(ErrorCode.E0302, ne);
167                    }
168                    for (int i = start; i <= end; i++) {
169                        actions.add(jobId + "@" + i);
170                    }
171                }
172                else {
173                    try {
174                        Integer.parseInt(s);
175                    }
176                    catch (NumberFormatException ne) {
177                        throw new CommandException(ErrorCode.E0302, "format is wrong for action id'" + s
178                                + "'. Integer only.");
179                    }
180                    actions.add(jobId + "@" + s);
181                }
182            }
183    
184            List<CoordinatorActionBean> coordActions = new ArrayList<CoordinatorActionBean>();
185            for (String id : actions) {
186                CoordinatorActionBean coordAction = store.getCoordinatorAction(id, false);
187                coordActions.add(coordAction);
188                log.debug("Rerun coordinator for actionId='" + id + "'");
189            }
190            return coordActions;
191        }
192    
193        /**
194         * Get the list of actions for given date ranges
195         *
196         * @param jobId
197         * @param scope
198         * @param store
199         * @return the list of dates to rerun
200         * @throws CommandException
201         * @throws StoreException
202         */
203        private List<CoordinatorActionBean> getCoordActionsFromDates(String jobId, String scope, CoordinatorStore store)
204                throws CommandException, StoreException {
205            ParamChecker.notEmpty(jobId, "jobId");
206            ParamChecker.notEmpty(scope, "scope");
207    
208            Set<CoordinatorActionBean> actionSet = new HashSet<CoordinatorActionBean>();
209            String[] list = scope.split(",");
210            for (String s : list) {
211                s = s.trim();
212                if (s.contains("::")) {
213                    String[] dateRange = s.split("::");
214                    if (dateRange.length != 2) {
215                        throw new CommandException(ErrorCode.E0302, "format is wrong for date's range '" + s + "'");
216                    }
217                    Date start;
218                    Date end;
219                    try {
220                        start = DateUtils.parseDateUTC(dateRange[0].trim());
221                        end = DateUtils.parseDateUTC(dateRange[1].trim());
222                        if (start.after(end)) {
223                            throw new CommandException(ErrorCode.E0302, "start date is older than end date: '" + s + "'");
224                        }
225                    }
226                    catch (Exception e) {
227                        throw new CommandException(ErrorCode.E0302, e);
228                    }
229    
230                    List<CoordinatorActionBean> listOfActions = getActionIdsFromDateRange(jobId, start, end, store);
231                    actionSet.addAll(listOfActions);
232                }
233                else {
234                    Date date;
235                    try {
236                        date = DateUtils.parseDateUTC(s.trim());
237                    }
238                    catch (Exception e) {
239                        throw new CommandException(ErrorCode.E0302, e);
240                    }
241    
242                    CoordinatorActionBean coordAction = store.getCoordActionForNominalTime(jobId, date);
243                    actionSet.add(coordAction);
244                }
245            }
246    
247            List<CoordinatorActionBean> coordActions = new ArrayList<CoordinatorActionBean>();
248            for (CoordinatorActionBean coordAction : actionSet) {
249                coordActions.add(coordAction);
250                log.debug("Rerun coordinator for actionId='" + coordAction.getId() + "'");
251            }
252            return coordActions;
253        }
254    
255        private List<CoordinatorActionBean> getActionIdsFromDateRange(String jobId, Date start, Date end,
256                CoordinatorStore store)
257                throws StoreException {
258            List<CoordinatorActionBean> list = store.getCoordActionsForDates(jobId, start, end);
259            return list;
260        }
261    
262        /**
263         * Check if all given actions are eligible to rerun.
264         *
265         * @param actions list of CoordinatorActionBean
266         * @return true if all actions are eligible to rerun
267         */
268        private boolean checkAllActionsRunnable(List<CoordinatorActionBean> coordActions) {
269            for (CoordinatorActionBean coordAction : coordActions) {
270                if (!coordAction.isTerminalStatus()) {
271                    return false;
272                }
273            }
274            return true;
275        }
276    
277        /**
278         * Cleanup output-events directories
279         *
280         * @param eAction
281         * @param workflow
282         * @param action
283         */
284        @SuppressWarnings("unchecked")
285        private void cleanupOutputEvents(Element eAction, String user, String group, Configuration conf) {
286            Element outputList = eAction.getChild("output-events", eAction.getNamespace());
287            if (outputList != null) {
288                for (Element data : (List<Element>) outputList.getChildren("data-out", eAction.getNamespace())) {
289                    if (data.getChild("uris", data.getNamespace()) != null) {
290                        String uris = data.getChild("uris", data.getNamespace()).getTextTrim();
291                        if (uris != null) {
292                            String[] uriArr = uris.split(CoordELFunctions.INSTANCE_SEPARATOR);
293                            for (String uri : uriArr) {
294                                Path path = new Path(uri);
295                                try {
296                                    FileSystem fs = Services.get().get(HadoopAccessorService.class).
297                                            createFileSystem(user, group, path.toUri(), conf);
298                                    if (fs.exists(path)) {
299                                        if (!fs.delete(path, true)) {
300                                            throw new IOException();
301                                        }
302                                    }
303                                    log.debug("Cleanup the output dir " + path);
304                                }
305                                catch (Exception ex) {
306                                    log.warn("Failed to cleanup the output dir " + uri, ex);
307                                }
308                            }
309                        }
310    
311                    }
312                }
313            }
314            else {
315                log.info("No output-events defined in coordinator xml. Therefore nothing to cleanup");
316            }
317        }
318    
319        /**
320         * Refresh an Action
321         *
322         * @param coordJob
323         * @param coordAction
324         * @param store
325         * @throws Exception
326         */
327        private void refreshAction(CoordinatorJobBean coordJob, CoordinatorActionBean coordAction, CoordinatorStore store)
328                throws Exception {
329            Configuration jobConf = null;
330            try {
331                jobConf = new XConfiguration(new StringReader(coordJob.getConf()));
332            }
333            catch (IOException ioe) {
334                log.warn("Configuration parse error. read from DB :" + coordJob.getConf(), ioe);
335                throw new CommandException(ErrorCode.E1005, ioe);
336            }
337            String jobXml = coordJob.getJobXml();
338            Element eJob = XmlUtils.parseXml(jobXml);
339            String actionXml = CoordCommandUtils.materializeOneInstance(jobId, dryrun, (Element) eJob.clone(), coordAction
340                    .getNominalTime(), coordAction.getActionNumber(), jobConf, coordAction);
341            log.debug("Refresh Action actionId=" + coordAction.getId() + ", actionXml="
342                    + XmlUtils.prettyPrint(actionXml).toString());
343            coordAction.setActionXml(actionXml);
344        }
345    
346        /**
347         * Update an Action into database table
348         *
349         * @param coordJob
350         * @param coordAction
351         * @param actionXml
352         * @param store
353         * @throws Exception
354         */
355        private void updateAction(CoordinatorJobBean coordJob, CoordinatorActionBean coordAction, String actionXml,
356                CoordinatorStore store) throws Exception {
357            log.debug("updateAction for actionId=" + coordAction.getId());
358            coordAction.setStatus(CoordinatorAction.Status.WAITING);
359            coordAction.setExternalId("");
360            coordAction.setExternalStatus("");
361            coordAction.setRerunTime(new Date());
362            store.updateCoordinatorAction(coordAction);
363            writeActionRegistration(coordAction.getActionXml(), coordAction, store, coordJob.getUser(), coordJob.getGroup());
364        }
365    
366        /**
367         * Create SLA RegistrationEvent
368         *
369         * @param actionXml
370         * @param actionBean
371         * @param store
372         * @param user
373         * @param group
374         * @throws Exception
375         */
376        private void writeActionRegistration(String actionXml, CoordinatorActionBean actionBean, CoordinatorStore store,
377                String user, String group)
378                throws Exception {
379            Element eAction = XmlUtils.parseXml(actionXml);
380            Element eSla = eAction.getChild("action", eAction.getNamespace()).getChild("info", eAction.getNamespace("sla"));
381            SLADbOperations.writeSlaRegistrationEvent(eSla, store, actionBean.getId(), SlaAppType.COORDINATOR_ACTION, user,
382                    group);
383        }
384    
385        @Override
386        protected CoordinatorActionInfo execute(CoordinatorStore store) throws StoreException, CommandException {
387            log.info("STARTED CoordRerunCommand for jobId=" + jobId + ", scope=" + scope);
388            CoordinatorActionInfo coordInfo = null;
389            try {
390                if (lock(jobId)) {
391                    coordInfo = call(store);
392                }
393                else {
394                    queueCallable(new CoordResumeCommand(jobId), LOCK_FAILURE_REQUEUE_INTERVAL);
395                    log.warn("CoordRerunCommand lock was not acquired - " + " failed " + jobId + ". Requeing the same.");
396                }
397            }
398            catch (InterruptedException e) {
399                queueCallable(new CoordResumeCommand(jobId), LOCK_FAILURE_REQUEUE_INTERVAL);
400                log.warn("CoordRerunCommand lock acquiring failed " + " with exception " + e.getMessage() + " for job id "
401                        + jobId + ". Requeing the same.");
402            }
403            finally {
404                log.info("ENDED CoordRerunCommand for jobId=" + jobId + ", scope=" + scope);
405            }
406            return coordInfo;
407        }
408    
409    }