001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.command.coord;
016    
017    import java.io.IOException;
018    import java.io.InputStreamReader;
019    import java.io.Reader;
020    import java.io.StringReader;
021    import java.io.StringWriter;
022    import java.net.URI;
023    import java.net.URISyntaxException;
024    import java.util.ArrayList;
025    import java.util.Date;
026    import java.util.HashMap;
027    import java.util.HashSet;
028    import java.util.List;
029    import java.util.Set;
030    import java.util.TreeSet;
031    
032    import javax.xml.transform.stream.StreamSource;
033    import javax.xml.validation.Validator;
034    
035    import org.apache.hadoop.conf.Configuration;
036    import org.apache.hadoop.fs.FileSystem;
037    import org.apache.hadoop.fs.Path;
038    import org.apache.oozie.CoordinatorJobBean;
039    import org.apache.oozie.ErrorCode;
040    import org.apache.oozie.client.CoordinatorJob;
041    import org.apache.oozie.client.OozieClient;
042    import org.apache.oozie.client.CoordinatorJob.Execution;
043    import org.apache.oozie.command.CommandException;
044    import org.apache.oozie.coord.CoordELEvaluator;
045    import org.apache.oozie.coord.CoordELFunctions;
046    import org.apache.oozie.coord.CoordUtils;
047    import org.apache.oozie.coord.CoordinatorJobException;
048    import org.apache.oozie.coord.TimeUnit;
049    import org.apache.oozie.service.DagXLogInfoService;
050    import org.apache.oozie.service.HadoopAccessorException;
051    import org.apache.oozie.service.SchemaService;
052    import org.apache.oozie.service.Service;
053    import org.apache.oozie.service.Services;
054    import org.apache.oozie.service.UUIDService;
055    import org.apache.oozie.service.HadoopAccessorService;
056    import org.apache.oozie.service.WorkflowAppService;
057    import org.apache.oozie.service.SchemaService.SchemaName;
058    import org.apache.oozie.service.UUIDService.ApplicationType;
059    import org.apache.oozie.store.CoordinatorStore;
060    import org.apache.oozie.store.StoreException;
061    import org.apache.oozie.util.DateUtils;
062    import org.apache.oozie.util.ELEvaluator;
063    import org.apache.oozie.util.IOUtils;
064    import org.apache.oozie.util.ParamChecker;
065    import org.apache.oozie.util.PropertiesUtils;
066    import org.apache.oozie.util.XConfiguration;
067    import org.apache.oozie.util.XLog;
068    import org.apache.oozie.util.XmlUtils;
069    import org.apache.oozie.workflow.WorkflowException;
070    import org.jdom.Attribute;
071    import org.jdom.Element;
072    import org.jdom.JDOMException;
073    import org.jdom.Namespace;
074    import org.xml.sax.SAXException;
075    
076    /**
077     * This class provides the functionalities to resolve a coordinator job XML and write the job information into a DB
078     * table. <p/> Specifically it performs the following functions: 1. Resolve all the variables or properties using job
079     * configurations. 2. Insert all datasets definition as part of the <data-in> and <data-out> tags. 3. Validate the XML
080     * at runtime.
081     */
082    public class CoordSubmitCommand extends CoordinatorCommand<String> {
083    
084        private Configuration conf;
085        private String authToken;
086        private boolean dryrun;
087    
088        public static final String CONFIG_DEFAULT = "coord-config-default.xml";
089        public static final String COORDINATOR_XML_FILE = "coordinator.xml";
090    
091        private static final Set<String> DISALLOWED_USER_PROPERTIES = new HashSet<String>();
092        private static final Set<String> DISALLOWED_DEFAULT_PROPERTIES = new HashSet<String>();
093        /**
094         * Default timeout for normal jobs, in minutes, after which coordinator input check will timeout
095         */
096        public static final String CONF_DEFAULT_TIMEOUT_NORMAL = Service.CONF_PREFIX + "coord.normal.default.timeout";
097    
098        private XLog log = XLog.getLog(getClass());
099        private ELEvaluator evalFreq = null;
100        private ELEvaluator evalNofuncs = null;
101        private ELEvaluator evalData = null;
102        private ELEvaluator evalInst = null;
103        private ELEvaluator evalSla = null;
104    
105        static {
106            String[] badUserProps = {PropertiesUtils.YEAR, PropertiesUtils.MONTH, PropertiesUtils.DAY,
107                    PropertiesUtils.HOUR, PropertiesUtils.MINUTE, PropertiesUtils.DAYS, PropertiesUtils.HOURS,
108                    PropertiesUtils.MINUTES, PropertiesUtils.KB, PropertiesUtils.MB, PropertiesUtils.GB,
109                    PropertiesUtils.TB, PropertiesUtils.PB, PropertiesUtils.RECORDS, PropertiesUtils.MAP_IN,
110                    PropertiesUtils.MAP_OUT, PropertiesUtils.REDUCE_IN, PropertiesUtils.REDUCE_OUT, PropertiesUtils.GROUPS};
111            PropertiesUtils.createPropertySet(badUserProps, DISALLOWED_USER_PROPERTIES);
112    
113            String[] badDefaultProps = {PropertiesUtils.HADOOP_USER, PropertiesUtils.HADOOP_UGI,
114                    WorkflowAppService.HADOOP_JT_KERBEROS_NAME, WorkflowAppService.HADOOP_NN_KERBEROS_NAME};
115            PropertiesUtils.createPropertySet(badUserProps, DISALLOWED_DEFAULT_PROPERTIES);
116            PropertiesUtils.createPropertySet(badDefaultProps, DISALLOWED_DEFAULT_PROPERTIES);
117        }
118    
119        /**
120         * Constructor to create the Coordinator Submit Command.
121         *
122         * @param conf : Configuration for Coordinator job
123         * @param authToken : To be used for authentication
124         */
125        public CoordSubmitCommand(Configuration conf, String authToken) {
126            super("coord_submit", "coord_submit", 1, XLog.STD);
127            this.conf = ParamChecker.notNull(conf, "conf");
128            this.authToken = ParamChecker.notEmpty(authToken, "authToken");
129        }
130    
131        public CoordSubmitCommand(boolean dryrun, Configuration conf, String authToken) {
132            super("coord_submit", "coord_submit", 1, XLog.STD, dryrun);
133            this.conf = ParamChecker.notNull(conf, "conf");
134            this.authToken = ParamChecker.notEmpty(authToken, "authToken");
135            this.dryrun = dryrun;
136            // TODO Auto-generated constructor stub
137        }
138    
139        /*
140         * (non-Javadoc)
141         * 
142         * @see org.apache.oozie.command.Command#call(org.apache.oozie.store.Store)
143         */
144        @Override
145        protected String call(CoordinatorStore store) throws StoreException, CommandException {
146            String jobId = null;
147            log.info("STARTED Coordinator Submit");
148            incrJobCounter(1);
149            CoordinatorJobBean coordJob = new CoordinatorJobBean();
150            try {
151                XLog.Info.get().setParameter(DagXLogInfoService.TOKEN, conf.get(OozieClient.LOG_TOKEN));
152                mergeDefaultConfig();
153    
154                String appXml = readAndValidateXml();
155                coordJob.setOrigJobXml(appXml);
156                log.debug("jobXml after initial validation " + XmlUtils.prettyPrint(appXml).toString());
157                appXml = XmlUtils.removeComments(appXml);
158                initEvaluators();
159                Element eJob = basicResolveAndIncludeDS(appXml, conf, coordJob);
160                log.debug("jobXml after all validation " + XmlUtils.prettyPrint(eJob).toString());
161    
162                jobId = storeToDB(eJob, store, coordJob);
163                // log JOB info for coordinator jobs
164                setLogInfo(coordJob);
165                log = XLog.getLog(getClass());
166    
167                if (!dryrun) {
168                    // submit a command to materialize jobs for the next 1 hour (3600 secs)
169                    // so we don't wait 10 mins for the Service to run.
170                    queueCallable(new CoordJobMatLookupCommand(jobId, 3600), 100);
171                }
172                else {
173                    Date startTime = coordJob.getStartTime();
174                    long startTimeMilli = startTime.getTime();
175                    long endTimeMilli = startTimeMilli + (3600 * 1000);
176                    Date jobEndTime = coordJob.getEndTime();
177                    Date endTime = new Date(endTimeMilli);
178                    if (endTime.compareTo(jobEndTime) > 0) {
179                        endTime = jobEndTime;
180                    }
181                    jobId = coordJob.getId();
182                    log.info("[" + jobId + "]: Update status to PREMATER");
183                    coordJob.setStatus(CoordinatorJob.Status.PREMATER);
184                    CoordActionMaterializeCommand coordActionMatCom = new CoordActionMaterializeCommand(jobId, startTime,
185                                                                                                        endTime);
186                    Configuration jobConf = null;
187                    try {
188                        jobConf = new XConfiguration(new StringReader(coordJob.getConf()));
189                    }
190                    catch (IOException e1) {
191                        log.warn("Configuration parse error. read from DB :" + coordJob.getConf(), e1);
192                    }
193                    String action = coordActionMatCom.materializeJobs(true, coordJob, jobConf, null);
194                    String output = coordJob.getJobXml() + System.getProperty("line.separator")
195                            + "***actions for instance***" + action;
196                    return output;
197                }
198            }
199            catch (CoordinatorJobException ex) {
200                log.warn("ERROR:  ", ex);
201                throw new CommandException(ex);
202            }
203            catch (IllegalArgumentException iex) {
204                log.warn("ERROR:  ", iex);
205                throw new CommandException(ErrorCode.E1003, iex);
206            }
207            catch (Exception ex) {// TODO
208                log.warn("ERROR:  ", ex);
209                throw new CommandException(ErrorCode.E0803, ex);
210            }
211            log.info("ENDED Coordinator Submit jobId=" + jobId);
212            return jobId;
213        }
214    
215        /**
216         * Read the application XML and validate against coordinator Schema
217         *
218         * @return validated coordinator XML
219         * @throws CoordinatorJobException
220         */
221        private String readAndValidateXml() throws CoordinatorJobException {
222            String appPath = ParamChecker.notEmpty(conf.get(OozieClient.COORDINATOR_APP_PATH),
223                                                   OozieClient.COORDINATOR_APP_PATH);// TODO: COORDINATOR_APP_PATH
224            String coordXml = readDefinition(appPath);
225            validateXml(coordXml);
226            return coordXml;
227        }
228    
229        /**
230         * Validate against Coordinator XSD file
231         *
232         * @param xmlContent : Input coordinator xml
233         * @throws CoordinatorJobException
234         */
235        private void validateXml(String xmlContent) throws CoordinatorJobException {
236            javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(SchemaName.COORDINATOR);
237            Validator validator = schema.newValidator();
238            // log.warn("XML " + xmlContent);
239            try {
240                validator.validate(new StreamSource(new StringReader(xmlContent)));
241            }
242            catch (SAXException ex) {
243                log.warn("SAXException :", ex);
244                throw new CoordinatorJobException(ErrorCode.E0701, ex.getMessage(), ex);
245            }
246            catch (IOException ex) {
247                // ex.printStackTrace();
248                log.warn("IOException :", ex);
249                throw new CoordinatorJobException(ErrorCode.E0702, ex.getMessage(), ex);
250            }
251        }
252    
253        /**
254         * Merge default configuration with user-defined configuration.
255         *
256         * @throws CommandException
257         */
258        protected void mergeDefaultConfig() throws CommandException {
259            Path coordAppDir = new Path(conf.get(OozieClient.COORDINATOR_APP_PATH)).getParent();
260            Path configDefault = new Path(coordAppDir, CONFIG_DEFAULT);
261            // Configuration fsConfig = new Configuration();
262            // log.warn("CONFIG :" + configDefault.toUri());
263            Configuration fsConfig = CoordUtils.getHadoopConf(conf);
264            FileSystem fs;
265            // TODO: which conf?
266            try {
267                String user = ParamChecker.notEmpty(conf.get(OozieClient.USER_NAME), OozieClient.USER_NAME);
268                String group = ParamChecker.notEmpty(conf.get(OozieClient.GROUP_NAME), OozieClient.GROUP_NAME);
269                fs = Services.get().get(HadoopAccessorService.class).createFileSystem(user, group, configDefault.toUri(),
270                                                                                      conf);
271                if (fs.exists(configDefault)) {
272                    Configuration defaultConf = new XConfiguration(fs.open(configDefault));
273                    PropertiesUtils.checkDisallowedProperties(defaultConf, DISALLOWED_DEFAULT_PROPERTIES);
274                    XConfiguration.injectDefaults(defaultConf, conf);
275                }
276                else {
277                    log.info("configDefault Doesn't exist " + configDefault);
278                }
279                PropertiesUtils.checkDisallowedProperties(conf, DISALLOWED_USER_PROPERTIES);
280            }
281            catch (IOException e) {
282                throw new CommandException(ErrorCode.E0702, e.getMessage() + " : Problem reading default config "
283                        + configDefault, e);
284            }
285            catch (HadoopAccessorException e) {
286                throw new CommandException(e);
287            }
288            log.debug("Merged CONF :" + XmlUtils.prettyPrint(conf).toString());
289        }
290    
291        /**
292         * The method resolve all the variables that are defined in configuration. It also include the data set definition
293         * from dataset file into XML.
294         *
295         * @param appXml : Original job XML
296         * @param conf : Configuration of the job
297         * @param coordJob : Coordinator job bean to be populated.
298         * @return : Resolved and modified job XML element.
299         * @throws Exception
300         */
301        public Element basicResolveAndIncludeDS(String appXml, Configuration conf, CoordinatorJobBean coordJob)
302                throws CoordinatorJobException, Exception {
303            Element basicResolvedApp = resolveInitial(conf, appXml, coordJob);
304            includeDataSets(basicResolvedApp, conf);
305            return basicResolvedApp;
306        }
307    
308        /**
309         * Insert data set into data-in and data-out tags.
310         *
311         * @param eAppXml : coordinator application XML
312         * @param eDatasets : DataSet XML
313         * @return updated application
314         */
315        private void insertDataSet(Element eAppXml, Element eDatasets) {
316            // Adding DS definition in the coordinator XML
317            Element inputList = eAppXml.getChild("input-events", eAppXml.getNamespace());
318            if (inputList != null) {
319                for (Element dataIn : (List<Element>) inputList.getChildren("data-in", eAppXml.getNamespace())) {
320                    Element eDataset = findDataSet(eDatasets, dataIn.getAttributeValue("dataset"));
321                    dataIn.getContent().add(0, eDataset);
322                }
323            }
324            Element outputList = eAppXml.getChild("output-events", eAppXml.getNamespace());
325            if (outputList != null) {
326                for (Element dataOut : (List<Element>) outputList.getChildren("data-out", eAppXml.getNamespace())) {
327                    Element eDataset = findDataSet(eDatasets, dataOut.getAttributeValue("dataset"));
328                    dataOut.getContent().add(0, eDataset);
329                }
330            }
331        }
332    
333        /**
334         * Find a specific dataset from a list of Datasets.
335         *
336         * @param eDatasets : List of data sets
337         * @param name : queried data set name
338         * @return one Dataset element. otherwise throw Exception
339         */
340        private static Element findDataSet(Element eDatasets, String name) {
341            for (Element eDataset : (List<Element>) eDatasets.getChildren("dataset", eDatasets.getNamespace())) {
342                if (eDataset.getAttributeValue("name").equals(name)) {
343                    eDataset = (Element) eDataset.clone();
344                    eDataset.detach();
345                    return eDataset;
346                }
347            }
348            throw new RuntimeException("undefined dataset: " + name);
349        }
350    
351        /**
352         * Initialize all the required EL Evaluators.
353         */
354        protected void initEvaluators() {
355            evalFreq = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-freq");
356            evalNofuncs = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-nofuncs");
357            evalInst = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-instances");
358            evalSla = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-sla-submit");
359        }
360    
361        /**
362         * Resolve basic entities using job Configuration.
363         *
364         * @param conf :Job configuration
365         * @param appXml : Original job XML
366         * @param coordJob : Coordinator job bean to be populated.
367         * @return Resolved job XML element.
368         * @throws Exception
369         */
370        protected Element resolveInitial(Configuration conf, String appXml, CoordinatorJobBean coordJob)
371                throws CoordinatorJobException, Exception {
372            Element eAppXml = XmlUtils.parseXml(appXml);
373            // job's main attributes
374            // frequency
375            String val = resolveAttribute("frequency", eAppXml, evalFreq);
376            int ival = ParamChecker.checkInteger(val, "frequency");
377            ParamChecker.checkGTZero(ival, "frequency");
378            coordJob.setFrequency(ival);
379            TimeUnit tmp = (evalFreq.getVariable("timeunit") == null) ? TimeUnit.MINUTE : ((TimeUnit) evalFreq
380                    .getVariable("timeunit"));
381            addAnAttribute("freq_timeunit", eAppXml, tmp.toString()); // TODO: Store
382            // TimeUnit
383            coordJob.setTimeUnit(CoordinatorJob.Timeunit.valueOf(tmp.toString()));
384            // End Of Duration
385            tmp = evalFreq.getVariable("endOfDuration") == null ? TimeUnit.NONE : ((TimeUnit) evalFreq
386                    .getVariable("endOfDuration"));
387            addAnAttribute("end_of_duration", eAppXml, tmp.toString());
388            // coordJob.setEndOfDuration(tmp) // TODO: Add new attribute in Job bean
389    
390            // start time
391            val = resolveAttribute("start", eAppXml, evalNofuncs);
392            ParamChecker.checkUTC(val, "start");
393            coordJob.setStartTime(DateUtils.parseDateUTC(val));
394            // end time
395            val = resolveAttribute("end", eAppXml, evalNofuncs);
396            ParamChecker.checkUTC(val, "end");
397            coordJob.setEndTime(DateUtils.parseDateUTC(val));
398            // Time zone
399            val = resolveAttribute("timezone", eAppXml, evalNofuncs);
400            ParamChecker.checkTimeZone(val, "timezone");
401            coordJob.setTimeZone(val);
402    
403            // controls
404            val = resolveTagContents("timeout", eAppXml.getChild("controls", eAppXml.getNamespace()), evalNofuncs);
405            if (val == "") {
406                val = Services.get().getConf().get(CONF_DEFAULT_TIMEOUT_NORMAL);
407            }
408    
409            ival = ParamChecker.checkInteger(val, "timeout");
410            // ParamChecker.checkGEZero(ival, "timeout");
411            coordJob.setTimeout(ival);
412            val = resolveTagContents("concurrency", eAppXml.getChild("controls", eAppXml.getNamespace()), evalNofuncs);
413            if (val == "") {
414                val = "-1";
415            }
416            ival = ParamChecker.checkInteger(val, "concurrency");
417            // ParamChecker.checkGEZero(ival, "concurrency");
418            coordJob.setConcurrency(ival);
419            val = resolveTagContents("execution", eAppXml.getChild("controls", eAppXml.getNamespace()), evalNofuncs);
420            if (val == "") {
421                val = Execution.FIFO.toString();
422            }
423            coordJob.setExecution(Execution.valueOf(val));
424            String[] acceptedVals = {Execution.LIFO.toString(), Execution.FIFO.toString(), Execution.LAST_ONLY.toString()};
425            ParamChecker.isMember(val, acceptedVals, "execution");
426    
427            // datasets
428            resolveTagContents("include", eAppXml.getChild("datasets", eAppXml.getNamespace()), evalNofuncs);
429            // for each data set
430            resolveDataSets(eAppXml);
431            HashMap<String, String> dataNameList = new HashMap<String, String>();
432            resolveIOEvents(eAppXml, dataNameList);
433    
434            resolveTagContents("app-path", eAppXml.getChild("action", eAppXml.getNamespace()).getChild("workflow",
435                                                                                                       eAppXml.getNamespace()), evalNofuncs);
436            // TODO: If action or workflow tag is missing, NullPointerException will
437            // occur
438            Element configElem = eAppXml.getChild("action", eAppXml.getNamespace()).getChild("workflow",
439                                                                                             eAppXml.getNamespace()).getChild("configuration", eAppXml.getNamespace());
440            evalData = CoordELEvaluator.createELEvaluatorForDataEcho(conf, "coord-job-submit-data", dataNameList);
441            if (configElem != null) {
442                for (Element propElem : (List<Element>) configElem.getChildren("property", configElem.getNamespace())) {
443                    resolveTagContents("name", propElem, evalData);
444                    // log.warn("Value :");
445                    // Want to check the data-integrity but don't want to modify the
446                    // XML
447                    // for properties only
448                    Element tmpProp = (Element) propElem.clone();
449                    resolveTagContents("value", tmpProp, evalData);
450                    // val = resolveTagContents("value", propElem, evalData);
451                    // log.warn("Value OK :" + val);
452                }
453            }
454            resolveSLA(eAppXml, coordJob);
455            return eAppXml;
456        }
457    
458        private void resolveSLA(Element eAppXml, CoordinatorJobBean coordJob) throws CommandException {
459            // String prefix = XmlUtils.getNamespacePrefix(eAppXml,
460            // SchemaService.SLA_NAME_SPACE_URI);
461            Element eSla = eAppXml.getChild("action", eAppXml.getNamespace()).getChild("info",
462                                                                                       Namespace.getNamespace(SchemaService.SLA_NAME_SPACE_URI));
463    
464            if (eSla != null) {
465                String slaXml = XmlUtils.prettyPrint(eSla).toString();
466                try {
467                    // EL evaluation
468                    slaXml = evalSla.evaluate(slaXml, String.class);
469                    // Validate against semantic SXD
470                    XmlUtils.validateData(slaXml, SchemaName.SLA_ORIGINAL);
471                }
472                catch (Exception e) {
473                    throw new CommandException(ErrorCode.E1004, "Validation ERROR :" + e.getMessage(), e);
474                }
475            }
476        }
477    
478        /**
479         * Resolve input-events/data-in and output-events/data-out tags.
480         *
481         * @param eJob : Job element
482         * @throws CoordinatorJobException
483         */
484        private void resolveIOEvents(Element eJobOrg, HashMap<String, String> dataNameList) throws CoordinatorJobException {
485            // Resolving input-events/data-in
486            // Clone the job and don't update anything in the original
487            Element eJob = (Element) eJobOrg.clone();
488            Element inputList = eJob.getChild("input-events", eJob.getNamespace());
489            if (inputList != null) {
490                TreeSet<String> eventNameSet = new TreeSet<String>();
491                for (Element dataIn : (List<Element>) inputList.getChildren("data-in", eJob.getNamespace())) {
492                    String dataInName = dataIn.getAttributeValue("name");
493                    dataNameList.put(dataInName, "data-in");
494                    // check whether there is any duplicate data-in name
495                    if (eventNameSet.contains(dataInName)) {
496                        throw new RuntimeException("Duplicate dataIn name " + dataInName);
497                    }
498                    else {
499                        eventNameSet.add(dataInName);
500                    }
501                    resolveTagContents("instance", dataIn, evalInst);
502                    resolveTagContents("start-instance", dataIn, evalInst);
503                    resolveTagContents("end-instance", dataIn, evalInst);
504                }
505            }
506            // Resolving output-events/data-out
507            Element outputList = eJob.getChild("output-events", eJob.getNamespace());
508            if (outputList != null) {
509                TreeSet<String> eventNameSet = new TreeSet<String>();
510                for (Element dataOut : (List<Element>) outputList.getChildren("data-out", eJob.getNamespace())) {
511                    String dataOutName = dataOut.getAttributeValue("name");
512                    dataNameList.put(dataOutName, "data-out");
513                    // check whether there is any duplicate data-out name
514                    if (eventNameSet.contains(dataOutName)) {
515                        throw new RuntimeException("Duplicate dataIn name " + dataOutName);
516                    }
517                    else {
518                        eventNameSet.add(dataOutName);
519                    }
520                    resolveTagContents("instance", dataOut, evalInst);
521                }
522            }
523    
524        }
525    
526        /**
527         * Add an attribute into XML element.
528         *
529         * @param attrName :attribute name
530         * @param elem : Element to add attribute
531         * @param value :Value of attribute
532         */
533        private void addAnAttribute(String attrName, Element elem, String value) {
534            elem.setAttribute(attrName, value);
535        }
536    
537        /**
538         * Resolve Data set using job configuration.
539         *
540         * @param eAppXml : Job Element XML
541         * @throws Exception
542         */
543        private void resolveDataSets(Element eAppXml) throws Exception {
544            Element datasetList = eAppXml.getChild("datasets", eAppXml.getNamespace());
545            if (datasetList != null) {
546    
547                List<Element> dsElems = datasetList.getChildren("dataset", eAppXml.getNamespace());
548                resolveDataSets(dsElems);
549                resolveTagContents("app-path", eAppXml.getChild("action", eAppXml.getNamespace()).getChild("workflow",
550                                                                                                           eAppXml.getNamespace()), evalNofuncs);
551            }
552        }
553    
554        /**
555         * Resolve Data set using job configuration.
556         *
557         * @param dsElems : Data set XML element.
558         * @throws CoordinatorJobException
559         * @throws Exception
560         */
561        private void resolveDataSets(List<Element> dsElems) throws CoordinatorJobException /*
562                                                                                            * throws
563                                                                                            * Exception
564                                                                                            */ {
565            for (Element dsElem : dsElems) {
566                // Setting up default TimeUnit and EndOFDuraion
567                evalFreq.setVariable("timeunit", TimeUnit.MINUTE);
568                evalFreq.setVariable("endOfDuration", TimeUnit.NONE);
569    
570                String val = resolveAttribute("frequency", dsElem, evalFreq);
571                int ival = ParamChecker.checkInteger(val, "frequency");
572                ParamChecker.checkGTZero(ival, "frequency");
573                addAnAttribute("freq_timeunit", dsElem, evalFreq.getVariable("timeunit") == null ? TimeUnit.MINUTE
574                        .toString() : ((TimeUnit) evalFreq.getVariable("timeunit")).toString());
575                addAnAttribute("end_of_duration", dsElem, evalFreq.getVariable("endOfDuration") == null ? TimeUnit.NONE
576                        .toString() : ((TimeUnit) evalFreq.getVariable("endOfDuration")).toString());
577                val = resolveAttribute("initial-instance", dsElem, evalNofuncs);
578                ParamChecker.checkUTC(val, "initial-instance");
579                val = resolveAttribute("timezone", dsElem, evalNofuncs);
580                ParamChecker.checkTimeZone(val, "timezone");
581                resolveTagContents("uri-template", dsElem, evalNofuncs);
582                resolveTagContents("done-flag", dsElem, evalNofuncs);
583            }
584        }
585    
586        /**
587         * Resolve the content of a tag.
588         *
589         * @param tagName : Tag name of job XML i.e. <timeout> 10 </timeout>
590         * @param elem : Element where the tag exists.
591         * @param eval :
592         * @return Resolved tag content.
593         * @throws CoordinatorJobException
594         */
595        private String resolveTagContents(String tagName, Element elem, ELEvaluator eval) throws CoordinatorJobException {
596            String ret = "";
597            if (elem != null) {
598                for (Element tagElem : (List<Element>) elem.getChildren(tagName, elem.getNamespace())) {
599                    if (tagElem != null) {
600                        String updated;
601                        try {
602                            updated = CoordELFunctions.evalAndWrap(eval, tagElem.getText().trim());
603    
604                        }
605                        catch (Exception e) {
606                            // e.printStackTrace();
607                            throw new CoordinatorJobException(ErrorCode.E1004, e.getMessage(), e);
608                        }
609                        tagElem.removeContent();
610                        tagElem.addContent(updated);
611                        ret += updated;
612                    }
613                    /*
614                     * else { //TODO: unlike event }
615                     */
616                }
617            }
618            return ret;
619        }
620    
621        /**
622         * Resolve an attribute value.
623         *
624         * @param attrName : Attribute name.
625         * @param elem : XML Element where attribute is defiend
626         * @param eval : ELEvaluator used to resolve
627         * @return Resolved attribute value
628         * @throws CoordinatorJobException
629         */
630        private String resolveAttribute(String attrName, Element elem, ELEvaluator eval) throws CoordinatorJobException {
631            Attribute attr = elem.getAttribute(attrName);
632            String val = null;
633            if (attr != null) {
634                try {
635                    val = CoordELFunctions.evalAndWrap(eval, attr.getValue().trim());
636    
637                }
638                catch (Exception e) {
639                    // e.printStackTrace();
640                    throw new CoordinatorJobException(ErrorCode.E1004, e.getMessage(), e);
641                }
642                attr.setValue(val);
643            }
644            return val;
645        }
646    
647        /**
648         * Include referred Datasets into XML.
649         *
650         * @param resolvedXml : Job XML element.
651         * @param conf : Job configuration
652         * @throws CoordinatorJobException
653         */
654        protected void includeDataSets(Element resolvedXml, Configuration conf) throws CoordinatorJobException
655            /* throws Exception */ {
656            Element datasets = resolvedXml.getChild("datasets", resolvedXml.getNamespace());
657            Element allDataSets = new Element("all_datasets", resolvedXml.getNamespace());
658            List<String> dsList = new ArrayList<String>();
659            if (datasets != null) {
660                for (Element includeElem : (List<Element>) datasets.getChildren("include", datasets.getNamespace())) {
661                    String incDSFile = includeElem.getTextTrim();
662                    // log.warn(" incDSFile " + incDSFile);
663                    includeOneDSFile(incDSFile, dsList, allDataSets, datasets.getNamespace());
664                }
665                for (Element e : (List<Element>) datasets.getChildren("dataset", datasets.getNamespace())) {
666                    String dsName = (String) e.getAttributeValue("name");
667                    if (dsList.contains(dsName)) {// Override with this DS
668                        // Remove old DS
669                        removeDataSet(allDataSets, dsName);
670                        // throw new RuntimeException("Duplicate Dataset " +
671                        // dsName);
672                    }
673                    else {
674                        dsList.add(dsName);
675                    }
676                    allDataSets.addContent((Element) e.clone());
677                }
678            }
679            insertDataSet(resolvedXml, allDataSets);
680            resolvedXml.removeChild("datasets", resolvedXml.getNamespace());
681        }
682    
683        /**
684         * Include One Dataset file.
685         *
686         * @param incDSFile : Include data set filename.
687         * @param dsList :List of dataset names to verify the duplicate.
688         * @param allDataSets : Element that includes all dataset definitions.
689         * @param dsNameSpace : Data set name space
690         * @throws CoordinatorJobException
691         * @throws Exception
692         */
693        private void includeOneDSFile(String incDSFile, List<String> dsList, Element allDataSets, Namespace dsNameSpace)
694                throws CoordinatorJobException {
695            Element tmpDataSets = null;
696            try {
697                String dsXml = readDefinition(incDSFile);
698                log.debug("DSFILE :" + incDSFile + "\n" + dsXml);
699                tmpDataSets = XmlUtils.parseXml(dsXml);
700            }
701            /*
702             * catch (IOException iex) {XLog.getLog(getClass()).warn(
703             * "Error reading included dataset file [{0}].  Message [{1}]",
704             * incDSFile, iex.getMessage()); throw new
705             * CommandException(ErrorCode.E0803, iex.getMessage()); }
706             */
707            catch (JDOMException e) {
708                log.warn("Error parsing included dataset [{0}].  Message [{1}]", incDSFile, e.getMessage());
709                throw new CoordinatorJobException(ErrorCode.E0700, e.getMessage());
710            }
711            resolveDataSets((List<Element>) tmpDataSets.getChildren("dataset"));
712            for (Element e : (List<Element>) tmpDataSets.getChildren("dataset")) {
713                String dsName = (String) e.getAttributeValue("name");
714                if (dsList.contains(dsName)) {
715                    throw new RuntimeException("Duplicate Dataset " + dsName);
716                }
717                dsList.add(dsName);
718                Element tmp = (Element) e.clone();
719                // TODO: Don't like to over-write the external/include DS's
720                // namespace
721                tmp.setNamespace(dsNameSpace);// TODO:
722                tmp.getChild("uri-template").setNamespace(dsNameSpace);
723                if (e.getChild("done-flag") != null) {
724                    tmp.getChild("done-flag").setNamespace(dsNameSpace);
725                }
726                allDataSets.addContent(tmp);
727            }
728            // nested include
729            for (Element includeElem : (List<Element>) tmpDataSets.getChildren("include", tmpDataSets.getNamespace())) {
730                String incFile = includeElem.getTextTrim();
731                // log.warn("incDSFile "+ incDSFile);
732                includeOneDSFile(incFile, dsList, allDataSets, dsNameSpace);
733            }
734        }
735    
736        /**
737         * Remove a dataset from a list of dataset.
738         *
739         * @param eDatasets : List of dataset
740         * @param name : Dataset name to be removed.
741         */
742        private static void removeDataSet(Element eDatasets, String name) {
743            for (Element eDataset : (List<Element>) eDatasets.getChildren("dataset", eDatasets.getNamespace())) {
744                if (eDataset.getAttributeValue("name").equals(name)) {
745                    eDataset.detach();
746                }
747            }
748            throw new RuntimeException("undefined dataset: " + name);
749        }
750    
751        /**
752         * Read workflow definition.
753         *
754         * @param appPath application path.
755         * @param user user name.
756         * @param group group name.
757         * @param autToken authentication token.
758         * @return workflow definition.
759         * @throws WorkflowException thrown if the definition could not be read.
760         */
761        protected String readDefinition(String appPath) throws CoordinatorJobException {
762            String user = ParamChecker.notEmpty(conf.get(OozieClient.USER_NAME), OozieClient.USER_NAME);
763            String group = ParamChecker.notEmpty(conf.get(OozieClient.GROUP_NAME), OozieClient.GROUP_NAME);
764            Configuration confHadoop = CoordUtils.getHadoopConf(conf);
765            try {
766                URI uri = new URI(appPath);
767                log.debug("user =" + user + " group =" + group);
768                FileSystem fs = Services.get().get(HadoopAccessorService.class).createFileSystem(user, group, uri, conf);
769                Path p = new Path(uri.getPath());
770    
771                // Reader reader = new InputStreamReader(fs.open(new Path(uri
772                // .getPath(), fileName)));
773                Reader reader = new InputStreamReader(fs.open(p));// TODO
774                StringWriter writer = new StringWriter();
775                IOUtils.copyCharStream(reader, writer);
776                return writer.toString();
777            }
778            catch (IOException ex) {
779                log.warn("IOException :" + XmlUtils.prettyPrint(confHadoop), ex);
780                throw new CoordinatorJobException(ErrorCode.E1001, ex.getMessage(), ex); // TODO:
781            }
782            catch (URISyntaxException ex) {
783                log.warn("URISyException :" + ex.getMessage());
784                throw new CoordinatorJobException(ErrorCode.E1002, appPath, ex.getMessage(), ex);// TODO:
785            }
786            catch (HadoopAccessorException ex) {
787                throw new CoordinatorJobException(ex);
788            }
789            catch (Exception ex) {
790                log.warn("Exception :", ex);
791                throw new CoordinatorJobException(ErrorCode.E1001, ex.getMessage(), ex);// TODO:
792            }
793        }
794    
795        /**
796         * Write a Coordinator Job into database
797         *
798         * @param eJob : XML element of job
799         * @param store : Coordinator Store to write.
800         * @param coordJob : Coordinator job bean
801         * @return Job if.
802         * @throws StoreException
803         */
804        private String storeToDB(Element eJob, CoordinatorStore store, CoordinatorJobBean coordJob) throws StoreException {
805            String jobId = Services.get().get(UUIDService.class).generateId(ApplicationType.COORDINATOR);
806            coordJob.setId(jobId);
807            coordJob.setAuthToken(this.authToken);
808            coordJob.setAppName(eJob.getAttributeValue("name"));
809            coordJob.setAppPath(conf.get(OozieClient.COORDINATOR_APP_PATH));
810            coordJob.setStatus(CoordinatorJob.Status.PREP);
811            coordJob.setCreatedTime(new Date()); // TODO: Do we need that?
812            coordJob.setUser(conf.get(OozieClient.USER_NAME));
813            coordJob.setGroup(conf.get(OozieClient.GROUP_NAME));
814            coordJob.setConf(XmlUtils.prettyPrint(conf).toString());
815            coordJob.setJobXml(XmlUtils.prettyPrint(eJob).toString());
816            coordJob.setLastActionNumber(0);
817            coordJob.setLastModifiedTime(new Date());
818    
819            if (!dryrun) {
820                store.insertCoordinatorJob(coordJob);
821            }
822            return jobId;
823        }
824    
825        /**
826         * For unit-testing only. Will ultimately go away
827         *
828         * @param args
829         * @throws Exception
830         * @throws JDOMException
831         */
832        public static void main(String[] args) throws Exception {
833            // TODO Auto-generated method stub
834            // Configuration conf = new XConfiguration(IOUtils.getResourceAsReader(
835            // "org/apache/oozie/coord/conf.xml", -1));
836    
837            Configuration conf = new XConfiguration();
838    
839            // base case
840            // conf.set(OozieClient.COORDINATOR_APP_PATH,
841            // "file:///Users/danielwo/oozie/workflows/coord/test1/");
842    
843            // no input datasets
844            // conf.set(OozieClient.COORDINATOR_APP_PATH,
845            // "file:///Users/danielwo/oozie/workflows/coord/coord_noinput/");
846            // conf.set(OozieClient.COORDINATOR_APP_PATH,
847            // "file:///Users/danielwo/oozie/workflows/coord/coord_use_apppath/");
848    
849            // only 1 instance
850            // conf.set(OozieClient.COORDINATOR_APP_PATH,
851            // "file:///Users/danielwo/oozie/workflows/coord/coord_oneinstance/");
852    
853            // no local props in xml
854            // conf.set(OozieClient.COORDINATOR_APP_PATH,
855            // "file:///Users/danielwo/oozie/workflows/coord/coord_noprops/");
856    
857            conf.set(OozieClient.COORDINATOR_APP_PATH,
858                     "file:///homes/test/workspace/sandbox_krishna/oozie-main/core/src/main/java/org/apache/oozie/coord/");
859            conf.set(OozieClient.USER_NAME, "test");
860            // conf.set(OozieClient.USER_NAME, "danielwo");
861            conf.set(OozieClient.GROUP_NAME, "other");
862            // System.out.println("appXml :"+ appXml + "\n conf :"+ conf);
863            new Services().init();
864            try {
865                CoordSubmitCommand sc = new CoordSubmitCommand(conf, "TESTING");
866                String jobId = sc.call();
867                System.out.println("Job Id " + jobId);
868                Thread.sleep(80000);
869            }
870            finally {
871                Services.get().destroy();
872            }
873        }
874    }