001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.command.coord; 016 017 import java.io.IOException; 018 import java.io.InputStreamReader; 019 import java.io.Reader; 020 import java.io.StringReader; 021 import java.io.StringWriter; 022 import java.net.URI; 023 import java.net.URISyntaxException; 024 import java.util.ArrayList; 025 import java.util.Date; 026 import java.util.HashMap; 027 import java.util.HashSet; 028 import java.util.List; 029 import java.util.Set; 030 import java.util.TreeSet; 031 032 import javax.xml.transform.stream.StreamSource; 033 import javax.xml.validation.Validator; 034 035 import org.apache.hadoop.conf.Configuration; 036 import org.apache.hadoop.fs.FileSystem; 037 import org.apache.hadoop.fs.Path; 038 import org.apache.oozie.CoordinatorJobBean; 039 import org.apache.oozie.ErrorCode; 040 import org.apache.oozie.client.CoordinatorJob; 041 import org.apache.oozie.client.OozieClient; 042 import org.apache.oozie.client.CoordinatorJob.Execution; 043 import org.apache.oozie.command.CommandException; 044 import org.apache.oozie.coord.CoordELEvaluator; 045 import org.apache.oozie.coord.CoordELFunctions; 046 import org.apache.oozie.coord.CoordUtils; 047 import org.apache.oozie.coord.CoordinatorJobException; 048 import org.apache.oozie.coord.TimeUnit; 049 import org.apache.oozie.service.DagXLogInfoService; 050 import org.apache.oozie.service.HadoopAccessorException; 051 import org.apache.oozie.service.SchemaService; 052 import org.apache.oozie.service.Service; 053 import org.apache.oozie.service.Services; 054 import org.apache.oozie.service.UUIDService; 055 import org.apache.oozie.service.HadoopAccessorService; 056 import org.apache.oozie.service.WorkflowAppService; 057 import org.apache.oozie.service.SchemaService.SchemaName; 058 import org.apache.oozie.service.UUIDService.ApplicationType; 059 import org.apache.oozie.store.CoordinatorStore; 060 import org.apache.oozie.store.StoreException; 061 import org.apache.oozie.util.DateUtils; 062 import org.apache.oozie.util.ELEvaluator; 063 import org.apache.oozie.util.IOUtils; 064 import org.apache.oozie.util.ParamChecker; 065 import org.apache.oozie.util.PropertiesUtils; 066 import org.apache.oozie.util.XConfiguration; 067 import org.apache.oozie.util.XLog; 068 import org.apache.oozie.util.XmlUtils; 069 import org.apache.oozie.workflow.WorkflowException; 070 import org.jdom.Attribute; 071 import org.jdom.Element; 072 import org.jdom.JDOMException; 073 import org.jdom.Namespace; 074 import org.xml.sax.SAXException; 075 076 /** 077 * This class provides the functionalities to resolve a coordinator job XML and write the job information into a DB 078 * table. <p/> Specifically it performs the following functions: 1. Resolve all the variables or properties using job 079 * configurations. 2. Insert all datasets definition as part of the <data-in> and <data-out> tags. 3. Validate the XML 080 * at runtime. 081 */ 082 public class CoordSubmitCommand extends CoordinatorCommand<String> { 083 084 private Configuration conf; 085 private String authToken; 086 private boolean dryrun; 087 088 public static final String CONFIG_DEFAULT = "coord-config-default.xml"; 089 public static final String COORDINATOR_XML_FILE = "coordinator.xml"; 090 091 private static final Set<String> DISALLOWED_USER_PROPERTIES = new HashSet<String>(); 092 private static final Set<String> DISALLOWED_DEFAULT_PROPERTIES = new HashSet<String>(); 093 /** 094 * Default timeout for normal jobs, in minutes, after which coordinator input check will timeout 095 */ 096 public static final String CONF_DEFAULT_TIMEOUT_NORMAL = Service.CONF_PREFIX + "coord.normal.default.timeout"; 097 098 private XLog log = XLog.getLog(getClass()); 099 private ELEvaluator evalFreq = null; 100 private ELEvaluator evalNofuncs = null; 101 private ELEvaluator evalData = null; 102 private ELEvaluator evalInst = null; 103 private ELEvaluator evalSla = null; 104 105 static { 106 String[] badUserProps = {PropertiesUtils.YEAR, PropertiesUtils.MONTH, PropertiesUtils.DAY, 107 PropertiesUtils.HOUR, PropertiesUtils.MINUTE, PropertiesUtils.DAYS, PropertiesUtils.HOURS, 108 PropertiesUtils.MINUTES, PropertiesUtils.KB, PropertiesUtils.MB, PropertiesUtils.GB, 109 PropertiesUtils.TB, PropertiesUtils.PB, PropertiesUtils.RECORDS, PropertiesUtils.MAP_IN, 110 PropertiesUtils.MAP_OUT, PropertiesUtils.REDUCE_IN, PropertiesUtils.REDUCE_OUT, PropertiesUtils.GROUPS}; 111 PropertiesUtils.createPropertySet(badUserProps, DISALLOWED_USER_PROPERTIES); 112 113 String[] badDefaultProps = {PropertiesUtils.HADOOP_USER, PropertiesUtils.HADOOP_UGI, 114 WorkflowAppService.HADOOP_JT_KERBEROS_NAME, WorkflowAppService.HADOOP_NN_KERBEROS_NAME}; 115 PropertiesUtils.createPropertySet(badUserProps, DISALLOWED_DEFAULT_PROPERTIES); 116 PropertiesUtils.createPropertySet(badDefaultProps, DISALLOWED_DEFAULT_PROPERTIES); 117 } 118 119 /** 120 * Constructor to create the Coordinator Submit Command. 121 * 122 * @param conf : Configuration for Coordinator job 123 * @param authToken : To be used for authentication 124 */ 125 public CoordSubmitCommand(Configuration conf, String authToken) { 126 super("coord_submit", "coord_submit", 1, XLog.STD); 127 this.conf = ParamChecker.notNull(conf, "conf"); 128 this.authToken = ParamChecker.notEmpty(authToken, "authToken"); 129 } 130 131 public CoordSubmitCommand(boolean dryrun, Configuration conf, String authToken) { 132 super("coord_submit", "coord_submit", 1, XLog.STD, dryrun); 133 this.conf = ParamChecker.notNull(conf, "conf"); 134 this.authToken = ParamChecker.notEmpty(authToken, "authToken"); 135 this.dryrun = dryrun; 136 // TODO Auto-generated constructor stub 137 } 138 139 /* 140 * (non-Javadoc) 141 * 142 * @see org.apache.oozie.command.Command#call(org.apache.oozie.store.Store) 143 */ 144 @Override 145 protected String call(CoordinatorStore store) throws StoreException, CommandException { 146 String jobId = null; 147 log.info("STARTED Coordinator Submit"); 148 incrJobCounter(1); 149 CoordinatorJobBean coordJob = new CoordinatorJobBean(); 150 try { 151 XLog.Info.get().setParameter(DagXLogInfoService.TOKEN, conf.get(OozieClient.LOG_TOKEN)); 152 mergeDefaultConfig(); 153 154 String appXml = readAndValidateXml(); 155 coordJob.setOrigJobXml(appXml); 156 log.debug("jobXml after initial validation " + XmlUtils.prettyPrint(appXml).toString()); 157 appXml = XmlUtils.removeComments(appXml); 158 initEvaluators(); 159 Element eJob = basicResolveAndIncludeDS(appXml, conf, coordJob); 160 log.debug("jobXml after all validation " + XmlUtils.prettyPrint(eJob).toString()); 161 162 jobId = storeToDB(eJob, store, coordJob); 163 // log JOB info for coordinator jobs 164 setLogInfo(coordJob); 165 log = XLog.getLog(getClass()); 166 167 if (!dryrun) { 168 // submit a command to materialize jobs for the next 1 hour (3600 secs) 169 // so we don't wait 10 mins for the Service to run. 170 queueCallable(new CoordJobMatLookupCommand(jobId, 3600), 100); 171 } 172 else { 173 Date startTime = coordJob.getStartTime(); 174 long startTimeMilli = startTime.getTime(); 175 long endTimeMilli = startTimeMilli + (3600 * 1000); 176 Date jobEndTime = coordJob.getEndTime(); 177 Date endTime = new Date(endTimeMilli); 178 if (endTime.compareTo(jobEndTime) > 0) { 179 endTime = jobEndTime; 180 } 181 jobId = coordJob.getId(); 182 log.info("[" + jobId + "]: Update status to PREMATER"); 183 coordJob.setStatus(CoordinatorJob.Status.PREMATER); 184 CoordActionMaterializeCommand coordActionMatCom = new CoordActionMaterializeCommand(jobId, startTime, 185 endTime); 186 Configuration jobConf = null; 187 try { 188 jobConf = new XConfiguration(new StringReader(coordJob.getConf())); 189 } 190 catch (IOException e1) { 191 log.warn("Configuration parse error. read from DB :" + coordJob.getConf(), e1); 192 } 193 String action = coordActionMatCom.materializeJobs(true, coordJob, jobConf, null); 194 String output = coordJob.getJobXml() + System.getProperty("line.separator") 195 + "***actions for instance***" + action; 196 return output; 197 } 198 } 199 catch (CoordinatorJobException ex) { 200 log.warn("ERROR: ", ex); 201 throw new CommandException(ex); 202 } 203 catch (IllegalArgumentException iex) { 204 log.warn("ERROR: ", iex); 205 throw new CommandException(ErrorCode.E1003, iex); 206 } 207 catch (Exception ex) {// TODO 208 log.warn("ERROR: ", ex); 209 throw new CommandException(ErrorCode.E0803, ex); 210 } 211 log.info("ENDED Coordinator Submit jobId=" + jobId); 212 return jobId; 213 } 214 215 /** 216 * Read the application XML and validate against coordinator Schema 217 * 218 * @return validated coordinator XML 219 * @throws CoordinatorJobException 220 */ 221 private String readAndValidateXml() throws CoordinatorJobException { 222 String appPath = ParamChecker.notEmpty(conf.get(OozieClient.COORDINATOR_APP_PATH), 223 OozieClient.COORDINATOR_APP_PATH);// TODO: COORDINATOR_APP_PATH 224 String coordXml = readDefinition(appPath, COORDINATOR_XML_FILE); 225 validateXml(coordXml); 226 return coordXml; 227 } 228 229 /** 230 * Validate against Coordinator XSD file 231 * 232 * @param xmlContent : Input coordinator xml 233 * @throws CoordinatorJobException 234 */ 235 private void validateXml(String xmlContent) throws CoordinatorJobException { 236 javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(SchemaName.COORDINATOR); 237 Validator validator = schema.newValidator(); 238 // log.warn("XML " + xmlContent); 239 try { 240 validator.validate(new StreamSource(new StringReader(xmlContent))); 241 } 242 catch (SAXException ex) { 243 log.warn("SAXException :", ex); 244 throw new CoordinatorJobException(ErrorCode.E0701, ex.getMessage(), ex); 245 } 246 catch (IOException ex) { 247 // ex.printStackTrace(); 248 log.warn("IOException :", ex); 249 throw new CoordinatorJobException(ErrorCode.E0702, ex.getMessage(), ex); 250 } 251 } 252 253 /** 254 * Merge default configuration with user-defined configuration. 255 * 256 * @throws CommandException 257 */ 258 protected void mergeDefaultConfig() throws CommandException { 259 Path configDefault = new Path(conf.get(OozieClient.COORDINATOR_APP_PATH), CONFIG_DEFAULT); 260 // Configuration fsConfig = new Configuration(); 261 // log.warn("CONFIG :" + configDefault.toUri()); 262 Configuration fsConfig = CoordUtils.getHadoopConf(conf); 263 FileSystem fs; 264 // TODO: which conf? 265 try { 266 String user = ParamChecker.notEmpty(conf.get(OozieClient.USER_NAME), OozieClient.USER_NAME); 267 String group = ParamChecker.notEmpty(conf.get(OozieClient.GROUP_NAME), OozieClient.GROUP_NAME); 268 fs = Services.get().get(HadoopAccessorService.class).createFileSystem(user, group, configDefault.toUri(), 269 new Configuration()); 270 if (fs.exists(configDefault)) { 271 Configuration defaultConf = new XConfiguration(fs.open(configDefault)); 272 PropertiesUtils.checkDisallowedProperties(defaultConf, DISALLOWED_DEFAULT_PROPERTIES); 273 XConfiguration.injectDefaults(defaultConf, conf); 274 } 275 else { 276 log.info("configDefault Doesn't exist " + configDefault); 277 } 278 PropertiesUtils.checkDisallowedProperties(conf, DISALLOWED_USER_PROPERTIES); 279 } 280 catch (IOException e) { 281 throw new CommandException(ErrorCode.E0702, e.getMessage() + " : Problem reading default config " 282 + configDefault, e); 283 } 284 catch (HadoopAccessorException e) { 285 throw new CommandException(e); 286 } 287 log.debug("Merged CONF :" + XmlUtils.prettyPrint(conf).toString()); 288 } 289 290 /** 291 * The method resolve all the variables that are defined in configuration. It also include the data set definition 292 * from dataset file into XML. 293 * 294 * @param appXml : Original job XML 295 * @param conf : Configuration of the job 296 * @param coordJob : Coordinator job bean to be populated. 297 * @return : Resolved and modified job XML element. 298 * @throws Exception 299 */ 300 public Element basicResolveAndIncludeDS(String appXml, Configuration conf, CoordinatorJobBean coordJob) 301 throws CoordinatorJobException, Exception { 302 Element basicResolvedApp = resolveInitial(conf, appXml, coordJob); 303 includeDataSets(basicResolvedApp, conf); 304 return basicResolvedApp; 305 } 306 307 /** 308 * Insert data set into data-in and data-out tags. 309 * 310 * @param eAppXml : coordinator application XML 311 * @param eDatasets : DataSet XML 312 * @return updated application 313 */ 314 private void insertDataSet(Element eAppXml, Element eDatasets) { 315 // Adding DS definition in the coordinator XML 316 Element inputList = eAppXml.getChild("input-events", eAppXml.getNamespace()); 317 if (inputList != null) { 318 for (Element dataIn : (List<Element>) inputList.getChildren("data-in", eAppXml.getNamespace())) { 319 Element eDataset = findDataSet(eDatasets, dataIn.getAttributeValue("dataset")); 320 dataIn.getContent().add(0, eDataset); 321 } 322 } 323 Element outputList = eAppXml.getChild("output-events", eAppXml.getNamespace()); 324 if (outputList != null) { 325 for (Element dataOut : (List<Element>) outputList.getChildren("data-out", eAppXml.getNamespace())) { 326 Element eDataset = findDataSet(eDatasets, dataOut.getAttributeValue("dataset")); 327 dataOut.getContent().add(0, eDataset); 328 } 329 } 330 } 331 332 /** 333 * Find a specific dataset from a list of Datasets. 334 * 335 * @param eDatasets : List of data sets 336 * @param name : queried data set name 337 * @return one Dataset element. otherwise throw Exception 338 */ 339 private static Element findDataSet(Element eDatasets, String name) { 340 for (Element eDataset : (List<Element>) eDatasets.getChildren("dataset", eDatasets.getNamespace())) { 341 if (eDataset.getAttributeValue("name").equals(name)) { 342 eDataset = (Element) eDataset.clone(); 343 eDataset.detach(); 344 return eDataset; 345 } 346 } 347 throw new RuntimeException("undefined dataset: " + name); 348 } 349 350 /** 351 * Initialize all the required EL Evaluators. 352 */ 353 protected void initEvaluators() { 354 evalFreq = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-freq"); 355 evalNofuncs = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-nofuncs"); 356 evalInst = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-instances"); 357 evalSla = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-sla-submit"); 358 } 359 360 /** 361 * Resolve basic entities using job Configuration. 362 * 363 * @param conf :Job configuration 364 * @param appXml : Original job XML 365 * @param coordJob : Coordinator job bean to be populated. 366 * @return Resolved job XML element. 367 * @throws Exception 368 */ 369 protected Element resolveInitial(Configuration conf, String appXml, CoordinatorJobBean coordJob) 370 throws CoordinatorJobException, Exception { 371 Element eAppXml = XmlUtils.parseXml(appXml); 372 // job's main attributes 373 // frequency 374 String val = resolveAttribute("frequency", eAppXml, evalFreq); 375 int ival = ParamChecker.checkInteger(val, "frequency"); 376 ParamChecker.checkGTZero(ival, "frequency"); 377 coordJob.setFrequency(ival); 378 TimeUnit tmp = (evalFreq.getVariable("timeunit") == null) ? TimeUnit.MINUTE : ((TimeUnit) evalFreq 379 .getVariable("timeunit")); 380 addAnAttribute("freq_timeunit", eAppXml, tmp.toString()); // TODO: Store 381 // TimeUnit 382 coordJob.setTimeUnit(CoordinatorJob.Timeunit.valueOf(tmp.toString())); 383 // End Of Duration 384 tmp = evalFreq.getVariable("endOfDuration") == null ? TimeUnit.NONE : ((TimeUnit) evalFreq 385 .getVariable("endOfDuration")); 386 addAnAttribute("end_of_duration", eAppXml, tmp.toString()); 387 // coordJob.setEndOfDuration(tmp) // TODO: Add new attribute in Job bean 388 389 // start time 390 val = resolveAttribute("start", eAppXml, evalNofuncs); 391 ParamChecker.checkUTC(val, "start"); 392 coordJob.setStartTime(DateUtils.parseDateUTC(val)); 393 // end time 394 val = resolveAttribute("end", eAppXml, evalNofuncs); 395 ParamChecker.checkUTC(val, "end"); 396 coordJob.setEndTime(DateUtils.parseDateUTC(val)); 397 // Time zone 398 val = resolveAttribute("timezone", eAppXml, evalNofuncs); 399 ParamChecker.checkTimeZone(val, "timezone"); 400 coordJob.setTimeZone(val); 401 402 // controls 403 val = resolveTagContents("timeout", eAppXml.getChild("controls", eAppXml.getNamespace()), evalNofuncs); 404 if (val == "") { 405 val = Services.get().getConf().get(CONF_DEFAULT_TIMEOUT_NORMAL); 406 } 407 408 ival = ParamChecker.checkInteger(val, "timeout"); 409 // ParamChecker.checkGEZero(ival, "timeout"); 410 coordJob.setTimeout(ival); 411 val = resolveTagContents("concurrency", eAppXml.getChild("controls", eAppXml.getNamespace()), evalNofuncs); 412 if (val == "") { 413 val = "-1"; 414 } 415 ival = ParamChecker.checkInteger(val, "concurrency"); 416 // ParamChecker.checkGEZero(ival, "concurrency"); 417 coordJob.setConcurrency(ival); 418 val = resolveTagContents("execution", eAppXml.getChild("controls", eAppXml.getNamespace()), evalNofuncs); 419 if (val == "") { 420 val = Execution.FIFO.toString(); 421 } 422 coordJob.setExecution(Execution.valueOf(val)); 423 String[] acceptedVals = {Execution.LIFO.toString(), Execution.FIFO.toString(), Execution.LAST_ONLY.toString()}; 424 ParamChecker.isMember(val, acceptedVals, "execution"); 425 426 // datasets 427 resolveTagContents("include", eAppXml.getChild("datasets", eAppXml.getNamespace()), evalNofuncs); 428 // for each data set 429 resolveDataSets(eAppXml); 430 HashMap<String, String> dataNameList = new HashMap<String, String>(); 431 resolveIOEvents(eAppXml, dataNameList); 432 433 resolveTagContents("app-path", eAppXml.getChild("action", eAppXml.getNamespace()).getChild("workflow", 434 eAppXml.getNamespace()), evalNofuncs); 435 // TODO: If action or workflow tag is missing, NullPointerException will 436 // occur 437 Element configElem = eAppXml.getChild("action", eAppXml.getNamespace()).getChild("workflow", 438 eAppXml.getNamespace()).getChild("configuration", eAppXml.getNamespace()); 439 evalData = CoordELEvaluator.createELEvaluatorForDataEcho(conf, "coord-job-submit-data", dataNameList); 440 if (configElem != null) { 441 for (Element propElem : (List<Element>) configElem.getChildren("property", configElem.getNamespace())) { 442 resolveTagContents("name", propElem, evalData); 443 // log.warn("Value :"); 444 // Want to check the data-integrity but don't want to modify the 445 // XML 446 // for properties only 447 Element tmpProp = (Element) propElem.clone(); 448 resolveTagContents("value", tmpProp, evalData); 449 // val = resolveTagContents("value", propElem, evalData); 450 // log.warn("Value OK :" + val); 451 } 452 } 453 resolveSLA(eAppXml, coordJob); 454 return eAppXml; 455 } 456 457 private void resolveSLA(Element eAppXml, CoordinatorJobBean coordJob) throws CommandException { 458 // String prefix = XmlUtils.getNamespacePrefix(eAppXml, 459 // SchemaService.SLA_NAME_SPACE_URI); 460 Element eSla = eAppXml.getChild("action", eAppXml.getNamespace()).getChild("info", 461 Namespace.getNamespace(SchemaService.SLA_NAME_SPACE_URI)); 462 463 if (eSla != null) { 464 String slaXml = XmlUtils.prettyPrint(eSla).toString(); 465 try { 466 // EL evaluation 467 slaXml = evalSla.evaluate(slaXml, String.class); 468 // Validate against semantic SXD 469 XmlUtils.validateData(slaXml, SchemaName.SLA_ORIGINAL); 470 } 471 catch (Exception e) { 472 throw new CommandException(ErrorCode.E1004, "Validation ERROR :" + e.getMessage(), e); 473 } 474 } 475 } 476 477 /** 478 * Resolve input-events/data-in and output-events/data-out tags. 479 * 480 * @param eJob : Job element 481 * @throws CoordinatorJobException 482 */ 483 private void resolveIOEvents(Element eJobOrg, HashMap<String, String> dataNameList) throws CoordinatorJobException { 484 // Resolving input-events/data-in 485 // Clone the job and don't update anything in the original 486 Element eJob = (Element) eJobOrg.clone(); 487 Element inputList = eJob.getChild("input-events", eJob.getNamespace()); 488 if (inputList != null) { 489 TreeSet<String> eventNameSet = new TreeSet<String>(); 490 for (Element dataIn : (List<Element>) inputList.getChildren("data-in", eJob.getNamespace())) { 491 String dataInName = dataIn.getAttributeValue("name"); 492 dataNameList.put(dataInName, "data-in"); 493 // check whether there is any duplicate data-in name 494 if (eventNameSet.contains(dataInName)) { 495 throw new RuntimeException("Duplicate dataIn name " + dataInName); 496 } 497 else { 498 eventNameSet.add(dataInName); 499 } 500 resolveTagContents("instance", dataIn, evalInst); 501 resolveTagContents("start-instance", dataIn, evalInst); 502 resolveTagContents("end-instance", dataIn, evalInst); 503 } 504 } 505 // Resolving output-events/data-out 506 Element outputList = eJob.getChild("output-events", eJob.getNamespace()); 507 if (outputList != null) { 508 TreeSet<String> eventNameSet = new TreeSet<String>(); 509 for (Element dataOut : (List<Element>) outputList.getChildren("data-out", eJob.getNamespace())) { 510 String dataOutName = dataOut.getAttributeValue("name"); 511 dataNameList.put(dataOutName, "data-out"); 512 // check whether there is any duplicate data-out name 513 if (eventNameSet.contains(dataOutName)) { 514 throw new RuntimeException("Duplicate dataIn name " + dataOutName); 515 } 516 else { 517 eventNameSet.add(dataOutName); 518 } 519 resolveTagContents("instance", dataOut, evalInst); 520 } 521 } 522 523 } 524 525 /** 526 * Add an attribute into XML element. 527 * 528 * @param attrName :attribute name 529 * @param elem : Element to add attribute 530 * @param value :Value of attribute 531 */ 532 private void addAnAttribute(String attrName, Element elem, String value) { 533 elem.setAttribute(attrName, value); 534 } 535 536 /** 537 * Resolve Data set using job configuration. 538 * 539 * @param eAppXml : Job Element XML 540 * @throws Exception 541 */ 542 private void resolveDataSets(Element eAppXml) throws Exception { 543 Element datasetList = eAppXml.getChild("datasets", eAppXml.getNamespace()); 544 if (datasetList != null) { 545 546 List<Element> dsElems = datasetList.getChildren("dataset", eAppXml.getNamespace()); 547 resolveDataSets(dsElems); 548 resolveTagContents("app-path", eAppXml.getChild("action", eAppXml.getNamespace()).getChild("workflow", 549 eAppXml.getNamespace()), evalNofuncs); 550 } 551 } 552 553 /** 554 * Resolve Data set using job configuration. 555 * 556 * @param dsElems : Data set XML element. 557 * @throws CoordinatorJobException 558 * @throws Exception 559 */ 560 private void resolveDataSets(List<Element> dsElems) throws CoordinatorJobException /* 561 * throws 562 * Exception 563 */ { 564 for (Element dsElem : dsElems) { 565 // Setting up default TimeUnit and EndOFDuraion 566 evalFreq.setVariable("timeunit", TimeUnit.MINUTE); 567 evalFreq.setVariable("endOfDuration", TimeUnit.NONE); 568 569 String val = resolveAttribute("frequency", dsElem, evalFreq); 570 int ival = ParamChecker.checkInteger(val, "frequency"); 571 ParamChecker.checkGTZero(ival, "frequency"); 572 addAnAttribute("freq_timeunit", dsElem, evalFreq.getVariable("timeunit") == null ? TimeUnit.MINUTE 573 .toString() : ((TimeUnit) evalFreq.getVariable("timeunit")).toString()); 574 addAnAttribute("end_of_duration", dsElem, evalFreq.getVariable("endOfDuration") == null ? TimeUnit.NONE 575 .toString() : ((TimeUnit) evalFreq.getVariable("endOfDuration")).toString()); 576 val = resolveAttribute("initial-instance", dsElem, evalNofuncs); 577 ParamChecker.checkUTC(val, "initial-instance"); 578 val = resolveAttribute("timezone", dsElem, evalNofuncs); 579 ParamChecker.checkTimeZone(val, "timezone"); 580 resolveTagContents("uri-template", dsElem, evalNofuncs); 581 resolveTagContents("done-flag", dsElem, evalNofuncs); 582 } 583 } 584 585 /** 586 * Resolve the content of a tag. 587 * 588 * @param tagName : Tag name of job XML i.e. <timeout> 10 </timeout> 589 * @param elem : Element where the tag exists. 590 * @param eval : 591 * @return Resolved tag content. 592 * @throws CoordinatorJobException 593 */ 594 private String resolveTagContents(String tagName, Element elem, ELEvaluator eval) throws CoordinatorJobException { 595 String ret = ""; 596 if (elem != null) { 597 for (Element tagElem : (List<Element>) elem.getChildren(tagName, elem.getNamespace())) { 598 if (tagElem != null) { 599 String updated; 600 try { 601 updated = CoordELFunctions.evalAndWrap(eval, tagElem.getText().trim()); 602 603 } 604 catch (Exception e) { 605 // e.printStackTrace(); 606 throw new CoordinatorJobException(ErrorCode.E1004, e.getMessage(), e); 607 } 608 tagElem.removeContent(); 609 tagElem.addContent(updated); 610 ret += updated; 611 } 612 /* 613 * else { //TODO: unlike event } 614 */ 615 } 616 } 617 return ret; 618 } 619 620 /** 621 * Resolve an attribute value. 622 * 623 * @param attrName : Attribute name. 624 * @param elem : XML Element where attribute is defiend 625 * @param eval : ELEvaluator used to resolve 626 * @return Resolved attribute value 627 * @throws CoordinatorJobException 628 */ 629 private String resolveAttribute(String attrName, Element elem, ELEvaluator eval) throws CoordinatorJobException { 630 Attribute attr = elem.getAttribute(attrName); 631 String val = null; 632 if (attr != null) { 633 try { 634 val = CoordELFunctions.evalAndWrap(eval, attr.getValue().trim()); 635 636 } 637 catch (Exception e) { 638 // e.printStackTrace(); 639 throw new CoordinatorJobException(ErrorCode.E1004, e.getMessage(), e); 640 } 641 attr.setValue(val); 642 } 643 return val; 644 } 645 646 /** 647 * Include referred Datasets into XML. 648 * 649 * @param resolvedXml : Job XML element. 650 * @param conf : Job configuration 651 * @throws CoordinatorJobException 652 */ 653 protected void includeDataSets(Element resolvedXml, Configuration conf) throws CoordinatorJobException 654 /* throws Exception */ { 655 Element datasets = resolvedXml.getChild("datasets", resolvedXml.getNamespace()); 656 Element allDataSets = new Element("all_datasets", resolvedXml.getNamespace()); 657 List<String> dsList = new ArrayList<String>(); 658 if (datasets != null) { 659 for (Element includeElem : (List<Element>) datasets.getChildren("include", datasets.getNamespace())) { 660 String incDSFile = includeElem.getTextTrim(); 661 // log.warn(" incDSFile " + incDSFile); 662 includeOneDSFile(incDSFile, dsList, allDataSets, datasets.getNamespace()); 663 } 664 for (Element e : (List<Element>) datasets.getChildren("dataset", datasets.getNamespace())) { 665 String dsName = (String) e.getAttributeValue("name"); 666 if (dsList.contains(dsName)) {// Override with this DS 667 // Remove old DS 668 removeDataSet(allDataSets, dsName); 669 // throw new RuntimeException("Duplicate Dataset " + 670 // dsName); 671 } 672 else { 673 dsList.add(dsName); 674 } 675 allDataSets.addContent((Element) e.clone()); 676 } 677 } 678 insertDataSet(resolvedXml, allDataSets); 679 resolvedXml.removeChild("datasets", resolvedXml.getNamespace()); 680 } 681 682 /** 683 * Include One Dataset file. 684 * 685 * @param incDSFile : Include data set filename. 686 * @param dsList :List of dataset names to verify the duplicate. 687 * @param allDataSets : Element that includes all dataset definitions. 688 * @param dsNameSpace : Data set name space 689 * @throws CoordinatorJobException 690 * @throws Exception 691 */ 692 private void includeOneDSFile(String incDSFile, List<String> dsList, Element allDataSets, Namespace dsNameSpace) 693 throws CoordinatorJobException { 694 Element tmpDataSets = null; 695 try { 696 String dsXml = readDefinition(incDSFile, ""); 697 log.debug("DSFILE :" + incDSFile + "\n" + dsXml); 698 tmpDataSets = XmlUtils.parseXml(dsXml); 699 } 700 /* 701 * catch (IOException iex) {XLog.getLog(getClass()).warn( 702 * "Error reading included dataset file [{0}]. Message [{1}]", 703 * incDSFile, iex.getMessage()); throw new 704 * CommandException(ErrorCode.E0803, iex.getMessage()); } 705 */ 706 catch (JDOMException e) { 707 log.warn("Error parsing included dataset [{0}]. Message [{1}]", incDSFile, e.getMessage()); 708 throw new CoordinatorJobException(ErrorCode.E0700, e.getMessage()); 709 } 710 resolveDataSets((List<Element>) tmpDataSets.getChildren("dataset")); 711 for (Element e : (List<Element>) tmpDataSets.getChildren("dataset")) { 712 String dsName = (String) e.getAttributeValue("name"); 713 if (dsList.contains(dsName)) { 714 throw new RuntimeException("Duplicate Dataset " + dsName); 715 } 716 dsList.add(dsName); 717 Element tmp = (Element) e.clone(); 718 // TODO: Don't like to over-write the external/include DS's 719 // namespace 720 tmp.setNamespace(dsNameSpace);// TODO: 721 tmp.getChild("uri-template").setNamespace(dsNameSpace); 722 if (e.getChild("done-flag") != null) { 723 tmp.getChild("done-flag").setNamespace(dsNameSpace); 724 } 725 allDataSets.addContent(tmp); 726 } 727 // nested include 728 for (Element includeElem : (List<Element>) tmpDataSets.getChildren("include", tmpDataSets.getNamespace())) { 729 String incFile = includeElem.getTextTrim(); 730 // log.warn("incDSFile "+ incDSFile); 731 includeOneDSFile(incFile, dsList, allDataSets, dsNameSpace); 732 } 733 } 734 735 /** 736 * Remove a dataset from a list of dataset. 737 * 738 * @param eDatasets : List of dataset 739 * @param name : Dataset name to be removed. 740 */ 741 private static void removeDataSet(Element eDatasets, String name) { 742 for (Element eDataset : (List<Element>) eDatasets.getChildren("dataset", eDatasets.getNamespace())) { 743 if (eDataset.getAttributeValue("name").equals(name)) { 744 eDataset.detach(); 745 } 746 } 747 throw new RuntimeException("undefined dataset: " + name); 748 } 749 750 /** 751 * Read workflow definition. 752 * 753 * @param appPath application path. 754 * @param user user name. 755 * @param group group name. 756 * @param autToken authentication token. 757 * @return workflow definition. 758 * @throws WorkflowException thrown if the definition could not be read. 759 */ 760 protected String readDefinition(String appPath, String fileName) throws CoordinatorJobException {// TODO: 761 String user = ParamChecker.notEmpty(conf.get(OozieClient.USER_NAME), OozieClient.USER_NAME); 762 String group = ParamChecker.notEmpty(conf.get(OozieClient.GROUP_NAME), OozieClient.GROUP_NAME); 763 Configuration confHadoop = CoordUtils.getHadoopConf(conf); 764 try { 765 URI uri = new URI(appPath); 766 log.debug("user =" + user + " group =" + group); 767 FileSystem fs = Services.get().get(HadoopAccessorService.class).createFileSystem(user, group, uri, 768 new Configuration()); 769 Path p; 770 if (fileName == null || fileName.length() == 0) { 771 p = new Path(uri.getPath()); 772 } 773 else { 774 p = new Path(uri.getPath(), fileName); 775 } 776 // Reader reader = new InputStreamReader(fs.open(new Path(uri 777 // .getPath(), fileName))); 778 Reader reader = new InputStreamReader(fs.open(p));// TODO 779 StringWriter writer = new StringWriter(); 780 IOUtils.copyCharStream(reader, writer); 781 return writer.toString(); 782 } 783 catch (IOException ex) { 784 log.warn("IOException :" + XmlUtils.prettyPrint(confHadoop), ex); 785 throw new CoordinatorJobException(ErrorCode.E1001, ex.getMessage(), ex); // TODO: 786 } 787 catch (URISyntaxException ex) { 788 log.warn("URISyException :" + ex.getMessage()); 789 throw new CoordinatorJobException(ErrorCode.E1002, appPath, ex.getMessage(), ex);// TODO: 790 } 791 catch (HadoopAccessorException ex) { 792 throw new CoordinatorJobException(ex); 793 } 794 catch (Exception ex) { 795 log.warn("Exception :", ex); 796 throw new CoordinatorJobException(ErrorCode.E1001, ex.getMessage(), ex);// TODO: 797 } 798 } 799 800 /** 801 * Write a Coordinator Job into database 802 * 803 * @param eJob : XML element of job 804 * @param store : Coordinator Store to write. 805 * @param coordJob : Coordinator job bean 806 * @return Job if. 807 * @throws StoreException 808 */ 809 private String storeToDB(Element eJob, CoordinatorStore store, CoordinatorJobBean coordJob) throws StoreException { 810 String jobId = Services.get().get(UUIDService.class).generateId(ApplicationType.COORDINATOR); 811 coordJob.setId(jobId); 812 coordJob.setAuthToken(this.authToken); 813 coordJob.setAppName(eJob.getAttributeValue("name")); 814 coordJob.setAppPath(conf.get(OozieClient.COORDINATOR_APP_PATH)); 815 coordJob.setStatus(CoordinatorJob.Status.PREP); 816 coordJob.setCreatedTime(new Date()); // TODO: Do we need that? 817 coordJob.setUser(conf.get(OozieClient.USER_NAME)); 818 coordJob.setGroup(conf.get(OozieClient.GROUP_NAME)); 819 coordJob.setConf(XmlUtils.prettyPrint(conf).toString()); 820 coordJob.setJobXml(XmlUtils.prettyPrint(eJob).toString()); 821 coordJob.setLastActionNumber(0); 822 coordJob.setLastModifiedTime(new Date()); 823 824 if (!dryrun) { 825 store.insertCoordinatorJob(coordJob); 826 } 827 return jobId; 828 } 829 830 /** 831 * For unit-testing only. Will ultimately go away 832 * 833 * @param args 834 * @throws Exception 835 * @throws JDOMException 836 */ 837 public static void main(String[] args) throws Exception { 838 // TODO Auto-generated method stub 839 // Configuration conf = new XConfiguration(IOUtils.getResourceAsReader( 840 // "org/apache/oozie/coord/conf.xml", -1)); 841 842 Configuration conf = new XConfiguration(); 843 844 // base case 845 // conf.set(OozieClient.COORDINATOR_APP_PATH, 846 // "file:///Users/danielwo/oozie/workflows/coord/test1/"); 847 848 // no input datasets 849 // conf.set(OozieClient.COORDINATOR_APP_PATH, 850 // "file:///Users/danielwo/oozie/workflows/coord/coord_noinput/"); 851 // conf.set(OozieClient.COORDINATOR_APP_PATH, 852 // "file:///Users/danielwo/oozie/workflows/coord/coord_use_apppath/"); 853 854 // only 1 instance 855 // conf.set(OozieClient.COORDINATOR_APP_PATH, 856 // "file:///Users/danielwo/oozie/workflows/coord/coord_oneinstance/"); 857 858 // no local props in xml 859 // conf.set(OozieClient.COORDINATOR_APP_PATH, 860 // "file:///Users/danielwo/oozie/workflows/coord/coord_noprops/"); 861 862 conf.set(OozieClient.COORDINATOR_APP_PATH, 863 "file:///homes/test/workspace/sandbox_krishna/oozie-main/core/src/main/java/org/apache/oozie/coord/"); 864 conf.set(OozieClient.USER_NAME, "test"); 865 // conf.set(OozieClient.USER_NAME, "danielwo"); 866 conf.set(OozieClient.GROUP_NAME, "other"); 867 // System.out.println("appXml :"+ appXml + "\n conf :"+ conf); 868 new Services().init(); 869 try { 870 CoordSubmitCommand sc = new CoordSubmitCommand(conf, "TESTING"); 871 String jobId = sc.call(); 872 System.out.println("Job Id " + jobId); 873 Thread.sleep(80000); 874 } 875 finally { 876 Services.get().destroy(); 877 } 878 } 879 }