001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.command.coord; 016 017 import java.io.IOException; 018 import java.io.InputStreamReader; 019 import java.io.Reader; 020 import java.io.StringReader; 021 import java.io.StringWriter; 022 import java.net.URI; 023 import java.net.URISyntaxException; 024 import java.util.ArrayList; 025 import java.util.Date; 026 import java.util.HashMap; 027 import java.util.HashSet; 028 import java.util.List; 029 import java.util.Set; 030 import java.util.TreeSet; 031 032 import javax.xml.transform.stream.StreamSource; 033 import javax.xml.validation.Validator; 034 035 import org.apache.hadoop.conf.Configuration; 036 import org.apache.hadoop.fs.FileSystem; 037 import org.apache.hadoop.fs.Path; 038 import org.apache.oozie.CoordinatorJobBean; 039 import org.apache.oozie.ErrorCode; 040 import org.apache.oozie.client.CoordinatorJob; 041 import org.apache.oozie.client.OozieClient; 042 import org.apache.oozie.client.CoordinatorJob.Execution; 043 import org.apache.oozie.command.CommandException; 044 import org.apache.oozie.coord.CoordELEvaluator; 045 import org.apache.oozie.coord.CoordELFunctions; 046 import org.apache.oozie.coord.CoordUtils; 047 import org.apache.oozie.coord.CoordinatorJobException; 048 import org.apache.oozie.coord.TimeUnit; 049 import org.apache.oozie.service.DagXLogInfoService; 050 import org.apache.oozie.service.HadoopAccessorException; 051 import org.apache.oozie.service.SchemaService; 052 import org.apache.oozie.service.Service; 053 import org.apache.oozie.service.Services; 054 import org.apache.oozie.service.UUIDService; 055 import org.apache.oozie.service.HadoopAccessorService; 056 import org.apache.oozie.service.WorkflowAppService; 057 import org.apache.oozie.service.SchemaService.SchemaName; 058 import org.apache.oozie.service.UUIDService.ApplicationType; 059 import org.apache.oozie.store.CoordinatorStore; 060 import org.apache.oozie.store.StoreException; 061 import org.apache.oozie.util.DateUtils; 062 import org.apache.oozie.util.ELEvaluator; 063 import org.apache.oozie.util.IOUtils; 064 import org.apache.oozie.util.ParamChecker; 065 import org.apache.oozie.util.PropertiesUtils; 066 import org.apache.oozie.util.XConfiguration; 067 import org.apache.oozie.util.XLog; 068 import org.apache.oozie.util.XmlUtils; 069 import org.apache.oozie.workflow.WorkflowException; 070 import org.jdom.Attribute; 071 import org.jdom.Element; 072 import org.jdom.JDOMException; 073 import org.jdom.Namespace; 074 import org.xml.sax.SAXException; 075 076 /** 077 * This class provides the functionalities to resolve a coordinator job XML and write the job information into a DB 078 * table. <p/> Specifically it performs the following functions: 1. Resolve all the variables or properties using job 079 * configurations. 2. Insert all datasets definition as part of the <data-in> and <data-out> tags. 3. Validate the XML 080 * at runtime. 081 */ 082 public class CoordSubmitCommand extends CoordinatorCommand<String> { 083 084 private Configuration conf; 085 private String authToken; 086 private boolean dryrun; 087 088 public static final String CONFIG_DEFAULT = "coord-config-default.xml"; 089 public static final String COORDINATOR_XML_FILE = "coordinator.xml"; 090 091 private static final Set<String> DISALLOWED_USER_PROPERTIES = new HashSet<String>(); 092 private static final Set<String> DISALLOWED_DEFAULT_PROPERTIES = new HashSet<String>(); 093 /** 094 * Default timeout for normal jobs, in minutes, after which coordinator input check will timeout 095 */ 096 public static final String CONF_DEFAULT_TIMEOUT_NORMAL = Service.CONF_PREFIX + "coord.normal.default.timeout"; 097 098 private XLog log = XLog.getLog(getClass()); 099 private ELEvaluator evalFreq = null; 100 private ELEvaluator evalNofuncs = null; 101 private ELEvaluator evalData = null; 102 private ELEvaluator evalInst = null; 103 private ELEvaluator evalSla = null; 104 105 static { 106 String[] badUserProps = {PropertiesUtils.YEAR, PropertiesUtils.MONTH, PropertiesUtils.DAY, 107 PropertiesUtils.HOUR, PropertiesUtils.MINUTE, PropertiesUtils.DAYS, PropertiesUtils.HOURS, 108 PropertiesUtils.MINUTES, PropertiesUtils.KB, PropertiesUtils.MB, PropertiesUtils.GB, 109 PropertiesUtils.TB, PropertiesUtils.PB, PropertiesUtils.RECORDS, PropertiesUtils.MAP_IN, 110 PropertiesUtils.MAP_OUT, PropertiesUtils.REDUCE_IN, PropertiesUtils.REDUCE_OUT, PropertiesUtils.GROUPS}; 111 PropertiesUtils.createPropertySet(badUserProps, DISALLOWED_USER_PROPERTIES); 112 113 String[] badDefaultProps = {PropertiesUtils.HADOOP_USER, PropertiesUtils.HADOOP_UGI, 114 WorkflowAppService.HADOOP_JT_KERBEROS_NAME, WorkflowAppService.HADOOP_NN_KERBEROS_NAME}; 115 PropertiesUtils.createPropertySet(badUserProps, DISALLOWED_DEFAULT_PROPERTIES); 116 PropertiesUtils.createPropertySet(badDefaultProps, DISALLOWED_DEFAULT_PROPERTIES); 117 } 118 119 /** 120 * Constructor to create the Coordinator Submit Command. 121 * 122 * @param conf : Configuration for Coordinator job 123 * @param authToken : To be used for authentication 124 */ 125 public CoordSubmitCommand(Configuration conf, String authToken) { 126 super("coord_submit", "coord_submit", 1, XLog.STD); 127 this.conf = ParamChecker.notNull(conf, "conf"); 128 this.authToken = ParamChecker.notEmpty(authToken, "authToken"); 129 } 130 131 public CoordSubmitCommand(boolean dryrun, Configuration conf, String authToken) { 132 super("coord_submit", "coord_submit", 1, XLog.STD, dryrun); 133 this.conf = ParamChecker.notNull(conf, "conf"); 134 this.authToken = ParamChecker.notEmpty(authToken, "authToken"); 135 this.dryrun = dryrun; 136 // TODO Auto-generated constructor stub 137 } 138 139 /* 140 * (non-Javadoc) 141 * 142 * @see org.apache.oozie.command.Command#call(org.apache.oozie.store.Store) 143 */ 144 @Override 145 protected String call(CoordinatorStore store) throws StoreException, CommandException { 146 String jobId = null; 147 log.info("STARTED Coordinator Submit"); 148 incrJobCounter(1); 149 CoordinatorJobBean coordJob = new CoordinatorJobBean(); 150 try { 151 XLog.Info.get().setParameter(DagXLogInfoService.TOKEN, conf.get(OozieClient.LOG_TOKEN)); 152 mergeDefaultConfig(); 153 154 String appXml = readAndValidateXml(); 155 coordJob.setOrigJobXml(appXml); 156 log.debug("jobXml after initial validation " + XmlUtils.prettyPrint(appXml).toString()); 157 appXml = XmlUtils.removeComments(appXml); 158 initEvaluators(); 159 Element eJob = basicResolveAndIncludeDS(appXml, conf, coordJob); 160 log.debug("jobXml after all validation " + XmlUtils.prettyPrint(eJob).toString()); 161 162 jobId = storeToDB(eJob, store, coordJob); 163 // log JOB info for coordinator jobs 164 setLogInfo(coordJob); 165 log = XLog.getLog(getClass()); 166 167 if (!dryrun) { 168 // submit a command to materialize jobs for the next 1 hour (3600 secs) 169 // so we don't wait 10 mins for the Service to run. 170 queueCallable(new CoordJobMatLookupCommand(jobId, 3600), 100); 171 } 172 else { 173 Date startTime = coordJob.getStartTime(); 174 long startTimeMilli = startTime.getTime(); 175 long endTimeMilli = startTimeMilli + (3600 * 1000); 176 Date jobEndTime = coordJob.getEndTime(); 177 Date endTime = new Date(endTimeMilli); 178 if (endTime.compareTo(jobEndTime) > 0) { 179 endTime = jobEndTime; 180 } 181 jobId = coordJob.getId(); 182 log.info("[" + jobId + "]: Update status to PREMATER"); 183 coordJob.setStatus(CoordinatorJob.Status.PREMATER); 184 CoordActionMaterializeCommand coordActionMatCom = new CoordActionMaterializeCommand(jobId, startTime, 185 endTime); 186 Configuration jobConf = null; 187 try { 188 jobConf = new XConfiguration(new StringReader(coordJob.getConf())); 189 } 190 catch (IOException e1) { 191 log.warn("Configuration parse error. read from DB :" + coordJob.getConf(), e1); 192 } 193 String action = coordActionMatCom.materializeJobs(true, coordJob, jobConf, null); 194 String output = coordJob.getJobXml() + System.getProperty("line.separator") 195 + "***actions for instance***" + action; 196 return output; 197 } 198 } 199 catch (CoordinatorJobException ex) { 200 log.warn("ERROR: ", ex); 201 throw new CommandException(ex); 202 } 203 catch (IllegalArgumentException iex) { 204 log.warn("ERROR: ", iex); 205 throw new CommandException(ErrorCode.E1003, iex); 206 } 207 catch (Exception ex) {// TODO 208 log.warn("ERROR: ", ex); 209 throw new CommandException(ErrorCode.E0803, ex); 210 } 211 log.info("ENDED Coordinator Submit jobId=" + jobId); 212 return jobId; 213 } 214 215 /** 216 * Read the application XML and validate against coordinator Schema 217 * 218 * @return validated coordinator XML 219 * @throws CoordinatorJobException 220 */ 221 private String readAndValidateXml() throws CoordinatorJobException { 222 String appPath = ParamChecker.notEmpty(conf.get(OozieClient.COORDINATOR_APP_PATH), 223 OozieClient.COORDINATOR_APP_PATH);// TODO: COORDINATOR_APP_PATH 224 String coordXml = readDefinition(appPath); 225 validateXml(coordXml); 226 return coordXml; 227 } 228 229 /** 230 * Validate against Coordinator XSD file 231 * 232 * @param xmlContent : Input coordinator xml 233 * @throws CoordinatorJobException 234 */ 235 private void validateXml(String xmlContent) throws CoordinatorJobException { 236 javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(SchemaName.COORDINATOR); 237 Validator validator = schema.newValidator(); 238 // log.warn("XML " + xmlContent); 239 try { 240 validator.validate(new StreamSource(new StringReader(xmlContent))); 241 } 242 catch (SAXException ex) { 243 log.warn("SAXException :", ex); 244 throw new CoordinatorJobException(ErrorCode.E0701, ex.getMessage(), ex); 245 } 246 catch (IOException ex) { 247 // ex.printStackTrace(); 248 log.warn("IOException :", ex); 249 throw new CoordinatorJobException(ErrorCode.E0702, ex.getMessage(), ex); 250 } 251 } 252 253 /** 254 * Merge default configuration with user-defined configuration. 255 * 256 * @throws CommandException 257 */ 258 protected void mergeDefaultConfig() throws CommandException { 259 Path coordAppDir = new Path(conf.get(OozieClient.COORDINATOR_APP_PATH)).getParent(); 260 Path configDefault = new Path(coordAppDir, CONFIG_DEFAULT); 261 // Configuration fsConfig = new Configuration(); 262 // log.warn("CONFIG :" + configDefault.toUri()); 263 Configuration fsConfig = CoordUtils.getHadoopConf(conf); 264 FileSystem fs; 265 // TODO: which conf? 266 try { 267 String user = ParamChecker.notEmpty(conf.get(OozieClient.USER_NAME), OozieClient.USER_NAME); 268 String group = ParamChecker.notEmpty(conf.get(OozieClient.GROUP_NAME), OozieClient.GROUP_NAME); 269 fs = Services.get().get(HadoopAccessorService.class).createFileSystem(user, group, configDefault.toUri(), 270 conf); 271 if (fs.exists(configDefault)) { 272 Configuration defaultConf = new XConfiguration(fs.open(configDefault)); 273 PropertiesUtils.checkDisallowedProperties(defaultConf, DISALLOWED_DEFAULT_PROPERTIES); 274 XConfiguration.injectDefaults(defaultConf, conf); 275 } 276 else { 277 log.info("configDefault Doesn't exist " + configDefault); 278 } 279 PropertiesUtils.checkDisallowedProperties(conf, DISALLOWED_USER_PROPERTIES); 280 } 281 catch (IOException e) { 282 throw new CommandException(ErrorCode.E0702, e.getMessage() + " : Problem reading default config " 283 + configDefault, e); 284 } 285 catch (HadoopAccessorException e) { 286 throw new CommandException(e); 287 } 288 log.debug("Merged CONF :" + XmlUtils.prettyPrint(conf).toString()); 289 } 290 291 /** 292 * The method resolve all the variables that are defined in configuration. It also include the data set definition 293 * from dataset file into XML. 294 * 295 * @param appXml : Original job XML 296 * @param conf : Configuration of the job 297 * @param coordJob : Coordinator job bean to be populated. 298 * @return : Resolved and modified job XML element. 299 * @throws Exception 300 */ 301 public Element basicResolveAndIncludeDS(String appXml, Configuration conf, CoordinatorJobBean coordJob) 302 throws CoordinatorJobException, Exception { 303 Element basicResolvedApp = resolveInitial(conf, appXml, coordJob); 304 includeDataSets(basicResolvedApp, conf); 305 return basicResolvedApp; 306 } 307 308 /** 309 * Insert data set into data-in and data-out tags. 310 * 311 * @param eAppXml : coordinator application XML 312 * @param eDatasets : DataSet XML 313 * @return updated application 314 */ 315 private void insertDataSet(Element eAppXml, Element eDatasets) { 316 // Adding DS definition in the coordinator XML 317 Element inputList = eAppXml.getChild("input-events", eAppXml.getNamespace()); 318 if (inputList != null) { 319 for (Element dataIn : (List<Element>) inputList.getChildren("data-in", eAppXml.getNamespace())) { 320 Element eDataset = findDataSet(eDatasets, dataIn.getAttributeValue("dataset")); 321 dataIn.getContent().add(0, eDataset); 322 } 323 } 324 Element outputList = eAppXml.getChild("output-events", eAppXml.getNamespace()); 325 if (outputList != null) { 326 for (Element dataOut : (List<Element>) outputList.getChildren("data-out", eAppXml.getNamespace())) { 327 Element eDataset = findDataSet(eDatasets, dataOut.getAttributeValue("dataset")); 328 dataOut.getContent().add(0, eDataset); 329 } 330 } 331 } 332 333 /** 334 * Find a specific dataset from a list of Datasets. 335 * 336 * @param eDatasets : List of data sets 337 * @param name : queried data set name 338 * @return one Dataset element. otherwise throw Exception 339 */ 340 private static Element findDataSet(Element eDatasets, String name) { 341 for (Element eDataset : (List<Element>) eDatasets.getChildren("dataset", eDatasets.getNamespace())) { 342 if (eDataset.getAttributeValue("name").equals(name)) { 343 eDataset = (Element) eDataset.clone(); 344 eDataset.detach(); 345 return eDataset; 346 } 347 } 348 throw new RuntimeException("undefined dataset: " + name); 349 } 350 351 /** 352 * Initialize all the required EL Evaluators. 353 */ 354 protected void initEvaluators() { 355 evalFreq = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-freq"); 356 evalNofuncs = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-nofuncs"); 357 evalInst = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-job-submit-instances"); 358 evalSla = CoordELEvaluator.createELEvaluatorForGroup(conf, "coord-sla-submit"); 359 } 360 361 /** 362 * Resolve basic entities using job Configuration. 363 * 364 * @param conf :Job configuration 365 * @param appXml : Original job XML 366 * @param coordJob : Coordinator job bean to be populated. 367 * @return Resolved job XML element. 368 * @throws Exception 369 */ 370 protected Element resolveInitial(Configuration conf, String appXml, CoordinatorJobBean coordJob) 371 throws CoordinatorJobException, Exception { 372 Element eAppXml = XmlUtils.parseXml(appXml); 373 // job's main attributes 374 // frequency 375 String val = resolveAttribute("frequency", eAppXml, evalFreq); 376 int ival = ParamChecker.checkInteger(val, "frequency"); 377 ParamChecker.checkGTZero(ival, "frequency"); 378 coordJob.setFrequency(ival); 379 TimeUnit tmp = (evalFreq.getVariable("timeunit") == null) ? TimeUnit.MINUTE : ((TimeUnit) evalFreq 380 .getVariable("timeunit")); 381 addAnAttribute("freq_timeunit", eAppXml, tmp.toString()); // TODO: Store 382 // TimeUnit 383 coordJob.setTimeUnit(CoordinatorJob.Timeunit.valueOf(tmp.toString())); 384 // End Of Duration 385 tmp = evalFreq.getVariable("endOfDuration") == null ? TimeUnit.NONE : ((TimeUnit) evalFreq 386 .getVariable("endOfDuration")); 387 addAnAttribute("end_of_duration", eAppXml, tmp.toString()); 388 // coordJob.setEndOfDuration(tmp) // TODO: Add new attribute in Job bean 389 390 // start time 391 val = resolveAttribute("start", eAppXml, evalNofuncs); 392 ParamChecker.checkUTC(val, "start"); 393 coordJob.setStartTime(DateUtils.parseDateUTC(val)); 394 // end time 395 val = resolveAttribute("end", eAppXml, evalNofuncs); 396 ParamChecker.checkUTC(val, "end"); 397 coordJob.setEndTime(DateUtils.parseDateUTC(val)); 398 // Time zone 399 val = resolveAttribute("timezone", eAppXml, evalNofuncs); 400 ParamChecker.checkTimeZone(val, "timezone"); 401 coordJob.setTimeZone(val); 402 403 // controls 404 val = resolveTagContents("timeout", eAppXml.getChild("controls", eAppXml.getNamespace()), evalNofuncs); 405 if (val == "") { 406 val = Services.get().getConf().get(CONF_DEFAULT_TIMEOUT_NORMAL); 407 } 408 409 ival = ParamChecker.checkInteger(val, "timeout"); 410 // ParamChecker.checkGEZero(ival, "timeout"); 411 coordJob.setTimeout(ival); 412 val = resolveTagContents("concurrency", eAppXml.getChild("controls", eAppXml.getNamespace()), evalNofuncs); 413 if (val == "") { 414 val = "-1"; 415 } 416 ival = ParamChecker.checkInteger(val, "concurrency"); 417 // ParamChecker.checkGEZero(ival, "concurrency"); 418 coordJob.setConcurrency(ival); 419 val = resolveTagContents("execution", eAppXml.getChild("controls", eAppXml.getNamespace()), evalNofuncs); 420 if (val == "") { 421 val = Execution.FIFO.toString(); 422 } 423 coordJob.setExecution(Execution.valueOf(val)); 424 String[] acceptedVals = {Execution.LIFO.toString(), Execution.FIFO.toString(), Execution.LAST_ONLY.toString()}; 425 ParamChecker.isMember(val, acceptedVals, "execution"); 426 427 // datasets 428 resolveTagContents("include", eAppXml.getChild("datasets", eAppXml.getNamespace()), evalNofuncs); 429 // for each data set 430 resolveDataSets(eAppXml); 431 HashMap<String, String> dataNameList = new HashMap<String, String>(); 432 resolveIOEvents(eAppXml, dataNameList); 433 434 resolveTagContents("app-path", eAppXml.getChild("action", eAppXml.getNamespace()).getChild("workflow", 435 eAppXml.getNamespace()), evalNofuncs); 436 // TODO: If action or workflow tag is missing, NullPointerException will 437 // occur 438 Element configElem = eAppXml.getChild("action", eAppXml.getNamespace()).getChild("workflow", 439 eAppXml.getNamespace()).getChild("configuration", eAppXml.getNamespace()); 440 evalData = CoordELEvaluator.createELEvaluatorForDataEcho(conf, "coord-job-submit-data", dataNameList); 441 if (configElem != null) { 442 for (Element propElem : (List<Element>) configElem.getChildren("property", configElem.getNamespace())) { 443 resolveTagContents("name", propElem, evalData); 444 // log.warn("Value :"); 445 // Want to check the data-integrity but don't want to modify the 446 // XML 447 // for properties only 448 Element tmpProp = (Element) propElem.clone(); 449 resolveTagContents("value", tmpProp, evalData); 450 // val = resolveTagContents("value", propElem, evalData); 451 // log.warn("Value OK :" + val); 452 } 453 } 454 resolveSLA(eAppXml, coordJob); 455 return eAppXml; 456 } 457 458 private void resolveSLA(Element eAppXml, CoordinatorJobBean coordJob) throws CommandException { 459 // String prefix = XmlUtils.getNamespacePrefix(eAppXml, 460 // SchemaService.SLA_NAME_SPACE_URI); 461 Element eSla = eAppXml.getChild("action", eAppXml.getNamespace()).getChild("info", 462 Namespace.getNamespace(SchemaService.SLA_NAME_SPACE_URI)); 463 464 if (eSla != null) { 465 String slaXml = XmlUtils.prettyPrint(eSla).toString(); 466 try { 467 // EL evaluation 468 slaXml = evalSla.evaluate(slaXml, String.class); 469 // Validate against semantic SXD 470 XmlUtils.validateData(slaXml, SchemaName.SLA_ORIGINAL); 471 } 472 catch (Exception e) { 473 throw new CommandException(ErrorCode.E1004, "Validation ERROR :" + e.getMessage(), e); 474 } 475 } 476 } 477 478 /** 479 * Resolve input-events/data-in and output-events/data-out tags. 480 * 481 * @param eJob : Job element 482 * @throws CoordinatorJobException 483 */ 484 private void resolveIOEvents(Element eJobOrg, HashMap<String, String> dataNameList) throws CoordinatorJobException { 485 // Resolving input-events/data-in 486 // Clone the job and don't update anything in the original 487 Element eJob = (Element) eJobOrg.clone(); 488 Element inputList = eJob.getChild("input-events", eJob.getNamespace()); 489 if (inputList != null) { 490 TreeSet<String> eventNameSet = new TreeSet<String>(); 491 for (Element dataIn : (List<Element>) inputList.getChildren("data-in", eJob.getNamespace())) { 492 String dataInName = dataIn.getAttributeValue("name"); 493 dataNameList.put(dataInName, "data-in"); 494 // check whether there is any duplicate data-in name 495 if (eventNameSet.contains(dataInName)) { 496 throw new RuntimeException("Duplicate dataIn name " + dataInName); 497 } 498 else { 499 eventNameSet.add(dataInName); 500 } 501 resolveTagContents("instance", dataIn, evalInst); 502 resolveTagContents("start-instance", dataIn, evalInst); 503 resolveTagContents("end-instance", dataIn, evalInst); 504 } 505 } 506 // Resolving output-events/data-out 507 Element outputList = eJob.getChild("output-events", eJob.getNamespace()); 508 if (outputList != null) { 509 TreeSet<String> eventNameSet = new TreeSet<String>(); 510 for (Element dataOut : (List<Element>) outputList.getChildren("data-out", eJob.getNamespace())) { 511 String dataOutName = dataOut.getAttributeValue("name"); 512 dataNameList.put(dataOutName, "data-out"); 513 // check whether there is any duplicate data-out name 514 if (eventNameSet.contains(dataOutName)) { 515 throw new RuntimeException("Duplicate dataIn name " + dataOutName); 516 } 517 else { 518 eventNameSet.add(dataOutName); 519 } 520 resolveTagContents("instance", dataOut, evalInst); 521 } 522 } 523 524 } 525 526 /** 527 * Add an attribute into XML element. 528 * 529 * @param attrName :attribute name 530 * @param elem : Element to add attribute 531 * @param value :Value of attribute 532 */ 533 private void addAnAttribute(String attrName, Element elem, String value) { 534 elem.setAttribute(attrName, value); 535 } 536 537 /** 538 * Resolve Data set using job configuration. 539 * 540 * @param eAppXml : Job Element XML 541 * @throws Exception 542 */ 543 private void resolveDataSets(Element eAppXml) throws Exception { 544 Element datasetList = eAppXml.getChild("datasets", eAppXml.getNamespace()); 545 if (datasetList != null) { 546 547 List<Element> dsElems = datasetList.getChildren("dataset", eAppXml.getNamespace()); 548 resolveDataSets(dsElems); 549 resolveTagContents("app-path", eAppXml.getChild("action", eAppXml.getNamespace()).getChild("workflow", 550 eAppXml.getNamespace()), evalNofuncs); 551 } 552 } 553 554 /** 555 * Resolve Data set using job configuration. 556 * 557 * @param dsElems : Data set XML element. 558 * @throws CoordinatorJobException 559 * @throws Exception 560 */ 561 private void resolveDataSets(List<Element> dsElems) throws CoordinatorJobException /* 562 * throws 563 * Exception 564 */ { 565 for (Element dsElem : dsElems) { 566 // Setting up default TimeUnit and EndOFDuraion 567 evalFreq.setVariable("timeunit", TimeUnit.MINUTE); 568 evalFreq.setVariable("endOfDuration", TimeUnit.NONE); 569 570 String val = resolveAttribute("frequency", dsElem, evalFreq); 571 int ival = ParamChecker.checkInteger(val, "frequency"); 572 ParamChecker.checkGTZero(ival, "frequency"); 573 addAnAttribute("freq_timeunit", dsElem, evalFreq.getVariable("timeunit") == null ? TimeUnit.MINUTE 574 .toString() : ((TimeUnit) evalFreq.getVariable("timeunit")).toString()); 575 addAnAttribute("end_of_duration", dsElem, evalFreq.getVariable("endOfDuration") == null ? TimeUnit.NONE 576 .toString() : ((TimeUnit) evalFreq.getVariable("endOfDuration")).toString()); 577 val = resolveAttribute("initial-instance", dsElem, evalNofuncs); 578 ParamChecker.checkUTC(val, "initial-instance"); 579 val = resolveAttribute("timezone", dsElem, evalNofuncs); 580 ParamChecker.checkTimeZone(val, "timezone"); 581 resolveTagContents("uri-template", dsElem, evalNofuncs); 582 resolveTagContents("done-flag", dsElem, evalNofuncs); 583 } 584 } 585 586 /** 587 * Resolve the content of a tag. 588 * 589 * @param tagName : Tag name of job XML i.e. <timeout> 10 </timeout> 590 * @param elem : Element where the tag exists. 591 * @param eval : 592 * @return Resolved tag content. 593 * @throws CoordinatorJobException 594 */ 595 private String resolveTagContents(String tagName, Element elem, ELEvaluator eval) throws CoordinatorJobException { 596 String ret = ""; 597 if (elem != null) { 598 for (Element tagElem : (List<Element>) elem.getChildren(tagName, elem.getNamespace())) { 599 if (tagElem != null) { 600 String updated; 601 try { 602 updated = CoordELFunctions.evalAndWrap(eval, tagElem.getText().trim()); 603 604 } 605 catch (Exception e) { 606 // e.printStackTrace(); 607 throw new CoordinatorJobException(ErrorCode.E1004, e.getMessage(), e); 608 } 609 tagElem.removeContent(); 610 tagElem.addContent(updated); 611 ret += updated; 612 } 613 /* 614 * else { //TODO: unlike event } 615 */ 616 } 617 } 618 return ret; 619 } 620 621 /** 622 * Resolve an attribute value. 623 * 624 * @param attrName : Attribute name. 625 * @param elem : XML Element where attribute is defiend 626 * @param eval : ELEvaluator used to resolve 627 * @return Resolved attribute value 628 * @throws CoordinatorJobException 629 */ 630 private String resolveAttribute(String attrName, Element elem, ELEvaluator eval) throws CoordinatorJobException { 631 Attribute attr = elem.getAttribute(attrName); 632 String val = null; 633 if (attr != null) { 634 try { 635 val = CoordELFunctions.evalAndWrap(eval, attr.getValue().trim()); 636 637 } 638 catch (Exception e) { 639 // e.printStackTrace(); 640 throw new CoordinatorJobException(ErrorCode.E1004, e.getMessage(), e); 641 } 642 attr.setValue(val); 643 } 644 return val; 645 } 646 647 /** 648 * Include referred Datasets into XML. 649 * 650 * @param resolvedXml : Job XML element. 651 * @param conf : Job configuration 652 * @throws CoordinatorJobException 653 */ 654 protected void includeDataSets(Element resolvedXml, Configuration conf) throws CoordinatorJobException 655 /* throws Exception */ { 656 Element datasets = resolvedXml.getChild("datasets", resolvedXml.getNamespace()); 657 Element allDataSets = new Element("all_datasets", resolvedXml.getNamespace()); 658 List<String> dsList = new ArrayList<String>(); 659 if (datasets != null) { 660 for (Element includeElem : (List<Element>) datasets.getChildren("include", datasets.getNamespace())) { 661 String incDSFile = includeElem.getTextTrim(); 662 // log.warn(" incDSFile " + incDSFile); 663 includeOneDSFile(incDSFile, dsList, allDataSets, datasets.getNamespace()); 664 } 665 for (Element e : (List<Element>) datasets.getChildren("dataset", datasets.getNamespace())) { 666 String dsName = (String) e.getAttributeValue("name"); 667 if (dsList.contains(dsName)) {// Override with this DS 668 // Remove old DS 669 removeDataSet(allDataSets, dsName); 670 // throw new RuntimeException("Duplicate Dataset " + 671 // dsName); 672 } 673 else { 674 dsList.add(dsName); 675 } 676 allDataSets.addContent((Element) e.clone()); 677 } 678 } 679 insertDataSet(resolvedXml, allDataSets); 680 resolvedXml.removeChild("datasets", resolvedXml.getNamespace()); 681 } 682 683 /** 684 * Include One Dataset file. 685 * 686 * @param incDSFile : Include data set filename. 687 * @param dsList :List of dataset names to verify the duplicate. 688 * @param allDataSets : Element that includes all dataset definitions. 689 * @param dsNameSpace : Data set name space 690 * @throws CoordinatorJobException 691 * @throws Exception 692 */ 693 private void includeOneDSFile(String incDSFile, List<String> dsList, Element allDataSets, Namespace dsNameSpace) 694 throws CoordinatorJobException { 695 Element tmpDataSets = null; 696 try { 697 String dsXml = readDefinition(incDSFile); 698 log.debug("DSFILE :" + incDSFile + "\n" + dsXml); 699 tmpDataSets = XmlUtils.parseXml(dsXml); 700 } 701 /* 702 * catch (IOException iex) {XLog.getLog(getClass()).warn( 703 * "Error reading included dataset file [{0}]. Message [{1}]", 704 * incDSFile, iex.getMessage()); throw new 705 * CommandException(ErrorCode.E0803, iex.getMessage()); } 706 */ 707 catch (JDOMException e) { 708 log.warn("Error parsing included dataset [{0}]. Message [{1}]", incDSFile, e.getMessage()); 709 throw new CoordinatorJobException(ErrorCode.E0700, e.getMessage()); 710 } 711 resolveDataSets((List<Element>) tmpDataSets.getChildren("dataset")); 712 for (Element e : (List<Element>) tmpDataSets.getChildren("dataset")) { 713 String dsName = (String) e.getAttributeValue("name"); 714 if (dsList.contains(dsName)) { 715 throw new RuntimeException("Duplicate Dataset " + dsName); 716 } 717 dsList.add(dsName); 718 Element tmp = (Element) e.clone(); 719 // TODO: Don't like to over-write the external/include DS's 720 // namespace 721 tmp.setNamespace(dsNameSpace);// TODO: 722 tmp.getChild("uri-template").setNamespace(dsNameSpace); 723 if (e.getChild("done-flag") != null) { 724 tmp.getChild("done-flag").setNamespace(dsNameSpace); 725 } 726 allDataSets.addContent(tmp); 727 } 728 // nested include 729 for (Element includeElem : (List<Element>) tmpDataSets.getChildren("include", tmpDataSets.getNamespace())) { 730 String incFile = includeElem.getTextTrim(); 731 // log.warn("incDSFile "+ incDSFile); 732 includeOneDSFile(incFile, dsList, allDataSets, dsNameSpace); 733 } 734 } 735 736 /** 737 * Remove a dataset from a list of dataset. 738 * 739 * @param eDatasets : List of dataset 740 * @param name : Dataset name to be removed. 741 */ 742 private static void removeDataSet(Element eDatasets, String name) { 743 for (Element eDataset : (List<Element>) eDatasets.getChildren("dataset", eDatasets.getNamespace())) { 744 if (eDataset.getAttributeValue("name").equals(name)) { 745 eDataset.detach(); 746 } 747 } 748 throw new RuntimeException("undefined dataset: " + name); 749 } 750 751 /** 752 * Read workflow definition. 753 * 754 * @param appPath application path. 755 * @param user user name. 756 * @param group group name. 757 * @param autToken authentication token. 758 * @return workflow definition. 759 * @throws WorkflowException thrown if the definition could not be read. 760 */ 761 protected String readDefinition(String appPath) throws CoordinatorJobException { 762 String user = ParamChecker.notEmpty(conf.get(OozieClient.USER_NAME), OozieClient.USER_NAME); 763 String group = ParamChecker.notEmpty(conf.get(OozieClient.GROUP_NAME), OozieClient.GROUP_NAME); 764 Configuration confHadoop = CoordUtils.getHadoopConf(conf); 765 try { 766 URI uri = new URI(appPath); 767 log.debug("user =" + user + " group =" + group); 768 FileSystem fs = Services.get().get(HadoopAccessorService.class).createFileSystem(user, group, uri, conf); 769 Path p = new Path(uri.getPath()); 770 771 // Reader reader = new InputStreamReader(fs.open(new Path(uri 772 // .getPath(), fileName))); 773 Reader reader = new InputStreamReader(fs.open(p));// TODO 774 StringWriter writer = new StringWriter(); 775 IOUtils.copyCharStream(reader, writer); 776 return writer.toString(); 777 } 778 catch (IOException ex) { 779 log.warn("IOException :" + XmlUtils.prettyPrint(confHadoop), ex); 780 throw new CoordinatorJobException(ErrorCode.E1001, ex.getMessage(), ex); // TODO: 781 } 782 catch (URISyntaxException ex) { 783 log.warn("URISyException :" + ex.getMessage()); 784 throw new CoordinatorJobException(ErrorCode.E1002, appPath, ex.getMessage(), ex);// TODO: 785 } 786 catch (HadoopAccessorException ex) { 787 throw new CoordinatorJobException(ex); 788 } 789 catch (Exception ex) { 790 log.warn("Exception :", ex); 791 throw new CoordinatorJobException(ErrorCode.E1001, ex.getMessage(), ex);// TODO: 792 } 793 } 794 795 /** 796 * Write a Coordinator Job into database 797 * 798 * @param eJob : XML element of job 799 * @param store : Coordinator Store to write. 800 * @param coordJob : Coordinator job bean 801 * @return Job if. 802 * @throws StoreException 803 */ 804 private String storeToDB(Element eJob, CoordinatorStore store, CoordinatorJobBean coordJob) throws StoreException { 805 String jobId = Services.get().get(UUIDService.class).generateId(ApplicationType.COORDINATOR); 806 coordJob.setId(jobId); 807 coordJob.setAuthToken(this.authToken); 808 coordJob.setAppName(eJob.getAttributeValue("name")); 809 coordJob.setAppPath(conf.get(OozieClient.COORDINATOR_APP_PATH)); 810 coordJob.setStatus(CoordinatorJob.Status.PREP); 811 coordJob.setCreatedTime(new Date()); // TODO: Do we need that? 812 coordJob.setUser(conf.get(OozieClient.USER_NAME)); 813 coordJob.setGroup(conf.get(OozieClient.GROUP_NAME)); 814 coordJob.setConf(XmlUtils.prettyPrint(conf).toString()); 815 coordJob.setJobXml(XmlUtils.prettyPrint(eJob).toString()); 816 coordJob.setLastActionNumber(0); 817 coordJob.setLastModifiedTime(new Date()); 818 819 if (!dryrun) { 820 store.insertCoordinatorJob(coordJob); 821 } 822 return jobId; 823 } 824 825 /** 826 * For unit-testing only. Will ultimately go away 827 * 828 * @param args 829 * @throws Exception 830 * @throws JDOMException 831 */ 832 public static void main(String[] args) throws Exception { 833 // TODO Auto-generated method stub 834 // Configuration conf = new XConfiguration(IOUtils.getResourceAsReader( 835 // "org/apache/oozie/coord/conf.xml", -1)); 836 837 Configuration conf = new XConfiguration(); 838 839 // base case 840 // conf.set(OozieClient.COORDINATOR_APP_PATH, 841 // "file:///Users/danielwo/oozie/workflows/coord/test1/"); 842 843 // no input datasets 844 // conf.set(OozieClient.COORDINATOR_APP_PATH, 845 // "file:///Users/danielwo/oozie/workflows/coord/coord_noinput/"); 846 // conf.set(OozieClient.COORDINATOR_APP_PATH, 847 // "file:///Users/danielwo/oozie/workflows/coord/coord_use_apppath/"); 848 849 // only 1 instance 850 // conf.set(OozieClient.COORDINATOR_APP_PATH, 851 // "file:///Users/danielwo/oozie/workflows/coord/coord_oneinstance/"); 852 853 // no local props in xml 854 // conf.set(OozieClient.COORDINATOR_APP_PATH, 855 // "file:///Users/danielwo/oozie/workflows/coord/coord_noprops/"); 856 857 conf.set(OozieClient.COORDINATOR_APP_PATH, 858 "file:///homes/test/workspace/sandbox_krishna/oozie-main/core/src/main/java/org/apache/oozie/coord/"); 859 conf.set(OozieClient.USER_NAME, "test"); 860 // conf.set(OozieClient.USER_NAME, "danielwo"); 861 conf.set(OozieClient.GROUP_NAME, "other"); 862 // System.out.println("appXml :"+ appXml + "\n conf :"+ conf); 863 new Services().init(); 864 try { 865 CoordSubmitCommand sc = new CoordSubmitCommand(conf, "TESTING"); 866 String jobId = sc.call(); 867 System.out.println("Job Id " + jobId); 868 Thread.sleep(80000); 869 } 870 finally { 871 Services.get().destroy(); 872 } 873 } 874 }