001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.util; 016 017 import java.io.ByteArrayInputStream; 018 import java.io.IOException; 019 import java.io.InputStream; 020 import java.io.StringReader; 021 import java.io.StringWriter; 022 import java.util.Enumeration; 023 import java.util.Iterator; 024 import java.util.List; 025 import java.util.Map; 026 import java.util.Properties; 027 028 import javax.xml.XMLConstants; 029 import javax.xml.parsers.DocumentBuilderFactory; 030 import javax.xml.transform.Result; 031 import javax.xml.transform.Source; 032 import javax.xml.transform.Transformer; 033 import javax.xml.transform.TransformerFactory; 034 import javax.xml.transform.dom.DOMSource; 035 import javax.xml.transform.stream.StreamResult; 036 import javax.xml.transform.stream.StreamSource; 037 import javax.xml.validation.Schema; 038 import javax.xml.validation.SchemaFactory; 039 import javax.xml.validation.Validator; 040 041 import org.apache.hadoop.conf.Configuration; 042 import org.apache.oozie.service.SchemaService; 043 import org.apache.oozie.service.Services; 044 import org.apache.oozie.service.SchemaService.SchemaName; 045 import org.jdom.Comment; 046 import org.jdom.Document; 047 import org.jdom.Element; 048 import org.jdom.JDOMException; 049 import org.jdom.input.SAXBuilder; 050 import org.jdom.output.Format; 051 import org.jdom.output.XMLOutputter; 052 import org.xml.sax.EntityResolver; 053 import org.xml.sax.InputSource; 054 import org.xml.sax.SAXException; 055 056 /** 057 * XML utility methods. 058 */ 059 public class XmlUtils { 060 public static final String SLA_NAME_SPACE_URI = "uri:oozie:sla:0.1"; 061 062 private static class NoExternalEntityEntityResolver implements EntityResolver { 063 064 public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { 065 return new InputSource(new ByteArrayInputStream(new byte[0])); 066 } 067 068 } 069 070 private static SAXBuilder createSAXBuilder() { 071 SAXBuilder saxBuilder = new SAXBuilder(); 072 073 //THIS IS NOT WORKING 074 //saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false); 075 076 //INSTEAD WE ARE JUST SETTING AN EntityResolver that does not resolve entities 077 saxBuilder.setEntityResolver(new NoExternalEntityEntityResolver()); 078 return saxBuilder; 079 } 080 081 /** 082 * Remove comments from any Xml String. 083 * 084 * @param xmlStr XML string to remove comments. 085 * @return String after removing comments. 086 * @throws JDOMException thrown if an error happend while XML parsing. 087 */ 088 public static String removeComments(String xmlStr) throws JDOMException { 089 if (xmlStr == null) { 090 return null; 091 } 092 try { 093 SAXBuilder saxBuilder = createSAXBuilder(); 094 Document document = saxBuilder.build(new StringReader(xmlStr)); 095 removeComments(document); 096 return prettyPrint(document.getRootElement()).toString(); 097 } 098 catch (IOException ex) { 099 throw new RuntimeException("It should not happen, " + ex.getMessage(), ex); 100 } 101 } 102 103 private static void removeComments(List l) { 104 for (Iterator i = l.iterator(); i.hasNext();) { 105 Object node = i.next(); 106 if (node instanceof Comment) { 107 i.remove(); 108 } 109 else { 110 if (node instanceof Element) { 111 removeComments(((Element) node).getContent()); 112 } 113 } 114 } 115 } 116 117 private static void removeComments(Document doc) { 118 removeComments(doc.getContent()); 119 } 120 121 /** 122 * Parse a string assuming it is a valid XML document and return an JDOM Element for it. 123 * 124 * @param xmlStr XML string to parse. 125 * @return JDOM element for the parsed XML string. 126 * @throws JDOMException thrown if an error happend while XML parsing. 127 */ 128 public static Element parseXml(String xmlStr) throws JDOMException { 129 ParamChecker.notNull(xmlStr, "xmlStr"); 130 try { 131 SAXBuilder saxBuilder = createSAXBuilder(); 132 Document document = saxBuilder.build(new StringReader(xmlStr)); 133 return document.getRootElement(); 134 } 135 catch (IOException ex) { 136 throw new RuntimeException("It should not happen, " + ex.getMessage(), ex); 137 } 138 } 139 140 /** 141 * Parse a inputstream assuming it is a valid XML document and return an JDOM Element for it. 142 * 143 * @param is inputstream to parse. 144 * @return JDOM element for the parsed XML string. 145 * @throws JDOMException thrown if an error happend while XML parsing. 146 * @throws IOException thrown if an IO error occurred. 147 */ 148 public static Element parseXml(InputStream is) throws JDOMException, IOException { 149 ParamChecker.notNull(is, "is"); 150 SAXBuilder saxBuilder = createSAXBuilder(); 151 Document document = saxBuilder.build(is); 152 return document.getRootElement(); 153 } 154 155 /** 156 * //TODO move this to action registry method Return the value of an attribute from the root element of an XML 157 * document. 158 * 159 * @param filePath path of the XML document. 160 * @param attributeName attribute to retrieve value for. 161 * @return value of the specified attribute. 162 */ 163 public static String getRootAttribute(String filePath, String attributeName) { 164 ParamChecker.notNull(filePath, "filePath"); 165 ParamChecker.notNull(attributeName, "attributeName"); 166 SAXBuilder saxBuilder = createSAXBuilder(); 167 try { 168 Document doc = saxBuilder.build(Thread.currentThread().getContextClassLoader().getResourceAsStream(filePath)); 169 return doc.getRootElement().getAttributeValue(attributeName); 170 } 171 catch (JDOMException e) { 172 throw new RuntimeException(); 173 } 174 catch (IOException e) { 175 throw new RuntimeException(); 176 } 177 } 178 179 /** 180 * Pretty print string representation of an XML document that generates the pretty print on lazy mode when the 181 * {@link #toString} method is invoked. 182 */ 183 public static class PrettyPrint { 184 private String str; 185 private Element element; 186 187 private PrettyPrint(String str) { 188 this.str = str; 189 } 190 191 private PrettyPrint(Element element) { 192 this.element = ParamChecker.notNull(element, "element"); 193 } 194 195 /** 196 * Return the pretty print representation of an XML document. 197 * 198 * @return the pretty print representation of an XML document. 199 */ 200 @Override 201 public String toString() { 202 if (str != null) { 203 return str; 204 } 205 else { 206 XMLOutputter outputter = new XMLOutputter(); 207 StringWriter stringWriter = new StringWriter(); 208 outputter.setFormat(Format.getPrettyFormat()); 209 try { 210 outputter.output(element, stringWriter); 211 } 212 catch (Exception ex) { 213 throw new RuntimeException(ex); 214 } 215 return stringWriter.toString(); 216 } 217 } 218 } 219 220 /** 221 * Return a pretty print string for a JDOM Element. 222 * 223 * @param element JDOM element. 224 * @return pretty print of the given JDOM Element. 225 */ 226 public static PrettyPrint prettyPrint(Element element) { 227 return new PrettyPrint(element); 228 229 } 230 231 /** 232 * Return a pretty print string for a XML string. If the given string is not valid XML it returns the original 233 * string. 234 * 235 * @param xmlStr XML string. 236 * @return prettyprint of the given XML string or the original string if the given string is not valid XML. 237 */ 238 public static PrettyPrint prettyPrint(String xmlStr) { 239 try { 240 return new PrettyPrint(parseXml(xmlStr)); 241 } 242 catch (Exception e) { 243 return new PrettyPrint(xmlStr); 244 } 245 } 246 247 /** 248 * Return a pretty print string for a Configuration object. 249 * 250 * @param conf Configuration object. 251 * @return prettyprint of the given Configuration object. 252 */ 253 public static PrettyPrint prettyPrint(Configuration conf) { 254 Element root = new Element("configuration"); 255 for (Map.Entry<String, String> entry : conf) { 256 Element property = new Element("property"); 257 Element name = new Element("name"); 258 name.setText(entry.getKey()); 259 Element value = new Element("value"); 260 value.setText(entry.getValue()); 261 property.addContent(name); 262 property.addContent(value); 263 root.addContent(property); 264 } 265 return new PrettyPrint(root); 266 } 267 268 /** 269 * Schema validation for a given xml. <p/> 270 * 271 * @param schema for validation 272 * @param xml to be validated 273 */ 274 public static void validateXml(Schema schema, String xml) throws SAXException, IOException { 275 276 Validator validator = schema.newValidator(); 277 validator.validate(new StreamSource(new ByteArrayInputStream(xml.getBytes()))); 278 } 279 280 /** 281 * Create schema object for the given xsd 282 * 283 * @param is inputstream to schema. 284 * @return the schema object. 285 */ 286 public static Schema createSchema(InputStream is) { 287 SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); 288 StreamSource src = new StreamSource(is); 289 try { 290 return factory.newSchema(src); 291 } 292 catch (SAXException e) { 293 throw new RuntimeException(e.getMessage(), e); 294 } 295 } 296 297 public static void validateData(String xmlData, SchemaName xsdFile) throws SAXException, IOException { 298 if (xmlData == null || xmlData.length() == 0) { 299 return; 300 } 301 javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(xsdFile); 302 validateXml(schema, xmlData); 303 } 304 305 /** 306 * Convert Properties to string 307 * 308 * @param props 309 * @return xml string 310 * @throws IOException 311 */ 312 public static String writePropToString(Properties props) throws IOException { 313 try { 314 org.w3c.dom.Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); 315 org.w3c.dom.Element conf = doc.createElement("configuration"); 316 doc.appendChild(conf); 317 conf.appendChild(doc.createTextNode("\n")); 318 for (Enumeration e = props.keys(); e.hasMoreElements();) { 319 String name = (String) e.nextElement(); 320 Object object = props.get(name); 321 String value; 322 if (object instanceof String) { 323 value = (String) object; 324 } 325 else { 326 continue; 327 } 328 org.w3c.dom.Element propNode = doc.createElement("property"); 329 conf.appendChild(propNode); 330 331 org.w3c.dom.Element nameNode = doc.createElement("name"); 332 nameNode.appendChild(doc.createTextNode(name.trim())); 333 propNode.appendChild(nameNode); 334 335 org.w3c.dom.Element valueNode = doc.createElement("value"); 336 valueNode.appendChild(doc.createTextNode(value.trim())); 337 propNode.appendChild(valueNode); 338 339 conf.appendChild(doc.createTextNode("\n")); 340 } 341 342 Source source = new DOMSource(doc); 343 StringWriter stringWriter = new StringWriter(); 344 Result result = new StreamResult(stringWriter); 345 TransformerFactory factory = TransformerFactory.newInstance(); 346 Transformer transformer = factory.newTransformer(); 347 transformer.transform(source, result); 348 349 return stringWriter.getBuffer().toString(); 350 } 351 catch (Exception e) { 352 throw new IOException(e); 353 } 354 } 355 356 }