001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.util;
016    
017    import java.io.ByteArrayInputStream;
018    import java.io.IOException;
019    import java.io.InputStream;
020    import java.io.StringReader;
021    import java.io.StringWriter;
022    import java.util.Enumeration;
023    import java.util.Iterator;
024    import java.util.List;
025    import java.util.Map;
026    import java.util.Properties;
027    
028    import javax.xml.XMLConstants;
029    import javax.xml.parsers.DocumentBuilderFactory;
030    import javax.xml.transform.Result;
031    import javax.xml.transform.Source;
032    import javax.xml.transform.Transformer;
033    import javax.xml.transform.TransformerFactory;
034    import javax.xml.transform.dom.DOMSource;
035    import javax.xml.transform.stream.StreamResult;
036    import javax.xml.transform.stream.StreamSource;
037    import javax.xml.validation.Schema;
038    import javax.xml.validation.SchemaFactory;
039    import javax.xml.validation.Validator;
040    
041    import org.apache.hadoop.conf.Configuration;
042    import org.apache.oozie.service.SchemaService;
043    import org.apache.oozie.service.Services;
044    import org.apache.oozie.service.SchemaService.SchemaName;
045    import org.jdom.Comment;
046    import org.jdom.Document;
047    import org.jdom.Element;
048    import org.jdom.JDOMException;
049    import org.jdom.input.SAXBuilder;
050    import org.jdom.output.Format;
051    import org.jdom.output.XMLOutputter;
052    import org.xml.sax.EntityResolver;
053    import org.xml.sax.InputSource;
054    import org.xml.sax.SAXException;
055    
056    /**
057     * XML utility methods.
058     */
059    public class XmlUtils {
060        public static final String SLA_NAME_SPACE_URI = "uri:oozie:sla:0.1";
061    
062        private static class NoExternalEntityEntityResolver implements EntityResolver {
063    
064            public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
065                return new InputSource(new ByteArrayInputStream(new byte[0]));
066            }
067    
068        }
069    
070        private static SAXBuilder createSAXBuilder() {
071            SAXBuilder saxBuilder = new SAXBuilder();
072    
073            //THIS IS NOT WORKING
074            //saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false);
075    
076            //INSTEAD WE ARE JUST SETTING AN EntityResolver that does not resolve entities
077            saxBuilder.setEntityResolver(new NoExternalEntityEntityResolver());
078            return saxBuilder;
079        }
080    
081        /**
082         * Remove comments from any Xml String.
083         *
084         * @param xmlStr XML string to remove comments.
085         * @return String after removing comments.
086         * @throws JDOMException thrown if an error happend while XML parsing.
087         */
088        public static String removeComments(String xmlStr) throws JDOMException {
089            if (xmlStr == null) {
090                return null;
091            }
092            try {
093                SAXBuilder saxBuilder = createSAXBuilder();
094                Document document = saxBuilder.build(new StringReader(xmlStr));
095                removeComments(document);
096                return prettyPrint(document.getRootElement()).toString();
097            }
098            catch (IOException ex) {
099                throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
100            }
101        }
102    
103        private static void removeComments(List l) {
104            for (Iterator i = l.iterator(); i.hasNext();) {
105                Object node = i.next();
106                if (node instanceof Comment) {
107                    i.remove();
108                }
109                else {
110                    if (node instanceof Element) {
111                        removeComments(((Element) node).getContent());
112                    }
113                }
114            }
115        }
116    
117        private static void removeComments(Document doc) {
118            removeComments(doc.getContent());
119        }
120    
121        /**
122         * Parse a string assuming it is a valid XML document and return an JDOM Element for it.
123         *
124         * @param xmlStr XML string to parse.
125         * @return JDOM element for the parsed XML string.
126         * @throws JDOMException thrown if an error happend while XML parsing.
127         */
128        public static Element parseXml(String xmlStr) throws JDOMException {
129            ParamChecker.notNull(xmlStr, "xmlStr");
130            try {
131                SAXBuilder saxBuilder = createSAXBuilder();
132                Document document = saxBuilder.build(new StringReader(xmlStr));
133                return document.getRootElement();
134            }
135            catch (IOException ex) {
136                throw new RuntimeException("It should not happen, " + ex.getMessage(), ex);
137            }
138        }
139    
140        /**
141         * Parse a inputstream assuming it is a valid XML document and return an JDOM Element for it.
142         *
143         * @param is inputstream to parse.
144         * @return JDOM element for the parsed XML string.
145         * @throws JDOMException thrown if an error happend while XML parsing.
146         * @throws IOException thrown if an IO error occurred.
147         */
148        public static Element parseXml(InputStream is) throws JDOMException, IOException {
149            ParamChecker.notNull(is, "is");
150            SAXBuilder saxBuilder = createSAXBuilder();
151            Document document = saxBuilder.build(is);
152            return document.getRootElement();
153        }
154    
155        /**
156         * //TODO move this to action registry method Return the value of an attribute from the root element of an XML
157         * document.
158         *
159         * @param filePath path of the XML document.
160         * @param attributeName attribute to retrieve value for.
161         * @return value of the specified attribute.
162         */
163        public static String getRootAttribute(String filePath, String attributeName) {
164            ParamChecker.notNull(filePath, "filePath");
165            ParamChecker.notNull(attributeName, "attributeName");
166            SAXBuilder saxBuilder = createSAXBuilder();
167            try {
168                Document doc = saxBuilder.build(Thread.currentThread().getContextClassLoader().getResourceAsStream(filePath));
169                return doc.getRootElement().getAttributeValue(attributeName);
170            }
171            catch (JDOMException e) {
172                throw new RuntimeException();
173            }
174            catch (IOException e) {
175                throw new RuntimeException();
176            }
177        }
178    
179        /**
180         * Pretty print string representation of an XML document that generates the pretty print on lazy mode when the
181         * {@link #toString} method is invoked.
182         */
183        public static class PrettyPrint {
184            private String str;
185            private Element element;
186    
187            private PrettyPrint(String str) {
188                this.str = str;
189            }
190    
191            private PrettyPrint(Element element) {
192                this.element = ParamChecker.notNull(element, "element");
193            }
194    
195            /**
196             * Return the pretty print representation of an XML document.
197             *
198             * @return the pretty print representation of an XML document.
199             */
200            @Override
201            public String toString() {
202                if (str != null) {
203                    return str;
204                }
205                else {
206                    XMLOutputter outputter = new XMLOutputter();
207                    StringWriter stringWriter = new StringWriter();
208                    outputter.setFormat(Format.getPrettyFormat());
209                    try {
210                        outputter.output(element, stringWriter);
211                    }
212                    catch (Exception ex) {
213                        throw new RuntimeException(ex);
214                    }
215                    return stringWriter.toString();
216                }
217            }
218        }
219    
220        /**
221         * Return a pretty print string for a JDOM Element.
222         *
223         * @param element JDOM element.
224         * @return pretty print of the given JDOM Element.
225         */
226        public static PrettyPrint prettyPrint(Element element) {
227            return new PrettyPrint(element);
228    
229        }
230    
231        /**
232         * Return a pretty print string for a XML string. If the given string is not valid XML it returns the original
233         * string.
234         *
235         * @param xmlStr XML string.
236         * @return prettyprint of the given XML string or the original string if the given string is not valid XML.
237         */
238        public static PrettyPrint prettyPrint(String xmlStr) {
239            try {
240                return new PrettyPrint(parseXml(xmlStr));
241            }
242            catch (Exception e) {
243                return new PrettyPrint(xmlStr);
244            }
245        }
246    
247        /**
248         * Return a pretty print string for a Configuration object.
249         *
250         * @param conf Configuration object.
251         * @return prettyprint of the given Configuration object.
252         */
253        public static PrettyPrint prettyPrint(Configuration conf) {
254            Element root = new Element("configuration");
255            for (Map.Entry<String, String> entry : conf) {
256                Element property = new Element("property");
257                Element name = new Element("name");
258                name.setText(entry.getKey());
259                Element value = new Element("value");
260                value.setText(entry.getValue());
261                property.addContent(name);
262                property.addContent(value);
263                root.addContent(property);
264            }
265            return new PrettyPrint(root);
266        }
267    
268        /**
269         * Schema validation for a given xml. <p/>
270         *
271         * @param schema for validation
272         * @param xml to be validated
273         */
274        public static void validateXml(Schema schema, String xml) throws SAXException, IOException {
275    
276            Validator validator = schema.newValidator();
277            validator.validate(new StreamSource(new ByteArrayInputStream(xml.getBytes())));
278        }
279    
280        /**
281         * Create schema object for the given xsd
282         *
283         * @param is inputstream to schema.
284         * @return the schema object.
285         */
286        public static Schema createSchema(InputStream is) {
287            SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
288            StreamSource src = new StreamSource(is);
289            try {
290                return factory.newSchema(src);
291            }
292            catch (SAXException e) {
293                throw new RuntimeException(e.getMessage(), e);
294            }
295        }
296    
297        public static void validateData(String xmlData, SchemaName xsdFile) throws SAXException, IOException {
298            if (xmlData == null || xmlData.length() == 0) {
299                return;
300            }
301            javax.xml.validation.Schema schema = Services.get().get(SchemaService.class).getSchema(xsdFile);
302            validateXml(schema, xmlData);
303        }
304    
305        /**
306         * Convert Properties to string
307         *
308         * @param props
309         * @return xml string
310         * @throws IOException
311         */
312        public static String writePropToString(Properties props) throws IOException {
313            try {
314                org.w3c.dom.Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
315                org.w3c.dom.Element conf = doc.createElement("configuration");
316                doc.appendChild(conf);
317                conf.appendChild(doc.createTextNode("\n"));
318                for (Enumeration e = props.keys(); e.hasMoreElements();) {
319                    String name = (String) e.nextElement();
320                    Object object = props.get(name);
321                    String value;
322                    if (object instanceof String) {
323                        value = (String) object;
324                    }
325                    else {
326                        continue;
327                    }
328                    org.w3c.dom.Element propNode = doc.createElement("property");
329                    conf.appendChild(propNode);
330    
331                    org.w3c.dom.Element nameNode = doc.createElement("name");
332                    nameNode.appendChild(doc.createTextNode(name.trim()));
333                    propNode.appendChild(nameNode);
334    
335                    org.w3c.dom.Element valueNode = doc.createElement("value");
336                    valueNode.appendChild(doc.createTextNode(value.trim()));
337                    propNode.appendChild(valueNode);
338    
339                    conf.appendChild(doc.createTextNode("\n"));
340                }
341    
342                Source source = new DOMSource(doc);
343                StringWriter stringWriter = new StringWriter();
344                Result result = new StreamResult(stringWriter);
345                TransformerFactory factory = TransformerFactory.newInstance();
346                Transformer transformer = factory.newTransformer();
347                transformer.transform(source, result);
348    
349                return stringWriter.getBuffer().toString();
350            }
351            catch (Exception e) {
352                throw new IOException(e);
353            }
354        }
355    
356    }