View Javadoc
1   /*
2    * Copyright (c) 2002-2026 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.xml;
16  
17  import static java.nio.charset.StandardCharsets.UTF_8;
18  
19  import java.io.IOException;
20  import java.nio.charset.Charset;
21  import java.util.HashMap;
22  
23  import javax.xml.parsers.ParserConfigurationException;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.htmlunit.SgmlPage;
28  import org.htmlunit.WebResponse;
29  import org.htmlunit.WebWindow;
30  import org.htmlunit.html.DomElement;
31  import org.htmlunit.html.DomProcessingInstruction;
32  import org.htmlunit.util.MimeType;
33  import org.htmlunit.util.XmlUtils;
34  import org.w3c.dom.Attr;
35  import org.w3c.dom.DOMConfiguration;
36  import org.w3c.dom.DOMImplementation;
37  import org.w3c.dom.Document;
38  import org.w3c.dom.DocumentType;
39  import org.w3c.dom.Element;
40  import org.w3c.dom.EntityReference;
41  import org.w3c.dom.Node;
42  import org.xml.sax.SAXException;
43  
44  /**
45   * A page that will be returned for response with content type "text/xml".
46   *
47   * @author Marc Guillemot
48   * @author David K. Taylor
49   * @author Ahmed Ashour
50   * @author Frank Danek
51   * @author Ronald Brill
52   */
53  public class XmlPage extends SgmlPage {
54  
55      private static final Log LOG = LogFactory.getLog(XmlPage.class);
56  
57      private Node node_;
58  
59      /**
60       * Creates an instance.
61       * A warning is logged if an exception is thrown while parsing the XML content
62       * (for instance when the content is not a valid XML and can't be parsed).
63       *
64       * @param webResponse the response from the server
65       * @param enclosingWindow the window that holds the page
66       * @throws IOException if the page could not be created
67       */
68      public XmlPage(final WebResponse webResponse, final WebWindow enclosingWindow) throws IOException {
69          this(webResponse, enclosingWindow, true);
70      }
71  
72      /**
73       * Creates an instance.
74       * A warning is logged if an exception is thrown while parsing the XML content
75       * (for instance when the content is not a valid XML and can't be parsed).
76       *
77       * @param node the node to initialize this page with
78       * @param enclosingWindow the window that holds the page
79       */
80      public XmlPage(final Node node, final WebWindow enclosingWindow) {
81          super(null, enclosingWindow);
82          node_ = node;
83          if (node_ != null) {
84              XmlUtils.appendChild(this, this, node_, true);
85          }
86      }
87  
88      /**
89       * Creates an instance.
90       * A warning is logged if an exception is thrown while parsing the XML content
91       * (for instance when the content is not a valid XML and can't be parsed).
92       *
93       * @param webResponse the response from the server
94       * @param enclosingWindow the window that holds the page
95       * @param ignoreSAXException Whether to ignore {@link SAXException} or throw it as {@link IOException}
96       * @throws IOException if the page could not be created
97       */
98      public XmlPage(final WebResponse webResponse, final WebWindow enclosingWindow, final boolean ignoreSAXException)
99          throws IOException {
100         this(webResponse, enclosingWindow, ignoreSAXException, true);
101     }
102 
103     /**
104      * Creates an instance.
105      * A warning is logged if an exception is thrown while parsing the XML content
106      * (for instance when the content is not a valid XML and can't be parsed).
107      *
108      * @param webResponse the response from the server
109      * @param enclosingWindow the window that holds the page
110      * @param ignoreSAXException Whether to ignore {@link SAXException} or throw it as {@link IOException}
111      * @param handleXHTMLAsHTML if true elements from the XHTML namespace are handled as HTML elements instead of
112      *     DOM elements
113      * @throws IOException if the page could not be created
114      */
115     public XmlPage(final WebResponse webResponse, final WebWindow enclosingWindow, final boolean ignoreSAXException,
116         final boolean handleXHTMLAsHTML) throws IOException {
117         super(webResponse, enclosingWindow);
118 
119         try {
120             try {
121                 final Document document = XmlUtils.buildDocument(webResponse);
122                 node_ = document.getFirstChild();
123             }
124             catch (final SAXException e) {
125                 if (LOG.isWarnEnabled()) {
126                     LOG.warn("Failed parsing XML document '" + webResponse.getWebRequest().getUrl() + "'", e);
127                 }
128                 if (!ignoreSAXException) {
129                     throw new IOException(
130                             "Failed parsing XML document '" + webResponse.getWebRequest().getUrl() + "'", e);
131                 }
132             }
133         }
134         catch (final ParserConfigurationException e) {
135             if (LOG.isWarnEnabled()) {
136                 if (null == webResponse) {
137                     LOG.warn("Failed parsing XML empty document: " + e.getMessage(), e);
138                 }
139                 else {
140                     LOG.warn("Failed parsing XML empty document '" + webResponse.getWebRequest().getUrl() + "'", e);
141                 }
142             }
143         }
144 
145         for (Node node = node_; node != null; node = node.getNextSibling()) {
146             XmlUtils.appendChild(this, this, node, handleXHTMLAsHTML);
147         }
148     }
149 
150     /**
151      * {@inheritDoc}
152      */
153     @Override
154     public void initialize() throws IOException {
155         // nothing to do here
156     }
157 
158     /**
159      * {@inheritDoc}
160      */
161     @Override
162     public boolean hasCaseSensitiveTagNames() {
163         return true;
164     }
165 
166     /**
167      * Returns the DOM representation of the XML content.
168      * @return {@code null} if the content couldn't be parsed
169      */
170     public Document getXmlDocument() {
171         if (node_ != null) {
172             return node_.getOwnerDocument();
173         }
174         return null;
175     }
176 
177     /**
178      * {@inheritDoc}
179      * Not yet implemented.
180      */
181     @Override
182     public Node adoptNode(final Node source) {
183         throw new UnsupportedOperationException("XmlPage.adoptNode is not yet implemented.");
184     }
185 
186     /**
187      * {@inheritDoc}
188      * Not yet implemented.
189      */
190     @Override
191     public Attr createAttributeNS(final String namespaceURI, final String qualifiedName) {
192         throw new UnsupportedOperationException("XmlPage.createAttributeNS is not yet implemented.");
193     }
194 
195     /**
196      * {@inheritDoc}
197      */
198     @Override
199     public DomElement createElement(final String tagName) {
200         return createElementNS(null, tagName);
201     }
202 
203     /**
204      * {@inheritDoc}
205      */
206     @Override
207     public DomElement createElementNS(final String namespaceURI, final String qualifiedName) {
208         return new DomElement(namespaceURI, qualifiedName, this, new HashMap<>());
209     }
210 
211     /**
212      * {@inheritDoc}
213      * Not yet implemented.
214      */
215     @Override
216     public EntityReference createEntityReference(final String name) {
217         throw new UnsupportedOperationException("XmlPage.createEntityReference is not yet implemented.");
218     }
219 
220     /**
221      * {@inheritDoc}
222      */
223     @Override
224     public DomProcessingInstruction createProcessingInstruction(final String target, final String data) {
225         return new DomProcessingInstruction(this, target, data);
226     }
227 
228     /**
229      * {@inheritDoc}
230      * Not yet implemented.
231      */
232     @Override
233     public String getDocumentURI() {
234         throw new UnsupportedOperationException("XmlPage.getDocumentURI is not yet implemented.");
235     }
236 
237     /**
238      * {@inheritDoc}
239      * Not yet implemented.
240      */
241     @Override
242     public DOMConfiguration getDomConfig() {
243         throw new UnsupportedOperationException("XmlPage.getDomConfig is not yet implemented.");
244     }
245 
246     /**
247      * {@inheritDoc}
248      * Not yet implemented.
249      */
250     @Override
251     public Element getElementById(final String elementId) {
252         throw new UnsupportedOperationException("XmlPage.getElementById is not yet implemented.");
253     }
254 
255     /**
256      * {@inheritDoc}
257      * Not yet implemented.
258      */
259     @Override
260     public DOMImplementation getImplementation() {
261         throw new UnsupportedOperationException("XmlPage.getImplementation is not yet implemented.");
262     }
263 
264     /**
265      * {@inheritDoc}
266      * Not yet implemented.
267      */
268     @Override
269     public String getInputEncoding() {
270         throw new UnsupportedOperationException("XmlPage.getInputEncoding is not yet implemented.");
271     }
272 
273     /**
274      * {@inheritDoc}
275      * Not yet implemented.
276      */
277     @Override
278     public boolean getStrictErrorChecking() {
279         throw new UnsupportedOperationException("XmlPage.getStrictErrorChecking is not yet implemented.");
280     }
281 
282     /**
283      * {@inheritDoc}
284      */
285     @Override
286     public String getXmlEncoding() {
287         return null;
288     }
289 
290     /**
291      * {@inheritDoc}
292      */
293     @Override
294     public boolean getXmlStandalone() {
295         return false;
296     }
297 
298     /**
299      * {@inheritDoc}
300      */
301     @Override
302     public String getXmlVersion() {
303         return "1.0";
304     }
305 
306     /**
307      * {@inheritDoc}
308      * Not yet implemented.
309      */
310     @Override
311     public Node importNode(final Node importedNode, final boolean deep) {
312         throw new UnsupportedOperationException("XmlPage.importNode is not yet implemented.");
313     }
314 
315     /**
316      * {@inheritDoc}
317      * Not yet implemented.
318      */
319     @Override
320     public Node renameNode(final Node n, final String namespaceURI, final String qualifiedName) {
321         throw new UnsupportedOperationException("XmlPage.renameNode is not yet implemented.");
322     }
323 
324     /**
325      * {@inheritDoc}
326      * Not yet implemented.
327      */
328     @Override
329     public void setDocumentURI(final String documentURI) {
330         throw new UnsupportedOperationException("XmlPage.setDocumentURI is not yet implemented.");
331     }
332 
333     /**
334      * {@inheritDoc}
335      * Not yet implemented.
336      */
337     @Override
338     public void setStrictErrorChecking(final boolean strictErrorChecking) {
339         throw new UnsupportedOperationException("XmlPage.setStrictErrorChecking is not yet implemented.");
340     }
341 
342     /**
343      * {@inheritDoc}
344      * Not yet implemented.
345      */
346     @Override
347     public void setXmlStandalone(final boolean xmlStandalone) {
348         throw new UnsupportedOperationException("XmlPage.setXmlStandalone is not yet implemented.");
349     }
350 
351     /**
352      * {@inheritDoc}
353      * Not yet implemented.
354      */
355     @Override
356     public void setXmlVersion(final String xmlVersion) {
357         throw new UnsupportedOperationException("XmlPage.setXmlVersion is not yet implemented.");
358     }
359 
360     /**
361      * {@inheritDoc}
362      */
363     @Override
364     public Charset getCharset() {
365         return UTF_8;
366     }
367 
368     /**
369      * {@inheritDoc}
370      */
371     @Override
372     public String getContentType() {
373         return MimeType.APPLICATION_XML;
374     }
375 
376     /**
377      * {@inheritDoc}
378      */
379     @Override
380     public void setDocumentType(final DocumentType type) {
381         super.setDocumentType(type);
382     }
383 
384     /**
385      * {@inheritDoc}
386      */
387     @Override
388     public void setNodeValue(final String value) {
389         // Default behavior is to do nothing, overridden in some subclasses
390     }
391 
392     /**
393      * {@inheritDoc}
394      */
395     @Override
396     public void setPrefix(final String prefix) {
397         // Empty.
398     }
399 }