View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.xml;
16  
17  import static java.nio.charset.StandardCharsets.UTF_8;
18  
19  import java.io.IOException;
20  import java.nio.charset.Charset;
21  import java.util.HashMap;
22  import java.util.List;
23  import java.util.Map;
24  
25  import javax.xml.parsers.ParserConfigurationException;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.htmlunit.SgmlPage;
30  import org.htmlunit.WebResponse;
31  import org.htmlunit.WebWindow;
32  import org.htmlunit.html.DomElement;
33  import org.htmlunit.html.DomProcessingInstruction;
34  import org.htmlunit.util.MimeType;
35  import org.htmlunit.util.XmlUtils;
36  import org.w3c.dom.Attr;
37  import org.w3c.dom.DOMConfiguration;
38  import org.w3c.dom.DOMImplementation;
39  import org.w3c.dom.Document;
40  import org.w3c.dom.DocumentType;
41  import org.w3c.dom.Element;
42  import org.w3c.dom.EntityReference;
43  import org.w3c.dom.Node;
44  import org.xml.sax.SAXException;
45  
46  /**
47   * A page that will be returned for response with content type "text/xml".
48   *
49   * @author Marc Guillemot
50   * @author David K. Taylor
51   * @author Ahmed Ashour
52   * @author Frank Danek
53   */
54  public class XmlPage extends SgmlPage {
55  
56      private static final Log LOG = LogFactory.getLog(XmlPage.class);
57  
58      private Node node_;
59  
60      /**
61       * Creates an instance.
62       * A warning is logged if an exception is thrown while parsing the XML content
63       * (for instance when the content is not a valid XML and can't be parsed).
64       *
65       * @param webResponse the response from the server
66       * @param enclosingWindow the window that holds the page
67       * @throws IOException if the page could not be created
68       */
69      public XmlPage(final WebResponse webResponse, final WebWindow enclosingWindow) throws IOException {
70          this(webResponse, enclosingWindow, true);
71      }
72  
73      /**
74       * Creates an instance.
75       * A warning is logged if an exception is thrown while parsing the XML content
76       * (for instance when the content is not a valid XML and can't be parsed).
77       *
78       * @param node the node to initialize this page with
79       * @param enclosingWindow the window that holds the page
80       */
81      public XmlPage(final Node node, final WebWindow enclosingWindow) {
82          super(null, enclosingWindow);
83          node_ = node;
84          if (node_ != null) {
85              XmlUtils.appendChild(this, this, node_, true);
86          }
87      }
88  
89      /**
90       * Creates an instance.
91       * A warning is logged if an exception is thrown while parsing the XML content
92       * (for instance when the content is not a valid XML and can't be parsed).
93       *
94       * @param webResponse the response from the server
95       * @param enclosingWindow the window that holds the page
96       * @param ignoreSAXException Whether to ignore {@link SAXException} or throw it as {@link IOException}
97       * @throws IOException if the page could not be created
98       */
99      public XmlPage(final WebResponse webResponse, final WebWindow enclosingWindow, final boolean ignoreSAXException)
100         throws IOException {
101         this(webResponse, enclosingWindow, ignoreSAXException, true);
102     }
103 
104     /**
105      * Creates an instance.
106      * A warning is logged if an exception is thrown while parsing the XML content
107      * (for instance when the content is not a valid XML and can't be parsed).
108      *
109      * @param webResponse the response from the server
110      * @param enclosingWindow the window that holds the page
111      * @param ignoreSAXException Whether to ignore {@link SAXException} or throw it as {@link IOException}
112      * @param handleXHTMLAsHTML if true elements from the XHTML namespace are handled as HTML elements instead of
113      *     DOM elements
114      * @throws IOException if the page could not be created
115      */
116     public XmlPage(final WebResponse webResponse, final WebWindow enclosingWindow, final boolean ignoreSAXException,
117         final boolean handleXHTMLAsHTML) throws IOException {
118         super(webResponse, enclosingWindow);
119 
120         try {
121             try {
122                 final Document document = XmlUtils.buildDocument(webResponse);
123                 node_ = document.getFirstChild();
124             }
125             catch (final SAXException e) {
126                 if (LOG.isWarnEnabled()) {
127                     LOG.warn("Failed parsing XML document '" + webResponse.getWebRequest().getUrl() + "'", e);
128                 }
129                 if (!ignoreSAXException) {
130                     throw new IOException(e.getMessage());
131                 }
132             }
133         }
134         catch (final ParserConfigurationException e) {
135             if (LOG.isWarnEnabled()) {
136                 if (null == webResponse) {
137                     LOG.warn("Failed parsing XML empty document: " + e.getMessage(), e);
138                 }
139                 else {
140                     LOG.warn("Failed parsing XML empty document '" + webResponse.getWebRequest().getUrl() + "'", e);
141                 }
142             }
143         }
144 
145         final Map<Integer, List<String>> attributesOrderMap;
146         if (node_ != null) {
147             attributesOrderMap = XmlUtils.getAttributesOrderMap(node_.getOwnerDocument());
148         }
149         else {
150             attributesOrderMap = null;
151         }
152         for (Node node = node_; node != null; node = node.getNextSibling()) {
153             XmlUtils.appendChild(this, this, node, handleXHTMLAsHTML, attributesOrderMap);
154         }
155     }
156 
157     /**
158      * {@inheritDoc}
159      */
160     @Override
161     public void initialize() throws IOException {
162         // nothing to do here
163     }
164 
165     /**
166      * {@inheritDoc}
167      */
168     @Override
169     public boolean hasCaseSensitiveTagNames() {
170         return true;
171     }
172 
173     /**
174      * Returns the DOM representation of the XML content.
175      * @return {@code null} if the content couldn't be parsed
176      */
177     public Document getXmlDocument() {
178         if (node_ != null) {
179             return node_.getOwnerDocument();
180         }
181         return null;
182     }
183 
184     /**
185      * {@inheritDoc}
186      * Not yet implemented.
187      */
188     @Override
189     public Node adoptNode(final Node source) {
190         throw new UnsupportedOperationException("XmlPage.adoptNode is not yet implemented.");
191     }
192 
193     /**
194      * {@inheritDoc}
195      * Not yet implemented.
196      */
197     @Override
198     public Attr createAttributeNS(final String namespaceURI, final String qualifiedName) {
199         throw new UnsupportedOperationException("XmlPage.createAttributeNS is not yet implemented.");
200     }
201 
202     /**
203      * {@inheritDoc}
204      */
205     @Override
206     public DomElement createElement(final String tagName) {
207         return createElementNS(null, tagName);
208     }
209 
210     /**
211      * {@inheritDoc}
212      */
213     @Override
214     public DomElement createElementNS(final String namespaceURI, final String qualifiedName) {
215         return new DomElement(namespaceURI, qualifiedName, this, new HashMap<>());
216     }
217 
218     /**
219      * {@inheritDoc}
220      * Not yet implemented.
221      */
222     @Override
223     public EntityReference createEntityReference(final String name) {
224         throw new UnsupportedOperationException("XmlPage.createEntityReference is not yet implemented.");
225     }
226 
227     /**
228      * {@inheritDoc}
229      */
230     @Override
231     public DomProcessingInstruction createProcessingInstruction(final String target, final String data) {
232         return new DomProcessingInstruction(this, target, data);
233     }
234 
235     /**
236      * {@inheritDoc}
237      * Not yet implemented.
238      */
239     @Override
240     public String getDocumentURI() {
241         throw new UnsupportedOperationException("XmlPage.getDocumentURI is not yet implemented.");
242     }
243 
244     /**
245      * {@inheritDoc}
246      * Not yet implemented.
247      */
248     @Override
249     public DOMConfiguration getDomConfig() {
250         throw new UnsupportedOperationException("XmlPage.getDomConfig is not yet implemented.");
251     }
252 
253     /**
254      * {@inheritDoc}
255      * Not yet implemented.
256      */
257     @Override
258     public Element getElementById(final String elementId) {
259         throw new UnsupportedOperationException("XmlPage.getElementById is not yet implemented.");
260     }
261 
262     /**
263      * {@inheritDoc}
264      * Not yet implemented.
265      */
266     @Override
267     public DOMImplementation getImplementation() {
268         throw new UnsupportedOperationException("XmlPage.getImplementation is not yet implemented.");
269     }
270 
271     /**
272      * {@inheritDoc}
273      * Not yet implemented.
274      */
275     @Override
276     public String getInputEncoding() {
277         throw new UnsupportedOperationException("XmlPage.getInputEncoding is not yet implemented.");
278     }
279 
280     /**
281      * {@inheritDoc}
282      * Not yet implemented.
283      */
284     @Override
285     public boolean getStrictErrorChecking() {
286         throw new UnsupportedOperationException("XmlPage.getStrictErrorChecking is not yet implemented.");
287     }
288 
289     /**
290      * {@inheritDoc}
291      */
292     @Override
293     public String getXmlEncoding() {
294         return null;
295     }
296 
297     /**
298      * {@inheritDoc}
299      */
300     @Override
301     public boolean getXmlStandalone() {
302         return false;
303     }
304 
305     /**
306      * {@inheritDoc}
307      */
308     @Override
309     public String getXmlVersion() {
310         return "1.0";
311     }
312 
313     /**
314      * {@inheritDoc}
315      * Not yet implemented.
316      */
317     @Override
318     public Node importNode(final Node importedNode, final boolean deep) {
319         throw new UnsupportedOperationException("XmlPage.importNode is not yet implemented.");
320     }
321 
322     /**
323      * {@inheritDoc}
324      * Not yet implemented.
325      */
326     @Override
327     public Node renameNode(final Node n, final String namespaceURI, final String qualifiedName) {
328         throw new UnsupportedOperationException("XmlPage.renameNode is not yet implemented.");
329     }
330 
331     /**
332      * {@inheritDoc}
333      * Not yet implemented.
334      */
335     @Override
336     public void setDocumentURI(final String documentURI) {
337         throw new UnsupportedOperationException("XmlPage.setDocumentURI is not yet implemented.");
338     }
339 
340     /**
341      * {@inheritDoc}
342      * Not yet implemented.
343      */
344     @Override
345     public void setStrictErrorChecking(final boolean strictErrorChecking) {
346         throw new UnsupportedOperationException("XmlPage.setStrictErrorChecking is not yet implemented.");
347     }
348 
349     /**
350      * {@inheritDoc}
351      * Not yet implemented.
352      */
353     @Override
354     public void setXmlStandalone(final boolean xmlStandalone) {
355         throw new UnsupportedOperationException("XmlPage.setXmlStandalone is not yet implemented.");
356     }
357 
358     /**
359      * {@inheritDoc}
360      * Not yet implemented.
361      */
362     @Override
363     public void setXmlVersion(final String xmlVersion) {
364         throw new UnsupportedOperationException("XmlPage.setXmlVersion is not yet implemented.");
365     }
366 
367     /**
368      * {@inheritDoc}
369      */
370     @Override
371     public Charset getCharset() {
372         return UTF_8;
373     }
374 
375     /**
376      * {@inheritDoc}
377      */
378     @Override
379     public String getContentType() {
380         return MimeType.APPLICATION_XML;
381     }
382 
383     /**
384      * {@inheritDoc}
385      */
386     @Override
387     public void setDocumentType(final DocumentType type) {
388         super.setDocumentType(type);
389     }
390 
391     /**
392      * {@inheritDoc}
393      */
394     @Override
395     public void setNodeValue(final String value) {
396         // Default behavior is to do nothing, overridden in some subclasses
397     }
398 
399     /**
400      * {@inheritDoc}
401      */
402     @Override
403     public void setPrefix(final String prefix) {
404         // Empty.
405     }
406 }