View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.xml;
16  
17  import static java.nio.charset.StandardCharsets.UTF_8;
18  
19  import java.io.IOException;
20  import java.nio.ByteBuffer;
21  import java.util.HashMap;
22  import java.util.Map;
23  
24  import javax.servlet.Servlet;
25  import javax.servlet.http.HttpServlet;
26  import javax.servlet.http.HttpServletRequest;
27  import javax.servlet.http.HttpServletResponse;
28  
29  import org.apache.commons.io.ByteOrderMark;
30  import org.htmlunit.MockWebConnection;
31  import org.htmlunit.Page;
32  import org.htmlunit.WebClient;
33  import org.htmlunit.WebServerTestCase;
34  import org.htmlunit.html.DomAttr;
35  import org.htmlunit.html.DomElement;
36  import org.htmlunit.html.DomText;
37  import org.htmlunit.http.HttpStatus;
38  import org.htmlunit.util.MimeType;
39  import org.htmlunit.util.StringUtils;
40  import org.junit.jupiter.api.Test;
41  import org.w3c.dom.Node;
42  
43  /**
44   * Tests for {@link XmlPage}.
45   *
46   * @author Marc Guillemot
47   * @author Ahmed Ashour
48   * @author Ronald Brill
49   */
50  public class XmlPageTest extends WebServerTestCase {
51  
52      /**
53       * @throws Exception if the test fails
54       */
55      @Test
56      public void asNormalizedText() throws Exception {
57          asNormalizedText("<msg>abc</msg>", "abc");
58      }
59  
60      /**
61       * @throws Exception if the test fails
62       */
63      @Test
64      public void asNormalizedTextOnlyText() throws Exception {
65          final WebClient client = getWebClient();
66          final MockWebConnection webConnection = new MockWebConnection();
67          webConnection.setDefaultResponse("<msg>abc</msg>", 200, "OK", MimeType.TEXT_XML);
68          client.setWebConnection(webConnection);
69          final XmlPage xmlPage = client.getPage(URL_FIRST);
70  
71          assertEquals("abc", ((DomText) xmlPage.getFirstByXPath("/msg/text()")).asNormalizedText());
72      }
73  
74      /**
75       * @throws Exception if the test fails
76       */
77      @Test
78      public void asTextComplex() throws Exception {
79          final String xml
80                  = "<msg>1"
81                  + "<h1>h1</h1>"
82                  + "<h2>h2"
83                  + "<h3>h3</h3>"
84                  + "<h3></h3>"
85                  + "txt"
86                  + "</h2>o"
87                  + "</msg>";
88          asNormalizedText(xml, "1h1h2h3txto");
89      }
90  
91      /**
92       * @throws Exception if the test fails
93       */
94      @Test
95      public void asTextPart() throws Exception {
96          final String xml
97                  = "<outer>outer"
98                  + "<msg><h1>h1</h1></msg>"
99                  + "xy</outer>";
100         asNormalizedText(xml, "h1");
101     }
102 
103     /**
104      * Test for issue #1817.
105      * @throws Exception if the test fails
106      */
107     @Test
108     public void asTextEmpty() throws Exception {
109         asNormalizedText("<msg></msg>", "");
110     }
111 
112     private void asNormalizedText(final String xml, final String expected) throws Exception {
113         final WebClient client = getWebClient();
114         final MockWebConnection webConnection = new MockWebConnection();
115         webConnection.setDefaultResponse(xml, 200, "OK", MimeType.TEXT_XML);
116         client.setWebConnection(webConnection);
117         final XmlPage xmlPage = client.getPage(URL_FIRST);
118 
119         final DomElement msg = (DomElement) xmlPage.getFirstByXPath("//msg");
120         assertNotNull("No element found by XPath '//msg'", msg);
121         assertEquals(expected, msg.asNormalizedText());
122     }
123 
124     /**
125      * Tests namespace.
126      * @throws Exception if the test fails
127      */
128     @Test
129     public void namespace() throws Exception {
130         final String content
131             = "<?xml version='1.0'?>\n"
132             + "<RDF xmlns='http://www.w3.org/1999/02/22-rdf-syntax-ns#' "
133             + "xmlns:em='http://www.mozilla.org/2004/em-rdf#'>"
134             + "<Description about='urn:mozilla:install-manifest'>"
135             + "<em:name>My Plugin</em:name>"
136             + "</Description>\n"
137             + "</RDF>";
138 
139         final XmlPage xmlPage = testDocument(content, MimeType.TEXT_XML);
140         final Node node = xmlPage.getXmlDocument().getFirstChild().getFirstChild().getFirstChild();
141         assertEquals("em:name", node.getNodeName());
142         assertEquals("name", node.getLocalName());
143         assertEquals("http://www.mozilla.org/2004/em-rdf#", node.getNamespaceURI());
144     }
145 
146     /**
147      * Tests a simple valid XML document.
148      * @throws Exception if the test fails
149      */
150     @Test
151     public void validDocument() throws Exception {
152         final String content
153             = "<?xml version=\"1.0\"?>\n"
154              + "<foo>\n"
155              + "  <foofoo name='first'>something</foofoo>\n"
156              + "  <foofoo name='second'>something else</foofoo>\n"
157              + "</foo>";
158 
159         final XmlPage xmlPage = testDocument(content, MimeType.TEXT_XML);
160         assertEquals("foo", xmlPage.getXmlDocument().getFirstChild().getNodeName());
161     }
162 
163     /**
164      * Test that UTF-8 is used as default encoding for xml responses
165      * (issue 3385410).
166      * @throws Exception if the test fails
167      */
168     @Test
169     public void defaultEncoding() throws Exception {
170         final String content
171             = "<?xml version=\"1.0\"?>\n"
172              + "<foo>\n"
173              + "\u0434\n"
174              + "</foo>";
175 
176         final byte[] bytes = StringUtils.toByteArray(content, UTF_8);
177 
178         final WebClient client = getWebClient();
179         final MockWebConnection webConnection = new MockWebConnection();
180         webConnection.setDefaultResponse(bytes, 200, "OK", MimeType.TEXT_XML);
181         client.setWebConnection(webConnection);
182 
183         final Page page = client.getPage(URL_FIRST);
184         assertTrue(XmlPage.class.isInstance(page));
185 
186         final XmlPage xmlPage = (XmlPage) page;
187         assertEquals(content, xmlPage.getWebResponse().getContentAsString());
188         assertNotNull(xmlPage.getXmlDocument());
189 
190         assertEquals("foo", xmlPage.getXmlDocument().getFirstChild().getNodeName());
191     }
192 
193     /**
194      * Utility method to test XML page of different MIME types.
195      * @param content the XML content
196      * @param mimeType the MIME type
197      * @return the page returned by the WebClient
198      * @throws Exception if a problem occurs
199      */
200     private XmlPage testDocument(final String content, final String mimeType) throws Exception {
201         final WebClient client = getWebClient();
202         final MockWebConnection webConnection = new MockWebConnection();
203         webConnection.setDefaultResponse(content, 200, "OK", mimeType);
204         client.setWebConnection(webConnection);
205         final Page page = client.getPage(URL_FIRST);
206         assertEquals(URL_FIRST, page.getUrl());
207         assertEquals("OK", page.getWebResponse().getStatusMessage());
208         assertEquals(HttpStatus.OK_200, page.getWebResponse().getStatusCode());
209         assertEquals(mimeType, page.getWebResponse().getContentType());
210         assertTrue(XmlPage.class.isInstance(page));
211         final XmlPage xmlPage = (XmlPage) page;
212         assertEquals(content, xmlPage.getWebResponse().getContentAsString());
213         assertNotNull(xmlPage.getXmlDocument());
214         return xmlPage;
215     }
216 
217     /**
218      * Tests a simple invalid (badly formed) XML document.
219      * @throws Exception if the test fails
220      */
221     @Test
222     public void invalidDocument() throws Exception {
223         final WebClient client = getWebClient();
224         final MockWebConnection webConnection = new MockWebConnection();
225 
226         final String content
227             = "<?xml version=\"1.0\"?>\n"
228             + "<foo>\n"
229             + "  <foofoo invalid\n"
230             + "  <foofoo name='first'>something</foofoo>\n"
231             + "  <foofoo name='second'>something else</foofoo>\n"
232             + "</foo>";
233 
234         webConnection.setDefaultResponse(content, 200, "OK", MimeType.TEXT_XML);
235         client.setWebConnection(webConnection);
236 
237         final Page page = client.getPage(URL_FIRST);
238         assertEquals(URL_FIRST, page.getUrl());
239         assertEquals("OK", page.getWebResponse().getStatusMessage());
240         assertEquals(HttpStatus.OK_200, page.getWebResponse().getStatusCode());
241         assertEquals(MimeType.TEXT_XML, page.getWebResponse().getContentType());
242 
243         assertTrue(Page.class.isInstance(page));
244         final XmlPage xmlPage = (XmlPage) page;
245         assertEquals(content, xmlPage.getWebResponse().getContentAsString());
246         assertNull(xmlPage.getXmlDocument());
247     }
248 
249     /**
250      * Tests a simple empty XML document.
251      * @throws Exception if the test fails
252      */
253     @Test
254     public void emptyDocument() throws Exception {
255         final WebClient client = getWebClient();
256         final MockWebConnection webConnection = new MockWebConnection();
257 
258         final String content = "";
259 
260         webConnection.setDefaultResponse(content, 200, "OK", MimeType.TEXT_XML);
261         client.setWebConnection(webConnection);
262 
263         final Page page = client.getPage(URL_FIRST);
264         assertEquals(URL_FIRST, page.getUrl());
265         assertEquals("OK", page.getWebResponse().getStatusMessage());
266         assertEquals(HttpStatus.OK_200, page.getWebResponse().getStatusCode());
267         assertEquals(MimeType.TEXT_XML, page.getWebResponse().getContentType());
268 
269         assertTrue(Page.class.isInstance(page));
270         final XmlPage xmlPage = (XmlPage) page;
271         assertEquals(content, xmlPage.getWebResponse().getContentAsString());
272         assertNull(xmlPage.getXmlDocument());
273     }
274 
275     /**
276      * Tests a simple empty XML document.
277      * @throws Exception if the test fails
278      */
279     @Test
280     public void blankDocument() throws Exception {
281         final WebClient client = getWebClient();
282         final MockWebConnection webConnection = new MockWebConnection();
283 
284         final String content = "\t  \n\r\n";
285 
286         webConnection.setDefaultResponse(content, 200, "OK", MimeType.TEXT_XML);
287         client.setWebConnection(webConnection);
288 
289         final Page page = client.getPage(URL_FIRST);
290         assertEquals(URL_FIRST, page.getUrl());
291         assertEquals("OK", page.getWebResponse().getStatusMessage());
292         assertEquals(HttpStatus.OK_200, page.getWebResponse().getStatusCode());
293         assertEquals("text/xml", page.getWebResponse().getContentType());
294 
295         assertTrue(Page.class.isInstance(page));
296         final XmlPage xmlPage = (XmlPage) page;
297         assertEquals(content, xmlPage.getWebResponse().getContentAsString());
298         assertNull(xmlPage.getXmlDocument());
299     }
300 
301     /**
302      * @throws Exception if the test fails
303      */
304     @Test
305     public void voiceXML() throws Exception {
306         final String content =
307             "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
308             + "<vxml xmlns=\"http://www.w3.org/2001/vxml\""
309             + "  xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\""
310             + "  xsi:schemaLocation=\"http://www.w3.org/2001/vxml "
311             + "   http://www.w3.org/TR/voicexml20/vxml.xsd\""
312             + "   version=\"2.0\">\n"
313             + "  <form>\n"
314             + "    <block>Hello World!</block>\n"
315             + "  </form>\n"
316             + "</vxml>";
317 
318         final XmlPage xmlPage = testDocument(content, "application/voicexml+xml");
319         assertEquals("vxml", xmlPage.getXmlDocument().getFirstChild().getNodeName());
320     }
321 
322     /**
323      * @throws Exception if the test fails
324      */
325     @Test
326     public void xpath() throws Exception {
327         final String html
328             = "<?xml version=\"1.0\"?>\n"
329              + "<foo>\n"
330              + "  <foofoo name='first'>something</foofoo>\n"
331              + "  <foofoo name='second'>something else</foofoo>\n"
332              + "</foo>";
333         final XmlPage xmlPage = testDocument(html, MimeType.TEXT_XML);
334         assertEquals(1, xmlPage.getByXPath("//foofoo[@name='first']").size());
335     }
336 
337     /**
338      * Test for issue #1820.
339      * @throws Exception if the test fails
340      */
341     @Test
342     public void xpathAttribute() throws Exception {
343         final String html
344             = "<?xml version=\"1.0\"?>\n"
345              + "<foo>\n"
346              + "  <MARKGR INTREGN=\"1289218\" BILING=\"Y\" OOCD=\"CH\" INTREGD=\"20160111\">\n"
347              + "  </MARKGR>\n"
348              + "</foo>";
349         final XmlPage xmlPage = testDocument(html, MimeType.TEXT_XML);
350 
351         assertEquals(1, xmlPage.getByXPath("//MARKGR").size());
352         assertNotNull(xmlPage.getFirstByXPath("//MARKGR"));
353 
354         assertEquals(0, xmlPage.getByXPath("//markgr").size());
355         assertNull(xmlPage.getFirstByXPath("//markgr"));
356 
357         assertEquals(1, xmlPage.getByXPath("//MARKGR/@INTREGN").size());
358         assertTrue(xmlPage.getFirstByXPath("//MARKGR/@INTREGN") instanceof DomAttr);
359 
360         assertEquals(0, xmlPage.getByXPath("//MARKGR/@intregn").size());
361         assertNull(xmlPage.getFirstByXPath("//MARKGR/@intregn"));
362     }
363 
364     /**
365      * @throws Exception if the test fails
366      */
367     @Test
368     public void noResponse() throws Exception {
369         final Map<String, Class<? extends Servlet>> servlets = new HashMap<>();
370         servlets.put("/test", NoResponseServlet.class);
371         startWebServer("./", null, servlets);
372 
373         final WebClient client = getWebClient();
374         client.getPage(URL_FIRST + "test");
375     }
376 
377     /**
378      * Servlet for {@link #noResponse()}.
379      */
380     public static class NoResponseServlet extends HttpServlet {
381 
382         /**
383          * {@inheritDoc}
384          */
385         @Override
386         protected void doGet(final HttpServletRequest request, final HttpServletResponse response) throws IOException {
387             response.setContentType(MimeType.TEXT_XML);
388             response.setStatus(HttpServletResponse.SC_NO_CONTENT);
389         }
390     }
391 
392     /**
393      * @throws Exception if the test fails
394      */
395     @Test
396     public void bom() throws Exception {
397         final byte[] bom = ByteOrderMark.UTF_8.getBytes();
398         final byte[] xml = "<msg>abc</msg>".getBytes(UTF_8);
399         final byte[] bytes = ByteBuffer.allocate(bom.length + xml.length).put(bom).put(xml).array();
400 
401         final WebClient client = getWebClient();
402         final MockWebConnection webConnection = new MockWebConnection();
403         webConnection.setDefaultResponse(bytes, 200, "OK", MimeType.TEXT_XML);
404         client.setWebConnection(webConnection);
405         final XmlPage xmlPage = client.getPage(URL_FIRST);
406 
407         final DomElement msg = (DomElement) xmlPage.getFirstByXPath("//msg");
408         assertNotNull("No element found by XPath '//msg'", msg);
409         assertEquals("abc", msg.asNormalizedText());
410     }
411 
412 }