View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.xml;
16  
17  import static java.nio.charset.StandardCharsets.UTF_8;
18  
19  import java.io.IOException;
20  import java.nio.ByteBuffer;
21  import java.util.HashMap;
22  import java.util.Map;
23  
24  import javax.servlet.Servlet;
25  import javax.servlet.http.HttpServlet;
26  import javax.servlet.http.HttpServletRequest;
27  import javax.servlet.http.HttpServletResponse;
28  
29  import org.apache.commons.io.ByteOrderMark;
30  import org.htmlunit.MockWebConnection;
31  import org.htmlunit.Page;
32  import org.htmlunit.WebClient;
33  import org.htmlunit.WebServerTestCase;
34  import org.htmlunit.html.DomAttr;
35  import org.htmlunit.html.DomElement;
36  import org.htmlunit.html.DomText;
37  import org.htmlunit.http.HttpStatus;
38  import org.htmlunit.junit.BrowserRunner;
39  import org.htmlunit.util.MimeType;
40  import org.htmlunit.util.StringUtils;
41  import org.junit.Test;
42  import org.junit.runner.RunWith;
43  import org.w3c.dom.Node;
44  
45  /**
46   * Tests for {@link XmlPage}.
47   *
48   * @author Marc Guillemot
49   * @author Ahmed Ashour
50   * @author Ronald Brill
51   */
52  @RunWith(BrowserRunner.class)
53  public class XmlPageTest extends WebServerTestCase {
54  
55      /**
56       * @throws Exception if the test fails
57       */
58      @Test
59      public void asNormalizedText() throws Exception {
60          asNormalizedText("<msg>abc</msg>", "abc");
61      }
62  
63      /**
64       * @throws Exception if the test fails
65       */
66      @Test
67      public void asNormalizedTextOnlyText() throws Exception {
68          final WebClient client = getWebClient();
69          final MockWebConnection webConnection = new MockWebConnection();
70          webConnection.setDefaultResponse("<msg>abc</msg>", 200, "OK", MimeType.TEXT_XML);
71          client.setWebConnection(webConnection);
72          final XmlPage xmlPage = client.getPage(URL_FIRST);
73  
74          assertEquals("abc", ((DomText) xmlPage.getFirstByXPath("/msg/text()")).asNormalizedText());
75      }
76  
77      /**
78       * @throws Exception if the test fails
79       */
80      @Test
81      public void asTextComplex() throws Exception {
82          final String xml
83                  = "<msg>1"
84                  + "<h1>h1</h1>"
85                  + "<h2>h2"
86                  + "<h3>h3</h3>"
87                  + "<h3></h3>"
88                  + "txt"
89                  + "</h2>o"
90                  + "</msg>";
91          asNormalizedText(xml, "1h1h2h3txto");
92      }
93  
94      /**
95       * @throws Exception if the test fails
96       */
97      @Test
98      public void asTextPart() throws Exception {
99          final String xml
100                 = "<outer>outer"
101                 + "<msg><h1>h1</h1></msg>"
102                 + "xy</outer>";
103         asNormalizedText(xml, "h1");
104     }
105 
106     /**
107      * Test for issue #1817.
108      * @throws Exception if the test fails
109      */
110     @Test
111     public void asTextEmpty() throws Exception {
112         asNormalizedText("<msg></msg>", "");
113     }
114 
115     private void asNormalizedText(final String xml, final String expected) throws Exception {
116         final WebClient client = getWebClient();
117         final MockWebConnection webConnection = new MockWebConnection();
118         webConnection.setDefaultResponse(xml, 200, "OK", MimeType.TEXT_XML);
119         client.setWebConnection(webConnection);
120         final XmlPage xmlPage = client.getPage(URL_FIRST);
121 
122         final DomElement msg = (DomElement) xmlPage.getFirstByXPath("//msg");
123         assertNotNull("No element found by XPath '//msg'", msg);
124         assertEquals(expected, msg.asNormalizedText());
125     }
126 
127     /**
128      * Tests namespace.
129      * @throws Exception if the test fails
130      */
131     @Test
132     public void namespace() throws Exception {
133         final String content
134             = "<?xml version='1.0'?>\n"
135             + "<RDF xmlns='http://www.w3.org/1999/02/22-rdf-syntax-ns#' "
136             + "xmlns:em='http://www.mozilla.org/2004/em-rdf#'>"
137             + "<Description about='urn:mozilla:install-manifest'>"
138             + "<em:name>My Plugin</em:name>"
139             + "</Description>\n"
140             + "</RDF>";
141 
142         final XmlPage xmlPage = testDocument(content, MimeType.TEXT_XML);
143         final Node node = xmlPage.getXmlDocument().getFirstChild().getFirstChild().getFirstChild();
144         assertEquals("em:name", node.getNodeName());
145         assertEquals("name", node.getLocalName());
146         assertEquals("http://www.mozilla.org/2004/em-rdf#", node.getNamespaceURI());
147     }
148 
149     /**
150      * Tests a simple valid XML document.
151      * @throws Exception if the test fails
152      */
153     @Test
154     public void validDocument() throws Exception {
155         final String content
156             = "<?xml version=\"1.0\"?>\n"
157              + "<foo>\n"
158              + "  <foofoo name='first'>something</foofoo>\n"
159              + "  <foofoo name='second'>something else</foofoo>\n"
160              + "</foo>";
161 
162         final XmlPage xmlPage = testDocument(content, MimeType.TEXT_XML);
163         assertEquals("foo", xmlPage.getXmlDocument().getFirstChild().getNodeName());
164     }
165 
166     /**
167      * Test that UTF-8 is used as default encoding for xml responses
168      * (issue 3385410).
169      * @throws Exception if the test fails
170      */
171     @Test
172     public void defaultEncoding() throws Exception {
173         final String content
174             = "<?xml version=\"1.0\"?>\n"
175              + "<foo>\n"
176              + "\u0434\n"
177              + "</foo>";
178 
179         final byte[] bytes = StringUtils.toByteArray(content, UTF_8);
180 
181         final WebClient client = getWebClient();
182         final MockWebConnection webConnection = new MockWebConnection();
183         webConnection.setDefaultResponse(bytes, 200, "OK", MimeType.TEXT_XML);
184         client.setWebConnection(webConnection);
185 
186         final Page page = client.getPage(URL_FIRST);
187         assertTrue(XmlPage.class.isInstance(page));
188 
189         final XmlPage xmlPage = (XmlPage) page;
190         assertEquals(content, xmlPage.getWebResponse().getContentAsString());
191         assertNotNull(xmlPage.getXmlDocument());
192 
193         assertEquals("foo", xmlPage.getXmlDocument().getFirstChild().getNodeName());
194     }
195 
196     /**
197      * Utility method to test XML page of different MIME types.
198      * @param content the XML content
199      * @param mimeType the MIME type
200      * @return the page returned by the WebClient
201      * @throws Exception if a problem occurs
202      */
203     private XmlPage testDocument(final String content, final String mimeType) throws Exception {
204         final WebClient client = getWebClient();
205         final MockWebConnection webConnection = new MockWebConnection();
206         webConnection.setDefaultResponse(content, 200, "OK", mimeType);
207         client.setWebConnection(webConnection);
208         final Page page = client.getPage(URL_FIRST);
209         assertEquals(URL_FIRST, page.getUrl());
210         assertEquals("OK", page.getWebResponse().getStatusMessage());
211         assertEquals(HttpStatus.OK_200, page.getWebResponse().getStatusCode());
212         assertEquals(mimeType, page.getWebResponse().getContentType());
213         assertTrue(XmlPage.class.isInstance(page));
214         final XmlPage xmlPage = (XmlPage) page;
215         assertEquals(content, xmlPage.getWebResponse().getContentAsString());
216         assertNotNull(xmlPage.getXmlDocument());
217         return xmlPage;
218     }
219 
220     /**
221      * Tests a simple invalid (badly formed) XML document.
222      * @throws Exception if the test fails
223      */
224     @Test
225     public void invalidDocument() throws Exception {
226         final WebClient client = getWebClient();
227         final MockWebConnection webConnection = new MockWebConnection();
228 
229         final String content
230             = "<?xml version=\"1.0\"?>\n"
231             + "<foo>\n"
232             + "  <foofoo invalid\n"
233             + "  <foofoo name='first'>something</foofoo>\n"
234             + "  <foofoo name='second'>something else</foofoo>\n"
235             + "</foo>";
236 
237         webConnection.setDefaultResponse(content, 200, "OK", MimeType.TEXT_XML);
238         client.setWebConnection(webConnection);
239 
240         final Page page = client.getPage(URL_FIRST);
241         assertEquals(URL_FIRST, page.getUrl());
242         assertEquals("OK", page.getWebResponse().getStatusMessage());
243         assertEquals(HttpStatus.OK_200, page.getWebResponse().getStatusCode());
244         assertEquals(MimeType.TEXT_XML, page.getWebResponse().getContentType());
245 
246         assertTrue(Page.class.isInstance(page));
247         final XmlPage xmlPage = (XmlPage) page;
248         assertEquals(content, xmlPage.getWebResponse().getContentAsString());
249         assertNull(xmlPage.getXmlDocument());
250     }
251 
252     /**
253      * Tests a simple empty XML document.
254      * @throws Exception if the test fails
255      */
256     @Test
257     public void emptyDocument() throws Exception {
258         final WebClient client = getWebClient();
259         final MockWebConnection webConnection = new MockWebConnection();
260 
261         final String content = "";
262 
263         webConnection.setDefaultResponse(content, 200, "OK", MimeType.TEXT_XML);
264         client.setWebConnection(webConnection);
265 
266         final Page page = client.getPage(URL_FIRST);
267         assertEquals(URL_FIRST, page.getUrl());
268         assertEquals("OK", page.getWebResponse().getStatusMessage());
269         assertEquals(HttpStatus.OK_200, page.getWebResponse().getStatusCode());
270         assertEquals(MimeType.TEXT_XML, page.getWebResponse().getContentType());
271 
272         assertTrue(Page.class.isInstance(page));
273         final XmlPage xmlPage = (XmlPage) page;
274         assertEquals(content, xmlPage.getWebResponse().getContentAsString());
275         assertNull(xmlPage.getXmlDocument());
276     }
277 
278     /**
279      * Tests a simple empty XML document.
280      * @throws Exception if the test fails
281      */
282     @Test
283     public void blankDocument() throws Exception {
284         final WebClient client = getWebClient();
285         final MockWebConnection webConnection = new MockWebConnection();
286 
287         final String content = "\t  \n\r\n";
288 
289         webConnection.setDefaultResponse(content, 200, "OK", MimeType.TEXT_XML);
290         client.setWebConnection(webConnection);
291 
292         final Page page = client.getPage(URL_FIRST);
293         assertEquals(URL_FIRST, page.getUrl());
294         assertEquals("OK", page.getWebResponse().getStatusMessage());
295         assertEquals(HttpStatus.OK_200, page.getWebResponse().getStatusCode());
296         assertEquals("text/xml", page.getWebResponse().getContentType());
297 
298         assertTrue(Page.class.isInstance(page));
299         final XmlPage xmlPage = (XmlPage) page;
300         assertEquals(content, xmlPage.getWebResponse().getContentAsString());
301         assertNull(xmlPage.getXmlDocument());
302     }
303 
304     /**
305      * @throws Exception if the test fails
306      */
307     @Test
308     public void voiceXML() throws Exception {
309         final String content =
310             "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
311             + "<vxml xmlns=\"http://www.w3.org/2001/vxml\""
312             + "  xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\""
313             + "  xsi:schemaLocation=\"http://www.w3.org/2001/vxml "
314             + "   http://www.w3.org/TR/voicexml20/vxml.xsd\""
315             + "   version=\"2.0\">\n"
316             + "  <form>\n"
317             + "    <block>Hello World!</block>\n"
318             + "  </form>\n"
319             + "</vxml>";
320 
321         final XmlPage xmlPage = testDocument(content, "application/voicexml+xml");
322         assertEquals("vxml", xmlPage.getXmlDocument().getFirstChild().getNodeName());
323     }
324 
325     /**
326      * @throws Exception if the test fails
327      */
328     @Test
329     public void xpath() throws Exception {
330         final String html
331             = "<?xml version=\"1.0\"?>\n"
332              + "<foo>\n"
333              + "  <foofoo name='first'>something</foofoo>\n"
334              + "  <foofoo name='second'>something else</foofoo>\n"
335              + "</foo>";
336         final XmlPage xmlPage = testDocument(html, MimeType.TEXT_XML);
337         assertEquals(1, xmlPage.getByXPath("//foofoo[@name='first']").size());
338     }
339 
340     /**
341      * Test for issue #1820.
342      * @throws Exception if the test fails
343      */
344     @Test
345     public void xpathAttribute() throws Exception {
346         final String html
347             = "<?xml version=\"1.0\"?>\n"
348              + "<foo>\n"
349              + "  <MARKGR INTREGN=\"1289218\" BILING=\"Y\" OOCD=\"CH\" INTREGD=\"20160111\">\n"
350              + "  </MARKGR>\n"
351              + "</foo>";
352         final XmlPage xmlPage = testDocument(html, MimeType.TEXT_XML);
353 
354         assertEquals(1, xmlPage.getByXPath("//MARKGR").size());
355         assertNotNull(xmlPage.getFirstByXPath("//MARKGR"));
356 
357         assertEquals(0, xmlPage.getByXPath("//markgr").size());
358         assertNull(xmlPage.getFirstByXPath("//markgr"));
359 
360         assertEquals(1, xmlPage.getByXPath("//MARKGR/@INTREGN").size());
361         assertTrue(xmlPage.getFirstByXPath("//MARKGR/@INTREGN") instanceof DomAttr);
362 
363         assertEquals(0, xmlPage.getByXPath("//MARKGR/@intregn").size());
364         assertNull(xmlPage.getFirstByXPath("//MARKGR/@intregn"));
365     }
366 
367     /**
368      * @throws Exception if the test fails
369      */
370     @Test
371     public void noResponse() throws Exception {
372         final Map<String, Class<? extends Servlet>> servlets = new HashMap<>();
373         servlets.put("/test", NoResponseServlet.class);
374         startWebServer("./", null, servlets);
375 
376         final WebClient client = getWebClient();
377         client.getPage(URL_FIRST + "test");
378     }
379 
380     /**
381      * Servlet for {@link #noResponse()}.
382      */
383     public static class NoResponseServlet extends HttpServlet {
384 
385         /**
386          * {@inheritDoc}
387          */
388         @Override
389         protected void doGet(final HttpServletRequest request, final HttpServletResponse response) throws IOException {
390             response.setContentType(MimeType.TEXT_XML);
391             response.setStatus(HttpServletResponse.SC_NO_CONTENT);
392         }
393     }
394 
395     /**
396      * @throws Exception if the test fails
397      */
398     @Test
399     public void bom() throws Exception {
400         final byte[] bom = ByteOrderMark.UTF_8.getBytes();
401         final byte[] xml = "<msg>abc</msg>".getBytes(UTF_8);
402         final byte[] bytes = ByteBuffer.allocate(bom.length + xml.length).put(bom).put(xml).array();
403 
404         final WebClient client = getWebClient();
405         final MockWebConnection webConnection = new MockWebConnection();
406         webConnection.setDefaultResponse(bytes, 200, "OK", MimeType.TEXT_XML);
407         client.setWebConnection(webConnection);
408         final XmlPage xmlPage = client.getPage(URL_FIRST);
409 
410         final DomElement msg = (DomElement) xmlPage.getFirstByXPath("//msg");
411         assertNotNull("No element found by XPath '//msg'", msg);
412         assertEquals("abc", msg.asNormalizedText());
413     }
414 
415 }