1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.xml;
16
17 import static java.nio.charset.StandardCharsets.UTF_8;
18
19 import java.io.IOException;
20 import java.nio.ByteBuffer;
21 import java.util.HashMap;
22 import java.util.Map;
23
24 import javax.servlet.Servlet;
25 import javax.servlet.http.HttpServlet;
26 import javax.servlet.http.HttpServletRequest;
27 import javax.servlet.http.HttpServletResponse;
28
29 import org.apache.commons.io.ByteOrderMark;
30 import org.htmlunit.MockWebConnection;
31 import org.htmlunit.Page;
32 import org.htmlunit.WebClient;
33 import org.htmlunit.WebServerTestCase;
34 import org.htmlunit.html.DomAttr;
35 import org.htmlunit.html.DomElement;
36 import org.htmlunit.html.DomText;
37 import org.htmlunit.http.HttpStatus;
38 import org.htmlunit.util.MimeType;
39 import org.htmlunit.util.StringUtils;
40 import org.junit.jupiter.api.Test;
41 import org.w3c.dom.Node;
42
43
44
45
46
47
48
49
50 public class XmlPageTest extends WebServerTestCase {
51
52
53
54
55 @Test
56 public void asNormalizedText() throws Exception {
57 asNormalizedText("<msg>abc</msg>", "abc");
58 }
59
60
61
62
63 @Test
64 public void asNormalizedTextOnlyText() throws Exception {
65 final WebClient client = getWebClient();
66 final MockWebConnection webConnection = new MockWebConnection();
67 webConnection.setDefaultResponse("<msg>abc</msg>", 200, "OK", MimeType.TEXT_XML);
68 client.setWebConnection(webConnection);
69 final XmlPage xmlPage = client.getPage(URL_FIRST);
70
71 assertEquals("abc", ((DomText) xmlPage.getFirstByXPath("/msg/text()")).asNormalizedText());
72 }
73
74
75
76
77 @Test
78 public void asTextComplex() throws Exception {
79 final String xml
80 = "<msg>1"
81 + "<h1>h1</h1>"
82 + "<h2>h2"
83 + "<h3>h3</h3>"
84 + "<h3></h3>"
85 + "txt"
86 + "</h2>o"
87 + "</msg>";
88 asNormalizedText(xml, "1h1h2h3txto");
89 }
90
91
92
93
94 @Test
95 public void asTextPart() throws Exception {
96 final String xml
97 = "<outer>outer"
98 + "<msg><h1>h1</h1></msg>"
99 + "xy</outer>";
100 asNormalizedText(xml, "h1");
101 }
102
103
104
105
106
107 @Test
108 public void asTextEmpty() throws Exception {
109 asNormalizedText("<msg></msg>", "");
110 }
111
112 private void asNormalizedText(final String xml, final String expected) throws Exception {
113 final WebClient client = getWebClient();
114 final MockWebConnection webConnection = new MockWebConnection();
115 webConnection.setDefaultResponse(xml, 200, "OK", MimeType.TEXT_XML);
116 client.setWebConnection(webConnection);
117 final XmlPage xmlPage = client.getPage(URL_FIRST);
118
119 final DomElement msg = (DomElement) xmlPage.getFirstByXPath("//msg");
120 assertNotNull("No element found by XPath '//msg'", msg);
121 assertEquals(expected, msg.asNormalizedText());
122 }
123
124
125
126
127
128 @Test
129 public void namespace() throws Exception {
130 final String content
131 = "<?xml version='1.0'?>\n"
132 + "<RDF xmlns='http://www.w3.org/1999/02/22-rdf-syntax-ns#' "
133 + "xmlns:em='http://www.mozilla.org/2004/em-rdf#'>"
134 + "<Description about='urn:mozilla:install-manifest'>"
135 + "<em:name>My Plugin</em:name>"
136 + "</Description>\n"
137 + "</RDF>";
138
139 final XmlPage xmlPage = testDocument(content, MimeType.TEXT_XML);
140 final Node node = xmlPage.getXmlDocument().getFirstChild().getFirstChild().getFirstChild();
141 assertEquals("em:name", node.getNodeName());
142 assertEquals("name", node.getLocalName());
143 assertEquals("http://www.mozilla.org/2004/em-rdf#", node.getNamespaceURI());
144 }
145
146
147
148
149
150 @Test
151 public void validDocument() throws Exception {
152 final String content
153 = "<?xml version=\"1.0\"?>\n"
154 + "<foo>\n"
155 + " <foofoo name='first'>something</foofoo>\n"
156 + " <foofoo name='second'>something else</foofoo>\n"
157 + "</foo>";
158
159 final XmlPage xmlPage = testDocument(content, MimeType.TEXT_XML);
160 assertEquals("foo", xmlPage.getXmlDocument().getFirstChild().getNodeName());
161 }
162
163
164
165
166
167
168 @Test
169 public void defaultEncoding() throws Exception {
170 final String content
171 = "<?xml version=\"1.0\"?>\n"
172 + "<foo>\n"
173 + "\u0434\n"
174 + "</foo>";
175
176 final byte[] bytes = StringUtils.toByteArray(content, UTF_8);
177
178 final WebClient client = getWebClient();
179 final MockWebConnection webConnection = new MockWebConnection();
180 webConnection.setDefaultResponse(bytes, 200, "OK", MimeType.TEXT_XML);
181 client.setWebConnection(webConnection);
182
183 final Page page = client.getPage(URL_FIRST);
184 assertTrue(XmlPage.class.isInstance(page));
185
186 final XmlPage xmlPage = (XmlPage) page;
187 assertEquals(content, xmlPage.getWebResponse().getContentAsString());
188 assertNotNull(xmlPage.getXmlDocument());
189
190 assertEquals("foo", xmlPage.getXmlDocument().getFirstChild().getNodeName());
191 }
192
193
194
195
196
197
198
199
200 private XmlPage testDocument(final String content, final String mimeType) throws Exception {
201 final WebClient client = getWebClient();
202 final MockWebConnection webConnection = new MockWebConnection();
203 webConnection.setDefaultResponse(content, 200, "OK", mimeType);
204 client.setWebConnection(webConnection);
205 final Page page = client.getPage(URL_FIRST);
206 assertEquals(URL_FIRST, page.getUrl());
207 assertEquals("OK", page.getWebResponse().getStatusMessage());
208 assertEquals(HttpStatus.OK_200, page.getWebResponse().getStatusCode());
209 assertEquals(mimeType, page.getWebResponse().getContentType());
210 assertTrue(XmlPage.class.isInstance(page));
211 final XmlPage xmlPage = (XmlPage) page;
212 assertEquals(content, xmlPage.getWebResponse().getContentAsString());
213 assertNotNull(xmlPage.getXmlDocument());
214 return xmlPage;
215 }
216
217
218
219
220
221 @Test
222 public void invalidDocument() throws Exception {
223 final WebClient client = getWebClient();
224 final MockWebConnection webConnection = new MockWebConnection();
225
226 final String content
227 = "<?xml version=\"1.0\"?>\n"
228 + "<foo>\n"
229 + " <foofoo invalid\n"
230 + " <foofoo name='first'>something</foofoo>\n"
231 + " <foofoo name='second'>something else</foofoo>\n"
232 + "</foo>";
233
234 webConnection.setDefaultResponse(content, 200, "OK", MimeType.TEXT_XML);
235 client.setWebConnection(webConnection);
236
237 final Page page = client.getPage(URL_FIRST);
238 assertEquals(URL_FIRST, page.getUrl());
239 assertEquals("OK", page.getWebResponse().getStatusMessage());
240 assertEquals(HttpStatus.OK_200, page.getWebResponse().getStatusCode());
241 assertEquals(MimeType.TEXT_XML, page.getWebResponse().getContentType());
242
243 assertTrue(Page.class.isInstance(page));
244 final XmlPage xmlPage = (XmlPage) page;
245 assertEquals(content, xmlPage.getWebResponse().getContentAsString());
246 assertNull(xmlPage.getXmlDocument());
247 }
248
249
250
251
252
253 @Test
254 public void emptyDocument() throws Exception {
255 final WebClient client = getWebClient();
256 final MockWebConnection webConnection = new MockWebConnection();
257
258 final String content = "";
259
260 webConnection.setDefaultResponse(content, 200, "OK", MimeType.TEXT_XML);
261 client.setWebConnection(webConnection);
262
263 final Page page = client.getPage(URL_FIRST);
264 assertEquals(URL_FIRST, page.getUrl());
265 assertEquals("OK", page.getWebResponse().getStatusMessage());
266 assertEquals(HttpStatus.OK_200, page.getWebResponse().getStatusCode());
267 assertEquals(MimeType.TEXT_XML, page.getWebResponse().getContentType());
268
269 assertTrue(Page.class.isInstance(page));
270 final XmlPage xmlPage = (XmlPage) page;
271 assertEquals(content, xmlPage.getWebResponse().getContentAsString());
272 assertNull(xmlPage.getXmlDocument());
273 }
274
275
276
277
278
279 @Test
280 public void blankDocument() throws Exception {
281 final WebClient client = getWebClient();
282 final MockWebConnection webConnection = new MockWebConnection();
283
284 final String content = "\t \n\r\n";
285
286 webConnection.setDefaultResponse(content, 200, "OK", MimeType.TEXT_XML);
287 client.setWebConnection(webConnection);
288
289 final Page page = client.getPage(URL_FIRST);
290 assertEquals(URL_FIRST, page.getUrl());
291 assertEquals("OK", page.getWebResponse().getStatusMessage());
292 assertEquals(HttpStatus.OK_200, page.getWebResponse().getStatusCode());
293 assertEquals("text/xml", page.getWebResponse().getContentType());
294
295 assertTrue(Page.class.isInstance(page));
296 final XmlPage xmlPage = (XmlPage) page;
297 assertEquals(content, xmlPage.getWebResponse().getContentAsString());
298 assertNull(xmlPage.getXmlDocument());
299 }
300
301
302
303
304 @Test
305 public void voiceXML() throws Exception {
306 final String content =
307 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
308 + "<vxml xmlns=\"http://www.w3.org/2001/vxml\""
309 + " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\""
310 + " xsi:schemaLocation=\"http://www.w3.org/2001/vxml "
311 + " http://www.w3.org/TR/voicexml20/vxml.xsd\""
312 + " version=\"2.0\">\n"
313 + " <form>\n"
314 + " <block>Hello World!</block>\n"
315 + " </form>\n"
316 + "</vxml>";
317
318 final XmlPage xmlPage = testDocument(content, "application/voicexml+xml");
319 assertEquals("vxml", xmlPage.getXmlDocument().getFirstChild().getNodeName());
320 }
321
322
323
324
325 @Test
326 public void xpath() throws Exception {
327 final String html
328 = "<?xml version=\"1.0\"?>\n"
329 + "<foo>\n"
330 + " <foofoo name='first'>something</foofoo>\n"
331 + " <foofoo name='second'>something else</foofoo>\n"
332 + "</foo>";
333 final XmlPage xmlPage = testDocument(html, MimeType.TEXT_XML);
334 assertEquals(1, xmlPage.getByXPath("//foofoo[@name='first']").size());
335 }
336
337
338
339
340
341 @Test
342 public void xpathAttribute() throws Exception {
343 final String html
344 = "<?xml version=\"1.0\"?>\n"
345 + "<foo>\n"
346 + " <MARKGR INTREGN=\"1289218\" BILING=\"Y\" OOCD=\"CH\" INTREGD=\"20160111\">\n"
347 + " </MARKGR>\n"
348 + "</foo>";
349 final XmlPage xmlPage = testDocument(html, MimeType.TEXT_XML);
350
351 assertEquals(1, xmlPage.getByXPath("//MARKGR").size());
352 assertNotNull(xmlPage.getFirstByXPath("//MARKGR"));
353
354 assertEquals(0, xmlPage.getByXPath("//markgr").size());
355 assertNull(xmlPage.getFirstByXPath("//markgr"));
356
357 assertEquals(1, xmlPage.getByXPath("//MARKGR/@INTREGN").size());
358 assertTrue(xmlPage.getFirstByXPath("//MARKGR/@INTREGN") instanceof DomAttr);
359
360 assertEquals(0, xmlPage.getByXPath("//MARKGR/@intregn").size());
361 assertNull(xmlPage.getFirstByXPath("//MARKGR/@intregn"));
362 }
363
364
365
366
367 @Test
368 public void noResponse() throws Exception {
369 final Map<String, Class<? extends Servlet>> servlets = new HashMap<>();
370 servlets.put("/test", NoResponseServlet.class);
371 startWebServer("./", null, servlets);
372
373 final WebClient client = getWebClient();
374 client.getPage(URL_FIRST + "test");
375 }
376
377
378
379
380 public static class NoResponseServlet extends HttpServlet {
381
382
383
384
385 @Override
386 protected void doGet(final HttpServletRequest request, final HttpServletResponse response) throws IOException {
387 response.setContentType(MimeType.TEXT_XML);
388 response.setStatus(HttpServletResponse.SC_NO_CONTENT);
389 }
390 }
391
392
393
394
395 @Test
396 public void bom() throws Exception {
397 final byte[] bom = ByteOrderMark.UTF_8.getBytes();
398 final byte[] xml = "<msg>abc</msg>".getBytes(UTF_8);
399 final byte[] bytes = ByteBuffer.allocate(bom.length + xml.length).put(bom).put(xml).array();
400
401 final WebClient client = getWebClient();
402 final MockWebConnection webConnection = new MockWebConnection();
403 webConnection.setDefaultResponse(bytes, 200, "OK", MimeType.TEXT_XML);
404 client.setWebConnection(webConnection);
405 final XmlPage xmlPage = client.getPage(URL_FIRST);
406
407 final DomElement msg = (DomElement) xmlPage.getFirstByXPath("//msg");
408 assertNotNull("No element found by XPath '//msg'", msg);
409 assertEquals("abc", msg.asNormalizedText());
410 }
411
412 }