View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.javascript.host.dom;
16  
17  import org.htmlunit.WebDriverTestCase;
18  import org.htmlunit.junit.BrowserRunner;
19  import org.htmlunit.junit.annotation.Alerts;
20  import org.junit.Test;
21  import org.junit.runner.RunWith;
22  
23  /**
24   * Tests for {@link DOMParser}.
25   *
26   * @author Ahmed Ashour
27   * @author Marc Guillemot
28   * @author Frank Danek
29   * @author Ronald Brill
30   */
31  @RunWith(BrowserRunner.class)
32  public class DOMParserTest extends WebDriverTestCase {
33  
34      /**
35       * @throws Exception if the test fails
36       */
37      @Test
38      @Alerts("[object DOMParser]")
39      public void scriptableToString() throws Exception {
40          final String html = DOCTYPE_HTML
41              + "<html><head>\n"
42              + "<script>\n"
43              + LOG_TITLE_FUNCTION
44              + "  function test() {\n"
45              + "    try {\n"
46              + "      log(new DOMParser());\n"
47              + "    } catch(e) {logEx(e);}\n"
48              + "  }\n"
49              + "</script></head><body onload='test()'>\n"
50              + "</body></html>";
51  
52          loadPageVerifyTitle2(html);
53      }
54  
55      /**
56       * @throws Exception if the test fails
57       */
58      @Test
59      @Alerts({"[object HTMLDocument]", "", "§§URL§§"})
60      public void parseFromString_text_html() throws Exception {
61          final String content = DOCTYPE_HTML
62              + "<html><head>\n"
63              + "<script>\n"
64              + LOG_TITLE_FUNCTION
65              + "  function test() {\n"
66              + "    var text='<html></html>';\n"
67              + "    try {\n"
68              + "      var parser = new DOMParser();\n"
69              + "      var doc = parser.parseFromString(text, 'text/html');\n"
70              + "      log(doc);\n"
71              + "      log(doc.body.innerHTML);\n"
72              + "      log(doc.URL);\n"
73              + "    } catch(e) { logEx(e); }\n"
74              + "  }\n"
75              + "</script></head><body onload='test()'>\n"
76              + "</body></html>";
77  
78          expandExpectedAlertsVariables(URL_FIRST);
79          loadPageVerifyTitle2(content);
80      }
81  
82      /**
83       * @throws Exception if the test fails
84       */
85      @Test
86      @Alerts({"[object HTMLDocument]", "<div></div>", "§§URL§§"})
87      public void parseFromString_text_html_div() throws Exception {
88          final String content = DOCTYPE_HTML
89              + "<html><head>\n"
90              + "<script>\n"
91              + LOG_TITLE_FUNCTION
92              + "  function test() {\n"
93              + "    var text='<div></div>';\n"
94              + "    try {\n"
95              + "      var parser = new DOMParser();\n"
96              + "      var doc = parser.parseFromString(text, 'text/html');\n"
97              + "      log(doc);\n"
98              + "      log(doc.body.innerHTML);\n"
99              + "      log(doc.URL);\n"
100             + "    } catch(e) { logEx(e); }\n"
101             + "  }\n"
102             + "</script></head><body onload='test()'>\n"
103             + "</body></html>";
104 
105         expandExpectedAlertsVariables(URL_FIRST);
106         loadPageVerifyTitle2(content);
107     }
108 
109     /**
110      * @throws Exception if the test fails
111      */
112     @Test
113     @Alerts("[object XMLDocument]")
114     public void parseFromString_text_xml() throws Exception {
115         final String content = DOCTYPE_HTML
116             + "<html><head>\n"
117             + "<script>\n"
118             + LOG_TITLE_FUNCTION
119             + "  function test() {\n"
120             + "    var text='<note/>';\n"
121             + "    try {\n"
122             + "      var parser = new DOMParser();\n"
123             + "      var doc = parser.parseFromString(text, 'text/xml');\n"
124             + "      log(doc);\n"
125             + "    } catch(e) { logEx(e); }\n"
126             + "  }\n"
127             + "</script></head><body onload='test()'>\n"
128             + "</body></html>";
129 
130         loadPageVerifyTitle2(content);
131     }
132 
133     /**
134      * @throws Exception if the test fails
135      */
136     @Test
137     @Alerts("[object XMLDocument]")
138     public void parseFromString_application_xml() throws Exception {
139         final String content = DOCTYPE_HTML
140             + "<html><head>\n"
141             + "<script>\n"
142             + LOG_TITLE_FUNCTION
143             + "  function test() {\n"
144             + "    var text='<note/>';\n"
145             + "    try {\n"
146             + "      var parser = new DOMParser();\n"
147             + "      var doc = parser.parseFromString(text, 'application/xml');\n"
148             + "      log(doc);\n"
149             + "    } catch(e) { logEx(e); }\n"
150             + "  }\n"
151             + "</script></head><body onload='test()'>\n"
152             + "</body></html>";
153 
154         loadPageVerifyTitle2(content);
155     }
156 
157     /**
158      * @throws Exception if the test fails
159      */
160     @Test
161     @Alerts("[object XMLDocument]")
162     public void parseFromString_application_xhtmlXml() throws Exception {
163         final String content = DOCTYPE_HTML
164             + "<html><head>\n"
165             + "<script>\n"
166             + LOG_TITLE_FUNCTION
167             + "  function test() {\n"
168             + "    var text='<html/>';\n"
169             + "    try {\n"
170             + "      var parser = new DOMParser();\n"
171             + "      var doc = parser.parseFromString(text, 'application/xhtml+xml');\n"
172             + "      log(doc);\n"
173             + "    } catch(e) { logEx(e); }\n"
174             + "  }\n"
175             + "</script></head><body onload='test()'>\n"
176             + "</body></html>";
177 
178         loadPageVerifyTitle2(content);
179     }
180 
181     /**
182      * @throws Exception if the test fails
183      */
184     @Test
185     @Alerts("[object XMLDocument]")
186     public void parseFromString_application_svgXml() throws Exception {
187         final String content = DOCTYPE_HTML
188             + "<html><head>\n"
189             + "<script>\n"
190             + LOG_TITLE_FUNCTION
191             + "  function test() {\n"
192             + "    var text='<svg xmlns=\"http://www.w3.org/2000/svg\"/>';\n"
193             + "    try {\n"
194             + "      var parser = new DOMParser();\n"
195             + "      var doc = parser.parseFromString(text, 'image/svg+xml');\n"
196             + "      log(doc);\n"
197             + "    } catch(e) { logEx(e); }\n"
198             + "  }\n"
199             + "</script></head><body onload='test()'>\n"
200             + "</body></html>";
201 
202         loadPageVerifyTitle2(content);
203     }
204 
205     /**
206      * @throws Exception if the test fails
207      */
208     @Test
209     @Alerts("TypeError")
210     public void parseFromString_unknownType() throws Exception {
211         final String content = DOCTYPE_HTML
212             + "<html><head>\n"
213             + "<script>\n"
214             + LOG_TITLE_FUNCTION
215             + "  function test() {\n"
216             + "    var text='<test/>';\n"
217             + "    try {\n"
218             + "      var parser = new DOMParser();\n"
219             + "      var doc = parser.parseFromString(text, 'unknown/type');\n"
220             + "      log(doc);\n"
221             + "    } catch(e) { logEx(e); }\n"
222             + "  }\n"
223             + "</script></head><body onload='test()'>\n"
224             + "</body></html>";
225 
226         loadPageVerifyTitle2(content);
227     }
228 
229     /**
230      * @throws Exception if the test fails
231      */
232     @Test
233     @Alerts("9")
234     public void parseFromString() throws Exception {
235         final String content = DOCTYPE_HTML
236             + "<html><head>\n"
237             + "<script>\n"
238             + LOG_TITLE_FUNCTION
239             + "  function test() {\n"
240             + "    var text='<note> ';\n"
241             + "    text += '<to>Tove</to> ';\n"
242             + "    text += '<from>Jani</from> ';\n"
243             + "    text += '<heading>Reminder</heading> ';\n"
244             + "    text += '<body>Do not forget me this weekend!</body> ';\n"
245             + "    text += '</note>';\n"
246             + "    try {\n"
247             + "      var parser = new DOMParser();\n"
248             + "      var doc = parser.parseFromString(text, 'text/xml');\n"
249             + "      if (doc.getElementsByTagName('parsererror').length > 0) { log('parsererror'); return; }\n"
250 
251             + "      var x = doc.documentElement;\n"
252             + "      log(x.childNodes.length);\n"
253             + "    } catch(e) { logEx(e); }\n"
254             + "  }\n"
255             + "</script></head><body onload='test()'>\n"
256             + "</body></html>";
257 
258         loadPageVerifyTitle2(content);
259     }
260 
261     /**
262      * In 2.9-SNAPSHOT on 26.10.2010 this was causing an internal error in DOMParser.parseFromString.
263      * @throws Exception if the test fails
264      */
265     @Test
266     @Alerts("parsererror")
267     public void parseFromString_invalidXml() throws Exception {
268         final String content = DOCTYPE_HTML
269             + "<html><head>\n"
270             + "<script>\n"
271             + LOG_TITLE_FUNCTION
272             + "  function test() {\n"
273             + "    var text = '</notvalid> ';\n"
274             + "    try {\n"
275             + "      var parser = new DOMParser();\n"
276             + "      var doc = parser.parseFromString(text, 'text/xml');\n"
277             + "      if (doc.getElementsByTagName('parsererror').length > 0) {\n"
278             + "        log('parsererror');\n"
279             + "        return;\n"
280             + "      }\n"
281             + "    } catch(e) { logEx(e); }\n"
282             + "  }\n"
283             + "</script></head><body onload='test()'>\n"
284             + "</body></html>";
285 
286         loadPageVerifyTitle2(content);
287     }
288 
289     /**
290      * @throws Exception if the test fails
291      */
292     @Test
293     @Alerts("parsererror")
294     public void parseFromString_emptyString() throws Exception {
295         final String content = DOCTYPE_HTML
296             + "<html><head>\n"
297             + "<script>\n"
298             + LOG_TITLE_FUNCTION
299             + "  function test() {\n"
300             + "    var text='';\n"
301             + "    try {\n"
302             + "      var parser = new DOMParser();\n"
303             + "      var doc = parser.parseFromString(text, 'text/xml');\n"
304             + "      if (doc.getElementsByTagName('parsererror').length > 0) {\n"
305             + "        log('parsererror');\n"
306             + "        return;\n"
307             + "      }\n"
308             + "      log(doc.childNodes.length);\n"
309             + "    } catch(e) { logEx(e); }\n"
310             + "  }\n"
311             + "</script></head><body onload='test()'>\n"
312             + "</body></html>";
313 
314         loadPageVerifyTitle2(content);
315     }
316 
317     /**
318      * @throws Exception if the test fails
319      */
320     @Test
321     @Alerts("TypeError")
322     public void parseFromString_missingMimeType() throws Exception {
323         final String content = DOCTYPE_HTML
324             + "<html><head>\n"
325             + "<script>\n"
326             + LOG_TITLE_FUNCTION
327             + "  function test() {\n"
328             + "    var text='<root/>';\n"
329             + "    try {\n"
330             + "      var parser=new DOMParser();\n"
331             + "      parser.parseFromString(text);\n"
332             + "    } catch(e) { logEx(e); }\n"
333             + "  }\n"
334             + "</script></head>\n"
335             + "<body onload='test()'>\n"
336             + "</body></html>";
337 
338         loadPageVerifyTitle2(content);
339     }
340 
341     /**
342      * Regression test for bug 2899485.
343      * @throws Exception if an error occurs
344      */
345     @Test
346     @Alerts({"5", "[object CDATASection]", "[object Comment]", "[object Element]",
347                 "[object ProcessingInstruction]", "[object Text]"})
348     public void parseFromString_processingInstructionKept() throws Exception {
349         final String html = DOCTYPE_HTML
350             + "<html><head>\n"
351             + "<script>\n"
352             + LOG_TITLE_FUNCTION
353             + "  function test() {\n"
354             + "    var text = '<elementWithChildren>' + '<![CDATA[sampl<<< >>e data]]>' + '<!--a sample comment-->'\n"
355             + "      + '<elementWithChildren/>' + '<?target processing instruction data?>' + 'sample text node'\n"
356             + "      + '</elementWithChildren>';\n"
357             + "    try {\n"
358             + "      var parser = new DOMParser();\n"
359             + "      var doc = parser.parseFromString(text, 'text/xml');\n"
360             + "      if (doc.getElementsByTagName('parsererror').length > 0) {\n"
361             + "        log('parsererror');\n"
362             + "        return;\n"
363             + "      }\n"
364             + "      log(doc.documentElement.childNodes.length);\n"
365             + "      for(var i = 0; i < doc.documentElement.childNodes.length; i++) {\n"
366             + "        log(doc.documentElement.childNodes[i]);\n"
367             + "      }\n"
368             + "    } catch(e) { logEx(e); }\n"
369             + "  }\n"
370             + "</script></head><body onload='test()'></body></html>";
371 
372         loadPageVerifyTitle2(html);
373     }
374 
375     /**
376      * @throws Exception if an error occurs
377      */
378     @Test
379     @Alerts("[object HTMLDocument]")
380     public void parseFromString_doNotExecuteScripts() throws Exception {
381         final String html = DOCTYPE_HTML
382             + "<html>\n"
383             + "<head>\n"
384             + "  <script>\n"
385             + LOG_TITLE_FUNCTION
386             + "    function test() {\n"
387             + "      var html = '<script>document.title = \"parsed script executed\";</' + 'script>';\n"
388             + "      var parser = new DOMParser();\n"
389             + "      log(parser.parseFromString(html, 'text/html'));\n"
390             + "  }\n"
391             + "  </script>\n"
392             + "</head>\n"
393             + "<body onload='test()'>\n"
394             + "</body></html>";
395 
396         loadPageVerifyTitle2(html);
397     }
398 
399     /**
400      * @throws Exception if an error occurs
401      */
402     @Test
403     @Alerts("[object HTMLDocument]")
404     public void parseFromString_doNotExecuteSvgScripts() throws Exception {
405         final String html = DOCTYPE_HTML
406             + "<html>\n"
407             + "<head>\n"
408             + "  <script>\n"
409             + LOG_TITLE_FUNCTION
410             + "    function test() {\n"
411             + "      var html = '<svg viewBox=\"0 0 10 10\" xmlns=\"http://www.w3.org/2000/svg\">'\n"
412             + "                + '<script>document.title = \"parsed script executed\";</' + 'script>'\n"
413             + "                + '</svg>';\n"
414             + "      var parser = new DOMParser();\n"
415             + "      log(parser.parseFromString(html, 'text/html'));\n"
416             + "  }\n"
417             + "  </script>\n"
418             + "</head>\n"
419             + "<body onload='test()'>\n"
420             + "</body></html>";
421 
422         loadPageVerifyTitle2(html);
423     }
424 
425     /**
426      * Test exception throw by IE when calling <code>insertBefore</code>.
427      * @throws Exception if the test fails
428      */
429     @Test
430     @Alerts({"parsed", "inserted"})
431     public void dontExecScriptsFromDOMParser() throws Exception {
432         final String html = DOCTYPE_HTML
433               + "<html>\n"
434               + "<head></head>\n"
435               + "<body>\n"
436               + "<div id='tester'><div>"
437               + "<script>\n"
438               + LOG_TITLE_FUNCTION
439               + "  var script = \"<div><script>log('from script');</\" + \"script></div>\"\n"
440               + "  var parser = new DOMParser();\n"
441               + "  var parsedDoc = parser.parseFromString(script, 'text/html');\n"
442               + "  var parsedNode = parsedDoc.body.firstChild.firstChild;\n"
443               + "  log('parsed');\n"
444 
445               + "  document.getElementById('tester').insertBefore(parsedNode, null);\n"
446               + "  log('inserted');\n"
447               + "</script>\n"
448               + "</body></html>";
449 
450         loadPageVerifyTitle2(html);
451     }
452 }