1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.html.parser;
16
17 import java.net.URL;
18
19 import org.htmlunit.SimpleWebTestCase;
20 import org.htmlunit.StringWebResponse;
21 import org.htmlunit.WebClient;
22 import org.htmlunit.WebResponse;
23 import org.htmlunit.html.DomElement;
24 import org.htmlunit.html.HtmlDivision;
25 import org.htmlunit.html.HtmlElement;
26 import org.htmlunit.html.HtmlPage;
27 import org.htmlunit.html.HtmlPageTest;
28 import org.htmlunit.html.HtmlTableColumnGroup;
29 import org.htmlunit.html.XHtmlPage;
30 import org.junit.jupiter.api.Test;
31
32
33
34
35
36
37
38
39
40
41 public class HTMLParserTest extends SimpleWebTestCase {
42
43
44
45
46
47 @Test
48 public void simpleHTMLString() throws Exception {
49 final WebClient webClient = getWebClient();
50 final WebResponse webResponse = new StringWebResponse(
51 "<html><head><title>TITLE</title></head><body><div>TEST</div></body></html>", URL_FIRST);
52
53 final HtmlPage page = new HtmlPage(webResponse, webClient.getCurrentWindow());
54 webClient.getCurrentWindow().setEnclosedPage(page);
55
56 webClient.getPageCreator().getHtmlParser().parse(null, webResponse, page, false, false);
57
58 final String stringVal = page.<HtmlDivision>getFirstByXPath("//div").getFirstChild().getNodeValue();
59 assertEquals("TEST", stringVal);
60
61 final HtmlElement node = (HtmlElement) page.getFirstByXPath("//*[./text() = 'TEST']");
62 assertEquals(node.getTagName(), HtmlDivision.TAG_NAME);
63 }
64
65
66
67
68
69
70 @Test
71 public void bomUtf8() throws Exception {
72 final String resource = "bom-utf8.html";
73 final URL url = getClass().getClassLoader().getResource(resource);
74 assertNotNull(url);
75
76 final WebClient client = getWebClient();
77 final HtmlPage page = client.getPage(url);
78 assertEquals("Welcome to Suffolk Coastal District Council online", page.getTitleText());
79 }
80
81
82
83
84
85 @Test
86 public void emptyStack() throws Exception {
87 final String html =
88 "<html>\n"
89 + " <body onload='document.getElementById(\"s\").innerHTML = "
90 + " \"<h1><span><span></span></span><span><span></span></span></h1>\";'>\n"
91 + " <div>\n"
92 + " <div>\n"
93 + " <table>\n"
94 + " <tbody>\n"
95 + " <tr>\n"
96 + " <td>\n"
97 + " <table>\n"
98 + " <tbody>\n"
99 + " <tr>\n"
100 + " <td>\n"
101 + " <div>\n"
102 + " <div>\n"
103 + " <h1>\n"
104 + " <span id='s'>blah</span>\n"
105 + " </h1>\n"
106 + " </div>\n"
107 + " </div>\n"
108 + " </td>\n"
109 + " </tr>\n"
110 + " </tbody>\n"
111 + " </table>\n"
112 + " </td>\n"
113 + " </tr>\n"
114 + " </tbody>\n"
115 + " </table>\n"
116 + " </div>\n"
117 + " </div>\n"
118 + " </body>\n"
119 + "</html>";
120 final HtmlPage page = loadPage(html);
121 assertNotNull(page);
122 }
123
124
125
126
127 @Test
128 public void tableWithoutColgroup() throws Exception {
129 final String html = HtmlPageTest.STANDARDS_MODE_PREFIX_
130 + "<html><head>\n"
131 + "</head>\n"
132 + "<body>\n"
133 + " <table><col width='7'/><col width='1'/><tbody><tr><td>seven</td><td>One</td></tr></tbody></table>\n"
134 + "</body></html>";
135
136 final WebClient webClient = getWebClient();
137 final WebResponse webResponse = new StringWebResponse(html, URL_FIRST);
138
139 final XHtmlPage page = new XHtmlPage(webResponse, webClient.getCurrentWindow());
140 webClient.getCurrentWindow().setEnclosedPage(page);
141
142 webClient.getPageCreator().getHtmlParser().parse(null, webResponse, page, true, false);
143
144 final DomElement col = page.getElementsByTagName("col").get(0);
145 assertEquals(col.getParentNode().getNodeName(), HtmlTableColumnGroup.TAG_NAME);
146 }
147 }