1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.html.parser;
16
17 import java.net.URL;
18
19 import org.htmlunit.SimpleWebTestCase;
20 import org.htmlunit.StringWebResponse;
21 import org.htmlunit.WebClient;
22 import org.htmlunit.WebResponse;
23 import org.htmlunit.html.DomElement;
24 import org.htmlunit.html.HtmlDivision;
25 import org.htmlunit.html.HtmlElement;
26 import org.htmlunit.html.HtmlPage;
27 import org.htmlunit.html.HtmlPageTest;
28 import org.htmlunit.html.HtmlTableColumnGroup;
29 import org.htmlunit.html.XHtmlPage;
30 import org.htmlunit.junit.BrowserRunner;
31 import org.junit.Test;
32 import org.junit.runner.RunWith;
33
34
35
36
37
38
39
40
41
42
43 @RunWith(BrowserRunner.class)
44 public class HTMLParserTest extends SimpleWebTestCase {
45
46
47
48
49
50 @Test
51 public void simpleHTMLString() throws Exception {
52 final WebClient webClient = getWebClient();
53 final WebResponse webResponse = new StringWebResponse(
54 "<html><head><title>TITLE</title></head><body><div>TEST</div></body></html>", URL_FIRST);
55
56 final HtmlPage page = new HtmlPage(webResponse, webClient.getCurrentWindow());
57 webClient.getCurrentWindow().setEnclosedPage(page);
58
59 webClient.getPageCreator().getHtmlParser().parse(webResponse, page, false, false);
60
61 final String stringVal = page.<HtmlDivision>getFirstByXPath("//div").getFirstChild().getNodeValue();
62 assertEquals("TEST", stringVal);
63
64 final HtmlElement node = (HtmlElement) page.getFirstByXPath("//*[./text() = 'TEST']");
65 assertEquals(node.getTagName(), HtmlDivision.TAG_NAME);
66 }
67
68
69
70
71
72
73 @Test
74 public void bomUtf8() throws Exception {
75 final String resource = "bom-utf8.html";
76 final URL url = getClass().getClassLoader().getResource(resource);
77 assertNotNull(url);
78
79 final WebClient client = getWebClient();
80 final HtmlPage page = client.getPage(url);
81 assertEquals("Welcome to Suffolk Coastal District Council online", page.getTitleText());
82 }
83
84
85
86
87
88 @Test
89 public void emptyStack() throws Exception {
90 final String html =
91 "<html>\n"
92 + " <body onload='document.getElementById(\"s\").innerHTML = "
93 + " \"<h1><span><span></span></span><span><span></span></span></h1>\";'>\n"
94 + " <div>\n"
95 + " <div>\n"
96 + " <table>\n"
97 + " <tbody>\n"
98 + " <tr>\n"
99 + " <td>\n"
100 + " <table>\n"
101 + " <tbody>\n"
102 + " <tr>\n"
103 + " <td>\n"
104 + " <div>\n"
105 + " <div>\n"
106 + " <h1>\n"
107 + " <span id='s'>blah</span>\n"
108 + " </h1>\n"
109 + " </div>\n"
110 + " </div>\n"
111 + " </td>\n"
112 + " </tr>\n"
113 + " </tbody>\n"
114 + " </table>\n"
115 + " </td>\n"
116 + " </tr>\n"
117 + " </tbody>\n"
118 + " </table>\n"
119 + " </div>\n"
120 + " </div>\n"
121 + " </body>\n"
122 + "</html>";
123 final HtmlPage page = loadPage(html);
124 assertNotNull(page);
125 }
126
127
128
129
130 @Test
131 public void tableWithoutColgroup() throws Exception {
132 final String html = HtmlPageTest.STANDARDS_MODE_PREFIX_
133 + "<html><head>\n"
134 + "</head>\n"
135 + "<body>\n"
136 + " <table><col width='7'/><col width='1'/><tbody><tr><td>seven</td><td>One</td></tr></tbody></table>\n"
137 + "</body></html>";
138
139 final WebClient webClient = getWebClient();
140 final WebResponse webResponse = new StringWebResponse(html, URL_FIRST);
141
142 final XHtmlPage page = new XHtmlPage(webResponse, webClient.getCurrentWindow());
143 webClient.getCurrentWindow().setEnclosedPage(page);
144
145 webClient.getPageCreator().getHtmlParser().parse(webResponse, page, true, false);
146
147 final DomElement col = page.getElementsByTagName("col").get(0);
148 assertEquals(col.getParentNode().getNodeName(), HtmlTableColumnGroup.TAG_NAME);
149 }
150 }