1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.html;
16
17 import static java.nio.charset.StandardCharsets.UTF_8;
18
19 import java.util.ArrayList;
20 import java.util.List;
21
22 import org.htmlunit.MockWebConnection;
23 import org.htmlunit.SimpleWebTestCase;
24 import org.htmlunit.WebClient;
25 import org.htmlunit.junit.BrowserRunner;
26 import org.htmlunit.util.MimeType;
27 import org.htmlunit.util.StringUtils;
28 import org.junit.Test;
29 import org.junit.runner.RunWith;
30
31
32
33
34
35
36
37
38
39
40 @RunWith(BrowserRunner.class)
41 public class DomTextTest extends SimpleWebTestCase {
42
43
44
45
46
47 @Test
48 public void asText_nbsp() throws Exception {
49 testPlainText("a b c d e", "a b c d e");
50 testPlainText("a b c d e", "a b c d e");
51 testPlainText(" a ", " a ");
52 testPlainText(" a ", " a ");
53 testPlainText(" a ", " a ");
54 }
55
56
57
58
59
60
61
62 @Test
63 public void asText_fontFormat() throws Exception {
64 testAsText("a <b>b</b> c", "a b c");
65 testAsText("a <b>b</b>c", "a bc");
66 testAsText("a<b>b</b> c", "ab c");
67 testAsText("a<b>b</b>c", "abc");
68
69
70 testAsText("a <i>b</i> c", "a b c");
71 testAsText("a <i>b</i>c", "a bc");
72 testAsText("a<i>b</i> c", "ab c");
73 testAsText("a<i>b</i>c", "abc");
74
75 testAsText("a <tt>b</tt> c", "a b c");
76 testAsText("a <tt>b</tt>c", "a bc");
77 testAsText("a<tt>b</tt> c", "ab c");
78 testAsText("a<tt>b</tt>c", "abc");
79
80 testAsText("a <font>b</font> c", "a b c");
81 testAsText("a<font>b</font> c", "ab c");
82 testAsText("a <font>b</font>c", "a bc");
83 testAsText("a<font>b</font>c", "abc");
84
85 testAsText("a <span>b</span> c", "a b c");
86 testAsText("a<span>b</span> c", "ab c");
87 testAsText("a <span>b</span>c", "a bc");
88 testAsText("a<span>b</span>c", "abc");
89
90 testAsText("a<b><font><i>b</i></font></b>c", "abc");
91 testAsText("a<b><font> <i>b</i></font></b>c", "a bc");
92 }
93
94
95
96
97
98
99 @Test
100 public void asNormalizedTextRegression() throws Exception {
101 String expected = "a\nb\nc";
102 testAsText("a<ul><li>b</ul>c", expected);
103 testAsText("a<p>b<br>c", expected);
104 testAsText("a<table><tr><td>b</td></tr></table>c", expected);
105 testAsText("a<div>b</div>c", expected);
106
107 expected = "a\nb\nb\nc";
108 testAsText("a<table><tr><td> b </td></tr>\n<tr><td> b </td></tr></table>c", expected);
109 }
110
111
112
113
114
115 @Test
116 public void asText_table_elements() throws Exception {
117 final String html = "<table id='table'><tr id='row'><td id='cell'> b </td></tr>\n</table>\n";
118 final String content = DOCTYPE_HTML + "<html><body><span id='foo'>" + html + "</span></body></html>";
119
120 final HtmlPage page = loadPage(content);
121
122 assertEquals("b", page.getHtmlElementById("cell").asNormalizedText());
123 assertEquals("b", page.getHtmlElementById("row").asNormalizedText());
124 assertEquals("b", page.getHtmlElementById("table").asNormalizedText());
125 }
126
127 private void testPlainText(final String html, final String expectedText) throws Exception {
128 final String content = DOCTYPE_HTML + "<html><body><span id='foo'>" + html + "</span></body></html>";
129
130 final HtmlPage page = loadPage(content);
131 assertEquals(expectedText, page.asNormalizedText());
132
133 final HtmlElement elt = page.getHtmlElementById("foo");
134 assertEquals(expectedText, elt.asNormalizedText());
135
136 final DomNode node = elt.getFirstChild();
137 assertEquals(expectedText, node.asNormalizedText());
138 }
139
140 private void testAsText(final String html, final String expectedText) throws Exception {
141 final String content = DOCTYPE_HTML + "<html><body><span id='foo'>" + html + "</span></body></html>";
142
143 final HtmlPage page = loadPage(content);
144 final HtmlElement elt = page.getHtmlElementById("foo");
145 assertEquals(expectedText, elt.asNormalizedText());
146 }
147
148
149
150
151 @Test
152 public void asXml() throws Exception {
153 final String unicodeString = "\u064A\u0627 \u0644\u064A\u064A\u0644";
154 final String html = DOCTYPE_HTML
155 + "<html>\n"
156 + "<head><meta http-equiv='Content-Type' content='text/html; charset=UTF-8'></head>\n"
157 + "<body><span id='foo'>" + unicodeString + "</span></body></html>";
158
159 final int[] expectedValues = {1610, 1575, 32, 1604, 1610, 1610, 1604};
160
161 final WebClient client = getWebClient();
162 final MockWebConnection webConnection = new MockWebConnection();
163
164 webConnection.setDefaultResponse(StringUtils.toByteArray(html, UTF_8), 200, "OK", MimeType.TEXT_HTML);
165 client.setWebConnection(webConnection);
166
167 final HtmlPage page = client.getPage(URL_FIRST);
168 final String xml = page.getHtmlElementById("foo").getFirstChild().asXml().trim();
169 assertEquals(expectedValues.length, xml.length());
170 int index = 0;
171 for (final int expectedValue : expectedValues) {
172 assertEquals(expectedValue, xml.codePointAt(index++));
173 }
174 }
175
176
177
178
179 @Test
180 public void splitText() throws Exception {
181 final String html = DOCTYPE_HTML
182 + "<html><head></head><body>\n"
183 + "<br><div id='tag'></div><br></body></html>";
184 final HtmlPage page = loadPage(html);
185
186 final DomNode divNode = page.getElementById("tag");
187
188 final DomText node = new DomText(page, "test split");
189 divNode.insertBefore(node);
190
191 final DomNode previousSibling = node.getPreviousSibling();
192 final DomNode nextSibling = node.getNextSibling();
193 final DomNode parent = node.getParentNode();
194
195
196 final int position = readPositionAmongParentChildren(node);
197
198 final DomText newNode = node.splitText(5);
199
200 assertSame("new node previous sibling", node, newNode.getPreviousSibling());
201 assertSame("previous sibling", previousSibling, node.getPreviousSibling());
202 assertSame("new node next sibling", nextSibling, newNode.getNextSibling());
203 assertSame("next sibling", newNode, node.getNextSibling());
204 assertSame("parent", parent, newNode.getParentNode());
205 assertSame(node, previousSibling.getNextSibling());
206 assertSame(newNode, nextSibling.getPreviousSibling());
207 assertEquals(position + 1, readPositionAmongParentChildren(newNode));
208 }
209
210
211
212
213 @Test
214 public void splitLastDomText() throws Exception {
215 final String content = DOCTYPE_HTML
216 + "<html><head></head><body>\n"
217 + "<br><div id='tag'></div><br></body></html>";
218 final HtmlPage page = loadPage(content);
219
220 final DomNode divNode = page.getElementById("tag");
221
222 final DomText firstNode = new DomText(page, "test split");
223 divNode.appendChild(firstNode);
224
225 assertNull(firstNode.getPreviousSibling());
226
227 final DomText secondNode = firstNode.splitText(5);
228
229 final DomText thirdNode = new DomText(page, "test split");
230 divNode.appendChild(thirdNode);
231
232 assertSame(secondNode, firstNode.getNextSibling());
233 assertNull(firstNode.getPreviousSibling());
234 assertSame(firstNode, secondNode.getPreviousSibling());
235 assertSame(thirdNode, secondNode.getNextSibling());
236 assertSame(secondNode, thirdNode.getPreviousSibling());
237 assertNull(thirdNode.getNextSibling());
238 assertSame(divNode, secondNode.getParentNode());
239 assertSame(divNode, thirdNode.getParentNode());
240 assertEquals(0, readPositionAmongParentChildren(firstNode));
241 assertEquals(1, readPositionAmongParentChildren(secondNode));
242 assertEquals(2, readPositionAmongParentChildren(thirdNode));
243 }
244
245
246
247
248
249
250 private static int readPositionAmongParentChildren(final DomNode node) {
251 int i = 0;
252 for (final DomNode child : node.getParentNode().getChildren()) {
253 if (child == node) {
254 return i;
255 }
256 i++;
257 }
258 return -1;
259 }
260
261
262
263
264 @Test
265 public void splitText2() throws Exception {
266 final String html = DOCTYPE_HTML
267 + "<html><head><title>foo</title><script>\n"
268 + " function test() {\n"
269 + " var div = document.getElementById('myDiv');\n"
270 + " div.appendChild(document.createElement('a'));\n"
271 + " var text = document.createTextNode('123456');\n"
272 + " div.appendChild(text);\n"
273 + " div.appendChild(document.createElement('hr'));\n"
274 + " alert(div.childNodes.length);\n"
275 + " text.splitText(3);\n"
276 + " alert(div.childNodes.length);\n"
277 + " alert(div.childNodes.item(2).nodeValue);\n"
278 + " }\n"
279 + "</script></head><body onload='test()'>\n"
280 + " <div id='myDiv'></div>\n"
281 + "</body></html>";
282 final String[] expectedAlerts = {"3", "4", "456"};
283 final List<String> collectedAlerts = new ArrayList<>();
284 loadPage(html, collectedAlerts);
285 assertEquals(expectedAlerts, collectedAlerts);
286 }
287
288
289
290
291 @Test
292 public void setTextContent() throws Exception {
293 final String html = DOCTYPE_HTML + "<html><body><span id='s'>abc</span></body></html>";
294 final HtmlPage page = loadPage(html);
295 final DomText text = (DomText) page.getElementById("s").getFirstChild();
296 assertEquals("abc", text.getTextContent());
297 text.setTextContent("xyz");
298 assertEquals("xyz", text.getTextContent());
299 assertEquals("xyz", page.asNormalizedText());
300 }
301
302
303
304
305
306 @Test
307 public void getTextContentWhitespace() throws Exception {
308 final String html = DOCTYPE_HTML + "<html><body><div id='s'><b>Hello</b> <b>World</b>!</div></body></html>";
309 final HtmlPage page = loadPage(html);
310 final HtmlElement text = page.getHtmlElementById("s");
311 assertEquals("Hello World!", text.getTextContent());
312 }
313
314
315
316
317
318
319 @Test
320 public void getCanonicalXPath_withoutTextSiblings() throws Exception {
321 final String html = DOCTYPE_HTML + "<html><body><span id='s'>abc</span></body></html>";
322 final HtmlPage page = loadPage(html);
323 final DomText text = (DomText) page.getElementById("s").getFirstChild();
324 assertEquals("/html/body/span/text()", text.getCanonicalXPath());
325 assertEquals(text, page.getFirstByXPath(text.getCanonicalXPath()));
326 }
327
328
329
330
331
332
333 @Test
334 public void getCanonicalXPath_withTextSiblings() throws Exception {
335 final String html = DOCTYPE_HTML + "<html><body><span id='s'>abc<br/>def</span></body></html>";
336 final HtmlPage page = loadPage(html);
337
338 final DomText text1 = (DomText) page.getElementById("s").getFirstChild();
339 assertEquals("abc", text1.getData());
340 assertEquals("/html/body/span/text()[1]", text1.getCanonicalXPath());
341 assertEquals(text1, page.getFirstByXPath(text1.getCanonicalXPath()));
342
343 final DomText text2 = (DomText) page.getElementById("s").getChildNodes().get(2);
344 assertEquals("def", text2.getData());
345 assertEquals("/html/body/span/text()[2]", text2.getCanonicalXPath());
346 assertEquals(text2, page.getFirstByXPath(text2.getCanonicalXPath()));
347 }
348
349 }