1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.html;
16
17 import static java.nio.charset.StandardCharsets.UTF_8;
18
19 import java.util.ArrayList;
20 import java.util.List;
21
22 import org.htmlunit.MockWebConnection;
23 import org.htmlunit.SimpleWebTestCase;
24 import org.htmlunit.WebClient;
25 import org.htmlunit.util.MimeType;
26 import org.htmlunit.util.StringUtils;
27 import org.junit.jupiter.api.Test;
28
29
30
31
32
33
34
35
36
37
38
39 public class DomTextTest extends SimpleWebTestCase {
40
41
42
43
44
45 @Test
46 public void asText_nbsp() throws Exception {
47 testPlainText("a b c d e", "a b c d e");
48 testPlainText("a b c d e", "a b c d e");
49 testPlainText(" a ", " a ");
50 testPlainText(" a ", " a ");
51 testPlainText(" a ", " a ");
52 }
53
54
55
56
57
58
59
60 @Test
61 public void asText_fontFormat() throws Exception {
62 testAsText("a <b>b</b> c", "a b c");
63 testAsText("a <b>b</b>c", "a bc");
64 testAsText("a<b>b</b> c", "ab c");
65 testAsText("a<b>b</b>c", "abc");
66
67
68 testAsText("a <i>b</i> c", "a b c");
69 testAsText("a <i>b</i>c", "a bc");
70 testAsText("a<i>b</i> c", "ab c");
71 testAsText("a<i>b</i>c", "abc");
72
73 testAsText("a <tt>b</tt> c", "a b c");
74 testAsText("a <tt>b</tt>c", "a bc");
75 testAsText("a<tt>b</tt> c", "ab c");
76 testAsText("a<tt>b</tt>c", "abc");
77
78 testAsText("a <font>b</font> c", "a b c");
79 testAsText("a<font>b</font> c", "ab c");
80 testAsText("a <font>b</font>c", "a bc");
81 testAsText("a<font>b</font>c", "abc");
82
83 testAsText("a <span>b</span> c", "a b c");
84 testAsText("a<span>b</span> c", "ab c");
85 testAsText("a <span>b</span>c", "a bc");
86 testAsText("a<span>b</span>c", "abc");
87
88 testAsText("a<b><font><i>b</i></font></b>c", "abc");
89 testAsText("a<b><font> <i>b</i></font></b>c", "a bc");
90 }
91
92
93
94
95
96
97 @Test
98 public void asNormalizedTextRegression() throws Exception {
99 String expected = "a\nb\nc";
100 testAsText("a<ul><li>b</ul>c", expected);
101 testAsText("a<p>b<br>c", expected);
102 testAsText("a<table><tr><td>b</td></tr></table>c", expected);
103 testAsText("a<div>b</div>c", expected);
104
105 expected = "a\nb\nb\nc";
106 testAsText("a<table><tr><td> b </td></tr>\n<tr><td> b </td></tr></table>c", expected);
107 }
108
109
110
111
112
113 @Test
114 public void asText_table_elements() throws Exception {
115 final String html = "<table id='table'><tr id='row'><td id='cell'> b </td></tr>\n</table>\n";
116 final String content = DOCTYPE_HTML + "<html><body><span id='foo'>" + html + "</span></body></html>";
117
118 final HtmlPage page = loadPage(content);
119
120 assertEquals("b", page.getHtmlElementById("cell").asNormalizedText());
121 assertEquals("b", page.getHtmlElementById("row").asNormalizedText());
122 assertEquals("b", page.getHtmlElementById("table").asNormalizedText());
123 }
124
125 private void testPlainText(final String html, final String expectedText) throws Exception {
126 final String content = DOCTYPE_HTML + "<html><body><span id='foo'>" + html + "</span></body></html>";
127
128 final HtmlPage page = loadPage(content);
129 assertEquals(expectedText, page.asNormalizedText());
130
131 final HtmlElement elt = page.getHtmlElementById("foo");
132 assertEquals(expectedText, elt.asNormalizedText());
133
134 final DomNode node = elt.getFirstChild();
135 assertEquals(expectedText, node.asNormalizedText());
136 }
137
138 private void testAsText(final String html, final String expectedText) throws Exception {
139 final String content = DOCTYPE_HTML + "<html><body><span id='foo'>" + html + "</span></body></html>";
140
141 final HtmlPage page = loadPage(content);
142 final HtmlElement elt = page.getHtmlElementById("foo");
143 assertEquals(expectedText, elt.asNormalizedText());
144 }
145
146
147
148
149 @Test
150 public void asXml() throws Exception {
151 final String unicodeString = "\u064A\u0627 \u0644\u064A\u064A\u0644";
152 final String html = DOCTYPE_HTML
153 + "<html>\n"
154 + "<head><meta http-equiv='Content-Type' content='text/html; charset=UTF-8'></head>\n"
155 + "<body><span id='foo'>" + unicodeString + "</span></body></html>";
156
157 final int[] expectedValues = {1610, 1575, 32, 1604, 1610, 1610, 1604};
158
159 final WebClient client = getWebClient();
160 final MockWebConnection webConnection = new MockWebConnection();
161
162 webConnection.setDefaultResponse(StringUtils.toByteArray(html, UTF_8), 200, "OK", MimeType.TEXT_HTML);
163 client.setWebConnection(webConnection);
164
165 final HtmlPage page = client.getPage(URL_FIRST);
166 final String xml = page.getHtmlElementById("foo").getFirstChild().asXml();
167 assertEquals(expectedValues.length, xml.length());
168 int index = 0;
169 for (final int expectedValue : expectedValues) {
170 assertEquals(expectedValue, xml.codePointAt(index++));
171 }
172 }
173
174
175
176
177 @Test
178 public void splitText() throws Exception {
179 final String html = DOCTYPE_HTML
180 + "<html><head></head><body>\n"
181 + "<br><div id='tag'></div><br></body></html>";
182 final HtmlPage page = loadPage(html);
183
184 final DomNode divNode = page.getElementById("tag");
185
186 final DomText node = new DomText(page, "test split");
187 divNode.insertBefore(node);
188
189 final DomNode previousSibling = node.getPreviousSibling();
190 final DomNode nextSibling = node.getNextSibling();
191 final DomNode parent = node.getParentNode();
192
193
194 final int position = readPositionAmongParentChildren(node);
195
196 final DomText newNode = node.splitText(5);
197
198 assertSame("new node previous sibling", node, newNode.getPreviousSibling());
199 assertSame("previous sibling", previousSibling, node.getPreviousSibling());
200 assertSame("new node next sibling", nextSibling, newNode.getNextSibling());
201 assertSame("next sibling", newNode, node.getNextSibling());
202 assertSame("parent", parent, newNode.getParentNode());
203 assertSame(node, previousSibling.getNextSibling());
204 assertSame(newNode, nextSibling.getPreviousSibling());
205 assertEquals(position + 1, readPositionAmongParentChildren(newNode));
206 }
207
208
209
210
211 @Test
212 public void splitLastDomText() throws Exception {
213 final String content = DOCTYPE_HTML
214 + "<html><head></head><body>\n"
215 + "<br><div id='tag'></div><br></body></html>";
216 final HtmlPage page = loadPage(content);
217
218 final DomNode divNode = page.getElementById("tag");
219
220 final DomText firstNode = new DomText(page, "test split");
221 divNode.appendChild(firstNode);
222
223 assertNull(firstNode.getPreviousSibling());
224
225 final DomText secondNode = firstNode.splitText(5);
226
227 final DomText thirdNode = new DomText(page, "test split");
228 divNode.appendChild(thirdNode);
229
230 assertSame(secondNode, firstNode.getNextSibling());
231 assertNull(firstNode.getPreviousSibling());
232 assertSame(firstNode, secondNode.getPreviousSibling());
233 assertSame(thirdNode, secondNode.getNextSibling());
234 assertSame(secondNode, thirdNode.getPreviousSibling());
235 assertNull(thirdNode.getNextSibling());
236 assertSame(divNode, secondNode.getParentNode());
237 assertSame(divNode, thirdNode.getParentNode());
238 assertEquals(0, readPositionAmongParentChildren(firstNode));
239 assertEquals(1, readPositionAmongParentChildren(secondNode));
240 assertEquals(2, readPositionAmongParentChildren(thirdNode));
241 }
242
243
244
245
246
247
248 private static int readPositionAmongParentChildren(final DomNode node) {
249 int i = 0;
250 for (final DomNode child : node.getParentNode().getChildren()) {
251 if (child == node) {
252 return i;
253 }
254 i++;
255 }
256 return -1;
257 }
258
259
260
261
262 @Test
263 public void splitText2() throws Exception {
264 final String html = DOCTYPE_HTML
265 + "<html><head><title>foo</title><script>\n"
266 + " function test() {\n"
267 + " var div = document.getElementById('myDiv');\n"
268 + " div.appendChild(document.createElement('a'));\n"
269 + " var text = document.createTextNode('123456');\n"
270 + " div.appendChild(text);\n"
271 + " div.appendChild(document.createElement('hr'));\n"
272 + " alert(div.childNodes.length);\n"
273 + " text.splitText(3);\n"
274 + " alert(div.childNodes.length);\n"
275 + " alert(div.childNodes.item(2).nodeValue);\n"
276 + " }\n"
277 + "</script></head><body onload='test()'>\n"
278 + " <div id='myDiv'></div>\n"
279 + "</body></html>";
280 final String[] expectedAlerts = {"3", "4", "456"};
281 final List<String> collectedAlerts = new ArrayList<>();
282 loadPage(html, collectedAlerts);
283 assertEquals(expectedAlerts, collectedAlerts);
284 }
285
286
287
288
289 @Test
290 public void setTextContent() throws Exception {
291 final String html = DOCTYPE_HTML + "<html><body><span id='s'>abc</span></body></html>";
292 final HtmlPage page = loadPage(html);
293 final DomText text = (DomText) page.getElementById("s").getFirstChild();
294 assertEquals("abc", text.getTextContent());
295 text.setTextContent("xyz");
296 assertEquals("xyz", text.getTextContent());
297 assertEquals("xyz", page.asNormalizedText());
298 }
299
300
301
302
303
304 @Test
305 public void getTextContentWhitespace() throws Exception {
306 final String html = DOCTYPE_HTML + "<html><body><div id='s'><b>Hello</b> <b>World</b>!</div></body></html>";
307 final HtmlPage page = loadPage(html);
308 final HtmlElement text = page.getHtmlElementById("s");
309 assertEquals("Hello World!", text.getTextContent());
310 }
311
312
313
314
315
316
317 @Test
318 public void getCanonicalXPath_withoutTextSiblings() throws Exception {
319 final String html = DOCTYPE_HTML + "<html><body><span id='s'>abc</span></body></html>";
320 final HtmlPage page = loadPage(html);
321 final DomText text = (DomText) page.getElementById("s").getFirstChild();
322 assertEquals("/html/body/span/text()", text.getCanonicalXPath());
323 assertEquals(text, page.getFirstByXPath(text.getCanonicalXPath()));
324 }
325
326
327
328
329
330
331 @Test
332 public void getCanonicalXPath_withTextSiblings() throws Exception {
333 final String html = DOCTYPE_HTML + "<html><body><span id='s'>abc<br/>def</span></body></html>";
334 final HtmlPage page = loadPage(html);
335
336 final DomText text1 = (DomText) page.getElementById("s").getFirstChild();
337 assertEquals("abc", text1.getData());
338 assertEquals("/html/body/span/text()[1]", text1.getCanonicalXPath());
339 assertEquals(text1, page.getFirstByXPath(text1.getCanonicalXPath()));
340
341 final DomText text2 = (DomText) page.getElementById("s").getChildNodes().get(2);
342 assertEquals("def", text2.getData());
343 assertEquals("/html/body/span/text()[2]", text2.getCanonicalXPath());
344 assertEquals(text2, page.getFirstByXPath(text2.getCanonicalXPath()));
345 }
346
347 }