1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.html.serializer;
16
17 import static org.junit.Assert.assertEquals;
18 import static org.junit.Assert.assertTrue;
19
20 import java.io.IOException;
21 import java.util.Arrays;
22
23 import org.apache.commons.lang3.StringUtils;
24 import org.htmlunit.WebClient;
25 import org.htmlunit.html.HtmlPage;
26 import org.htmlunit.html.serializer.HtmlSerializerNormalizedText.HtmlSerializerTextBuilder;
27 import org.htmlunit.html.serializer.HtmlSerializerNormalizedText.HtmlSerializerTextBuilder.Mode;
28 import org.junit.Test;
29
30
31
32
33
34
35 public class HtmlSerializerNormalizedTextTest {
36
37
38
39
40 @Test
41 public void normalize() {
42 HtmlSerializerTextBuilder serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
43 serializer.append("", Mode.NORMALIZE);
44 assertEquals("", serializer.getText());
45
46 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
47 serializer.append(" \t\r\n ", Mode.NORMALIZE);
48 assertEquals("", serializer.getText());
49
50 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
51 serializer.appendBlockSeparator();
52 assertEquals("", serializer.getText());
53
54 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
55 serializer.appendBlockSeparator();
56 serializer.append(" ", Mode.NORMALIZE);
57 assertEquals("", serializer.getText());
58
59 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
60 serializer.append(" ", Mode.NORMALIZE);
61 serializer.appendBlockSeparator();
62 assertEquals("", serializer.getText());
63
64 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
65 serializer.append(" ", Mode.NORMALIZE);
66 serializer.appendBlockSeparator();
67 serializer.append(" ", Mode.NORMALIZE);
68 assertEquals("", serializer.getText());
69
70 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
71 serializer.append(" a ", Mode.NORMALIZE);
72 serializer.appendBlockSeparator();
73 assertEquals("a", serializer.getText());
74
75 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
76 serializer.append(" a ", Mode.NORMALIZE);
77 serializer.appendBlockSeparator();
78 serializer.append(" x ", Mode.NORMALIZE);
79 assertEquals("a\nx", serializer.getText());
80
81 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
82 serializer.append("a", Mode.NORMALIZE);
83 serializer.appendBlockSeparator();
84 serializer.append("x", Mode.NORMALIZE);
85 assertEquals("a\nx", serializer.getText());
86
87 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
88 serializer.append("a", Mode.NORMALIZE);
89 serializer.appendBlockSeparator();
90 serializer.appendBlockSeparator();
91 serializer.append("x", Mode.NORMALIZE);
92 assertEquals("a\nx", serializer.getText());
93
94 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
95 serializer.append("a", Mode.NORMALIZE);
96 serializer.appendBlockSeparator();
97 serializer.append(" ", Mode.NORMALIZE);
98 serializer.appendBlockSeparator();
99 serializer.append("x", Mode.NORMALIZE);
100 assertEquals("a\nx", serializer.getText());
101
102 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
103 serializer.appendNewLine();
104 assertEquals("\n", serializer.getText());
105
106 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
107 serializer.appendNewLine();
108 serializer.append(" ", Mode.NORMALIZE);
109 assertEquals("\n", serializer.getText());
110
111 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
112 serializer.append(" ", Mode.NORMALIZE);
113 serializer.appendNewLine();
114 assertEquals("\n", serializer.getText());
115
116 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
117 serializer.append(" ", Mode.NORMALIZE);
118 serializer.appendNewLine();
119 serializer.append(" ", Mode.NORMALIZE);
120 assertEquals("\n", serializer.getText());
121
122 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
123 serializer.appendNewLine();
124 serializer.appendBlockSeparator();
125 serializer.append("x", Mode.NORMALIZE);
126 assertEquals("x", serializer.getText());
127
128 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
129 serializer.append("a", Mode.NORMALIZE);
130 serializer.appendNewLine();
131 serializer.appendBlockSeparator();
132 serializer.append("x", Mode.NORMALIZE);
133 assertEquals("a\nx", serializer.getText());
134
135 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
136 serializer.append("a", Mode.NORMALIZE);
137 serializer.appendBlockSeparator();
138 serializer.appendBlockSeparator();
139 serializer.appendBlockSeparator();
140 serializer.append("x", Mode.NORMALIZE);
141 assertEquals("a\nx", serializer.getText());
142
143 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
144 serializer.append("a", Mode.NORMALIZE);
145 serializer.appendTab();
146 serializer.append(" ", Mode.NORMALIZE);
147 serializer.appendTab();
148 serializer.append("x", Mode.NORMALIZE);
149 assertEquals("a\t \tx", serializer.getText());
150
151 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
152 serializer.appendBlockSeparator();
153 serializer.append("\n", Mode.NORMALIZE);
154 serializer.appendBlockSeparator();
155 serializer.append("x", Mode.NORMALIZE);
156 serializer.appendBlockSeparator();
157 serializer.append("y", Mode.NORMALIZE);
158 serializer.appendNewLine();
159 serializer.appendBlockSeparator();
160 serializer.appendBlockSeparator();
161 assertEquals("x\ny", serializer.getText());
162
163 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
164 serializer.append("abc", Mode.NORMALIZE);
165 assertEquals("abc", serializer.getText());
166
167 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
168 serializer.append("a b \t\t\t c \r \r o \n\n\n", Mode.NORMALIZE);
169 assertEquals("a b c o", serializer.getText());
170 }
171
172
173
174
175 @Test
176 public void normalizeNbsp() {
177 HtmlSerializerTextBuilder serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
178 serializer.append("abc" + (char) 160 + "x", Mode.NORMALIZE);
179 assertEquals("abc x", serializer.getText());
180
181 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
182 serializer.append((char) 160 + "x" + (char) 160, Mode.NORMALIZE);
183 assertEquals(" x ", serializer.getText());
184
185 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
186 serializer.appendBlockSeparator();
187 serializer.append((char) 160 + "x" + (char) 160, Mode.NORMALIZE);
188 serializer.appendBlockSeparator();
189 assertEquals(" x ", serializer.getText());
190 }
191
192
193
194
195 @Test
196 public void normalize2() {
197 HtmlSerializerTextBuilder serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
198 serializer.append("a", Mode.NORMALIZE);
199 serializer.appendBlockSeparator();
200 serializer.appendBlockSeparator();
201 assertEquals("a", serializer.getText());
202
203 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
204 serializer.append("a", Mode.NORMALIZE);
205 serializer.appendBlockSeparator();
206 serializer.append(" ", Mode.NORMALIZE);
207 serializer.appendBlockSeparator();
208 assertEquals("a", serializer.getText());
209 }
210
211
212
213
214 @Test
215 public void pre() {
216 final HtmlSerializerTextBuilder serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
217 serializer.append(" hello \t abc ", Mode.PRESERVE_BLANK_TAB_NEWLINE);
218 assertEquals(" hello \t abc ", serializer.getText());
219 }
220
221
222
223
224 @Test
225 public void textArea() {
226 final HtmlSerializerTextBuilder serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
227 serializer.append(" hello \t abc ", Mode.PRESERVE_BLANK_NEWLINE);
228 assertEquals(" hello abc", serializer.getText());
229 }
230
231
232
233
234 @Test
235 public void performanceWhitespace() {
236 final int length = 100_000;
237 final char[] charArray = new char[length];
238 Arrays.fill(charArray, ' ');
239 charArray[0] = 'a';
240 charArray[length - 1] = 'a';
241 final String text = new String(charArray);
242
243 final long time = System.currentTimeMillis();
244 final HtmlSerializerTextBuilder serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
245 serializer.append(text, Mode.NORMALIZE);
246 serializer.getText();
247
248 final long runTime = System.currentTimeMillis() - time;
249 assertTrue("cleanUp() took too much time", runTime < 200);
250 }
251
252
253
254
255 @Test
256 public void performanceManyReplaces() {
257 final String expected = StringUtils.repeat("x\n", 100_000).trim();
258
259 final long time = System.currentTimeMillis();
260
261 final HtmlSerializerTextBuilder serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
262
263 for (int i = 0; i < 100_000; i++) {
264 serializer.append(" x ", Mode.NORMALIZE);
265 serializer.appendBlockSeparator();
266 }
267
268 assertEquals(expected, serializer.getText());
269
270 final long runTime = System.currentTimeMillis() - time;
271 assertTrue("cleanUp() took too much time", runTime < 200);
272 }
273
274
275
276
277 @Test
278 public void specialSpaces() {
279 HtmlSerializerTextBuilder serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
280 serializer.append("\u3000", Mode.NORMALIZE);
281 assertEquals("\u3000", serializer.getText());
282
283 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
284 serializer.append("\uFEFF", Mode.NORMALIZE);
285 assertEquals("\uFEFF", serializer.getText());
286
287 serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
288 serializer.append("\u200B", Mode.NORMALIZE);
289 assertEquals("\u200B", serializer.getText());
290 }
291
292
293
294
295 @Test
296 public void variousNewLines() {
297 final HtmlSerializerTextBuilder serializer = new HtmlSerializerNormalizedText.HtmlSerializerTextBuilder();
298 serializer.appendNewLine();
299 serializer.append("\n", Mode.NORMALIZE);
300 serializer.appendBlockSeparator();
301 assertEquals("", serializer.getText());
302 }
303
304
305
306
307 @Test
308 public void cssEnableDisable1() throws IOException {
309 final String html =
310 "<div>\r\n"
311 + "<p>p\r\n"
312 + "<br>br\r\n"
313 + "</p>\r\n"
314 + "</div>";
315 final String expected = "p \nbr";
316
317 try (WebClient webClient = new WebClient()) {
318 final HtmlPage page = webClient.loadHtmlCodeIntoCurrentWindow(html);
319
320 assertEquals(expected, page.asNormalizedText());
321
322 webClient.getOptions().setCssEnabled(false);
323 assertEquals(expected, page.asNormalizedText());
324 }
325 }
326
327
328
329
330 @Test
331 public void cssEnableDisable2() throws IOException {
332 final String html =
333 "<div>\r\n"
334 + "<p>p\r\n"
335 + "<br>br\r\n"
336 + "</p>\r\n"
337 + "<p>p</p>\r\n"
338 + "</div>";
339 final String expected = "p \nbr\np";
340
341 try (WebClient webClient = new WebClient()) {
342 final HtmlPage page = webClient.loadHtmlCodeIntoCurrentWindow(html);
343
344 assertEquals(expected, page.asNormalizedText());
345
346 webClient.getOptions().setCssEnabled(false);
347 assertEquals(expected, page.asNormalizedText());
348 }
349 }
350
351
352
353
354 @Test
355 public void cssEnableDisable3() throws IOException {
356 final String html =
357 "<div>\r\n"
358 + "<p>p\r\n"
359 + "<br>br\r\n"
360 + "</p>\r\n"
361 + "<p>p</p>\r\n"
362 + "<p>p\r\n"
363 + "<br>br\r\n"
364 + "<br>br\r\n"
365 + "<br>br\r\n"
366 + "</p>\r\n"
367 + "<p>p</p>\r\n"
368 + "</div>";
369
370 final String expected =
371 "p \n"
372 + "br\n"
373 + "p\n"
374 + "p \n"
375 + "br \n"
376 + "br \n"
377 + "br\n"
378 + "p";
379
380 try (WebClient webClient = new WebClient()) {
381 final HtmlPage page = webClient.loadHtmlCodeIntoCurrentWindow(html);
382
383 assertEquals(expected, page.asNormalizedText());
384
385 webClient.getOptions().setCssEnabled(false);
386 assertEquals(expected, page.asNormalizedText());
387 }
388 }
389 }