View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html.serializer;
16  
17  import static org.junit.Assert.assertEquals;
18  import static org.junit.Assert.assertTrue;
19  
20  import java.util.Arrays;
21  
22  import org.apache.commons.lang3.StringUtils;
23  import org.htmlunit.html.serializer.HtmlSerializerVisibleText.HtmlSerializerTextBuilder;
24  import org.htmlunit.html.serializer.HtmlSerializerVisibleText.HtmlSerializerTextBuilder.Mode;
25  import org.junit.Test;
26  
27  /**
28   * Tests for {@link HtmlSerializerVisibleText}.
29   *
30   * @author Ronald Brill
31   */
32  public class HtmlSerializerVisibleTextTest {
33  
34      /**
35       * Test {@link HtmlSerializerTextBuilder}.
36       */
37      @Test
38      public void normalize() {
39          HtmlSerializerTextBuilder serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
40          serializer.append("", Mode.WHITE_SPACE_NORMAL);
41          assertEquals("", serializer.getText());
42  
43          serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
44          serializer.append(" \t\r\n ", Mode.WHITE_SPACE_NORMAL);
45          assertEquals("", serializer.getText());
46  
47          serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
48          serializer.appendBlockSeparator();
49          assertEquals("", serializer.getText());
50  
51          serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
52          serializer.appendBlockSeparator();
53          serializer.append(" ", Mode.WHITE_SPACE_NORMAL);
54          assertEquals("", serializer.getText());
55  
56          serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
57          serializer.append(" ", Mode.WHITE_SPACE_NORMAL);
58          serializer.appendBlockSeparator();
59          assertEquals("", serializer.getText());
60  
61          serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
62          serializer.append(" ", Mode.WHITE_SPACE_NORMAL);
63          serializer.appendBlockSeparator();
64          serializer.append(" ", Mode.WHITE_SPACE_NORMAL);
65          assertEquals("", serializer.getText());
66  
67          serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
68          serializer.append(" a  ", Mode.WHITE_SPACE_NORMAL);
69          serializer.appendBlockSeparator();
70          assertEquals("a", serializer.getText());
71  
72          serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
73          serializer.append(" a  ", Mode.WHITE_SPACE_NORMAL);
74          serializer.appendBlockSeparator();
75          serializer.append("  x ", Mode.WHITE_SPACE_NORMAL);
76          assertEquals("a\nx", serializer.getText());
77  
78          serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
79          serializer.append("a", Mode.WHITE_SPACE_NORMAL);
80          serializer.appendBlockSeparator();
81          serializer.append("x", Mode.WHITE_SPACE_NORMAL);
82          assertEquals("a\nx", serializer.getText());
83  
84          serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
85          serializer.append("a", Mode.WHITE_SPACE_NORMAL);
86          serializer.appendBlockSeparator();
87          serializer.appendBlockSeparator();
88          serializer.append("x", Mode.WHITE_SPACE_NORMAL);
89          assertEquals("a\nx", serializer.getText());
90  
91          serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
92          serializer.append("a", Mode.WHITE_SPACE_NORMAL);
93          serializer.appendBlockSeparator();
94          serializer.append("  ", Mode.WHITE_SPACE_NORMAL);
95          serializer.appendBlockSeparator();
96          serializer.append("x", Mode.WHITE_SPACE_NORMAL);
97          assertEquals("a\nx", serializer.getText());
98  
99          serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
100         serializer.appendBreak(Mode.WHITE_SPACE_NORMAL);
101         assertEquals("\n", serializer.getText());
102 
103         serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
104         serializer.appendBreak(Mode.WHITE_SPACE_NORMAL);
105         serializer.append(" ", Mode.WHITE_SPACE_NORMAL);
106         assertEquals("\n", serializer.getText());
107 
108         serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
109         serializer.append(" ", Mode.WHITE_SPACE_NORMAL);
110         serializer.appendBreak(Mode.WHITE_SPACE_NORMAL);
111         assertEquals("\n", serializer.getText());
112 
113         serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
114         serializer.append(" ", Mode.WHITE_SPACE_NORMAL);
115         serializer.appendBreak(Mode.WHITE_SPACE_NORMAL);
116         serializer.append(" ", Mode.WHITE_SPACE_NORMAL);
117         assertEquals("\n", serializer.getText());
118 
119         serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
120         serializer.appendBreak(Mode.WHITE_SPACE_NORMAL);
121         serializer.appendBlockSeparator();
122         serializer.append("x", Mode.WHITE_SPACE_NORMAL);
123         assertEquals("x", serializer.getText());
124 
125         serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
126         serializer.append("a", Mode.WHITE_SPACE_NORMAL);
127         serializer.appendBreak(Mode.WHITE_SPACE_NORMAL);
128         serializer.appendBlockSeparator();
129         serializer.append("x", Mode.WHITE_SPACE_NORMAL);
130         assertEquals("a\nx", serializer.getText());
131 
132         serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
133         serializer.append("a", Mode.WHITE_SPACE_NORMAL);
134         serializer.appendBlockSeparator();
135         serializer.appendBlockSeparator();
136         serializer.appendBlockSeparator();
137         serializer.append("x", Mode.WHITE_SPACE_NORMAL);
138         assertEquals("a\nx", serializer.getText());
139 
140         serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
141         serializer.appendBlockSeparator();
142         serializer.append("\n", Mode.WHITE_SPACE_NORMAL);
143         serializer.appendBlockSeparator();
144         serializer.append("x", Mode.WHITE_SPACE_NORMAL);
145         serializer.appendBlockSeparator();
146         serializer.append("y", Mode.WHITE_SPACE_NORMAL);
147         serializer.appendBreak(Mode.WHITE_SPACE_NORMAL);
148         serializer.appendBlockSeparator();
149         serializer.appendBlockSeparator();
150         assertEquals("x\ny", serializer.getText());
151 
152         serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
153         serializer.append("abc", Mode.WHITE_SPACE_NORMAL);
154         assertEquals("abc", serializer.getText());
155 
156         serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
157         serializer.append("a     b \t\t\t c \r \r o \n\n\n", Mode.WHITE_SPACE_NORMAL);
158         assertEquals("a b c o", serializer.getText());
159     }
160 
161     /**
162      * Test {@link HtmlSerializerTextBuilder}.
163      */
164     @Test
165     public void normalizeNbsp() {
166         HtmlSerializerTextBuilder serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
167         serializer.append("abc" + (char) 160 + "x", Mode.WHITE_SPACE_NORMAL);
168         assertEquals("abc x", serializer.getText());
169 
170         serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
171         serializer.append((char) 160 + "x" + (char) 160, Mode.WHITE_SPACE_NORMAL);
172         assertEquals(" x ", serializer.getText());
173 
174         serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
175         serializer.appendBlockSeparator();
176         serializer.append((char) 160 + "x" + (char) 160, Mode.WHITE_SPACE_NORMAL);
177         serializer.appendBlockSeparator();
178         assertEquals(" x ", serializer.getText());
179     }
180 
181     /**
182      * Test {@link HtmlSerializerTextBuilder}.
183      */
184     @Test
185     public void normalize2() {
186         HtmlSerializerTextBuilder serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
187         serializer.append("a", Mode.WHITE_SPACE_NORMAL);
188         serializer.appendBlockSeparator();
189         serializer.appendBlockSeparator();
190         assertEquals("a", serializer.getText());
191 
192         serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
193         serializer.append("a", Mode.WHITE_SPACE_NORMAL);
194         serializer.appendBlockSeparator();
195         serializer.append("  ", Mode.WHITE_SPACE_NORMAL);
196         serializer.appendBlockSeparator();
197         assertEquals("a", serializer.getText());
198     }
199 
200     /**
201      * Test {@link HtmlSerializerTextBuilder}.
202      */
203     @Test
204     public void pre() {
205         final HtmlSerializerTextBuilder serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
206         serializer.append("  hello \t abc ", Mode.WHITE_SPACE_PRE);
207         assertEquals("  hello   abc ", serializer.getText());
208     }
209 
210     /**
211      * Test {@link HtmlSerializerTextBuilder}.
212      */
213     @Test
214     public void textArea() {
215         final HtmlSerializerTextBuilder serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
216         serializer.append("  hello \t abc ", Mode.WHITE_SPACE_PRE);
217         assertEquals("  hello   abc ", serializer.getText());
218     }
219 
220     /**
221      * Test {@link HtmlSerializerTextBuilder}.
222      */
223     @Test
224     public void performanceWhitespace() {
225         final int length = 100_000;
226         final char[] charArray = new char[length];
227         Arrays.fill(charArray, ' ');
228         charArray[0] = 'a';
229         charArray[length - 1] = 'a';
230         final String text = new String(charArray);
231 
232         final long time = System.currentTimeMillis();
233         final HtmlSerializerTextBuilder serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
234         serializer.append(text, Mode.WHITE_SPACE_NORMAL);
235         serializer.getText();
236 
237         final long runTime = System.currentTimeMillis() - time;
238         assertTrue("cleanUp() took too much time", runTime < 200);
239     }
240 
241     /**
242      * Test {@link HtmlSerializerTextBuilder}.
243      */
244     @Test
245     public void performanceManyReplaces() {
246         final String expected = StringUtils.repeat("x\n", 100_000).trim();
247 
248         final long time = System.currentTimeMillis();
249 
250         final HtmlSerializerTextBuilder serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
251 
252         for (int i = 0; i < 100_000; i++) {
253             serializer.append(" x ", Mode.WHITE_SPACE_NORMAL);
254             serializer.appendBlockSeparator();
255         }
256 
257         assertEquals(expected, serializer.getText());
258 
259         final long runTime = System.currentTimeMillis() - time;
260         assertTrue("cleanUp() took too much time", runTime < 200);
261     }
262 
263     /**
264      * Test {@link HtmlSerializerTextBuilder} special spaces.
265      */
266     @Test
267     public void specialSpaces() {
268         HtmlSerializerTextBuilder serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
269         serializer.append("\u3000", Mode.WHITE_SPACE_NORMAL);
270         assertEquals("\u3000", serializer.getText());
271 
272         serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
273         serializer.append("\uFEFF", Mode.WHITE_SPACE_NORMAL);
274         assertEquals("\uFEFF", serializer.getText());
275 
276         serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
277         serializer.append("\u200B", Mode.WHITE_SPACE_NORMAL);
278         assertEquals("\u200B", serializer.getText());
279     }
280 
281     /**
282      * Test {@link HtmlSerializerTextBuilder} special spaces.
283      */
284     @Test
285     public void normalizePre() {
286         final HtmlSerializerTextBuilder serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
287         serializer.append("  A B  C\t \t  D \r\nEF\nG \n H  ", Mode.WHITE_SPACE_PRE);
288         assertEquals("  A B  C     D \nEF\nG \n H  ", serializer.getText());
289     }
290 
291     /**
292      * Test {@link HtmlSerializerTextBuilder} special spaces.
293      */
294     @Test
295     public void normalizePreLine() {
296         final HtmlSerializerTextBuilder serializer = new HtmlSerializerVisibleText.HtmlSerializerTextBuilder();
297         serializer.append("  A B  C\t \t  D \r\nEF\nG \n H  ", Mode.WHITE_SPACE_PRE_LINE);
298         assertEquals("A B C D \nEF\nG \n H", serializer.getText());
299     }
300 }