View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html.serializer;
16  
17  import static org.htmlunit.css.CssStyleSheet.BLOCK;
18  import static org.htmlunit.html.DomElement.ATTRIBUTE_NOT_DEFINED;
19  
20  import java.util.Iterator;
21  import java.util.List;
22  
23  import org.apache.commons.lang3.StringUtils;
24  import org.htmlunit.Page;
25  import org.htmlunit.SgmlPage;
26  import org.htmlunit.WebWindow;
27  import org.htmlunit.html.DomComment;
28  import org.htmlunit.html.DomElement;
29  import org.htmlunit.html.DomNode;
30  import org.htmlunit.html.DomText;
31  import org.htmlunit.html.HtmlBody;
32  import org.htmlunit.html.HtmlBreak;
33  import org.htmlunit.html.HtmlCheckBoxInput;
34  import org.htmlunit.html.HtmlDetails;
35  import org.htmlunit.html.HtmlElement;
36  import org.htmlunit.html.HtmlElement.DisplayStyle;
37  import org.htmlunit.html.HtmlHiddenInput;
38  import org.htmlunit.html.HtmlInlineFrame;
39  import org.htmlunit.html.HtmlInput;
40  import org.htmlunit.html.HtmlListItem;
41  import org.htmlunit.html.HtmlNoFrames;
42  import org.htmlunit.html.HtmlNoScript;
43  import org.htmlunit.html.HtmlNumberInput;
44  import org.htmlunit.html.HtmlOption;
45  import org.htmlunit.html.HtmlOrderedList;
46  import org.htmlunit.html.HtmlPreformattedText;
47  import org.htmlunit.html.HtmlRadioButtonInput;
48  import org.htmlunit.html.HtmlResetInput;
49  import org.htmlunit.html.HtmlScript;
50  import org.htmlunit.html.HtmlSelect;
51  import org.htmlunit.html.HtmlStyle;
52  import org.htmlunit.html.HtmlSubmitInput;
53  import org.htmlunit.html.HtmlSummary;
54  import org.htmlunit.html.HtmlTable;
55  import org.htmlunit.html.HtmlTableCell;
56  import org.htmlunit.html.HtmlTableFooter;
57  import org.htmlunit.html.HtmlTableHeader;
58  import org.htmlunit.html.HtmlTableRow;
59  import org.htmlunit.html.HtmlTextArea;
60  import org.htmlunit.html.HtmlTitle;
61  import org.htmlunit.html.HtmlUnorderedList;
62  import org.htmlunit.html.TableRowGroup;
63  import org.htmlunit.html.serializer.HtmlSerializerNormalizedText.HtmlSerializerTextBuilder.Mode;
64  
65  /**
66   * Utility to handle conversion from HTML code to string.
67   * This implements HtmlUnit's way of normalization.
68   * <p>Note: There conversation done by selenium WebElement#getText()
69   * is different.</p>
70   *
71   * @author Marc Guillemot
72   * @author Ahmed Ashour
73   * @author Ronald Brill
74   * @author Rob Kodey
75   */
76  public class HtmlSerializerNormalizedText {
77  
78      private boolean ignoreMaskedElements_ = true;
79  
80      /**
81       * Converts an HTML node to text.
82       * @param node a node
83       * @return the text representation according to the setting of this serializer
84       */
85      public String asText(final DomNode node) {
86          final HtmlSerializerTextBuilder builder = new HtmlSerializerTextBuilder();
87          appendNode(builder, node);
88          return builder.getText();
89      }
90  
91      /**
92       * Iterate over all Children and call appendNode() for every.
93       *
94       * @param builder the StringBuilder to add to
95       * @param node the node to process
96       */
97      protected void appendChildren(final HtmlSerializerTextBuilder builder, final DomNode node) {
98          for (final DomNode child : node.getChildren()) {
99              appendNode(builder, child);
100         }
101     }
102 
103     /**
104      * The core distribution method call the different appendXXX
105      * methods depending on the type of the given node.
106      *
107      * @param builder the StringBuilder to add to
108      * @param node the node to process
109      */
110     protected void appendNode(final HtmlSerializerTextBuilder builder, final DomNode node) {
111         if (node instanceof DomText) {
112             appendText(builder, (DomText) node);
113         }
114         else if (node instanceof DomComment) {
115             // nothing to do
116         }
117         else if (node instanceof HtmlBreak) {
118             appendBreak(builder, (HtmlBreak) node);
119         }
120         else if (node instanceof HtmlHiddenInput) {
121             // nothing to do
122         }
123         else if (node instanceof HtmlScript) {
124             // nothing to do
125         }
126         else if (node instanceof HtmlStyle) {
127             // nothing to do
128         }
129         else if (node instanceof HtmlNoFrames) {
130             // nothing to do
131         }
132         else if (node instanceof HtmlTextArea) {
133             appendTextArea(builder, (HtmlTextArea) node);
134         }
135         else if (node instanceof HtmlTitle) {
136             appendTitle(builder, (HtmlTitle) node);
137         }
138         else if (node instanceof HtmlTableRow) {
139             appendTableRow(builder, (HtmlTableRow) node);
140         }
141         else if (node instanceof HtmlSelect) {
142             appendSelect(builder, (HtmlSelect) node);
143         }
144         else if (node instanceof HtmlSubmitInput) {
145             appendSubmitInput(builder, (HtmlSubmitInput) node);
146         }
147         else if (node instanceof HtmlResetInput) {
148             appendResetInput(builder, (HtmlResetInput) node);
149         }
150         else if (node instanceof HtmlCheckBoxInput) {
151             appendCheckBoxInput(builder, (HtmlCheckBoxInput) node);
152         }
153         else if (node instanceof HtmlRadioButtonInput) {
154             appendRadioButtonInput(builder, (HtmlRadioButtonInput) node);
155         }
156         else if (node instanceof HtmlNumberInput) {
157             appendNumberInput(builder, (HtmlNumberInput) node);
158         }
159         else if (node instanceof HtmlInput) {
160             appendInput(builder, (HtmlInput) node);
161         }
162         else if (node instanceof HtmlTable) {
163             appendTable(builder, (HtmlTable) node);
164         }
165         else if (node instanceof HtmlOrderedList) {
166             appendOrderedList(builder, (HtmlOrderedList) node);
167         }
168         else if (node instanceof HtmlUnorderedList) {
169             appendUnorderedList(builder, (HtmlUnorderedList) node);
170         }
171         else if (node instanceof HtmlPreformattedText) {
172             appendPreformattedText(builder, (HtmlPreformattedText) node);
173         }
174         else if (node instanceof HtmlInlineFrame) {
175             appendInlineFrame(builder, (HtmlInlineFrame) node);
176         }
177         else if (node instanceof HtmlDetails) {
178             appendDetails(builder, (HtmlDetails) node);
179         }
180         else if (node instanceof HtmlNoScript && node.getPage().getWebClient().isJavaScriptEnabled()) {
181             // nothing to do
182         }
183         else {
184             appendDomNode(builder, node);
185         }
186     }
187 
188     /**
189      * Process {@link HtmlHiddenInput}.
190      *
191      * @param builder the StringBuilder to add to
192      * @param domNode the target to process
193      */
194     protected void appendDomNode(final HtmlSerializerTextBuilder builder, final DomNode domNode) {
195         boolean block = false;
196         if (!(domNode instanceof HtmlBody)) {
197             final SgmlPage page = domNode.getPage();
198             final WebWindow window = page.getEnclosingWindow();
199             if (window.getWebClient().getOptions().isCssEnabled()) {
200                 if (domNode instanceof DomElement) {
201                     final String display = window.getComputedStyle((DomElement) domNode, null).getDisplay();
202                     block = BLOCK.equals(display);
203                 }
204             }
205             else if (domNode instanceof HtmlElement) {
206                 block = DisplayStyle.BLOCK == ((HtmlElement) domNode).getDefaultStyleDisplay();
207             }
208         }
209 
210         if (block) {
211             builder.appendBlockSeparator();
212         }
213         appendChildren(builder, domNode);
214         if (block) {
215             builder.appendBlockSeparator();
216         }
217     }
218 
219     /**
220      * Process {@link HtmlSubmitInput}.
221      *
222      * @param builder the StringBuilder to add to
223      * @param htmlSubmitInput the target to process
224      */
225     protected void appendSubmitInput(final HtmlSerializerTextBuilder builder, final HtmlSubmitInput htmlSubmitInput) {
226         String text = htmlSubmitInput.getValueAttribute();
227         if (ATTRIBUTE_NOT_DEFINED == text) {
228             text = HtmlSubmitInput.DEFAULT_VALUE;
229         }
230 
231         builder.append(text, Mode.NORMALIZE);
232     }
233 
234     /**
235      * Process {@link HtmlInput}.
236      *
237      * @param builder the StringBuilder to add to
238      * @param htmlInput the target to process
239      */
240     protected void appendInput(final HtmlSerializerTextBuilder builder, final HtmlInput htmlInput) {
241         builder.append(" ", Mode.NORMALIZE);
242         builder.append(htmlInput.getRawValue(), Mode.NORMALIZE);
243         builder.append(" ", Mode.NORMALIZE);
244     }
245 
246     /**
247      * Process {@link HtmlNumberInput}.
248      *
249      * @param builder the StringBuilder to add to
250      * @param htmlNumberInput the target to process
251      */
252     protected void appendNumberInput(final HtmlSerializerTextBuilder builder, final HtmlNumberInput htmlNumberInput) {
253         builder.append(" ", Mode.NORMALIZE);
254 
255         String val = htmlNumberInput.getRawValue();
256         final int lastPos = val.length() - 1;
257         if (lastPos >= 0 && val.charAt(lastPos) == '.') {
258             val = val.substring(0, lastPos);
259         }
260         builder.append(val, Mode.NORMALIZE);
261 
262         builder.append(" ", Mode.NORMALIZE);
263     }
264 
265     /**
266      * Process {@link HtmlResetInput}.
267      *
268      * @param builder the StringBuilder to add to
269      * @param htmlResetInput the target to process
270      */
271     protected void appendResetInput(final HtmlSerializerTextBuilder builder, final HtmlResetInput htmlResetInput) {
272         String text = htmlResetInput.getValueAttribute();
273         if (ATTRIBUTE_NOT_DEFINED == text) {
274             text = HtmlResetInput.DEFAULT_VALUE;
275         }
276 
277         builder.append(text, Mode.NORMALIZE);
278     }
279 
280     /**
281      * Process {@link HtmlUnorderedList}.
282      * @param builder the StringBuilder to add to
283      * @param htmlUnorderedList the target to process
284      */
285     protected void appendUnorderedList(final HtmlSerializerTextBuilder builder,
286                                           final HtmlUnorderedList htmlUnorderedList) {
287         builder.appendBlockSeparator();
288         boolean first = true;
289         for (final DomNode item : htmlUnorderedList.getChildren()) {
290             if (!first) {
291                 builder.appendBlockSeparator();
292             }
293             first = false;
294             appendNode(builder, item);
295         }
296         builder.appendBlockSeparator();
297     }
298 
299     /**
300      * Process {@link HtmlDetails}.
301      * @param builder the StringBuilder to add to
302      * @param htmlDetails the target to process
303      */
304     protected void appendDetails(final HtmlSerializerTextBuilder builder,
305                     final HtmlDetails htmlDetails) {
306         if (htmlDetails.isOpen()) {
307             appendChildren(builder, htmlDetails);
308             return;
309         }
310 
311         for (final DomNode child : htmlDetails.getChildren()) {
312             if (child instanceof HtmlSummary) {
313                 appendNode(builder, child);
314             }
315         }
316     }
317 
318     /**
319      * Process {@link HtmlTitle}.
320      * @param builder the StringBuilder to add to
321      * @param htmlTitle the target to process
322      */
323     protected void appendTitle(final HtmlSerializerTextBuilder builder, final HtmlTitle htmlTitle) {
324         // optimized version
325         // for the title there is no need to check the visibility
326         // of the containing dom text;
327         // this optimization defers the load of the style sheets
328         final DomNode child = htmlTitle.getFirstChild();
329         if (child instanceof DomText) {
330             builder.append(((DomText) child).getData(), Mode.NORMALIZE);
331             builder.appendBlockSeparator();
332         }
333     }
334 
335     /**
336      * Process {@link HtmlTableRow}.
337      *
338      * @param builder the StringBuilder to add to
339      * @param htmlTableRow the target to process
340      */
341     protected void appendTableRow(final HtmlSerializerTextBuilder builder, final HtmlTableRow htmlTableRow) {
342         boolean first = true;
343         for (final HtmlTableCell cell : htmlTableRow.getCells()) {
344             if (!first) {
345                 builder.appendTab();
346             }
347             else {
348                 first = false;
349             }
350             appendChildren(builder, cell); // trim?
351         }
352     }
353 
354     /**
355      * Process {@link HtmlTextArea}.
356      *
357      * @param builder the StringBuilder to add to
358      * @param htmlTextArea the target to process
359      */
360     protected void appendTextArea(final HtmlSerializerTextBuilder builder, final HtmlTextArea htmlTextArea) {
361         if (isVisible(htmlTextArea)) {
362             builder.append(htmlTextArea.getText(), Mode.PRESERVE_BLANK_NEWLINE);
363         }
364     }
365 
366     /**
367      * Process {@link HtmlTable}.
368      *
369      * @param builder the StringBuilder to add to
370      * @param htmlTable the target to process
371      */
372     protected void appendTable(final HtmlSerializerTextBuilder builder, final HtmlTable htmlTable) {
373         builder.appendBlockSeparator();
374         final String caption = htmlTable.getCaptionText();
375         if (caption != null) {
376             builder.append(caption, Mode.NORMALIZE);
377             builder.appendBlockSeparator();
378         }
379 
380         boolean first = true;
381 
382         // first thead has to be displayed first and first tfoot has to be displayed last
383         final HtmlTableHeader tableHeader = htmlTable.getHeader();
384         if (tableHeader != null) {
385             first = appendTableRows(builder, tableHeader.getRows(), true, null, null);
386         }
387         final HtmlTableFooter tableFooter = htmlTable.getFooter();
388 
389         final List<HtmlTableRow> tableRows = htmlTable.getRows();
390         first = appendTableRows(builder, tableRows, first, tableHeader, tableFooter);
391 
392         if (tableFooter != null) {
393             first = appendTableRows(builder, tableFooter.getRows(), first, null, null);
394         }
395         else if (tableRows.isEmpty()) {
396             final DomNode firstChild = htmlTable.getFirstChild();
397             if (firstChild != null) {
398                 appendNode(builder, firstChild);
399             }
400         }
401 
402         builder.appendBlockSeparator();
403     }
404 
405     /**
406      * Process {@link HtmlTableRow}.
407      *
408      * @param builder the StringBuilder to add to
409      * @param rows the rows
410      * @param first if true this is the first one
411      * @param skipParent1 skip row if the parent is this
412      * @param skipParent2 skip row if the parent is this
413      * @return true if this was the first one
414      */
415     protected boolean appendTableRows(final HtmlSerializerTextBuilder builder,
416             final List<HtmlTableRow> rows, boolean first, final TableRowGroup skipParent1,
417             final TableRowGroup skipParent2) {
418         for (final HtmlTableRow row : rows) {
419             if (row.getParentNode() == skipParent1 || row.getParentNode() == skipParent2) {
420                 continue;
421             }
422             if (!first) {
423                 builder.appendBlockSeparator();
424             }
425             first = false;
426             appendTableRow(builder, row);
427         }
428         return first;
429     }
430 
431     /**
432      * Process {@link HtmlSelect}.
433      *
434      * @param builder the StringBuilder to add to
435      * @param htmlSelect the target to process
436      */
437     protected void appendSelect(final HtmlSerializerTextBuilder builder, final HtmlSelect htmlSelect) {
438         final List<HtmlOption> options = htmlSelect.getSelectedOptions();
439 
440         for (final Iterator<HtmlOption> i = options.iterator(); i.hasNext();) {
441             final HtmlOption currentOption = i.next();
442             appendChildren(builder, currentOption);
443             if (i.hasNext()) {
444                 builder.appendBlockSeparator();
445             }
446         }
447     }
448 
449     /**
450      * Process {@link HtmlOrderedList} taking care to numerate it.
451      *
452      * @param builder the StringBuilder to add to
453      * @param htmlOrderedList the OL element
454      */
455     protected void appendOrderedList(final HtmlSerializerTextBuilder builder, final HtmlOrderedList htmlOrderedList) {
456         builder.appendBlockSeparator();
457         boolean first = true;
458         int i = 1;
459         for (final DomNode item : htmlOrderedList.getChildren()) {
460             if (!first) {
461                 builder.appendBlockSeparator();
462             }
463             first = false;
464             if (item instanceof HtmlListItem) {
465                 builder.append(Integer.toString(i++), Mode.NORMALIZE);
466                 builder.append(". ", Mode.NORMALIZE);
467                 appendChildren(builder, item);
468             }
469             else {
470                 appendNode(builder, item);
471             }
472         }
473         builder.appendBlockSeparator();
474     }
475 
476     /**
477      * Process {@link HtmlPreformattedText}.
478      *
479      * @param builder the StringBuilder to add to
480      * @param htmlPreformattedText the target to process
481      */
482     protected void appendPreformattedText(final HtmlSerializerTextBuilder builder,
483             final HtmlPreformattedText htmlPreformattedText) {
484         if (isVisible(htmlPreformattedText)) {
485             builder.appendBlockSeparator();
486             builder.append(htmlPreformattedText.getTextContent(), Mode.PRESERVE_BLANK_TAB_NEWLINE);
487             builder.appendBlockSeparator();
488         }
489     }
490 
491     /**
492      * Process {@link HtmlInlineFrame}.
493      *
494      * @param builder the StringBuilder to add to
495      * @param htmlInlineFrame the target to process
496      */
497     protected void appendInlineFrame(final HtmlSerializerTextBuilder builder,
498             final HtmlInlineFrame htmlInlineFrame) {
499         if (isVisible(htmlInlineFrame)) {
500             builder.appendBlockSeparator();
501             final Page page = htmlInlineFrame.getEnclosedPage();
502             if (page instanceof SgmlPage) {
503                 builder.append(((SgmlPage) page).asNormalizedText(), Mode.NORMALIZE);
504             }
505             builder.appendBlockSeparator();
506         }
507     }
508 
509     /**
510      * Process {@link DomText}.
511      *
512      * @param builder the StringBuilder to add to
513      * @param domText the target to process
514      */
515     protected void appendText(final HtmlSerializerTextBuilder builder, final DomText domText) {
516         final DomNode parent = domText.getParentNode();
517         if (parent == null || parent instanceof HtmlTitle || isVisible(parent)) {
518             builder.append(domText.getData(), Mode.NORMALIZE);
519         }
520     }
521 
522     /**
523      * Process {@link HtmlBreak}.
524      *
525      * @param builder the StringBuilder to add to
526      * @param htmlBreak the target to process
527      */
528     protected void appendBreak(final HtmlSerializerTextBuilder builder, final HtmlBreak htmlBreak) {
529         builder.appendNewLine();
530     }
531 
532     /**
533      * Process {@link HtmlCheckBoxInput}.
534      *
535      * @param builder the StringBuilder to add to
536      * @param htmlCheckBoxInput the target to process
537      */
538     protected void appendCheckBoxInput(final HtmlSerializerTextBuilder builder,
539                                             final HtmlCheckBoxInput htmlCheckBoxInput) {
540         if (htmlCheckBoxInput.isChecked()) {
541             builder.append("checked", Mode.NORMALIZE);
542         }
543         else {
544             builder.append("unchecked", Mode.NORMALIZE);
545         }
546     }
547 
548     /**
549      * Process {@link HtmlRadioButtonInput}.
550      *
551      * @param builder the StringBuilder to add to
552      * @param htmlRadioButtonInput the target to process
553      */
554     protected void appendRadioButtonInput(final HtmlSerializerTextBuilder builder,
555             final HtmlRadioButtonInput htmlRadioButtonInput) {
556         if (htmlRadioButtonInput.isChecked()) {
557             builder.append("checked", Mode.NORMALIZE);
558         }
559         else {
560             builder.append("unchecked", Mode.NORMALIZE);
561         }
562     }
563 
564     private boolean isVisible(final DomNode node) {
565         return !ignoreMaskedElements_ || node.isDisplayed();
566     }
567 
568     /**
569      * Indicates if element that are not displayed due to style settings
570      * (visibility or display) should be visible in generated text.
571      * @param ignore indicates if masked elements should be ignored or not
572      */
573     public void setIgnoreMaskedElements(final boolean ignore) {
574         ignoreMaskedElements_ = ignore;
575     }
576 
577     /**
578      * Helper to compose the text for the serializer based on several modes.
579      */
580     protected static class HtmlSerializerTextBuilder {
581 
582         /** Mode. */
583         protected enum Mode {
584             /** Collapse whitespace. */
585             NORMALIZE,
586 
587             /** Preserve tab, blank, newline. */
588             PRESERVE_BLANK_TAB_NEWLINE,
589 
590             /** Preserve blank, newline. */
591             PRESERVE_BLANK_NEWLINE
592         }
593 
594         private enum State {
595             DEFAULT,
596             EMPTY,
597             TRIM,
598             BLANK_AT_END,
599             BLANK_AT_END_AFTER_NEWLINE,
600             NEWLINE_AT_END,
601             BLOCK_SEPARATOR_AT_END
602         }
603 
604         private static final String LINE_SEPARATOR = "\n";
605         private static final int LINE_SEPARATOR_LENGTH = LINE_SEPARATOR.length();
606 
607         private State state_;
608         private final StringBuilder builder_;
609         private int trimRightPos_;
610 
611         /**
612          * Ctor.
613          */
614         public HtmlSerializerTextBuilder() {
615             builder_ = new StringBuilder();
616             state_ = State.EMPTY;
617             trimRightPos_ = builder_.length();
618         }
619 
620         /**
621          * Append the provided content.
622          *
623          * @param content the content to add
624          * @param mode the {@link Mode}
625          */
626         public void append(final String content, final Mode mode) {
627             if (content == null) {
628                 return;
629             }
630             final int length = content.length();
631             if (length == 0) {
632                 return;
633             }
634 
635             String text = content;
636             if (mode == Mode.PRESERVE_BLANK_NEWLINE) {
637                 text = StringUtils.stripEnd(text, null);
638             }
639 
640             boolean crFound = false;
641             for (final char c : text.toCharArray()) {
642                 if (mode == Mode.NORMALIZE) {
643                     if (isSpace(c)) {
644                         switch (state_) {
645                             case EMPTY:
646                             case TRIM:
647                             case BLANK_AT_END:
648                             case BLANK_AT_END_AFTER_NEWLINE:
649                             case BLOCK_SEPARATOR_AT_END:
650                                 break;
651                             case NEWLINE_AT_END:
652                                 builder_.append(' ');
653                                 state_ = State.BLANK_AT_END_AFTER_NEWLINE;
654                                 break;
655                             default:
656                                 builder_.append(' ');
657                                 state_ = State.BLANK_AT_END;
658                                 break;
659                         }
660                     }
661                     else if (c == (char) 160) {
662                         builder_.append(' ');
663                         state_ = State.DEFAULT;
664                         trimRightPos_ = builder_.length();
665                     }
666                     else {
667                         builder_.append(c);
668                         state_ = State.DEFAULT;
669                         trimRightPos_ = builder_.length();
670                     }
671                     continue;
672                 }
673 
674                 // preserve mode
675                 if (c == '\n') {
676                     appendNewLine();
677                     crFound = false;
678                 }
679                 else {
680                     if (crFound) {
681                         appendNewLine();
682                     }
683                     crFound = c == '\r';
684 
685                     if (c == '\t') {
686                         if (mode == Mode.PRESERVE_BLANK_TAB_NEWLINE) {
687                             appendTab();
688                         }
689                         else if (state_ != State.BLOCK_SEPARATOR_AT_END) {
690                             builder_.append(' ');
691                         }
692                     }
693                     else if (c == (char) 160) {
694                         appendBlank();
695                     }
696                     else if (c == ' ') {
697                         appendBlank();
698                     }
699                     else {
700                         builder_.append(c);
701                     }
702                     trimRightPos_ = builder_.length();
703                 }
704             }
705 
706             if (crFound) {
707                 appendNewLine();
708             }
709 
710             if (mode != Mode.NORMALIZE) {
711                 // reset state to empty to restart whitespace normalization afterwards
712                 state_ = State.TRIM;
713             }
714         }
715 
716         /**
717          * Append a block separator.
718          */
719         public void appendBlockSeparator() {
720             switch (state_) {
721                 case EMPTY:
722                     break;
723                 case BLANK_AT_END:
724                     builder_.setLength(trimRightPos_);
725                     if (builder_.length() == 0) {
726                         state_ = State.EMPTY;
727                     }
728                     else {
729                         builder_.append(LINE_SEPARATOR);
730                         state_ = State.BLOCK_SEPARATOR_AT_END;
731                     }
732                     break;
733                 case BLANK_AT_END_AFTER_NEWLINE:
734                     builder_.setLength(trimRightPos_ - LINE_SEPARATOR_LENGTH);
735                     trimRightPos_ = trimRightPos_ - LINE_SEPARATOR_LENGTH;
736                     if (builder_.length() == 0) {
737                         state_ = State.EMPTY;
738                     }
739                     else {
740                         builder_.append(LINE_SEPARATOR);
741                         state_ = State.BLOCK_SEPARATOR_AT_END;
742                     }
743                     break;
744                 case BLOCK_SEPARATOR_AT_END:
745                     break;
746                 case NEWLINE_AT_END:
747                     builder_.setLength(builder_.length() - LINE_SEPARATOR_LENGTH);
748                     trimRightPos_ = trimRightPos_ - LINE_SEPARATOR_LENGTH;
749                     if (builder_.length() == 0) {
750                         state_ = State.EMPTY;
751                     }
752                     else {
753                         builder_.append(LINE_SEPARATOR);
754                         state_ = State.BLOCK_SEPARATOR_AT_END;
755                     }
756                     break;
757                 default:
758                     builder_.append(LINE_SEPARATOR);
759                     state_ = State.BLOCK_SEPARATOR_AT_END;
760                     break;
761             }
762         }
763 
764         /**
765          * Append a line separator.
766          */
767         public void appendNewLine() {
768             builder_.append(LINE_SEPARATOR);
769             state_ = State.NEWLINE_AT_END;
770             trimRightPos_ = builder_.length();
771         }
772 
773         /**
774          * Append a tab.
775          */
776         public void appendTab() {
777             builder_.append('\t');
778             trimRightPos_ = builder_.length();
779         }
780 
781         /**
782          * Append a blank.
783          */
784         private void appendBlank() {
785             builder_.append(' ');
786             trimRightPos_ = builder_.length();
787         }
788 
789         /**
790          * @return the constructed text.
791          */
792         public String getText() {
793             return builder_.substring(0, trimRightPos_);
794         }
795 
796         private static boolean isSpace(final char ch) {
797             return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\f' || ch == '\r';
798         }
799     }
800 }