1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.html.serializer;
16
17 import static org.htmlunit.css.CssStyleSheet.BLOCK;
18
19 import java.util.List;
20
21 import org.apache.commons.lang3.StringUtils;
22 import org.htmlunit.Page;
23 import org.htmlunit.SgmlPage;
24 import org.htmlunit.WebWindow;
25 import org.htmlunit.css.ComputedCssStyleDeclaration;
26 import org.htmlunit.css.StyleAttributes.Definition;
27 import org.htmlunit.html.DomComment;
28 import org.htmlunit.html.DomElement;
29 import org.htmlunit.html.DomNode;
30 import org.htmlunit.html.DomText;
31 import org.htmlunit.html.HtmlBody;
32 import org.htmlunit.html.HtmlBreak;
33 import org.htmlunit.html.HtmlCheckBoxInput;
34 import org.htmlunit.html.HtmlDetails;
35 import org.htmlunit.html.HtmlHiddenInput;
36 import org.htmlunit.html.HtmlInlineFrame;
37 import org.htmlunit.html.HtmlInput;
38 import org.htmlunit.html.HtmlMenu;
39 import org.htmlunit.html.HtmlNoFrames;
40 import org.htmlunit.html.HtmlNoScript;
41 import org.htmlunit.html.HtmlOption;
42 import org.htmlunit.html.HtmlOrderedList;
43 import org.htmlunit.html.HtmlPreformattedText;
44 import org.htmlunit.html.HtmlRadioButtonInput;
45 import org.htmlunit.html.HtmlResetInput;
46 import org.htmlunit.html.HtmlScript;
47 import org.htmlunit.html.HtmlSelect;
48 import org.htmlunit.html.HtmlStyle;
49 import org.htmlunit.html.HtmlSubmitInput;
50 import org.htmlunit.html.HtmlSummary;
51 import org.htmlunit.html.HtmlTable;
52 import org.htmlunit.html.HtmlTableCell;
53 import org.htmlunit.html.HtmlTableFooter;
54 import org.htmlunit.html.HtmlTableHeader;
55 import org.htmlunit.html.HtmlTableRow;
56 import org.htmlunit.html.HtmlTextArea;
57 import org.htmlunit.html.HtmlTitle;
58 import org.htmlunit.html.HtmlUnorderedList;
59 import org.htmlunit.html.TableRowGroup;
60 import org.htmlunit.html.serializer.HtmlSerializerVisibleText.HtmlSerializerTextBuilder.Mode;
61
62
63
64
65
66
67
68
69
70 public class HtmlSerializerVisibleText {
71
72
73
74
75
76
77 public String asText(final DomNode node) {
78 if (node instanceof HtmlBreak) {
79 return "";
80 }
81 final HtmlSerializerTextBuilder builder = new HtmlSerializerTextBuilder();
82 appendNode(builder, node, whiteSpaceStyle(node, Mode.WHITE_SPACE_NORMAL));
83 return builder.getText();
84 }
85
86
87
88
89
90
91
92
93 protected void appendChildren(final HtmlSerializerTextBuilder builder, final DomNode node, final Mode mode) {
94 for (final DomNode child : node.getChildren()) {
95 appendNode(builder, child, updateWhiteSpaceStyle(node, mode));
96 }
97 }
98
99
100
101
102
103
104
105
106
107 protected void appendNode(final HtmlSerializerTextBuilder builder, final DomNode node, final Mode mode) {
108 if (node instanceof DomText) {
109 appendText(builder, (DomText) node, mode);
110 }
111 else if (node instanceof DomComment) {
112 appendComment(builder, (DomComment) node, mode);
113 }
114 else if (node instanceof HtmlBreak) {
115 appendBreak(builder, (HtmlBreak) node, mode);
116 }
117 else if (node instanceof HtmlHiddenInput) {
118 appendHiddenInput(builder, (HtmlHiddenInput) node, mode);
119 }
120 else if (node instanceof HtmlScript) {
121 appendScript(builder, (HtmlScript) node, mode);
122 }
123 else if (node instanceof HtmlStyle) {
124 appendStyle(builder, (HtmlStyle) node, mode);
125 }
126 else if (node instanceof HtmlNoFrames) {
127 appendNoFrames(builder, (HtmlNoFrames) node, mode);
128 }
129 else if (node instanceof HtmlTextArea) {
130 appendTextArea(builder, (HtmlTextArea) node, mode);
131 }
132 else if (node instanceof HtmlTitle) {
133 appendTitle(builder, (HtmlTitle) node, mode);
134 }
135 else if (node instanceof HtmlTableRow) {
136 appendTableRow(builder, (HtmlTableRow) node, mode);
137 }
138 else if (node instanceof HtmlSelect) {
139 appendSelect(builder, (HtmlSelect) node, mode);
140 }
141 else if (node instanceof HtmlOption) {
142 appendOption(builder, (HtmlOption) node, mode);
143 }
144 else if (node instanceof HtmlSubmitInput) {
145 appendSubmitInput(builder, (HtmlSubmitInput) node, mode);
146 }
147 else if (node instanceof HtmlResetInput) {
148 appendResetInput(builder, (HtmlResetInput) node, mode);
149 }
150 else if (node instanceof HtmlCheckBoxInput) {
151 appendCheckBoxInput(builder, (HtmlCheckBoxInput) node, mode);
152 }
153 else if (node instanceof HtmlRadioButtonInput) {
154 appendRadioButtonInput(builder, (HtmlRadioButtonInput) node, mode);
155 }
156 else if (node instanceof HtmlInput) {
157
158 }
159 else if (node instanceof HtmlTable) {
160 appendTable(builder, (HtmlTable) node, mode);
161 }
162 else if (node instanceof HtmlOrderedList) {
163 appendOrderedList(builder, (HtmlOrderedList) node, mode);
164 }
165 else if (node instanceof HtmlUnorderedList) {
166 appendUnorderedList(builder, (HtmlUnorderedList) node, mode);
167 }
168 else if (node instanceof HtmlPreformattedText) {
169 appendPreformattedText(builder, (HtmlPreformattedText) node, mode);
170 }
171 else if (node instanceof HtmlInlineFrame) {
172 appendInlineFrame(builder, (HtmlInlineFrame) node, mode);
173 }
174 else if (node instanceof HtmlMenu) {
175 appendMenu(builder, (HtmlMenu) node, mode);
176 }
177 else if (node instanceof HtmlDetails) {
178 appendDetails(builder, (HtmlDetails) node, mode);
179 }
180 else if (node instanceof HtmlNoScript && node.getPage().getWebClient().isJavaScriptEnabled()) {
181 appendNoScript(builder, (HtmlNoScript) node, mode);
182 }
183 else {
184 appendDomNode(builder, node, mode);
185 }
186 }
187
188
189
190
191
192
193
194
195 protected void appendDomNode(final HtmlSerializerTextBuilder builder,
196 final DomNode domNode, final Mode mode) {
197 final boolean block;
198 if (domNode instanceof HtmlBody) {
199 block = false;
200 }
201 else if (domNode instanceof DomElement) {
202 final WebWindow window = domNode.getPage().getEnclosingWindow();
203 final String display = window.getComputedStyle((DomElement) domNode, null).getDisplay();
204 block = BLOCK.equals(display);
205 }
206 else {
207 block = false;
208 }
209
210 if (block) {
211 builder.appendBlockSeparator();
212 }
213 appendChildren(builder, domNode, mode);
214 if (block) {
215 builder.appendBlockSeparator();
216 }
217 }
218
219
220
221
222
223
224
225
226 protected void appendHiddenInput(final HtmlSerializerTextBuilder builder,
227 final HtmlHiddenInput htmlHiddenInput, final Mode mode) {
228
229 }
230
231
232
233
234
235
236
237
238 protected void appendScript(final HtmlSerializerTextBuilder builder,
239 final HtmlScript htmlScript, final Mode mode) {
240
241 }
242
243
244
245
246
247
248
249
250 protected void appendStyle(final HtmlSerializerTextBuilder builder,
251 final HtmlStyle htmlStyle, final Mode mode) {
252
253 }
254
255
256
257
258
259
260
261
262 protected void appendNoScript(final HtmlSerializerTextBuilder builder,
263 final HtmlNoScript htmlNoScript, final Mode mode) {
264
265 }
266
267
268
269
270
271
272
273
274 protected void appendNoFrames(final HtmlSerializerTextBuilder builder,
275 final HtmlNoFrames htmlNoFrames, final Mode mode) {
276
277 }
278
279
280
281
282
283
284
285
286 protected void appendSubmitInput(final HtmlSerializerTextBuilder builder,
287 final HtmlSubmitInput htmlSubmitInput, final Mode mode) {
288
289 }
290
291
292
293
294
295
296
297
298 protected void appendInput(final HtmlSerializerTextBuilder builder,
299 final HtmlInput htmlInput, final Mode mode) {
300 builder.append(htmlInput.getValueAttribute(), mode);
301 }
302
303
304
305
306
307
308
309
310 protected void appendResetInput(final HtmlSerializerTextBuilder builder,
311 final HtmlResetInput htmlResetInput, final Mode mode) {
312
313 }
314
315
316
317
318
319
320
321 protected void appendMenu(final HtmlSerializerTextBuilder builder,
322 final HtmlMenu htmlMenu, final Mode mode) {
323 builder.appendBlockSeparator();
324 boolean first = true;
325 for (final DomNode item : htmlMenu.getChildren()) {
326 if (!first) {
327 builder.appendBlockSeparator();
328 }
329 first = false;
330 appendNode(builder, item, mode);
331 }
332 builder.appendBlockSeparator();
333 }
334
335
336
337
338
339
340
341 protected void appendDetails(final HtmlSerializerTextBuilder builder,
342 final HtmlDetails htmlDetails, final Mode mode) {
343 if (htmlDetails.isOpen()) {
344 appendChildren(builder, htmlDetails, mode);
345 return;
346 }
347
348 for (final DomNode child : htmlDetails.getChildren()) {
349 if (child instanceof HtmlSummary) {
350 appendNode(builder, child, mode);
351 }
352 }
353 }
354
355
356
357
358
359
360
361 protected void appendTitle(final HtmlSerializerTextBuilder builder,
362 final HtmlTitle htmlTitle, final Mode mode) {
363
364 }
365
366
367
368
369
370
371
372
373 protected void appendTableRow(final HtmlSerializerTextBuilder builder,
374 final HtmlTableRow htmlTableRow, final Mode mode) {
375 boolean first = true;
376 for (final HtmlTableCell cell : htmlTableRow.getCells()) {
377 if (!first) {
378 builder.appendBlank();
379 }
380 else {
381 first = false;
382 }
383 appendChildren(builder, cell, mode);
384 }
385 }
386
387
388
389
390
391
392 protected boolean isDisplayed(final DomNode domNode) {
393 return domNode.isDisplayed();
394 }
395
396
397
398
399
400
401
402
403 protected void appendTextArea(final HtmlSerializerTextBuilder builder,
404 final HtmlTextArea htmlTextArea, final Mode mode) {
405 if (isDisplayed(htmlTextArea)) {
406 builder.append(htmlTextArea.getDefaultValue(), whiteSpaceStyle(htmlTextArea, Mode.PRE));
407 builder.trimRight(Mode.PRE);
408 }
409 }
410
411
412
413
414
415
416
417
418 protected void appendTable(final HtmlSerializerTextBuilder builder,
419 final HtmlTable htmlTable, final Mode mode) {
420 builder.appendBlockSeparator();
421 final String caption = htmlTable.getCaptionText();
422 if (caption != null) {
423 builder.append(caption, mode);
424 builder.appendBlockSeparator();
425 }
426
427 boolean first = true;
428
429
430 final HtmlTableHeader tableHeader = htmlTable.getHeader();
431 if (tableHeader != null) {
432 first = appendTableRows(builder, mode, tableHeader.getRows(), true, null, null);
433 }
434 final HtmlTableFooter tableFooter = htmlTable.getFooter();
435
436 final List<HtmlTableRow> tableRows = htmlTable.getRows();
437 first = appendTableRows(builder, mode, tableRows, first, tableHeader, tableFooter);
438
439 if (tableFooter != null) {
440 first = appendTableRows(builder, mode, tableFooter.getRows(), first, null, null);
441 }
442 else if (tableRows.isEmpty()) {
443 final DomNode firstChild = htmlTable.getFirstChild();
444 if (firstChild != null) {
445 appendNode(builder, firstChild, mode);
446 }
447 }
448
449 builder.appendBlockSeparator();
450 }
451
452
453
454
455
456
457
458
459
460
461
462
463 protected boolean appendTableRows(final HtmlSerializerTextBuilder builder, final Mode mode,
464 final List<HtmlTableRow> rows, boolean first, final TableRowGroup skipParent1,
465 final TableRowGroup skipParent2) {
466 for (final HtmlTableRow row : rows) {
467 if (row.getParentNode() == skipParent1 || row.getParentNode() == skipParent2) {
468 continue;
469 }
470 if (!first) {
471 builder.appendBlockSeparator();
472 }
473 first = false;
474 appendTableRow(builder, row, mode);
475 }
476 return first;
477 }
478
479
480
481
482
483
484
485
486 protected void appendSelect(final HtmlSerializerTextBuilder builder,
487 final HtmlSelect htmlSelect, final Mode mode) {
488 builder.appendBlockSeparator();
489 boolean leadingNlPending = false;
490 final Mode selectMode = whiteSpaceStyle(htmlSelect, mode);
491 for (final DomNode item : htmlSelect.getChildren()) {
492 if (leadingNlPending) {
493 builder.appendBlockSeparator();
494 leadingNlPending = false;
495 }
496
497 builder.resetContentAdded();
498 appendNode(builder, item, whiteSpaceStyle(item, selectMode));
499 if (!leadingNlPending && builder.contentAdded_) {
500 leadingNlPending = true;
501 }
502 }
503 builder.appendBlockSeparator();
504 }
505
506
507
508
509
510
511
512
513 protected void appendOption(final HtmlSerializerTextBuilder builder,
514 final HtmlOption htmlOption, final Mode mode) {
515 builder.ignoreHtmlBreaks();
516 appendChildren(builder, htmlOption, mode);
517 builder.processHtmlBreaks();
518 }
519
520
521
522
523
524
525
526
527 protected void appendOrderedList(final HtmlSerializerTextBuilder builder,
528 final HtmlOrderedList htmlOrderedList, final Mode mode) {
529 builder.appendBlockSeparator();
530 boolean leadingNlPending = false;
531 final Mode olMode = whiteSpaceStyle(htmlOrderedList, mode);
532 for (final DomNode item : htmlOrderedList.getChildren()) {
533 if (leadingNlPending) {
534 builder.appendBlockSeparator();
535 leadingNlPending = false;
536 }
537
538 builder.resetContentAdded();
539 appendNode(builder, item, whiteSpaceStyle(item, olMode));
540 if (!leadingNlPending && builder.contentAdded_) {
541 leadingNlPending = true;
542 }
543 }
544 builder.appendBlockSeparator();
545 }
546
547
548
549
550
551
552
553 protected void appendUnorderedList(final HtmlSerializerTextBuilder builder,
554 final HtmlUnorderedList htmlUnorderedList, final Mode mode) {
555 builder.appendBlockSeparator();
556 boolean leadingNlPending = false;
557 final Mode ulMode = whiteSpaceStyle(htmlUnorderedList, mode);
558 for (final DomNode item : htmlUnorderedList.getChildren()) {
559 if (leadingNlPending) {
560 builder.appendBlockSeparator();
561 leadingNlPending = false;
562 }
563
564 builder.resetContentAdded();
565 appendNode(builder, item, whiteSpaceStyle(item, ulMode));
566 if (!leadingNlPending && builder.contentAdded_) {
567 leadingNlPending = true;
568 }
569 }
570 builder.appendBlockSeparator();
571 }
572
573
574
575
576
577
578
579
580 protected void appendPreformattedText(final HtmlSerializerTextBuilder builder,
581 final HtmlPreformattedText htmlPreformattedText, final Mode mode) {
582 if (isDisplayed(htmlPreformattedText)) {
583 builder.appendBlockSeparator();
584 appendChildren(builder, htmlPreformattedText, whiteSpaceStyle(htmlPreformattedText, Mode.PRE));
585 builder.appendBlockSeparator();
586 }
587 }
588
589
590
591
592
593
594
595
596 protected void appendInlineFrame(final HtmlSerializerTextBuilder builder,
597 final HtmlInlineFrame htmlInlineFrame, final Mode mode) {
598 if (isDisplayed(htmlInlineFrame)) {
599 builder.appendBlockSeparator();
600 final Page page = htmlInlineFrame.getEnclosedPage();
601 if (page instanceof SgmlPage) {
602 builder.append(((SgmlPage) page).asNormalizedText(), mode);
603 }
604 builder.appendBlockSeparator();
605 }
606 }
607
608
609
610
611
612
613
614
615 protected void appendText(final HtmlSerializerTextBuilder builder, final DomText domText, final Mode mode) {
616 final DomNode parent = domText.getParentNode();
617 if (parent instanceof HtmlTitle
618 || parent instanceof HtmlScript) {
619 builder.append(domText.getData(), Mode.WHITE_SPACE_PRE_LINE);
620 }
621
622 if (parent == null
623 || parent instanceof HtmlTitle
624 || parent instanceof HtmlScript
625 || isDisplayed(parent)) {
626 builder.append(domText.getData(), mode);
627 }
628 }
629
630
631
632
633
634
635
636
637 protected void appendComment(final HtmlSerializerTextBuilder builder,
638 final DomComment domComment, final Mode mode) {
639
640 }
641
642
643
644
645
646
647
648
649 protected void appendBreak(final HtmlSerializerTextBuilder builder,
650 final HtmlBreak htmlBreak, final Mode mode) {
651 builder.appendBreak(mode);
652 }
653
654
655
656
657
658
659
660
661 protected void appendCheckBoxInput(final HtmlSerializerTextBuilder builder,
662 final HtmlCheckBoxInput htmlCheckBoxInput, final Mode mode) {
663
664 }
665
666
667
668
669
670
671
672
673 protected void appendRadioButtonInput(final HtmlSerializerTextBuilder builder,
674 final HtmlRadioButtonInput htmlRadioButtonInput, final Mode mode) {
675
676 }
677
678 protected Mode whiteSpaceStyle(final DomNode domNode, final Mode defaultMode) {
679 final Page page = domNode.getPage();
680 if (page != null) {
681 final WebWindow window = page.getEnclosingWindow();
682 if (window.getWebClient().getOptions().isCssEnabled()) {
683 DomNode node = domNode;
684 while (node != null) {
685 if (node instanceof DomElement) {
686 final ComputedCssStyleDeclaration style = window.getComputedStyle((DomElement) node, null);
687 final String value = style.getStyleAttribute(Definition.WHITE_SPACE, false);
688 if (StringUtils.isNoneEmpty(value)) {
689 if ("normal".equalsIgnoreCase(value)) {
690 return Mode.WHITE_SPACE_NORMAL;
691 }
692 if ("nowrap".equalsIgnoreCase(value)) {
693 return Mode.WHITE_SPACE_NORMAL;
694 }
695 if ("pre".equalsIgnoreCase(value)) {
696 return Mode.WHITE_SPACE_PRE;
697 }
698 if ("pre-wrap".equalsIgnoreCase(value)) {
699 return Mode.WHITE_SPACE_PRE;
700 }
701 if ("pre-line".equalsIgnoreCase(value)) {
702 return Mode.WHITE_SPACE_PRE_LINE;
703 }
704 }
705 }
706 node = node.getParentNode();
707 }
708 }
709 }
710 return defaultMode;
711 }
712
713 protected Mode updateWhiteSpaceStyle(final DomNode domNode, final Mode defaultMode) {
714 final Page page = domNode.getPage();
715 if (page != null) {
716 final WebWindow window = page.getEnclosingWindow();
717 if (window.getWebClient().getOptions().isCssEnabled()) {
718 if (domNode instanceof DomElement) {
719 final ComputedCssStyleDeclaration style = window.getComputedStyle((DomElement) domNode, null);
720 final String value = style.getStyleAttribute(Definition.WHITE_SPACE, false);
721 if (StringUtils.isNoneEmpty(value)) {
722 if ("normal".equalsIgnoreCase(value)) {
723 return Mode.WHITE_SPACE_NORMAL;
724 }
725 if ("nowrap".equalsIgnoreCase(value)) {
726 return Mode.WHITE_SPACE_NORMAL;
727 }
728 if ("pre".equalsIgnoreCase(value)) {
729 return Mode.WHITE_SPACE_PRE;
730 }
731 if ("pre-wrap".equalsIgnoreCase(value)) {
732 return Mode.WHITE_SPACE_PRE;
733 }
734 if ("pre-line".equalsIgnoreCase(value)) {
735 return Mode.WHITE_SPACE_PRE_LINE;
736 }
737 }
738 }
739 }
740 }
741 return defaultMode;
742 }
743
744
745
746
747 protected static class HtmlSerializerTextBuilder {
748
749 protected enum Mode {
750
751
752
753 PRE,
754
755
756
757
758
759
760 WHITE_SPACE_NORMAL,
761
762
763
764
765
766 WHITE_SPACE_PRE,
767
768
769
770
771
772
773 WHITE_SPACE_PRE_LINE
774 }
775
776 private enum State {
777 DEFAULT,
778 EMPTY,
779 BLANK_AT_END,
780 BLANK_AT_END_AFTER_NEWLINE,
781 NEWLINE_AT_END,
782 BREAK_AT_END,
783 BLOCK_SEPARATOR_AT_END
784 }
785
786 private State state_;
787 private final StringBuilder builder_;
788 private int trimRightPos_;
789 private boolean contentAdded_;
790 private boolean ignoreHtmlBreaks_;
791
792
793
794
795 public HtmlSerializerTextBuilder() {
796 builder_ = new StringBuilder();
797 state_ = State.EMPTY;
798 trimRightPos_ = 0;
799 }
800
801
802
803
804
805
806
807
808 public void append(final String content, final Mode mode) {
809 if (content == null) {
810 return;
811 }
812 int length = content.length();
813 if (length == 0) {
814 return;
815 }
816
817 length--;
818 int i = -1;
819 for (char c : content.toCharArray()) {
820 i++;
821
822
823 if (c == '\r') {
824 if (length != i) {
825 continue;
826 }
827 c = '\n';
828 }
829
830 if (c == '\n') {
831 if (mode == Mode.WHITE_SPACE_PRE) {
832 switch (state_) {
833 case EMPTY:
834 case BLOCK_SEPARATOR_AT_END:
835 break;
836 default:
837 builder_.append('\n');
838 state_ = State.NEWLINE_AT_END;
839 trimRightPos_ = builder_.length();
840 break;
841 }
842 continue;
843 }
844
845 if (mode == Mode.PRE) {
846 builder_.append('\n');
847 state_ = State.NEWLINE_AT_END;
848 trimRightPos_ = builder_.length();
849
850 continue;
851 }
852
853 if (mode == Mode.WHITE_SPACE_PRE_LINE) {
854 switch (state_) {
855 case EMPTY:
856 case BLOCK_SEPARATOR_AT_END:
857 break;
858 default:
859 builder_.append('\n');
860 state_ = State.NEWLINE_AT_END;
861 trimRightPos_ = builder_.length();
862 break;
863 }
864 continue;
865 }
866
867 switch (state_) {
868 case EMPTY:
869 case BLANK_AT_END:
870 case BLANK_AT_END_AFTER_NEWLINE:
871 case BLOCK_SEPARATOR_AT_END:
872 case NEWLINE_AT_END:
873 case BREAK_AT_END:
874 break;
875 default:
876 builder_.append(' ');
877 state_ = State.BLANK_AT_END;
878 break;
879 }
880 continue;
881 }
882
883 if (c == ' ' || c == '\t' || c == '\f') {
884 if (mode == Mode.WHITE_SPACE_PRE || mode == Mode.PRE) {
885 appendBlank();
886 continue;
887 }
888
889 if (mode == Mode.WHITE_SPACE_PRE_LINE) {
890 switch (state_) {
891 case EMPTY:
892 case BLANK_AT_END:
893 case BLANK_AT_END_AFTER_NEWLINE:
894 case BREAK_AT_END:
895 break;
896 default:
897 builder_.append(' ');
898 state_ = State.BLANK_AT_END;
899 break;
900 }
901 continue;
902 }
903
904 switch (state_) {
905 case EMPTY:
906 case BLANK_AT_END:
907 case BLANK_AT_END_AFTER_NEWLINE:
908 case BLOCK_SEPARATOR_AT_END:
909 case NEWLINE_AT_END:
910 case BREAK_AT_END:
911 break;
912 default:
913 builder_.append(' ');
914 state_ = State.BLANK_AT_END;
915 break;
916 }
917 continue;
918 }
919
920 if (c == (char) 160) {
921 appendBlank();
922 if (mode == Mode.WHITE_SPACE_NORMAL || mode == Mode.WHITE_SPACE_PRE_LINE) {
923 state_ = State.DEFAULT;
924 }
925 continue;
926 }
927 builder_.append(c);
928 state_ = State.DEFAULT;
929 trimRightPos_ = builder_.length();
930 contentAdded_ = true;
931 }
932 }
933
934
935
936
937 public void appendBlockSeparator() {
938 switch (state_) {
939 case EMPTY:
940 break;
941 case BLANK_AT_END:
942 builder_.setLength(trimRightPos_);
943 if (builder_.length() == 0) {
944 state_ = State.EMPTY;
945 }
946 else {
947 builder_.append('\n');
948 state_ = State.BLOCK_SEPARATOR_AT_END;
949 }
950 break;
951 case BLANK_AT_END_AFTER_NEWLINE:
952 builder_.setLength(trimRightPos_ - 1);
953 if (builder_.length() == 0) {
954 state_ = State.EMPTY;
955 }
956 else {
957 builder_.append('\n');
958 state_ = State.BLOCK_SEPARATOR_AT_END;
959 }
960 break;
961 case BLOCK_SEPARATOR_AT_END:
962 break;
963 case NEWLINE_AT_END:
964 case BREAK_AT_END:
965 builder_.setLength(builder_.length() - 1);
966 trimRightPos_ = trimRightPos_ - 1;
967 if (builder_.length() == 0) {
968 state_ = State.EMPTY;
969 }
970 else {
971 builder_.append('\n');
972 state_ = State.BLOCK_SEPARATOR_AT_END;
973 }
974 break;
975 default:
976 builder_.append('\n');
977 state_ = State.BLOCK_SEPARATOR_AT_END;
978 break;
979 }
980 }
981
982
983
984
985
986
987 public void appendBreak(final Mode mode) {
988 if (ignoreHtmlBreaks_) {
989 return;
990 }
991
992 builder_.setLength(trimRightPos_);
993
994 builder_.append('\n');
995 state_ = State.BREAK_AT_END;
996 trimRightPos_ = builder_.length();
997 }
998
999
1000
1001
1002 public void appendBlank() {
1003 builder_.append(' ');
1004 state_ = State.BLANK_AT_END;
1005 trimRightPos_ = builder_.length();
1006 }
1007
1008
1009
1010
1011
1012
1013 public void trimRight(final Mode mode) {
1014 if (mode == Mode.PRE) {
1015 switch (state_) {
1016 case BLOCK_SEPARATOR_AT_END:
1017 case NEWLINE_AT_END:
1018 case BREAK_AT_END:
1019 if (trimRightPos_ == builder_.length()) {
1020 trimRightPos_--;
1021 }
1022 break;
1023 default:
1024 break;
1025 }
1026 }
1027
1028 builder_.setLength(trimRightPos_);
1029 state_ = State.DEFAULT;
1030 if (builder_.length() == 0) {
1031 state_ = State.EMPTY;
1032 }
1033 }
1034
1035
1036
1037
1038 public boolean wasContentAdded() {
1039 return contentAdded_;
1040 }
1041
1042
1043
1044
1045 public void resetContentAdded() {
1046 contentAdded_ = false;
1047 }
1048
1049
1050
1051
1052 public void ignoreHtmlBreaks() {
1053 ignoreHtmlBreaks_ = true;
1054 }
1055
1056
1057
1058
1059 public void processHtmlBreaks() {
1060 ignoreHtmlBreaks_ = false;
1061 }
1062
1063
1064
1065
1066 public String getText() {
1067 return builder_.substring(0, trimRightPos_);
1068 }
1069 }
1070 }