1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.javascript.host.xml;
16
17 import java.util.Arrays;
18 import java.util.HashSet;
19 import java.util.Set;
20
21 import org.htmlunit.SgmlPage;
22 import org.htmlunit.html.*;
23 import org.htmlunit.javascript.HtmlUnitScriptable;
24 import org.htmlunit.javascript.configuration.JsxClass;
25 import org.htmlunit.javascript.configuration.JsxConstructor;
26 import org.htmlunit.javascript.configuration.JsxFunction;
27 import org.htmlunit.javascript.host.Element;
28 import org.htmlunit.javascript.host.dom.Document;
29 import org.htmlunit.javascript.host.dom.DocumentFragment;
30 import org.htmlunit.javascript.host.dom.Node;
31 import org.htmlunit.util.StringUtils;
32 import org.w3c.dom.NamedNodeMap;
33
34
35
36
37
38
39
40
41
42
43 @JsxClass
44 public class XMLSerializer extends HtmlUnitScriptable {
45
46
47
48 private static final Set<String> NON_EMPTY_TAGS = new HashSet<>(Arrays.asList(
49 HtmlAbbreviated.TAG_NAME, HtmlAcronym.TAG_NAME,
50 HtmlAnchor.TAG_NAME, HtmlAddress.TAG_NAME, HtmlAudio.TAG_NAME,
51 HtmlBidirectionalOverride.TAG_NAME, HtmlBig.TAG_NAME,
52 HtmlBlockQuote.TAG_NAME, HtmlBody.TAG_NAME, HtmlBold.TAG_NAME,
53 HtmlButton.TAG_NAME, HtmlCanvas.TAG_NAME, HtmlCaption.TAG_NAME,
54 HtmlCenter.TAG_NAME, HtmlCitation.TAG_NAME, HtmlCode.TAG_NAME,
55 HtmlDefinition.TAG_NAME, HtmlDefinitionDescription.TAG_NAME,
56 HtmlDeletedText.TAG_NAME, HtmlDirectory.TAG_NAME,
57 HtmlDivision.TAG_NAME,
58 HtmlDefinitionList.TAG_NAME,
59 HtmlDefinitionTerm.TAG_NAME, HtmlEmbed.TAG_NAME,
60 HtmlEmphasis.TAG_NAME, HtmlFieldSet.TAG_NAME,
61 HtmlFont.TAG_NAME, HtmlForm.TAG_NAME,
62 HtmlFrame.TAG_NAME, HtmlFrameSet.TAG_NAME, HtmlHeading1.TAG_NAME,
63 HtmlHeading2.TAG_NAME, HtmlHeading3.TAG_NAME,
64 HtmlHeading4.TAG_NAME, HtmlHeading5.TAG_NAME,
65 HtmlHeading6.TAG_NAME, HtmlHead.TAG_NAME,
66 HtmlHtml.TAG_NAME, HtmlInlineFrame.TAG_NAME,
67 HtmlInsertedText.TAG_NAME,
68 HtmlItalic.TAG_NAME, HtmlKeyboard.TAG_NAME, HtmlLabel.TAG_NAME,
69 HtmlLegend.TAG_NAME, HtmlListing.TAG_NAME, HtmlListItem.TAG_NAME,
70 HtmlMap.TAG_NAME, HtmlMarquee.TAG_NAME,
71 HtmlMenu.TAG_NAME,
72 HtmlNoBreak.TAG_NAME, HtmlNoEmbed.TAG_NAME, HtmlNoFrames.TAG_NAME,
73 HtmlNoScript.TAG_NAME, HtmlObject.TAG_NAME, HtmlOrderedList.TAG_NAME,
74 HtmlOptionGroup.TAG_NAME, HtmlOption.TAG_NAME, HtmlParagraph.TAG_NAME,
75 HtmlPlainText.TAG_NAME, HtmlPreformattedText.TAG_NAME,
76 HtmlInlineQuotation.TAG_NAME, HtmlS.TAG_NAME, HtmlSample.TAG_NAME,
77 HtmlScript.TAG_NAME, HtmlSelect.TAG_NAME, HtmlSmall.TAG_NAME,
78 HtmlSource.TAG_NAME, HtmlSpan.TAG_NAME,
79 HtmlStrike.TAG_NAME, HtmlStrong.TAG_NAME, HtmlStyle.TAG_NAME,
80 HtmlSubscript.TAG_NAME, HtmlSuperscript.TAG_NAME, HtmlTitle.TAG_NAME,
81 HtmlTable.TAG_NAME, HtmlTableColumn.TAG_NAME, HtmlTableColumnGroup.TAG_NAME,
82 HtmlTableBody.TAG_NAME, HtmlTableDataCell.TAG_NAME, HtmlTableHeaderCell.TAG_NAME,
83 HtmlTableRow.TAG_NAME, HtmlTextArea.TAG_NAME, HtmlTableFooter.TAG_NAME,
84 HtmlTableHeader.TAG_NAME, HtmlTeletype.TAG_NAME, HtmlUnderlined.TAG_NAME,
85 HtmlUnorderedList.TAG_NAME, HtmlVariable.TAG_NAME, HtmlVideo.TAG_NAME,
86 HtmlWordBreak.TAG_NAME, HtmlExample.TAG_NAME
87 ));
88
89
90
91
92 @JsxConstructor
93 public void jsConstructor() {
94
95 }
96
97
98
99
100
101
102 @JsxFunction
103 public String serializeToString(Node root) {
104 if (root == null) {
105 return "";
106 }
107
108 if (root instanceof DocumentFragment) {
109 Node node = root.getFirstChild();
110 if (node == null) {
111 return "";
112 }
113
114 final StringBuilder builder = new StringBuilder();
115 while (node != null) {
116 builder.append(serializeToString(node));
117 node = node.getNextSibling();
118 }
119 return builder.toString().trim();
120 }
121
122 final boolean rootIsDocument = root instanceof Document;
123 if (rootIsDocument) {
124 root = ((Document) root).getDocumentElement();
125 }
126
127 if (root instanceof Element) {
128 final StringBuilder builder = new StringBuilder();
129 final DomNode node = root.getDomNodeOrDie();
130 final SgmlPage page = node.getPage();
131 final boolean isHtmlPage = page != null && page.isHtmlPage();
132
133 String forcedNamespace = null;
134 if (!rootIsDocument && isHtmlPage) {
135 forcedNamespace = "http://www.w3.org/1999/xhtml";
136 }
137 toXml(1, node, builder, forcedNamespace);
138
139 return builder.toString();
140 }
141
142 return root.getDomNodeOrDie().asXml();
143 }
144
145 private void toXml(final int indent,
146 final DomNode node, final StringBuilder builder, final String foredNamespace) {
147 final String nodeName = node.getNodeName();
148 builder.append('<').append(nodeName);
149
150 String optionalPrefix = "";
151 final String namespaceURI = node.getNamespaceURI();
152 final String prefix = node.getPrefix();
153 if (namespaceURI != null && prefix != null) {
154 boolean sameNamespace = false;
155 for (DomNode parentNode = node.getParentNode(); parentNode instanceof DomElement;
156 parentNode = parentNode.getParentNode()) {
157 if (namespaceURI.equals(parentNode.getNamespaceURI())) {
158 sameNamespace = true;
159 break;
160 }
161 }
162 if (node.getParentNode() == null || !sameNamespace) {
163 ((DomElement) node).setAttribute("xmlns:" + prefix, namespaceURI);
164 }
165 }
166 else if (foredNamespace != null) {
167 builder.append(" xmlns=\"").append(foredNamespace).append('"');
168 optionalPrefix = " ";
169 }
170
171 final NamedNodeMap attributesMap = node.getAttributes();
172 final int lenght = attributesMap.getLength();
173 for (int i = 0; i < lenght; i++) {
174 final DomAttr attrib = (DomAttr) attributesMap.item(i);
175 builder.append(' ').append(attrib.getQualifiedName())
176 .append("=\"").append(attrib.getValue()).append('"');
177 }
178 boolean startTagClosed = false;
179 for (final DomNode child : node.getChildren()) {
180 if (!startTagClosed) {
181 builder.append(optionalPrefix).append('>');
182 startTagClosed = true;
183 }
184 switch (child.getNodeType()) {
185 case Node.ELEMENT_NODE:
186 toXml(indent + 1, child, builder, null);
187 break;
188
189 case Node.TEXT_NODE:
190 String value = child.getNodeValue();
191 value = StringUtils.escapeXmlChars(value);
192 builder.append(value);
193 break;
194
195 case Node.CDATA_SECTION_NODE:
196 case Node.COMMENT_NODE:
197 builder.append(child.asXml());
198 break;
199
200 default:
201 break;
202 }
203 }
204 if (!startTagClosed) {
205 final String tagName = StringUtils.toRootLowerCase(nodeName);
206 if (NON_EMPTY_TAGS.contains(tagName)) {
207 builder.append("></").append(nodeName).append('>');
208 }
209 else {
210 builder.append(optionalPrefix).append("/>");
211 }
212 }
213 else {
214 builder.append("</").append(nodeName).append('>');
215 }
216 }
217
218 }