1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.javascript.host.xml;
16
17 import java.util.Arrays;
18 import java.util.HashSet;
19 import java.util.Set;
20
21 import org.htmlunit.SgmlPage;
22 import org.htmlunit.html.*;
23 import org.htmlunit.javascript.HtmlUnitScriptable;
24 import org.htmlunit.javascript.configuration.JsxClass;
25 import org.htmlunit.javascript.configuration.JsxConstructor;
26 import org.htmlunit.javascript.configuration.JsxFunction;
27 import org.htmlunit.javascript.host.Element;
28 import org.htmlunit.javascript.host.dom.Document;
29 import org.htmlunit.javascript.host.dom.DocumentFragment;
30 import org.htmlunit.javascript.host.dom.Node;
31 import org.htmlunit.util.StringUtils;
32 import org.w3c.dom.NamedNodeMap;
33
34
35
36
37
38
39
40
41
42 @JsxClass
43 public class XMLSerializer extends HtmlUnitScriptable {
44
45
46
47 private static final Set<String> NON_EMPTY_TAGS = new HashSet<>(Arrays.asList(
48 HtmlAbbreviated.TAG_NAME, HtmlAcronym.TAG_NAME,
49 HtmlAnchor.TAG_NAME, HtmlAddress.TAG_NAME, HtmlAudio.TAG_NAME,
50 HtmlBidirectionalOverride.TAG_NAME, HtmlBig.TAG_NAME,
51 HtmlBlockQuote.TAG_NAME, HtmlBody.TAG_NAME, HtmlBold.TAG_NAME,
52 HtmlButton.TAG_NAME, HtmlCanvas.TAG_NAME, HtmlCaption.TAG_NAME,
53 HtmlCenter.TAG_NAME, HtmlCitation.TAG_NAME, HtmlCode.TAG_NAME,
54 HtmlDefinition.TAG_NAME, HtmlDefinitionDescription.TAG_NAME,
55 HtmlDeletedText.TAG_NAME, HtmlDirectory.TAG_NAME,
56 HtmlDivision.TAG_NAME,
57 HtmlDefinitionList.TAG_NAME,
58 HtmlDefinitionTerm.TAG_NAME, HtmlEmbed.TAG_NAME,
59 HtmlEmphasis.TAG_NAME, HtmlFieldSet.TAG_NAME,
60 HtmlFont.TAG_NAME, HtmlForm.TAG_NAME,
61 HtmlFrame.TAG_NAME, HtmlFrameSet.TAG_NAME, HtmlHeading1.TAG_NAME,
62 HtmlHeading2.TAG_NAME, HtmlHeading3.TAG_NAME,
63 HtmlHeading4.TAG_NAME, HtmlHeading5.TAG_NAME,
64 HtmlHeading6.TAG_NAME, HtmlHead.TAG_NAME,
65 HtmlHtml.TAG_NAME, HtmlInlineFrame.TAG_NAME,
66 HtmlInsertedText.TAG_NAME,
67 HtmlItalic.TAG_NAME, HtmlKeyboard.TAG_NAME, HtmlLabel.TAG_NAME,
68 HtmlLegend.TAG_NAME, HtmlListing.TAG_NAME, HtmlListItem.TAG_NAME,
69 HtmlMap.TAG_NAME, HtmlMarquee.TAG_NAME,
70 HtmlMenu.TAG_NAME,
71 HtmlNoBreak.TAG_NAME, HtmlNoEmbed.TAG_NAME, HtmlNoFrames.TAG_NAME,
72 HtmlNoScript.TAG_NAME, HtmlObject.TAG_NAME, HtmlOrderedList.TAG_NAME,
73 HtmlOptionGroup.TAG_NAME, HtmlOption.TAG_NAME, HtmlParagraph.TAG_NAME,
74 HtmlPlainText.TAG_NAME, HtmlPreformattedText.TAG_NAME,
75 HtmlInlineQuotation.TAG_NAME, HtmlS.TAG_NAME, HtmlSample.TAG_NAME,
76 HtmlScript.TAG_NAME, HtmlSelect.TAG_NAME, HtmlSmall.TAG_NAME,
77 HtmlSource.TAG_NAME, HtmlSpan.TAG_NAME,
78 HtmlStrike.TAG_NAME, HtmlStrong.TAG_NAME, HtmlStyle.TAG_NAME,
79 HtmlSubscript.TAG_NAME, HtmlSuperscript.TAG_NAME, HtmlTitle.TAG_NAME,
80 HtmlTable.TAG_NAME, HtmlTableColumn.TAG_NAME, HtmlTableColumnGroup.TAG_NAME,
81 HtmlTableBody.TAG_NAME, HtmlTableDataCell.TAG_NAME, HtmlTableHeaderCell.TAG_NAME,
82 HtmlTableRow.TAG_NAME, HtmlTextArea.TAG_NAME, HtmlTableFooter.TAG_NAME,
83 HtmlTableHeader.TAG_NAME, HtmlTeletype.TAG_NAME, HtmlUnderlined.TAG_NAME,
84 HtmlUnorderedList.TAG_NAME, HtmlVariable.TAG_NAME, HtmlVideo.TAG_NAME,
85 HtmlWordBreak.TAG_NAME, HtmlExample.TAG_NAME
86 ));
87
88
89
90
91 @JsxConstructor
92 public void jsConstructor() {
93
94 }
95
96
97
98
99
100
101 @JsxFunction
102 public String serializeToString(Node root) {
103 if (root == null) {
104 return "";
105 }
106
107 if (root instanceof DocumentFragment) {
108 Node node = root.getFirstChild();
109 if (node == null) {
110 return "";
111 }
112
113 final StringBuilder builder = new StringBuilder();
114 while (node != null) {
115 builder.append(serializeToString(node));
116 node = node.getNextSibling();
117 }
118 return builder.toString().trim();
119 }
120
121 if (root instanceof Document) {
122 root = ((Document) root).getDocumentElement();
123 }
124
125 if (root instanceof Element) {
126 final StringBuilder builder = new StringBuilder();
127 final DomNode node = root.getDomNodeOrDie();
128 final SgmlPage page = node.getPage();
129 final boolean isHtmlPage = page != null && page.isHtmlPage();
130
131 String forcedNamespace = null;
132 if (isHtmlPage) {
133 forcedNamespace = "http://www.w3.org/1999/xhtml";
134 }
135 toXml(1, node, builder, forcedNamespace);
136
137 return builder.toString();
138 }
139
140 return root.getDomNodeOrDie().asXml();
141 }
142
143 private void toXml(final int indent,
144 final DomNode node, final StringBuilder builder, final String foredNamespace) {
145 final String nodeName = node.getNodeName();
146 builder.append('<').append(nodeName);
147
148 String optionalPrefix = "";
149 final String namespaceURI = node.getNamespaceURI();
150 final String prefix = node.getPrefix();
151 if (namespaceURI != null && prefix != null) {
152 boolean sameNamespace = false;
153 for (DomNode parentNode = node.getParentNode(); parentNode instanceof DomElement;
154 parentNode = parentNode.getParentNode()) {
155 if (namespaceURI.equals(parentNode.getNamespaceURI())) {
156 sameNamespace = true;
157 break;
158 }
159 }
160 if (node.getParentNode() == null || !sameNamespace) {
161 ((DomElement) node).setAttribute("xmlns:" + prefix, namespaceURI);
162 }
163 }
164 else if (foredNamespace != null) {
165 builder.append(" xmlns=\"").append(foredNamespace).append('"');
166 optionalPrefix = " ";
167 }
168
169 final NamedNodeMap attributesMap = node.getAttributes();
170 for (int i = 0; i < attributesMap.getLength(); i++) {
171 final DomAttr attrib = (DomAttr) attributesMap.item(i);
172 builder.append(' ').append(attrib.getQualifiedName())
173 .append("=\"").append(attrib.getValue()).append('"');
174 }
175 boolean startTagClosed = false;
176 for (final DomNode child : node.getChildren()) {
177 if (!startTagClosed) {
178 builder.append(optionalPrefix).append('>');
179 startTagClosed = true;
180 }
181 switch (child.getNodeType()) {
182 case Node.ELEMENT_NODE:
183 toXml(indent + 1, child, builder, null);
184 break;
185
186 case Node.TEXT_NODE:
187 String value = child.getNodeValue();
188 value = StringUtils.escapeXmlChars(value);
189 builder.append(value);
190 break;
191
192 case Node.CDATA_SECTION_NODE:
193 case Node.COMMENT_NODE:
194 builder.append(child.asXml());
195 break;
196
197 default:
198 break;
199 }
200 }
201 if (!startTagClosed) {
202 final String tagName = StringUtils.toRootLowerCase(nodeName);
203 if (NON_EMPTY_TAGS.contains(tagName)) {
204 builder.append("></").append(nodeName).append('>');
205 }
206 else {
207 builder.append(optionalPrefix).append("/>");
208 }
209 }
210 else {
211 builder.append("</").append(nodeName).append('>');
212 }
213 }
214
215 }