1 /*
2 * Copyright (c) 2002-2025 Gargoyle Software Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 * https://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 package org.htmlunit;
16
17 import java.net.URL;
18 import java.nio.charset.Charset;
19 import java.util.ArrayList;
20 import java.util.Comparator;
21 import java.util.List;
22
23 import org.htmlunit.html.AbstractDomNodeList;
24 import org.htmlunit.html.DomAttr;
25 import org.htmlunit.html.DomCDataSection;
26 import org.htmlunit.html.DomComment;
27 import org.htmlunit.html.DomDocumentFragment;
28 import org.htmlunit.html.DomElement;
29 import org.htmlunit.html.DomNode;
30 import org.htmlunit.html.DomNodeIterator;
31 import org.htmlunit.html.DomNodeList;
32 import org.htmlunit.html.DomText;
33 import org.htmlunit.util.StringUtils;
34 import org.htmlunit.util.UrlUtils;
35 import org.w3c.dom.CDATASection;
36 import org.w3c.dom.Comment;
37 import org.w3c.dom.DOMException;
38 import org.w3c.dom.Document;
39 import org.w3c.dom.DocumentType;
40 import org.w3c.dom.Element;
41 import org.w3c.dom.Node;
42 import org.w3c.dom.Text;
43 import org.w3c.dom.traversal.NodeFilter;
44
45 /**
46 * A basic class of Standard Generalized Markup Language (SGML), e.g. HTML and XML.
47 *
48 * @author Ahmed Ashour
49 * @author Ronald Brill
50 */
51 public abstract class SgmlPage extends DomNode implements Page, Document {
52
53 private DocumentType documentType_;
54 private final WebResponse webResponse_;
55 private WebWindow enclosingWindow_;
56 private final WebClient webClient_;
57 private boolean printing_;
58 private boolean domChangeListenerInUse_;
59 private boolean characterDataChangeListenerInUse_;
60
61 /**
62 * Creates an instance of SgmlPage.
63 *
64 * @param webResponse the web response that was used to create this page
65 * @param webWindow the window that this page is being loaded into
66 */
67 public SgmlPage(final WebResponse webResponse, final WebWindow webWindow) {
68 super(null);
69 webResponse_ = webResponse;
70 enclosingWindow_ = webWindow;
71 webClient_ = webWindow.getWebClient();
72 }
73
74 /**
75 * {@inheritDoc}
76 */
77 @Override
78 public void cleanUp() {
79 if (getWebClient().getCache().getCachedResponse(webResponse_.getWebRequest()) == null) {
80 webResponse_.cleanUp();
81 }
82 }
83
84 /**
85 * {@inheritDoc}
86 */
87 @Override
88 public WebResponse getWebResponse() {
89 return webResponse_;
90 }
91
92 /**
93 * Gets the name for the current node.
94 * @return the node name
95 */
96 @Override
97 public String getNodeName() {
98 return "#document";
99 }
100
101 /**
102 * Gets the type of the current node.
103 * @return the node type
104 */
105 @Override
106 public short getNodeType() {
107 return DOCUMENT_NODE;
108 }
109
110 /**
111 * Returns the window that this page is sitting inside.
112 *
113 * @return the enclosing frame or null if this page isn't inside a frame
114 */
115 @Override
116 public WebWindow getEnclosingWindow() {
117 return enclosingWindow_;
118 }
119
120 /**
121 * Sets the window that contains this page.
122 *
123 * @param window the new frame or null if this page is being removed from a frame
124 */
125 public void setEnclosingWindow(final WebWindow window) {
126 enclosingWindow_ = window;
127 }
128
129 /**
130 * Returns the WebClient that originally loaded this page.
131 *
132 * @return the WebClient that originally loaded this page
133 */
134 public WebClient getWebClient() {
135 return webClient_;
136 }
137
138 /**
139 * Creates an empty {@link DomDocumentFragment} object.
140 * @return a newly created {@link DomDocumentFragment}
141 */
142 @Override
143 public DomDocumentFragment createDocumentFragment() {
144 return new DomDocumentFragment(this);
145 }
146
147 /**
148 * Returns the document type.
149 * @return the document type
150 */
151 @Override
152 public final DocumentType getDoctype() {
153 return documentType_;
154 }
155
156 /**
157 * Sets the document type.
158 * @param type the document type
159 */
160 protected void setDocumentType(final DocumentType type) {
161 documentType_ = type;
162 }
163
164 /**
165 * {@inheritDoc}
166 */
167 @Override
168 public SgmlPage getPage() {
169 return this;
170 }
171
172 /**
173 * Returns the encoding.
174 * @return the encoding
175 */
176 public abstract Charset getCharset();
177
178 /**
179 * Returns the document element.
180 * @return the document element
181 */
182 @Override
183 public DomElement getDocumentElement() {
184 DomNode childNode = getFirstChild();
185 while (childNode != null && !(childNode instanceof DomElement)) {
186 childNode = childNode.getNextSibling();
187 }
188 return (DomElement) childNode;
189 }
190
191 /**
192 * Creates a clone of this instance.
193 * @return a clone of this instance
194 */
195 @Override
196 protected SgmlPage clone() {
197 try {
198 return (SgmlPage) super.clone();
199 }
200 catch (final CloneNotSupportedException e) {
201 throw new IllegalStateException("Clone not supported", e);
202 }
203 }
204
205 /**
206 * {@inheritDoc}
207 */
208 @Override
209 public String asXml() {
210 final DomElement documentElement = getDocumentElement();
211 if (documentElement == null) {
212 return "";
213 }
214 return documentElement.asXml();
215 }
216
217 /**
218 * Returns {@code true} if this page has case-sensitive tag names, {@code false} otherwise. In general,
219 * XML has case-sensitive tag names, and HTML doesn't. This is especially important during XPath matching.
220 * @return {@code true} if this page has case-sensitive tag names, {@code false} otherwise
221 */
222 public abstract boolean hasCaseSensitiveTagNames();
223
224 /**
225 * {@inheritDoc}
226 * The current implementation just {@link DomNode#normalize()}s the document element.
227 */
228 @Override
229 public void normalizeDocument() {
230 getDocumentElement().normalize();
231 }
232
233 /**
234 * {@inheritDoc}
235 */
236 @Override
237 public String getCanonicalXPath() {
238 return "/";
239 }
240
241 /**
242 * {@inheritDoc}
243 */
244 @Override
245 public DomAttr createAttribute(final String name) {
246 return new DomAttr(getPage(), null, name, "", false);
247 }
248
249 /**
250 * Returns the URL of this page.
251 * @return the URL of this page
252 */
253 @Override
254 public URL getUrl() {
255 final WebResponse wr = getWebResponse();
256 if (null == wr) {
257 return UrlUtils.URL_ABOUT_BLANK;
258 }
259 return getWebResponse().getWebRequest().getUrl();
260 }
261
262 @Override
263 public boolean isHtmlPage() {
264 return false;
265 }
266
267 /**
268 * {@inheritDoc}
269 */
270 @Override
271 public DomNodeList<DomElement> getElementsByTagName(final String tagName) {
272 return new AbstractDomNodeList<DomElement>(this) {
273 @Override
274 protected List<DomElement> provideElements() {
275 final List<DomElement> res = new ArrayList<>();
276 final boolean caseSensitive = hasCaseSensitiveTagNames();
277 for (final DomElement elem : getDomElementDescendants()) {
278 final String localName = elem.getLocalName();
279 if (StringUtils.equalsChar('*', tagName) || localName.equals(tagName)
280 || (!caseSensitive && localName.equalsIgnoreCase(tagName))) {
281 res.add(elem);
282 }
283 }
284 return res;
285 }
286 };
287 }
288
289 /**
290 * {@inheritDoc}
291 */
292 @Override
293 public DomNodeList<DomElement> getElementsByTagNameNS(final String namespaceURI, final String localName) {
294 return new AbstractDomNodeList<DomElement>(this) {
295 @Override
296 protected List<DomElement> provideElements() {
297 final List<DomElement> res = new ArrayList<>();
298 final Comparator<String> comparator;
299
300 if (hasCaseSensitiveTagNames()) {
301 comparator = Comparator.nullsFirst(String::compareTo);
302 }
303 else {
304 comparator = Comparator.nullsFirst(String::compareToIgnoreCase);
305 }
306
307 for (final DomElement elem : getDomElementDescendants()) {
308 final String locName = elem.getLocalName();
309
310 if ((StringUtils.equalsChar('*', namespaceURI)
311 || comparator.compare(namespaceURI, elem.getNamespaceURI()) == 0)
312 && (StringUtils.equalsChar('*', locName)
313 || comparator.compare(locName, elem.getLocalName()) == 0)) {
314 res.add(elem);
315 }
316 }
317 return res;
318 }
319 };
320 }
321
322 /**
323 * {@inheritDoc}
324 */
325 @Override
326 public CDATASection createCDATASection(final String data) {
327 return new DomCDataSection(this, data);
328 }
329
330 /**
331 * {@inheritDoc}
332 */
333 @Override
334 public Text createTextNode(final String data) {
335 return new DomText(this, data);
336 }
337
338 /**
339 * {@inheritDoc}
340 */
341 @Override
342 public Comment createComment(final String data) {
343 return new DomComment(this, data);
344 }
345
346 /**
347 * Create a new <code>NodeIterator</code> over the subtree rooted at the
348 * specified node.
349 * @param root The node which will be iterated together with its
350 * children. The <code>NodeIterator</code> is initially positioned
351 * just before this node. The <code>whatToShow</code> flags and the
352 * filter, if any, are not considered when setting this position. The
353 * root must not be <code>null</code>.
354 * @param whatToShow This flag specifies which node types may appear in
355 * the logical view of the tree presented by the
356 * <code>NodeIterator</code>. See the description of
357 * <code>NodeFilter</code> for the set of possible <code>SHOW_</code>
358 * values.These flags can be combined using <code>OR</code>.
359 * @param filter The <code>NodeFilter</code> to be used with this
360 * <code>NodeIterator</code>, or <code>null</code> to indicate no
361 * filter.
362 * @param entityReferenceExpansion The value of this flag determines
363 * whether entity reference nodes are expanded.
364 * @return The newly created <code>NodeIterator</code>.
365 * @exception DOMException
366 * NOT_SUPPORTED_ERR: Raised if the specified <code>root</code> is <code>null</code>.
367 */
368 public DomNodeIterator createNodeIterator(final Node root, final int whatToShow, final NodeFilter filter,
369 final boolean entityReferenceExpansion) throws DOMException {
370 return new DomNodeIterator((DomNode) root, whatToShow, filter, entityReferenceExpansion);
371 }
372
373 /**
374 * Returns the content type of this page.
375 * @return the content type of this page
376 */
377 public abstract String getContentType();
378
379 /**
380 * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
381 *
382 * Clears the computed styles.
383 */
384 public void clearComputedStyles() {
385 // nothing to do here, overwritten in HtmlPage
386 }
387
388 /**
389 * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
390 *
391 * Clears the computed styles for a specific {@link Element}.
392 * @param element the element to clear its cache
393 */
394 public void clearComputedStyles(final DomElement element) {
395 // nothing to do here, overwritten in HtmlPage
396 }
397
398 /**
399 * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
400 *
401 * Clears the computed styles for a specific {@link Element}
402 * and all parent elements.
403 * @param element the element to clear its cache
404 */
405 public void clearComputedStylesUpToRoot(final DomElement element) {
406 // nothing to do here, overwritten in HtmlPage
407 }
408
409 /**
410 * @return whether or not this is currently printing
411 */
412 public boolean isPrinting() {
413 return printing_;
414 }
415
416 /**
417 * @param printing the printing state to set
418 */
419 public void setPrinting(final boolean printing) {
420 printing_ = printing;
421 clearComputedStyles();
422 }
423
424 /**
425 * Informs about the use of a domChangeListener.
426 */
427 public void domChangeListenerAdded() {
428 domChangeListenerInUse_ = true;
429 }
430
431 /**
432 * @return true if at least one domChangeListener was registered.
433 */
434 public boolean isDomChangeListenerInUse() {
435 return domChangeListenerInUse_;
436 }
437
438 /**
439 * Informs about the use of a characterDataChangeListener.
440 */
441 public void characterDataChangeListenerAdded() {
442 characterDataChangeListenerInUse_ = true;
443 }
444
445 /**
446 * @return true if at least one characterDataChangeListener was registered.
447 */
448 public boolean isCharacterDataChangeListenerInUse() {
449 return characterDataChangeListenerInUse_;
450 }
451 }