View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit;
16  
17  import java.net.URL;
18  import java.nio.charset.Charset;
19  import java.util.ArrayList;
20  import java.util.Comparator;
21  import java.util.List;
22  
23  import org.htmlunit.html.AbstractDomNodeList;
24  import org.htmlunit.html.DomAttr;
25  import org.htmlunit.html.DomCDataSection;
26  import org.htmlunit.html.DomComment;
27  import org.htmlunit.html.DomDocumentFragment;
28  import org.htmlunit.html.DomElement;
29  import org.htmlunit.html.DomNode;
30  import org.htmlunit.html.DomNodeIterator;
31  import org.htmlunit.html.DomNodeList;
32  import org.htmlunit.html.DomText;
33  import org.htmlunit.util.StringUtils;
34  import org.htmlunit.util.UrlUtils;
35  import org.w3c.dom.CDATASection;
36  import org.w3c.dom.Comment;
37  import org.w3c.dom.DOMException;
38  import org.w3c.dom.Document;
39  import org.w3c.dom.DocumentType;
40  import org.w3c.dom.Element;
41  import org.w3c.dom.Node;
42  import org.w3c.dom.Text;
43  import org.w3c.dom.traversal.NodeFilter;
44  
45  /**
46   * A basic class of Standard Generalized Markup Language (SGML), e.g. HTML and XML.
47   *
48   * @author Ahmed Ashour
49   * @author Ronald Brill
50   */
51  public abstract class SgmlPage extends DomNode implements Page, Document {
52  
53      private DocumentType documentType_;
54      private final WebResponse webResponse_;
55      private WebWindow enclosingWindow_;
56      private final WebClient webClient_;
57      private boolean printing_;
58      private boolean domChangeListenerInUse_;
59      private boolean characterDataChangeListenerInUse_;
60  
61      /**
62       * Creates an instance of SgmlPage.
63       *
64       * @param webResponse the web response that was used to create this page
65       * @param webWindow the window that this page is being loaded into
66       */
67      public SgmlPage(final WebResponse webResponse, final WebWindow webWindow) {
68          super(null);
69          webResponse_ = webResponse;
70          enclosingWindow_ = webWindow;
71          webClient_ = webWindow.getWebClient();
72      }
73  
74      /**
75       * {@inheritDoc}
76       */
77      @Override
78      public void cleanUp() {
79          if (getWebClient().getCache().getCachedResponse(webResponse_.getWebRequest()) == null) {
80              webResponse_.cleanUp();
81          }
82      }
83  
84      /**
85       * {@inheritDoc}
86       */
87      @Override
88      public WebResponse getWebResponse() {
89          return webResponse_;
90      }
91  
92      /**
93       * Gets the name for the current node.
94       * @return the node name
95       */
96      @Override
97      public String getNodeName() {
98          return "#document";
99      }
100 
101     /**
102      * Gets the type of the current node.
103      * @return the node type
104      */
105     @Override
106     public short getNodeType() {
107         return DOCUMENT_NODE;
108     }
109 
110     /**
111      * Returns the window that this page is sitting inside.
112      *
113      * @return the enclosing frame or null if this page isn't inside a frame
114      */
115     @Override
116     public WebWindow getEnclosingWindow() {
117         return enclosingWindow_;
118     }
119 
120     /**
121      * Sets the window that contains this page.
122      *
123      * @param window the new frame or null if this page is being removed from a frame
124      */
125     public void setEnclosingWindow(final WebWindow window) {
126         enclosingWindow_ = window;
127     }
128 
129     /**
130      * Returns the WebClient that originally loaded this page.
131      *
132      * @return the WebClient that originally loaded this page
133      */
134     public WebClient getWebClient() {
135         return webClient_;
136     }
137 
138     /**
139      * Creates an empty {@link DomDocumentFragment} object.
140      * @return a newly created {@link DomDocumentFragment}
141      */
142     @Override
143     public DomDocumentFragment createDocumentFragment() {
144         return new DomDocumentFragment(this);
145     }
146 
147     /**
148      * Returns the document type.
149      * @return the document type
150      */
151     @Override
152     public final DocumentType getDoctype() {
153         return documentType_;
154     }
155 
156     /**
157      * Sets the document type.
158      * @param type the document type
159      */
160     protected void setDocumentType(final DocumentType type) {
161         documentType_ = type;
162     }
163 
164     /**
165      * {@inheritDoc}
166      */
167     @Override
168     public SgmlPage getPage() {
169         return this;
170     }
171 
172     /**
173      * Returns the encoding.
174      * @return the encoding
175      */
176     public abstract Charset getCharset();
177 
178     /**
179      * Returns the document element.
180      * @return the document element
181      */
182     @Override
183     public DomElement getDocumentElement() {
184         DomNode childNode = getFirstChild();
185         while (childNode != null && !(childNode instanceof DomElement)) {
186             childNode = childNode.getNextSibling();
187         }
188         return (DomElement) childNode;
189     }
190 
191     /**
192      * Creates a clone of this instance.
193      * @return a clone of this instance
194      */
195     @Override
196     protected SgmlPage clone() {
197         try {
198             return (SgmlPage) super.clone();
199         }
200         catch (final CloneNotSupportedException e) {
201             throw new IllegalStateException("Clone not supported", e);
202         }
203     }
204 
205     /**
206      * {@inheritDoc}
207      */
208     @Override
209     public String asXml() {
210         final DomElement documentElement = getDocumentElement();
211         if (documentElement == null) {
212             return "";
213         }
214         return documentElement.asXml();
215     }
216 
217     /**
218      * Returns {@code true} if this page has case-sensitive tag names, {@code false} otherwise. In general,
219      * XML has case-sensitive tag names, and HTML doesn't. This is especially important during XPath matching.
220      * @return {@code true} if this page has case-sensitive tag names, {@code false} otherwise
221      */
222     public abstract boolean hasCaseSensitiveTagNames();
223 
224     /**
225      * {@inheritDoc}
226      * The current implementation just {@link DomNode#normalize()}s the document element.
227      */
228     @Override
229     public void normalizeDocument() {
230         getDocumentElement().normalize();
231     }
232 
233     /**
234      * {@inheritDoc}
235      */
236     @Override
237     public String getCanonicalXPath() {
238         return "/";
239     }
240 
241     /**
242      * {@inheritDoc}
243      */
244     @Override
245     public DomAttr createAttribute(final String name) {
246         return new DomAttr(getPage(), null, name, "", false);
247     }
248 
249     /**
250      * Returns the URL of this page.
251      * @return the URL of this page
252      */
253     @Override
254     public URL getUrl() {
255         final WebResponse wr = getWebResponse();
256         if (null == wr) {
257             return UrlUtils.URL_ABOUT_BLANK;
258         }
259         return getWebResponse().getWebRequest().getUrl();
260     }
261 
262     @Override
263     public boolean isHtmlPage() {
264         return false;
265     }
266 
267     /**
268      * {@inheritDoc}
269      */
270     @Override
271     public DomNodeList<DomElement> getElementsByTagName(final String tagName) {
272         return new AbstractDomNodeList<DomElement>(this) {
273             @Override
274             protected List<DomElement> provideElements() {
275                 final List<DomElement> res = new ArrayList<>();
276                 final boolean caseSensitive = hasCaseSensitiveTagNames();
277                 for (final DomElement elem : getDomElementDescendants()) {
278                     final String localName = elem.getLocalName();
279                     if (StringUtils.equalsChar('*', tagName) || localName.equals(tagName)
280                             || (!caseSensitive && localName.equalsIgnoreCase(tagName))) {
281                         res.add(elem);
282                     }
283                 }
284                 return res;
285             }
286         };
287     }
288 
289     /**
290      * {@inheritDoc}
291      */
292     @Override
293     public DomNodeList<DomElement> getElementsByTagNameNS(final String namespaceURI, final String localName) {
294         return new AbstractDomNodeList<DomElement>(this) {
295             @Override
296             protected List<DomElement> provideElements() {
297                 final List<DomElement> res = new ArrayList<>();
298                 final Comparator<String> comparator;
299 
300                 if (hasCaseSensitiveTagNames()) {
301                     comparator = Comparator.nullsFirst(String::compareTo);
302                 }
303                 else {
304                     comparator = Comparator.nullsFirst(String::compareToIgnoreCase);
305                 }
306 
307                 for (final DomElement elem : getDomElementDescendants()) {
308                     final String locName = elem.getLocalName();
309 
310                     if ((StringUtils.equalsChar('*', namespaceURI)
311                                     || comparator.compare(namespaceURI, elem.getNamespaceURI()) == 0)
312                             && (StringUtils.equalsChar('*', locName)
313                                     || comparator.compare(locName, elem.getLocalName()) == 0)) {
314                         res.add(elem);
315                     }
316                 }
317                 return res;
318             }
319         };
320     }
321 
322     /**
323      * {@inheritDoc}
324      */
325     @Override
326     public CDATASection createCDATASection(final String data) {
327         return new DomCDataSection(this, data);
328     }
329 
330     /**
331      * {@inheritDoc}
332      */
333     @Override
334     public Text createTextNode(final String data) {
335         return new DomText(this, data);
336     }
337 
338     /**
339      * {@inheritDoc}
340      */
341     @Override
342     public Comment createComment(final String data) {
343         return new DomComment(this, data);
344     }
345 
346     /**
347      * Create a new <code>NodeIterator</code> over the subtree rooted at the
348      * specified node.
349      * @param root The node which will be iterated together with its
350      *        children. The <code>NodeIterator</code> is initially positioned
351      *        just before this node. The <code>whatToShow</code> flags and the
352      *        filter, if any, are not considered when setting this position. The
353      *        root must not be <code>null</code>.
354      * @param whatToShow This flag specifies which node types may appear in
355      *        the logical view of the tree presented by the
356      *        <code>NodeIterator</code>. See the description of
357      *        <code>NodeFilter</code> for the set of possible <code>SHOW_</code>
358      *        values.These flags can be combined using <code>OR</code>.
359      * @param filter The <code>NodeFilter</code> to be used with this
360      *        <code>NodeIterator</code>, or <code>null</code> to indicate no
361      *        filter.
362      * @param entityReferenceExpansion The value of this flag determines
363      *        whether entity reference nodes are expanded.
364      * @return The newly created <code>NodeIterator</code>.
365      * @exception DOMException
366      *            NOT_SUPPORTED_ERR: Raised if the specified <code>root</code> is <code>null</code>.
367      */
368     public DomNodeIterator createNodeIterator(final Node root, final int whatToShow, final NodeFilter filter,
369             final boolean entityReferenceExpansion) throws DOMException {
370         return new DomNodeIterator((DomNode) root, whatToShow, filter, entityReferenceExpansion);
371     }
372 
373     /**
374      * Returns the content type of this page.
375      * @return the content type of this page
376      */
377     public abstract String getContentType();
378 
379     /**
380      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
381      *
382      * Clears the computed styles.
383      */
384     public void clearComputedStyles() {
385         // nothing to do here, overwritten in HtmlPage
386     }
387 
388     /**
389      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
390      *
391      * Clears the computed styles for a specific {@link Element}.
392      * @param element the element to clear its cache
393      */
394     public void clearComputedStyles(final DomElement element) {
395         // nothing to do here, overwritten in HtmlPage
396     }
397 
398     /**
399      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
400      *
401      * Clears the computed styles for a specific {@link Element}
402      * and all parent elements.
403      * @param element the element to clear its cache
404      */
405     public void clearComputedStylesUpToRoot(final DomElement element) {
406         // nothing to do here, overwritten in HtmlPage
407     }
408 
409     /**
410      * @return whether or not this is currently printing
411      */
412     public boolean isPrinting() {
413         return printing_;
414     }
415 
416     /**
417      * @param printing the printing state to set
418      */
419     public void setPrinting(final boolean printing) {
420         printing_ = printing;
421         clearComputedStyles();
422     }
423 
424     /**
425      * Informs about the use of a domChangeListener.
426      */
427     public void domChangeListenerAdded() {
428         domChangeListenerInUse_ = true;
429     }
430 
431     /**
432      * @return true if at least one domChangeListener was registered.
433      */
434     public boolean isDomChangeListenerInUse() {
435         return domChangeListenerInUse_;
436     }
437 
438     /**
439      * Informs about the use of a characterDataChangeListener.
440      */
441     public void characterDataChangeListenerAdded() {
442         characterDataChangeListenerInUse_ = true;
443     }
444 
445     /**
446      * @return true if at least one characterDataChangeListener was registered.
447      */
448     public boolean isCharacterDataChangeListenerInUse() {
449         return characterDataChangeListenerInUse_;
450     }
451 }