View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html;
16  
17  import static org.htmlunit.BrowserVersionFeatures.EVENT_FOCUS_ON_LOAD;
18  import static org.htmlunit.html.DomElement.ATTRIBUTE_NOT_DEFINED;
19  
20  import java.io.File;
21  import java.io.IOException;
22  import java.io.ObjectInputStream;
23  import java.io.ObjectOutputStream;
24  import java.io.Serializable;
25  import java.net.MalformedURLException;
26  import java.net.URL;
27  import java.nio.charset.Charset;
28  import java.nio.charset.StandardCharsets;
29  import java.util.ArrayList;
30  import java.util.Arrays;
31  import java.util.Collection;
32  import java.util.Collections;
33  import java.util.Comparator;
34  import java.util.HashMap;
35  import java.util.HashSet;
36  import java.util.Iterator;
37  import java.util.LinkedHashSet;
38  import java.util.List;
39  import java.util.Locale;
40  import java.util.Map;
41  import java.util.Set;
42  import java.util.WeakHashMap;
43  import java.util.concurrent.ConcurrentHashMap;
44  
45  import org.apache.commons.lang3.StringUtils;
46  import org.apache.commons.lang3.Strings;
47  import org.apache.commons.logging.Log;
48  import org.apache.commons.logging.LogFactory;
49  import org.htmlunit.Cache;
50  import org.htmlunit.ElementNotFoundException;
51  import org.htmlunit.FailingHttpStatusCodeException;
52  import org.htmlunit.History;
53  import org.htmlunit.HttpHeader;
54  import org.htmlunit.OnbeforeunloadHandler;
55  import org.htmlunit.Page;
56  import org.htmlunit.ScriptResult;
57  import org.htmlunit.SgmlPage;
58  import org.htmlunit.TopLevelWindow;
59  import org.htmlunit.WebAssert;
60  import org.htmlunit.WebClient;
61  import org.htmlunit.WebClientOptions;
62  import org.htmlunit.WebRequest;
63  import org.htmlunit.WebResponse;
64  import org.htmlunit.WebWindow;
65  import org.htmlunit.corejs.javascript.Function;
66  import org.htmlunit.corejs.javascript.Script;
67  import org.htmlunit.corejs.javascript.Scriptable;
68  import org.htmlunit.css.ComputedCssStyleDeclaration;
69  import org.htmlunit.css.CssStyleSheet;
70  import org.htmlunit.html.impl.SimpleRange;
71  import org.htmlunit.html.parser.HTMLParserDOMBuilder;
72  import org.htmlunit.http.HttpStatus;
73  import org.htmlunit.javascript.AbstractJavaScriptEngine;
74  import org.htmlunit.javascript.HtmlUnitScriptable;
75  import org.htmlunit.javascript.JavaScriptEngine;
76  import org.htmlunit.javascript.PostponedAction;
77  import org.htmlunit.javascript.host.Window;
78  import org.htmlunit.javascript.host.event.BeforeUnloadEvent;
79  import org.htmlunit.javascript.host.event.Event;
80  import org.htmlunit.javascript.host.event.EventTarget;
81  import org.htmlunit.javascript.host.html.HTMLDocument;
82  import org.htmlunit.protocol.javascript.JavaScriptURLConnection;
83  import org.htmlunit.util.MimeType;
84  import org.htmlunit.util.SerializableLock;
85  import org.htmlunit.util.UrlUtils;
86  import org.w3c.dom.Attr;
87  import org.w3c.dom.Comment;
88  import org.w3c.dom.DOMConfiguration;
89  import org.w3c.dom.DOMException;
90  import org.w3c.dom.DOMImplementation;
91  import org.w3c.dom.Document;
92  import org.w3c.dom.DocumentType;
93  import org.w3c.dom.Element;
94  import org.w3c.dom.EntityReference;
95  import org.w3c.dom.ProcessingInstruction;
96  
97  /**
98   * A representation of an HTML page returned from a server.
99   * <p>
100  * This class provides different methods to access the page's content like
101  * {@link #getForms()}, {@link #getAnchors()}, {@link #getElementById(String)}, ... as well as the
102  * very powerful inherited methods {@link #getByXPath(String)} and {@link #getFirstByXPath(String)}
103  * for fine grained user specific access to child nodes.
104  * </p>
105  * <p>
106  * Child elements allowing user interaction provide methods for this purpose like {@link HtmlAnchor#click()},
107  * {@link HtmlInput#type(String)}, {@link HtmlOption#setSelected(boolean)}, ...
108  * </p>
109  * <p>
110  * HtmlPage instances should not be instantiated directly. They will be returned by {@link WebClient#getPage(String)}
111  * when the content type of the server's response is <code>text/html</code> (or one of its variations).<br>
112  * <br>
113  * <b>Example:</b><br>
114  * <br>
115  * <code>
116  * final HtmlPage page = webClient.{@link WebClient#getPage(String) getPage}("http://mywebsite/some/page.html");
117  * </code>
118  * </p>
119  *
120  * @author Mike Bowler
121  * @author Alex Nikiforoff
122  * @author Noboru Sinohara
123  * @author David K. Taylor
124  * @author Andreas Hangler
125  * @author Christian Sell
126  * @author Chris Erskine
127  * @author Marc Guillemot
128  * @author Ahmed Ashour
129  * @author Daniel Gredler
130  * @author Dmitri Zoubkov
131  * @author Sudhan Moghe
132  * @author Ethan Glasser-Camp
133  * @author Tom Anderson
134  * @author Ronald Brill
135  * @author Frank Danek
136  * @author Joerg Werner
137  * @author Atsushi Nakagawa
138  * @author Rural Hunter
139  * @author Ronny Shapiro
140  * @author Lai Quang Duong
141  * @author Sven Strickroth
142  */
143 @SuppressWarnings("PMD.TooManyFields")
144 public class HtmlPage extends SgmlPage {
145 
146     private static final Log LOG = LogFactory.getLog(HtmlPage.class);
147 
148     private static final Comparator<DomElement> DOCUMENT_POSITION_COMPERATOR = new DocumentPositionComparator();
149 
150     private HTMLParserDOMBuilder domBuilder_;
151     private transient Charset originalCharset_;
152     private final Object lock_ = new SerializableLock(); // used for synchronization
153 
154     private Map<String, MappedElementIndexEntry> idMap_ = new ConcurrentHashMap<>();
155     private Map<String, MappedElementIndexEntry> nameMap_ = new ConcurrentHashMap<>();
156 
157     private List<BaseFrameElement> frameElements_ = new ArrayList<>();
158     private int parserCount_;
159     private int snippetParserCount_;
160     private int inlineSnippetParserCount_;
161     private Collection<HtmlAttributeChangeListener> attributeListeners_;
162     private List<PostponedAction> afterLoadActions_ = Collections.synchronizedList(new ArrayList<>());
163     private boolean cleaning_;
164     private HtmlBase base_;
165     private URL baseUrl_;
166     private List<AutoCloseable> autoCloseableList_;
167     private ElementFromPointHandler elementFromPointHandler_;
168     private DomElement elementWithFocus_;
169     private List<SimpleRange> selectionRanges_ = new ArrayList<>(3);
170 
171     private transient ComputedStylesCache computedStylesCache_;
172 
173     private static final HashSet<String> TABBABLE_TAGS =
174             new HashSet<>(Arrays.asList(HtmlAnchor.TAG_NAME, HtmlArea.TAG_NAME,
175                     HtmlButton.TAG_NAME, HtmlInput.TAG_NAME, HtmlObject.TAG_NAME,
176                     HtmlSelect.TAG_NAME, HtmlTextArea.TAG_NAME));
177     private static final HashSet<String> ACCEPTABLE_TAG_NAMES =
178             new HashSet<>(Arrays.asList(HtmlAnchor.TAG_NAME, HtmlArea.TAG_NAME,
179                     HtmlButton.TAG_NAME, HtmlInput.TAG_NAME, HtmlLabel.TAG_NAME,
180                     HtmlLegend.TAG_NAME, HtmlTextArea.TAG_NAME));
181 
182     /** Definition of special cases for the smart DomHtmlAttributeChangeListenerImpl */
183     private static final Set<String> ATTRIBUTES_AFFECTING_PARENT = new HashSet<>(Arrays.asList(
184             "style",
185             "class",
186             "height",
187             "width"));
188 
189     static class DocumentPositionComparator implements Comparator<DomElement>, Serializable {
190         @Override
191         public int compare(final DomElement elt1, final DomElement elt2) {
192             final short relation = elt1.compareDocumentPosition(elt2);
193             if (relation == 0) {
194                 return 0; // same node
195             }
196             if ((relation & DOCUMENT_POSITION_CONTAINS) != 0 || (relation & DOCUMENT_POSITION_PRECEDING) != 0) {
197                 return 1;
198             }
199 
200             return -1;
201         }
202     }
203 
204     /**
205      * Creates an instance of HtmlPage.
206      * An HtmlPage instance is normally retrieved with {@link WebClient#getPage(String)}.
207      *
208      * @param webResponse the web response that was used to create this page
209      * @param webWindow the window that this page is being loaded into
210      */
211     public HtmlPage(final WebResponse webResponse, final WebWindow webWindow) {
212         super(webResponse, webWindow);
213     }
214 
215     /**
216      * {@inheritDoc}
217      */
218     @Override
219     public HtmlPage getPage() {
220         return this;
221     }
222 
223     /**
224      * {@inheritDoc}
225      */
226     @Override
227     public boolean hasCaseSensitiveTagNames() {
228         return false;
229     }
230 
231     /**
232      * Initialize this page.
233      * @throws IOException if an IO problem occurs
234      * @throws FailingHttpStatusCodeException if the server returns a failing status code AND the property
235      *         {@link org.htmlunit.WebClientOptions#setThrowExceptionOnFailingStatusCode(boolean)} is set
236      *         to true.
237      */
238     @Override
239     public void initialize() throws IOException, FailingHttpStatusCodeException {
240         final WebWindow enclosingWindow = getEnclosingWindow();
241         final boolean isAboutBlank = getUrl() == UrlUtils.URL_ABOUT_BLANK;
242         if (isAboutBlank) {
243             // a frame contains first a faked "about:blank" before its real content specified by src gets loaded
244             if (enclosingWindow instanceof FrameWindow
245                     && !((FrameWindow) enclosingWindow).getFrameElement().isContentLoaded()) {
246                 return;
247             }
248 
249             // save the URL that should be used to resolve relative URLs in this page
250             if (enclosingWindow instanceof TopLevelWindow) {
251                 final TopLevelWindow topWindow = (TopLevelWindow) enclosingWindow;
252                 final WebWindow openerWindow = topWindow.getOpener();
253                 if (openerWindow != null && openerWindow.getEnclosedPage() != null) {
254                     baseUrl_ = openerWindow.getEnclosedPage().getWebResponse().getWebRequest().getUrl();
255                 }
256             }
257         }
258 
259         if (!isAboutBlank) {
260             setReadyState(READY_STATE_INTERACTIVE);
261             getDocumentElement().setReadyState(READY_STATE_INTERACTIVE);
262             executeEventHandlersIfNeeded(Event.TYPE_READY_STATE_CHANGE);
263         }
264 
265         executeDeferredScriptsIfNeeded();
266 
267         executeEventHandlersIfNeeded(Event.TYPE_DOM_DOCUMENT_LOADED);
268 
269         loadFrames();
270 
271         // don't set the ready state if we really load the blank page into the window
272         // see Node.initInlineFrameIfNeeded()
273         if (!isAboutBlank) {
274             setReadyState(READY_STATE_COMPLETE);
275             getDocumentElement().setReadyState(READY_STATE_COMPLETE);
276             executeEventHandlersIfNeeded(Event.TYPE_READY_STATE_CHANGE);
277         }
278 
279         // frame initialization has a different order
280         boolean isFrameWindow = enclosingWindow instanceof FrameWindow;
281         boolean isFirstPageInFrameWindow = false;
282         if (isFrameWindow) {
283             isFrameWindow = ((FrameWindow) enclosingWindow).getFrameElement() instanceof HtmlFrame;
284 
285             final History hist = enclosingWindow.getHistory();
286             if (hist.getLength() > 0 && UrlUtils.URL_ABOUT_BLANK == hist.getUrl(0)) {
287                 isFirstPageInFrameWindow = hist.getLength() <= 2;
288             }
289             else {
290                 isFirstPageInFrameWindow = enclosingWindow.getHistory().getLength() < 2;
291             }
292         }
293 
294         if (isFrameWindow && !isFirstPageInFrameWindow) {
295             executeEventHandlersIfNeeded(Event.TYPE_LOAD);
296         }
297 
298         for (final BaseFrameElement frameElement : new ArrayList<>(frameElements_)) {
299             if (frameElement instanceof HtmlFrame) {
300                 final Page page = frameElement.getEnclosedWindow().getEnclosedPage();
301                 if (page != null && page.isHtmlPage()) {
302                     ((HtmlPage) page).executeEventHandlersIfNeeded(Event.TYPE_LOAD);
303                 }
304             }
305         }
306 
307         if (!isFrameWindow) {
308             executeEventHandlersIfNeeded(Event.TYPE_LOAD);
309 
310             if (!isAboutBlank && enclosingWindow.getWebClient().isJavaScriptEnabled()
311                     && hasFeature(EVENT_FOCUS_ON_LOAD)) {
312                 final HtmlElement body = getBody();
313                 if (body != null) {
314                     final Event event = new Event((Window) enclosingWindow.getScriptableObject(), Event.TYPE_FOCUS);
315                     body.fireEvent(event);
316                 }
317             }
318         }
319 
320         try {
321             while (!afterLoadActions_.isEmpty()) {
322                 final PostponedAction action = afterLoadActions_.remove(0);
323                 action.execute();
324             }
325         }
326         catch (final IOException e) {
327             throw e;
328         }
329         catch (final Exception e) {
330             throw new RuntimeException(e);
331         }
332         executeRefreshIfNeeded();
333     }
334 
335     /**
336      * Adds an action that should be executed once the page has been loaded.
337      * @param action the action
338      */
339     void addAfterLoadAction(final PostponedAction action) {
340         afterLoadActions_.add(action);
341     }
342 
343     /**
344      * Clean up this page.
345      */
346     @Override
347     public void cleanUp() {
348         //To avoid endless recursion caused by window.close() in onUnload
349         if (cleaning_) {
350             return;
351         }
352 
353         cleaning_ = true;
354         try {
355             super.cleanUp();
356             executeEventHandlersIfNeeded(Event.TYPE_UNLOAD);
357             deregisterFramesIfNeeded();
358         }
359         finally {
360             cleaning_ = false;
361 
362             if (autoCloseableList_ != null) {
363                 for (final AutoCloseable closeable : new ArrayList<>(autoCloseableList_)) {
364                     try {
365                         closeable.close();
366                     }
367                     catch (final Exception e) {
368                         LOG.error("Closing the autoclosable " + closeable + " failed", e);
369                     }
370                 }
371             }
372         }
373     }
374 
375     /**
376      * {@inheritDoc}
377      */
378     @Override
379     public HtmlElement getDocumentElement() {
380         return (HtmlElement) super.getDocumentElement();
381     }
382 
383     /**
384      * @return the <code>body</code> element, or {@code null} if it does not yet exist
385      */
386     public HtmlBody getBody() {
387         final DomElement doc = getDocumentElement();
388         if (doc != null) {
389             for (final DomNode node : doc.getChildren()) {
390                 if (node instanceof HtmlBody) {
391                     return (HtmlBody) node;
392                 }
393             }
394         }
395         return null;
396     }
397 
398     /**
399      * Returns the head element.
400      * @return the head element
401      */
402     public HtmlElement getHead() {
403         final DomElement doc = getDocumentElement();
404         if (doc != null) {
405             for (final DomNode node : doc.getChildren()) {
406                 if (node instanceof HtmlHead) {
407                     return (HtmlElement) node;
408                 }
409             }
410         }
411         return null;
412     }
413 
414     /**
415      * {@inheritDoc}
416      */
417     @Override
418     public Document getOwnerDocument() {
419         return null;
420     }
421 
422     /**
423      * {@inheritDoc}
424      * Not yet implemented.
425      */
426     @Override
427     public org.w3c.dom.Node importNode(final org.w3c.dom.Node importedNode, final boolean deep) {
428         throw new UnsupportedOperationException("HtmlPage.importNode is not yet implemented.");
429     }
430 
431     /**
432      * {@inheritDoc}
433      * Not yet implemented.
434      */
435     @Override
436     public String getInputEncoding() {
437         throw new UnsupportedOperationException("HtmlPage.getInputEncoding is not yet implemented.");
438     }
439 
440     /**
441      * {@inheritDoc}
442      */
443     @Override
444     public String getXmlEncoding() {
445         return null;
446     }
447 
448     /**
449      * {@inheritDoc}
450      */
451     @Override
452     public boolean getXmlStandalone() {
453         return false;
454     }
455 
456     /**
457      * {@inheritDoc}
458      * Not yet implemented.
459      */
460     @Override
461     public void setXmlStandalone(final boolean xmlStandalone) throws DOMException {
462         throw new UnsupportedOperationException("HtmlPage.setXmlStandalone is not yet implemented.");
463     }
464 
465     /**
466      * {@inheritDoc}
467      */
468     @Override
469     public String getXmlVersion() {
470         return null;
471     }
472 
473     /**
474      * {@inheritDoc}
475      * Not yet implemented.
476      */
477     @Override
478     public void setXmlVersion(final String xmlVersion) throws DOMException {
479         throw new UnsupportedOperationException("HtmlPage.setXmlVersion is not yet implemented.");
480     }
481 
482     /**
483      * {@inheritDoc}
484      * Not yet implemented.
485      */
486     @Override
487     public boolean getStrictErrorChecking() {
488         throw new UnsupportedOperationException("HtmlPage.getStrictErrorChecking is not yet implemented.");
489     }
490 
491     /**
492      * {@inheritDoc}
493      * Not yet implemented.
494      */
495     @Override
496     public void setStrictErrorChecking(final boolean strictErrorChecking) {
497         throw new UnsupportedOperationException("HtmlPage.setStrictErrorChecking is not yet implemented.");
498     }
499 
500     /**
501      * {@inheritDoc}
502      * Not yet implemented.
503      */
504     @Override
505     public String getDocumentURI() {
506         throw new UnsupportedOperationException("HtmlPage.getDocumentURI is not yet implemented.");
507     }
508 
509     /**
510      * {@inheritDoc}
511      * Not yet implemented.
512      */
513     @Override
514     public void setDocumentURI(final String documentURI) {
515         throw new UnsupportedOperationException("HtmlPage.setDocumentURI is not yet implemented.");
516     }
517 
518     /**
519      * {@inheritDoc}
520      * Not yet implemented.
521      */
522     @Override
523     public org.w3c.dom.Node adoptNode(final org.w3c.dom.Node source) throws DOMException {
524         throw new UnsupportedOperationException("HtmlPage.adoptNode is not yet implemented.");
525     }
526 
527     /**
528      * {@inheritDoc}
529      * Not yet implemented.
530      */
531     @Override
532     public DOMConfiguration getDomConfig() {
533         throw new UnsupportedOperationException("HtmlPage.getDomConfig is not yet implemented.");
534     }
535 
536     /**
537      * {@inheritDoc}
538      * Not yet implemented.
539      */
540     @Override
541     public org.w3c.dom.Node renameNode(final org.w3c.dom.Node newNode, final String namespaceURI,
542         final String qualifiedName) throws DOMException {
543         throw new UnsupportedOperationException("HtmlPage.renameNode is not yet implemented.");
544     }
545 
546     /**
547      * {@inheritDoc}
548      */
549     @Override
550     public Charset getCharset() {
551         if (originalCharset_ == null) {
552             originalCharset_ = getWebResponse().getContentCharset();
553         }
554         return originalCharset_;
555     }
556 
557     /**
558      * {@inheritDoc}
559      */
560     @Override
561     public String getContentType() {
562         return getWebResponse().getContentType();
563     }
564 
565     /**
566      * {@inheritDoc}
567      * Not yet implemented.
568      */
569     @Override
570     public DOMImplementation getImplementation() {
571         throw new UnsupportedOperationException("HtmlPage.getImplementation is not yet implemented.");
572     }
573 
574     /**
575      * {@inheritDoc}
576      * @param tagName the tag name, preferably in lowercase
577      */
578     @Override
579     public DomElement createElement(String tagName) {
580         if (tagName.indexOf(':') == -1) {
581             tagName = org.htmlunit.util.StringUtils.toRootLowerCase(tagName);
582         }
583         return getWebClient().getPageCreator().getHtmlParser().getFactory(tagName)
584                     .createElementNS(this, null, tagName, null);
585     }
586 
587     /**
588      * {@inheritDoc}
589      */
590     @Override
591     public DomElement createElementNS(final String namespaceURI, final String qualifiedName) {
592         return getWebClient().getPageCreator().getHtmlParser()
593                 .getElementFactory(this, namespaceURI, qualifiedName, false, true)
594                 .createElementNS(this, namespaceURI, qualifiedName, null);
595     }
596 
597     /**
598      * {@inheritDoc}
599      * Not yet implemented.
600      */
601     @Override
602     public Attr createAttributeNS(final String namespaceURI, final String qualifiedName) {
603         throw new UnsupportedOperationException("HtmlPage.createAttributeNS is not yet implemented.");
604     }
605 
606     /**
607      * {@inheritDoc}
608      * Not yet implemented.
609      */
610     @Override
611     public EntityReference createEntityReference(final String id) {
612         throw new UnsupportedOperationException("HtmlPage.createEntityReference is not yet implemented.");
613     }
614 
615     /**
616      * {@inheritDoc}
617      * Not yet implemented.
618      */
619     @Override
620     public ProcessingInstruction createProcessingInstruction(final String namespaceURI, final String qualifiedName) {
621         throw new UnsupportedOperationException("HtmlPage.createProcessingInstruction is not yet implemented.");
622     }
623 
624     /**
625      * {@inheritDoc}
626      */
627     @Override
628     public DomElement getElementById(final String elementId) {
629         if (elementId != null) {
630             final MappedElementIndexEntry elements = idMap_.get(elementId);
631             if (elements != null) {
632                 return elements.first();
633             }
634         }
635         return null;
636     }
637 
638     /**
639      * Returns the {@link HtmlAnchor} with the specified name.
640      *
641      * @param name the name to search by
642      * @return the {@link HtmlAnchor} with the specified name
643      * @throws ElementNotFoundException if the anchor could not be found
644      */
645     public HtmlAnchor getAnchorByName(final String name) throws ElementNotFoundException {
646         return getDocumentElement().getOneHtmlElementByAttribute("a", DomElement.NAME_ATTRIBUTE, name);
647     }
648 
649     /**
650      * Returns the {@link HtmlAnchor} with the specified href.
651      *
652      * @param href the string to search by
653      * @return the HtmlAnchor
654      * @throws ElementNotFoundException if the anchor could not be found
655      */
656     public HtmlAnchor getAnchorByHref(final String href) throws ElementNotFoundException {
657         return getDocumentElement().getOneHtmlElementByAttribute("a", "href", href);
658     }
659 
660     /**
661      * Returns a list of all anchors contained in this page.
662      * @return the list of {@link HtmlAnchor} in this page
663      */
664     public List<HtmlAnchor> getAnchors() {
665         return getDocumentElement().getElementsByTagNameImpl("a");
666     }
667 
668     /**
669      * Returns the first anchor with the specified text.
670      * @param text the text to search for
671      * @return the first anchor that was found
672      * @throws ElementNotFoundException if no anchors are found with the specified text
673      */
674     public HtmlAnchor getAnchorByText(final String text) throws ElementNotFoundException {
675         WebAssert.notNull("text", text);
676 
677         for (final HtmlAnchor anchor : getAnchors()) {
678             if (text.equals(anchor.asNormalizedText())) {
679                 return anchor;
680             }
681         }
682         throw new ElementNotFoundException("a", "<text>", text);
683     }
684 
685     /**
686      * Returns the first form that matches the specified name.
687      * @param name the name to search for
688      * @return the first form
689      * @exception ElementNotFoundException If no forms match the specified result.
690      */
691     public HtmlForm getFormByName(final String name) throws ElementNotFoundException {
692         final List<HtmlForm> forms = getDocumentElement()
693                 .getElementsByAttribute("form", DomElement.NAME_ATTRIBUTE, name);
694         if (forms.isEmpty()) {
695             throw new ElementNotFoundException("form", DomElement.NAME_ATTRIBUTE, name);
696         }
697         return forms.get(0);
698     }
699 
700     /**
701      * Returns a list of all the forms in this page.
702      * @return all the forms in this page
703      */
704     public List<HtmlForm> getForms() {
705         return getDocumentElement().getElementsByTagNameImpl("form");
706     }
707 
708     /**
709      * Given a relative URL (ie <code>/foo</code>), returns a fully-qualified URL based on
710      * the URL that was used to load this page.
711      *
712      * @param relativeUrl the relative URL
713      * @return the fully-qualified URL for the specified relative URL
714      * @exception MalformedURLException if an error occurred when creating a URL object
715      */
716     public URL getFullyQualifiedUrl(String relativeUrl) throws MalformedURLException {
717         // to handle http: and http:/ in FF (Bug #474)
718         boolean incorrectnessNotified = false;
719         while (relativeUrl.startsWith("http:") && !relativeUrl.startsWith("http://")) {
720             if (!incorrectnessNotified) {
721                 notifyIncorrectness("Incorrect URL \"" + relativeUrl + "\" has been corrected");
722                 incorrectnessNotified = true;
723             }
724             relativeUrl = "http:/" + relativeUrl.substring(5);
725         }
726 
727         return WebClient.expandUrl(getBaseURL(), relativeUrl);
728     }
729 
730     /**
731      * Given a target attribute value, resolve the target using a base target for the page.
732      *
733      * @param elementTarget the target specified as an attribute of the element
734      * @return the resolved target to use for the element
735      */
736     public String getResolvedTarget(final String elementTarget) {
737         final String resolvedTarget;
738         if (base_ == null) {
739             resolvedTarget = elementTarget;
740         }
741         else if (elementTarget != null && !elementTarget.isEmpty()) {
742             resolvedTarget = elementTarget;
743         }
744         else {
745             resolvedTarget = base_.getTargetAttribute();
746         }
747         return resolvedTarget;
748     }
749 
750     /**
751      * Returns a list of ids (strings) that correspond to the tabbable elements
752      * in this page. Return them in the same order specified in {@link #getTabbableElements}
753      *
754      * @return the list of id's
755      */
756     public List<String> getTabbableElementIds() {
757         final List<String> list = new ArrayList<>();
758 
759         for (final HtmlElement element : getTabbableElements()) {
760             list.add(element.getId());
761         }
762 
763         return Collections.unmodifiableList(list);
764     }
765 
766     /**
767      * Returns a list of all elements that are tabbable in the order that will
768      * be used for tabbing.<p>
769      *
770      * The rules for determining tab order are as follows:
771      * <ol>
772      *   <li>Those elements that support the tabindex attribute and assign a
773      *   positive value to it are navigated first. Navigation proceeds from the
774      *   element with the lowest tabindex value to the element with the highest
775      *   value. Values need not be sequential nor must they begin with any
776      *   particular value. Elements that have identical tabindex values should
777      *   be navigated in the order they appear in the character stream.
778      *   <li>Those elements that do not support the tabindex attribute or
779      *   support it and assign it a value of "0" are navigated next. These
780      *   elements are navigated in the order they appear in the character
781      *   stream.
782      *   <li>Elements that are disabled do not participate in the tabbing
783      *   order.
784      * </ol>
785      * Additionally, the value of tabindex must be within 0 and 32767. Any
786      * values outside this range will be ignored.<p>
787      *
788      * The following elements support the <code>tabindex</code> attribute:
789      * A, AREA, BUTTON, INPUT, OBJECT, SELECT, and TEXTAREA.
790      *
791      * @return all the tabbable elements in proper tab order
792      */
793     public List<HtmlElement> getTabbableElements() {
794         final List<HtmlElement> tabbableElements = new ArrayList<>();
795         for (final HtmlElement element : getHtmlElementDescendants()) {
796             final String tagName = element.getTagName();
797             if (TABBABLE_TAGS.contains(tagName)) {
798                 final boolean disabled = element.isDisabledElementAndDisabled();
799                 if (!disabled && !HtmlElement.TAB_INDEX_OUT_OF_BOUNDS.equals(element.getTabIndex())) {
800                     tabbableElements.add(element);
801                 }
802             }
803         }
804         tabbableElements.sort(createTabOrderComparator());
805         return Collections.unmodifiableList(tabbableElements);
806     }
807 
808     private static Comparator<HtmlElement> createTabOrderComparator() {
809         return (element1, element2) -> {
810             final Short i1 = element1.getTabIndex();
811             final Short i2 = element2.getTabIndex();
812 
813             final short index1;
814             if (i1 == null) {
815                 index1 = -1;
816             }
817             else {
818                 index1 = i1.shortValue();
819             }
820 
821             final short index2;
822             if (i2 == null) {
823                 index2 = -1;
824             }
825             else {
826                 index2 = i2.shortValue();
827             }
828 
829             final int result;
830             if (index1 > 0 && index2 > 0) {
831                 result = index1 - index2;
832             }
833             else if (index1 > 0) {
834                 result = -1;
835             }
836             else if (index2 > 0) {
837                 result = 1;
838             }
839             else if (index1 == index2) {
840                 result = 0;
841             }
842             else {
843                 result = index2 - index1;
844             }
845 
846             return result;
847         };
848     }
849 
850     /**
851      * Returns the HTML element that is assigned to the specified access key. An
852      * access key (aka mnemonic key) is used for keyboard navigation of the
853      * page.<p>
854      *
855      * Only the following HTML elements may have <code>accesskey</code>s defined: A, AREA,
856      * BUTTON, INPUT, LABEL, LEGEND, and TEXTAREA.
857      *
858      * @param accessKey the key to look for
859      * @return the HTML element that is assigned to the specified key or null
860      *      if no elements can be found that match the specified key.
861      */
862     public HtmlElement getHtmlElementByAccessKey(final char accessKey) {
863         final List<HtmlElement> elements = getHtmlElementsByAccessKey(accessKey);
864         if (elements.isEmpty()) {
865             return null;
866         }
867         return elements.get(0);
868     }
869 
870     /**
871      * Returns all the HTML elements that are assigned to the specified access key. An
872      * access key (aka mnemonic key) is used for keyboard navigation of the
873      * page.<p>
874      *
875      * The HTML specification seems to indicate that one accesskey cannot be used
876      * for multiple elements however Internet Explorer does seem to support this.
877      * It's worth noting that Firefox does not support multiple elements with one
878      * access key so you are making your HTML browser specific if you rely on this
879      * feature.<p>
880      *
881      * Only the following HTML elements may have <code>accesskey</code>s defined: A, AREA,
882      * BUTTON, INPUT, LABEL, LEGEND, and TEXTAREA.
883      *
884      * @param accessKey the key to look for
885      * @return the elements that are assigned to the specified accesskey
886      */
887     public List<HtmlElement> getHtmlElementsByAccessKey(final char accessKey) {
888         final List<HtmlElement> elements = new ArrayList<>();
889 
890         final String searchString = Character.toString(accessKey).toLowerCase(Locale.ROOT);
891         for (final HtmlElement element : getHtmlElementDescendants()) {
892             if (ACCEPTABLE_TAG_NAMES.contains(element.getTagName())) {
893                 final String accessKeyAttribute = element.getAttributeDirect("accesskey");
894                 if (searchString.equalsIgnoreCase(accessKeyAttribute)) {
895                     elements.add(element);
896                 }
897             }
898         }
899 
900         return elements;
901     }
902 
903     /**
904      * <p>Executes the specified JavaScript code within the page. The usage would be similar to what can
905      * be achieved to execute JavaScript in the current page by entering "javascript:...some JS code..."
906      * in the URL field of a native browser.</p>
907      * <p><b>Note:</b> the provided code won't be executed if JavaScript has been disabled on the WebClient
908      * (see {@link org.htmlunit.WebClient#isJavaScriptEnabled()}.</p>
909      * @param sourceCode the JavaScript code to execute
910      * @return a ScriptResult which will contain both the current page (which may be different than
911      *         the previous page) and a JavaScript result object
912      */
913     public ScriptResult executeJavaScript(final String sourceCode) {
914         return executeJavaScript(sourceCode, "injected script", 1);
915     }
916 
917     /**
918      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
919      * <p>
920      * Execute the specified JavaScript if a JavaScript engine was successfully
921      * instantiated. If this JavaScript causes the current page to be reloaded
922      * (through location="" or form.submit()) then return the new page. Otherwise
923      * return the current page.
924      * </p>
925      * <p><b>Please note:</b> Although this method is public, it is not intended for
926      * general execution of JavaScript. Users of HtmlUnit should interact with the pages
927      * as a user would by clicking on buttons or links and having the JavaScript event
928      * handlers execute as needed.
929      * </p>
930      *
931      * @param sourceCode the JavaScript code to execute
932      * @param sourceName the name for this chunk of code (will be displayed in error messages)
933      * @param startLine the line at which the script source starts
934      * @return a ScriptResult which will contain both the current page (which may be different than
935      *         the previous page and a JavaScript result object.
936      */
937     public ScriptResult executeJavaScript(String sourceCode, final String sourceName, final int startLine) {
938         if (!getWebClient().isJavaScriptEnabled()) {
939             return new ScriptResult(JavaScriptEngine.UNDEFINED);
940         }
941 
942         if (Strings.CI.startsWith(sourceCode, JavaScriptURLConnection.JAVASCRIPT_PREFIX)) {
943             sourceCode = sourceCode.substring(JavaScriptURLConnection.JAVASCRIPT_PREFIX.length()).trim();
944             if (sourceCode.startsWith("return ")) {
945                 sourceCode = sourceCode.substring("return ".length());
946             }
947         }
948 
949         final Object result = getWebClient().getJavaScriptEngine()
950                 .execute(this, getEnclosingWindow().getScriptableObject(), sourceCode, sourceName, startLine);
951         return new ScriptResult(result);
952     }
953 
954     /** Various possible external JavaScript file loading results. */
955     enum JavaScriptLoadResult {
956         /** The load was aborted and nothing was done. */
957         NOOP,
958         /** The load was aborted and nothing was done. */
959         NO_CONTENT,
960         /** The external JavaScript file was downloaded and compiled successfully. */
961         SUCCESS,
962         /** The external JavaScript file was not downloaded successfully. */
963         DOWNLOAD_ERROR,
964         /** The external JavaScript file was downloaded but was not compiled successfully. */
965         COMPILATION_ERROR
966     }
967 
968     /**
969      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
970      *
971      * @param srcAttribute the source attribute from the script tag
972      * @param scriptCharset the charset from the script tag
973      * @return the result of loading the specified external JavaScript file
974      * @throws FailingHttpStatusCodeException if the request's status code indicates a request
975      *         failure and the {@link WebClient} was configured to throw exceptions on failing
976      *         HTTP status codes
977      */
978     JavaScriptLoadResult loadExternalJavaScriptFile(final String srcAttribute, final Charset scriptCharset)
979         throws FailingHttpStatusCodeException {
980 
981         final WebClient client = getWebClient();
982         if (StringUtils.isBlank(srcAttribute) || !client.isJavaScriptEnabled()) {
983             return JavaScriptLoadResult.NOOP;
984         }
985 
986         final URL scriptURL;
987         try {
988             scriptURL = getFullyQualifiedUrl(srcAttribute);
989             final String protocol = scriptURL.getProtocol();
990             if ("javascript".equals(protocol)) {
991                 if (LOG.isInfoEnabled()) {
992                     LOG.info("Ignoring script src [" + srcAttribute + "]");
993                 }
994                 return JavaScriptLoadResult.NOOP;
995             }
996             if (!"http".equals(protocol) && !"https".equals(protocol)
997                     && !"data".equals(protocol) && !"file".equals(protocol)) {
998                 client.getJavaScriptErrorListener().malformedScriptURL(this, srcAttribute,
999                         new MalformedURLException("unknown protocol: '" + protocol + "'"));
1000                 return JavaScriptLoadResult.NOOP;
1001             }
1002         }
1003         catch (final MalformedURLException e) {
1004             client.getJavaScriptErrorListener().malformedScriptURL(this, srcAttribute, e);
1005             return JavaScriptLoadResult.NOOP;
1006         }
1007 
1008         final Object script;
1009         try {
1010             script = loadJavaScriptFromUrl(scriptURL, scriptCharset);
1011         }
1012         catch (final IOException e) {
1013             client.getJavaScriptErrorListener().loadScriptError(this, scriptURL, e);
1014             return JavaScriptLoadResult.DOWNLOAD_ERROR;
1015         }
1016         catch (final FailingHttpStatusCodeException e) {
1017             if (e.getStatusCode() == HttpStatus.NO_CONTENT_204) {
1018                 return JavaScriptLoadResult.NO_CONTENT;
1019             }
1020             client.getJavaScriptErrorListener().loadScriptError(this, scriptURL, e);
1021             throw e;
1022         }
1023 
1024         if (script == null) {
1025             return JavaScriptLoadResult.COMPILATION_ERROR;
1026         }
1027 
1028         @SuppressWarnings("unchecked")
1029         final AbstractJavaScriptEngine<Object> engine = (AbstractJavaScriptEngine<Object>) client.getJavaScriptEngine();
1030         engine.execute(this, getEnclosingWindow().getScriptableObject(), script);
1031         return JavaScriptLoadResult.SUCCESS;
1032     }
1033 
1034     /**
1035      * Loads JavaScript from the specified URL. This method may return {@code null} if
1036      * there is a problem loading the code from the specified URL.
1037      *
1038      * @param url the URL of the script
1039      * @param scriptCharset the charset from the script tag
1040      * @return the content of the file, or {@code null} if we ran into a compile error
1041      * @throws IOException if there is a problem downloading the JavaScript file
1042      * @throws FailingHttpStatusCodeException if the request's status code indicates a request
1043      *         failure and the {@link WebClient} was configured to throw exceptions on failing
1044      *         HTTP status codes
1045      */
1046     private Object loadJavaScriptFromUrl(final URL url, final Charset scriptCharset) throws IOException,
1047         FailingHttpStatusCodeException {
1048 
1049         final WebRequest referringRequest = getWebResponse().getWebRequest();
1050 
1051         final WebClient client = getWebClient();
1052         final WebRequest request = new WebRequest(url);
1053         // copy all headers from the referring request
1054         request.setAdditionalHeaders(new HashMap<>(referringRequest.getAdditionalHeaders()));
1055 
1056         // at least overwrite this headers
1057         request.setAdditionalHeader(HttpHeader.ACCEPT, client.getBrowserVersion().getScriptAcceptHeader());
1058         request.setAdditionalHeader(HttpHeader.SEC_FETCH_SITE, "same-origin");
1059         request.setAdditionalHeader(HttpHeader.SEC_FETCH_MODE, "no-cors");
1060         request.setAdditionalHeader(HttpHeader.SEC_FETCH_DEST, "script");
1061 
1062         request.setRefererHeader(referringRequest.getUrl());
1063         request.setCharset(scriptCharset);
1064 
1065         // use info from script tag or fall back to utf-8
1066         // https://www.rfc-editor.org/rfc/rfc9239#section-4.2
1067         if (scriptCharset != null) {
1068             request.setDefaultResponseContentCharset(scriptCharset);
1069         }
1070         else {
1071             request.setDefaultResponseContentCharset(StandardCharsets.UTF_8);
1072         }
1073 
1074         // our cache is a bit strange;
1075         // loadWebResponse check the cache for the web response
1076         // AND also fixes the request url for the following cache lookups
1077         final WebResponse response = client.loadWebResponse(request);
1078 
1079         // now we can look into the cache with the fixed request for
1080         // a cached script
1081         final Cache cache = client.getCache();
1082         final Object cachedScript = cache.getCachedObject(request);
1083         if (cachedScript instanceof Script) {
1084             return cachedScript;
1085         }
1086 
1087         client.printContentIfNecessary(response);
1088         client.throwFailingHttpStatusCodeExceptionIfNecessary(response);
1089 
1090         final int statusCode = response.getStatusCode();
1091         if (statusCode == HttpStatus.NO_CONTENT_204) {
1092             throw new FailingHttpStatusCodeException(response);
1093         }
1094 
1095         if (!response.isSuccess()) {
1096             throw new IOException("Unable to download JavaScript from '" + url + "' (status " + statusCode + ").");
1097         }
1098 
1099         final String contentType = response.getContentType();
1100         if (contentType != null) {
1101             if (MimeType.isObsoleteJavascriptMimeType(contentType)) {
1102                 getWebClient().getIncorrectnessListener().notify(
1103                         "Obsolete content type encountered: '" + contentType + "' "
1104                                 + "for remotely loaded JavaScript element at '" + url + "'.", this);
1105             }
1106             else if (!MimeType.isJavascriptMimeType(contentType)) {
1107                 getWebClient().getIncorrectnessListener().notify(
1108                         "Expect content type of '" + MimeType.TEXT_JAVASCRIPT + "' "
1109                                 + "for remotely loaded JavaScript element at '" + url + "', "
1110                                 + "but got '" + contentType + "'.", this);
1111             }
1112         }
1113 
1114         final Charset scriptEncoding = response.getContentCharset();
1115         final String scriptCode = response.getContentAsString(scriptEncoding);
1116         if (null != scriptCode) {
1117             final AbstractJavaScriptEngine<?> javaScriptEngine = client.getJavaScriptEngine();
1118             final Scriptable scope = getEnclosingWindow().getScriptableObject();
1119             final Object script = javaScriptEngine.compile(this, scope, scriptCode, url.toExternalForm(), 1);
1120             if (script != null && cache.cacheIfPossible(request, response, script)) {
1121                 // no cleanup if the response is stored inside the cache
1122                 return script;
1123             }
1124 
1125             response.cleanUp();
1126             return script;
1127         }
1128 
1129         response.cleanUp();
1130         return null;
1131     }
1132 
1133     /**
1134      * Returns the title of this page or an empty string if the title wasn't specified.
1135      *
1136      * @return the title of this page or an empty string if the title wasn't specified
1137      */
1138     public String getTitleText() {
1139         final HtmlTitle titleElement = getTitleElement();
1140         if (titleElement != null) {
1141             return titleElement.asNormalizedText();
1142         }
1143         return "";
1144     }
1145 
1146     /**
1147      * Sets the text for the title of this page. If there is not a title element
1148      * on this page, then one has to be generated.
1149      * @param message the new text
1150      */
1151     public void setTitleText(final String message) {
1152         HtmlTitle titleElement = getTitleElement();
1153         if (titleElement == null) {
1154             LOG.debug("No title element, creating one");
1155             final HtmlHead head = (HtmlHead) getFirstChildElement(getDocumentElement(), HtmlHead.class);
1156             if (head == null) {
1157                 // perhaps should we create head too?
1158                 throw new IllegalStateException("Headelement was not defined for this page");
1159             }
1160             final Map<String, DomAttr> emptyMap = Collections.emptyMap();
1161             titleElement = new HtmlTitle(HtmlTitle.TAG_NAME, this, emptyMap);
1162             if (head.getFirstChild() != null) {
1163                 head.getFirstChild().insertBefore(titleElement);
1164             }
1165             else {
1166                 head.appendChild(titleElement);
1167             }
1168         }
1169 
1170         titleElement.setNodeValue(message);
1171     }
1172 
1173     /**
1174      * Gets the first child of startElement that is an instance of the given class.
1175      * @param startElement the parent element
1176      * @param clazz the class to search for
1177      * @return {@code null} if no child found
1178      */
1179     private static DomElement getFirstChildElement(final DomElement startElement, final Class<?> clazz) {
1180         if (startElement == null) {
1181             return null;
1182         }
1183         for (final DomElement element : startElement.getChildElements()) {
1184             if (clazz.isInstance(element)) {
1185                 return element;
1186             }
1187         }
1188 
1189         return null;
1190     }
1191 
1192     /**
1193      * Gets the first child of startElement or it's children that is an instance of the given class.
1194      * @param startElement the parent element
1195      * @param clazz the class to search for
1196      * @return {@code null} if no child found
1197      */
1198     private DomElement getFirstChildElementRecursive(final DomElement startElement, final Class<?> clazz) {
1199         if (startElement == null) {
1200             return null;
1201         }
1202         for (final DomElement element : startElement.getChildElements()) {
1203             if (clazz.isInstance(element)) {
1204                 return element;
1205             }
1206             final DomElement childFound = getFirstChildElementRecursive(element, clazz);
1207             if (childFound != null) {
1208                 return childFound;
1209             }
1210         }
1211 
1212         return null;
1213     }
1214 
1215     /**
1216      * Gets the title element for this page. Returns null if one is not found.
1217      *
1218      * @return the title element for this page or null if this is not one
1219      */
1220     private HtmlTitle getTitleElement() {
1221         return (HtmlTitle) getFirstChildElementRecursive(getDocumentElement(), HtmlTitle.class);
1222     }
1223 
1224     /**
1225      * Looks for and executes any appropriate event handlers. Looks for body and frame tags.
1226      * @param eventType either {@link Event#TYPE_LOAD}, {@link Event#TYPE_UNLOAD}, or {@link Event#TYPE_BEFORE_UNLOAD}
1227      * @return {@code true} if user accepted <code>onbeforeunload</code> (not relevant to other events)
1228      */
1229     private boolean executeEventHandlersIfNeeded(final String eventType) {
1230         // If JavaScript isn't enabled, there's nothing for us to do.
1231         if (!getWebClient().isJavaScriptEnabled()) {
1232             return true;
1233         }
1234 
1235         // Execute the specified event on the document element.
1236         final WebWindow window = getEnclosingWindow();
1237         if (window.getScriptableObject() instanceof Window) {
1238             final Event event;
1239             if (Event.TYPE_BEFORE_UNLOAD.equals(eventType)) {
1240                 event = new BeforeUnloadEvent(this, eventType);
1241             }
1242             else {
1243                 event = new Event(this, eventType);
1244             }
1245 
1246             // This is the same as DomElement.fireEvent() and was copied
1247             // here so it could be used with HtmlPage.
1248             if (LOG.isDebugEnabled()) {
1249                 LOG.debug("Firing " + event);
1250             }
1251 
1252             final EventTarget jsNode;
1253             if (Event.TYPE_DOM_DOCUMENT_LOADED.equals(eventType)) {
1254                 jsNode = getScriptableObject();
1255             }
1256             else if (Event.TYPE_READY_STATE_CHANGE.equals(eventType)) {
1257                 jsNode = getDocumentElement().getScriptableObject();
1258             }
1259             else {
1260                 // The load/beforeunload/unload events target Document but paths Window only (tested in Chrome/FF)
1261                 jsNode = window.getScriptableObject();
1262             }
1263 
1264             ((JavaScriptEngine) getWebClient().getJavaScriptEngine()).callSecured(cx -> jsNode.fireEvent(event), this);
1265 
1266             if (!isOnbeforeunloadAccepted(this, event)) {
1267                 return false;
1268             }
1269         }
1270 
1271         // If this page was loaded in a frame, execute the version of the event specified on the frame tag.
1272         if (window instanceof FrameWindow) {
1273             final FrameWindow fw = (FrameWindow) window;
1274             final BaseFrameElement frame = fw.getFrameElement();
1275 
1276             // if part of a document fragment, then the load event is not triggered
1277             if (Event.TYPE_LOAD.equals(eventType) && frame.getParentNode() instanceof DomDocumentFragment) {
1278                 return true;
1279             }
1280 
1281             if (frame.hasEventHandlers("on" + eventType)) {
1282                 if (LOG.isDebugEnabled()) {
1283                     LOG.debug("Executing on" + eventType + " handler for " + frame);
1284                 }
1285                 if (window.getScriptableObject() instanceof Window) {
1286                     final Event event;
1287                     if (Event.TYPE_BEFORE_UNLOAD.equals(eventType)) {
1288                         event = new BeforeUnloadEvent(frame, eventType);
1289                     }
1290                     else {
1291                         event = new Event(frame, eventType);
1292                     }
1293                     // This fires the "load" event for the <frame> element which, like all non-window
1294                     // load events, propagates up to Document but not Window.  The "load" event for
1295                     // <frameset> on the other hand, like that of <body>, is handled above where it is
1296                     // fired against Document and directed to Window.
1297                     frame.fireEvent(event);
1298 
1299                     if (!isOnbeforeunloadAccepted((HtmlPage) frame.getPage(), event)) {
1300                         return false;
1301                     }
1302                 }
1303             }
1304         }
1305 
1306         return true;
1307     }
1308 
1309     /**
1310      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
1311      *
1312      * @return true if the OnbeforeunloadHandler has accepted to change the page
1313      */
1314     public boolean isOnbeforeunloadAccepted() {
1315         return executeEventHandlersIfNeeded(Event.TYPE_BEFORE_UNLOAD);
1316     }
1317 
1318     private boolean isOnbeforeunloadAccepted(final HtmlPage page, final Event event) {
1319         if (event instanceof BeforeUnloadEvent) {
1320             final BeforeUnloadEvent beforeUnloadEvent = (BeforeUnloadEvent) event;
1321             if (beforeUnloadEvent.isBeforeUnloadMessageSet()) {
1322                 final OnbeforeunloadHandler handler = getWebClient().getOnbeforeunloadHandler();
1323                 if (handler == null) {
1324                     LOG.warn("document.onbeforeunload() returned a string in event.returnValue,"
1325                             + " but no onbeforeunload handler installed.");
1326                 }
1327                 else {
1328                     final String message = JavaScriptEngine.toString(beforeUnloadEvent.getReturnValue());
1329                     return handler.handleEvent(page, message);
1330                 }
1331             }
1332         }
1333         return true;
1334     }
1335 
1336     /**
1337      * If a refresh has been specified either through a meta tag or an HTTP
1338      * response header, then perform that refresh.
1339      * @throws IOException if an IO problem occurs
1340      */
1341     private void executeRefreshIfNeeded() throws IOException {
1342         // If this page is not in a frame then a refresh has already happened,
1343         // most likely through the JavaScript onload handler, so we don't do a
1344         // second refresh.
1345         final WebWindow window = getEnclosingWindow();
1346         if (window == null) {
1347             return;
1348         }
1349 
1350         final String refreshString = getRefreshStringOrNull();
1351         if (refreshString == null || refreshString.isEmpty()) {
1352             return;
1353         }
1354 
1355         final double time;
1356         final URL url;
1357 
1358         final int index = StringUtils.indexOfAnyBut(refreshString, "0123456789.");
1359 
1360         if (index == -1) {
1361             // Format: <meta http-equiv='refresh' content='10'>
1362             try {
1363                 time = Double.parseDouble(refreshString);
1364             }
1365             catch (final NumberFormatException e) {
1366                 if (LOG.isErrorEnabled()) {
1367                     LOG.error("Malformed refresh string (no ';' but not a number): " + refreshString, e);
1368                 }
1369                 return;
1370             }
1371             url = getUrl();
1372         }
1373         else {
1374             // Format: <meta http-equiv='refresh' content='10;url=http://www.blah.com'>
1375             try {
1376                 time = Double.parseDouble(refreshString.substring(0, index));
1377             }
1378             catch (final NumberFormatException e) {
1379                 if (LOG.isErrorEnabled()) {
1380                     LOG.error("Malformed refresh string (no valid number before ';') " + refreshString, e);
1381                 }
1382                 return;
1383             }
1384 
1385             String urlPart = refreshString.substring(index);
1386             final char separator = urlPart.charAt(0);
1387             if (";, \r\n\t".indexOf(separator) >= 0) {
1388                 urlPart = StringUtils.stripStart(urlPart, ";, \r\n\t");
1389                 if (urlPart.toLowerCase(Locale.ROOT).startsWith("url")) {
1390                     urlPart = urlPart.substring(3);
1391                     urlPart = urlPart.trim();
1392 
1393                     if (urlPart.toLowerCase().startsWith("=")) {
1394                         urlPart = urlPart.substring(1);
1395                         urlPart = urlPart.trim();
1396                     }
1397                 }
1398 
1399                 if (StringUtils.isBlank(urlPart)) {
1400                     //content='10; URL=' is treated as content='10'
1401                     url = getUrl();
1402                 }
1403                 else {
1404                     if (urlPart.charAt(0) == '"' || urlPart.charAt(0) == 0x27) {
1405                         urlPart = urlPart.substring(1);
1406                     }
1407                     if (urlPart.charAt(urlPart.length() - 1) == '"' || urlPart.charAt(urlPart.length() - 1) == 0x27) {
1408                         urlPart = urlPart.substring(0, urlPart.length() - 1);
1409                     }
1410                     try {
1411                         url = getFullyQualifiedUrl(urlPart);
1412                     }
1413                     catch (final MalformedURLException e) {
1414                         if (LOG.isErrorEnabled()) {
1415                             LOG.error("Malformed URL in refresh string: " + refreshString, e);
1416                         }
1417                         return;
1418                     }
1419                 }
1420             }
1421             else {
1422                 if (LOG.isErrorEnabled()) {
1423                     LOG.error("Malformed refresh string (separator after time missing): " + refreshString);
1424                 }
1425                 return;
1426             }
1427         }
1428 
1429         processRefresh(url, time);
1430     }
1431 
1432     // this is different from what is done in org.htmlunit.WebClient.loadWebResponseFromWebConnection(WebRequest, int)
1433     // because there we are directly replacing the response before loading the response into the window
1434     // here we are replacing the page in the window (maybe after some time)
1435     private void processRefresh(final URL url, final double time) throws IOException {
1436         final WebClient webClient = getWebClient();
1437 
1438         final int refreshLimit = webClient.getOptions().getPageRefreshLimit();
1439         if (refreshLimit == 0) {
1440             final WebResponse webResponse = getWebResponse();
1441             throw new FailingHttpStatusCodeException("Too many redirects for "
1442                     + webResponse.getWebRequest().getUrl(), webResponse);
1443         }
1444 
1445         if (refreshLimit >= 0) {
1446             final StackTraceElement[] elements = new Exception().getStackTrace();
1447             int count = 0;
1448             final int elementCountLimit = refreshLimit > 50 ? 400 : refreshLimit > 10 ? 80 : 5;
1449             final int elementCount = elements.length;
1450 
1451             if (elementCount > elementCountLimit) {
1452                 for (int i = 0; i < elementCount; i++) {
1453                     if ("processRefresh".equals(elements[i].getMethodName())
1454                             && "org.htmlunit.html.HtmlPage".equals(elements[i].getClassName())) {
1455                         count++;
1456                         if (count >= refreshLimit) {
1457                             final WebResponse webResponse = getWebResponse();
1458                             throw new FailingHttpStatusCodeException(
1459                                             "Too many redirects (>= " + count + ") for "
1460                                                 + webResponse.getWebRequest().getUrl(), webResponse);
1461                         }
1462                     }
1463                 }
1464             }
1465         }
1466 
1467         webClient.getRefreshHandler().handleRefresh(this, url, (int) time);
1468     }
1469 
1470     /**
1471      * Returns an auto-refresh string if specified. This will look in both the meta
1472      * tags and inside the HTTP response headers.
1473      * @return the auto-refresh string
1474      */
1475     private String getRefreshStringOrNull() {
1476         final List<HtmlMeta> metaTags = getMetaTags("refresh");
1477         if (!metaTags.isEmpty()) {
1478             return metaTags.get(0).getContentAttribute().trim();
1479         }
1480         return getWebResponse().getResponseHeaderValue("Refresh");
1481     }
1482 
1483     /**
1484      * Executes any deferred scripts, if necessary.
1485      */
1486     private void executeDeferredScriptsIfNeeded() {
1487         if (!getWebClient().isJavaScriptEnabled()) {
1488             return;
1489         }
1490         final DomElement doc = getDocumentElement();
1491         final List<HtmlScript> scripts = new ArrayList<>();
1492 
1493         // don't call getElementsByTagName() here because it creates a live collection
1494         for (final HtmlElement elem : doc.getHtmlElementDescendants()) {
1495             if ("script".equals(elem.getLocalName()) && (elem instanceof HtmlScript)) {
1496                 final HtmlScript script = (HtmlScript) elem;
1497                 if (script.isDeferred() && ATTRIBUTE_NOT_DEFINED != script.getSrcAttribute()) {
1498                     scripts.add(script);
1499                 }
1500             }
1501         }
1502         for (final HtmlScript script : scripts) {
1503             ScriptElementSupport.executeScriptIfNeeded(script, true, true);
1504         }
1505     }
1506 
1507     /**
1508      * Deregister frames that are no longer in use.
1509      */
1510     public void deregisterFramesIfNeeded() {
1511         final List<BaseFrameElement> frameElementsCopy = new ArrayList<>(frameElements_);
1512         for (final BaseFrameElement frameElement : frameElementsCopy) {
1513             final WebWindow window = frameElement.getEnclosedWindow();
1514             getWebClient().deregisterWebWindow(window);
1515             final Page page = window.getEnclosedPage();
1516             if (page != null && page.isHtmlPage()) {
1517                 // seems quite silly, but for instance if the src attribute of an iframe is not
1518                 // set, the error only occurs when leaving the page
1519                 ((HtmlPage) page).deregisterFramesIfNeeded();
1520             }
1521         }
1522     }
1523 
1524     /**
1525      * Returns a list containing all the frames (from frame and iframe tags) in this page
1526      * in document order.
1527      * @return a list of {@link FrameWindow}
1528      */
1529     public List<FrameWindow> getFrames() {
1530         final List<BaseFrameElement> frameElements = new ArrayList<>(frameElements_);
1531         Collections.sort(frameElements, DOCUMENT_POSITION_COMPERATOR);
1532 
1533         final List<FrameWindow> list = new ArrayList<>(frameElements.size());
1534         for (final BaseFrameElement frameElement : frameElements) {
1535             list.add(frameElement.getEnclosedWindow());
1536         }
1537         return list;
1538     }
1539 
1540     /**
1541      * Returns the first frame contained in this page with the specified name.
1542      * @param name the name to search for
1543      * @return the first frame found
1544      * @exception ElementNotFoundException If no frame exist in this page with the specified name.
1545      */
1546     public FrameWindow getFrameByName(final String name) throws ElementNotFoundException {
1547         for (final BaseFrameElement frameElement : frameElements_) {
1548             final FrameWindow fw = frameElement.getEnclosedWindow();
1549             if (fw.getName().equals(name)) {
1550                 return fw;
1551             }
1552         }
1553 
1554         throw new ElementNotFoundException("frame or iframe", DomElement.NAME_ATTRIBUTE, name);
1555     }
1556 
1557     /**
1558      * Simulate pressing an access key. This may change the focus, may click buttons and may invoke
1559      * JavaScript.
1560      *
1561      * @param accessKey the key that will be pressed
1562      * @return the element that has the focus after pressing this access key or null if no element
1563      *         has the focus.
1564      * @throws IOException if an IO error occurs during the processing of this access key (this
1565      *         would only happen if the access key triggered a button which in turn caused a page load)
1566      */
1567     public DomElement pressAccessKey(final char accessKey) throws IOException {
1568         final HtmlElement element = getHtmlElementByAccessKey(accessKey);
1569         if (element != null) {
1570             element.focus();
1571             if (element instanceof HtmlAnchor
1572                     || element instanceof HtmlArea
1573                     || element instanceof HtmlButton
1574                     || element instanceof HtmlInput
1575                     || element instanceof HtmlLabel
1576                     || element instanceof HtmlLegend
1577                     || element instanceof HtmlTextArea) {
1578                 final Page newPage = element.click();
1579 
1580                 if (newPage != this && getFocusedElement() == element) {
1581                     // The page was reloaded therefore no element on this page will have the focus.
1582                     getFocusedElement().blur();
1583                 }
1584             }
1585         }
1586 
1587         return getFocusedElement();
1588     }
1589 
1590     /**
1591      * Move the focus to the next element in the tab order. To determine the specified tab
1592      * order, refer to {@link HtmlPage#getTabbableElements()}
1593      *
1594      * @return the element that has focus after calling this method
1595      */
1596     public HtmlElement tabToNextElement() {
1597         final List<HtmlElement> elements = getTabbableElements();
1598         if (elements.isEmpty()) {
1599             setFocusedElement(null);
1600             return null;
1601         }
1602 
1603         final HtmlElement elementToGiveFocus;
1604         final DomElement elementWithFocus = getFocusedElement();
1605         if (elementWithFocus == null) {
1606             elementToGiveFocus = elements.get(0);
1607         }
1608         else {
1609             final int index = elements.indexOf(elementWithFocus);
1610             if (index == -1) {
1611                 // The element with focus isn't on this page
1612                 elementToGiveFocus = elements.get(0);
1613             }
1614             else {
1615                 if (index == elements.size() - 1) {
1616                     elementToGiveFocus = elements.get(0);
1617                 }
1618                 else {
1619                     elementToGiveFocus = elements.get(index + 1);
1620                 }
1621             }
1622         }
1623 
1624         setFocusedElement(elementToGiveFocus);
1625         return elementToGiveFocus;
1626     }
1627 
1628     /**
1629      * Move the focus to the previous element in the tab order. To determine the specified tab
1630      * order, refer to {@link HtmlPage#getTabbableElements()}
1631      *
1632      * @return the element that has focus after calling this method
1633      */
1634     public HtmlElement tabToPreviousElement() {
1635         final List<HtmlElement> elements = getTabbableElements();
1636         if (elements.isEmpty()) {
1637             setFocusedElement(null);
1638             return null;
1639         }
1640 
1641         final HtmlElement elementToGiveFocus;
1642         final DomElement elementWithFocus = getFocusedElement();
1643         if (elementWithFocus == null) {
1644             elementToGiveFocus = elements.get(elements.size() - 1);
1645         }
1646         else {
1647             final int index = elements.indexOf(elementWithFocus);
1648             if (index == -1) {
1649                 // The element with focus isn't on this page
1650                 elementToGiveFocus = elements.get(elements.size() - 1);
1651             }
1652             else {
1653                 if (index == 0) {
1654                     elementToGiveFocus = elements.get(elements.size() - 1);
1655                 }
1656                 else {
1657                     elementToGiveFocus = elements.get(index - 1);
1658                 }
1659             }
1660         }
1661 
1662         setFocusedElement(elementToGiveFocus);
1663         return elementToGiveFocus;
1664     }
1665 
1666     /**
1667      * Returns the HTML element with the specified ID. If more than one element
1668      * has this ID (not allowed by the HTML spec), then this method returns the
1669      * first one.
1670      *
1671      * @param elementId the ID value to search for
1672      * @param <E> the element type
1673      * @return the HTML element with the specified ID
1674      * @throws ElementNotFoundException if no element was found matching the specified ID
1675      */
1676     @SuppressWarnings("unchecked")
1677     public <E extends HtmlElement> E getHtmlElementById(final String elementId) throws ElementNotFoundException {
1678         final DomElement element = getElementById(elementId);
1679         if (element == null) {
1680             throw new ElementNotFoundException("*", DomElement.ID_ATTRIBUTE, elementId);
1681         }
1682         return (E) element;
1683     }
1684 
1685     /**
1686      * Returns the elements with the specified ID. If there are no elements
1687      * with the specified ID, this method returns an empty list. Please note that
1688      * the lists returned by this method are immutable.
1689      *
1690      * @param elementId the ID value to search for
1691      * @return the elements with the specified name attribute
1692      */
1693     public List<DomElement> getElementsById(final String elementId) {
1694         if (elementId != null) {
1695             final MappedElementIndexEntry elements = idMap_.get(elementId);
1696             if (elements != null) {
1697                 return new ArrayList<>(elements.elements());
1698             }
1699         }
1700         return Collections.emptyList();
1701     }
1702 
1703     /**
1704      * Returns the element with the specified name. If more than one element
1705      * has this name, then this method returns the first one.
1706      *
1707      * @param name the name value to search for
1708      * @param <E> the element type
1709      * @return the element with the specified name
1710      * @throws ElementNotFoundException if no element was found matching the specified name
1711      */
1712     @SuppressWarnings("unchecked")
1713     public <E extends DomElement> E getElementByName(final String name) throws ElementNotFoundException {
1714         if (name != null) {
1715             final MappedElementIndexEntry elements = nameMap_.get(name);
1716             if (elements != null) {
1717                 return (E) elements.first();
1718             }
1719         }
1720         throw new ElementNotFoundException("*", DomElement.NAME_ATTRIBUTE, name);
1721     }
1722 
1723     /**
1724      * Returns the elements with the specified name attribute. If there are no elements
1725      * with the specified name, this method returns an empty list. Please note that
1726      * the lists returned by this method are immutable.
1727      *
1728      * @param name the name value to search for
1729      * @return the elements with the specified name attribute
1730      */
1731     public List<DomElement> getElementsByName(final String name) {
1732         if (name != null) {
1733             final MappedElementIndexEntry elements = nameMap_.get(name);
1734             if (elements != null) {
1735                 return new ArrayList<>(elements.elements());
1736             }
1737         }
1738         return Collections.emptyList();
1739     }
1740 
1741     /**
1742      * Returns the elements with the specified string for their name or ID. If there are
1743      * no elements with the specified name or ID, this method returns an empty list.
1744      *
1745      * @param idAndOrName the value to search for
1746      * @return the elements with the specified string for their name or ID
1747      */
1748     public List<DomElement> getElementsByIdAndOrName(final String idAndOrName) {
1749         if (idAndOrName == null) {
1750             return Collections.emptyList();
1751         }
1752         final MappedElementIndexEntry list1 = idMap_.get(idAndOrName);
1753         final MappedElementIndexEntry list2 = nameMap_.get(idAndOrName);
1754         final List<DomElement> list = new ArrayList<>();
1755         if (list1 != null) {
1756             list.addAll(list1.elements());
1757         }
1758         if (list2 != null) {
1759             for (final DomElement elt : list2.elements()) {
1760                 if (!list.contains(elt)) {
1761                     list.add(elt);
1762                 }
1763             }
1764         }
1765         return list;
1766     }
1767 
1768     /**
1769      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
1770      *
1771      * @param node the node that has just been added to the document
1772      */
1773     void notifyNodeAdded(final DomNode node) {
1774         if (node instanceof DomElement) {
1775             addMappedElement((DomElement) node, true);
1776 
1777             if (node instanceof BaseFrameElement) {
1778                 frameElements_.add((BaseFrameElement) node);
1779             }
1780 
1781             if (node.getFirstChild() != null) {
1782                 for (final Iterator<HtmlElement> iterator = node.new DescendantHtmlElementsIterator();
1783                         iterator.hasNext();) {
1784                     final HtmlElement child = iterator.next();
1785                     if (child instanceof BaseFrameElement) {
1786                         frameElements_.add((BaseFrameElement) child);
1787                     }
1788                 }
1789             }
1790 
1791             if ("base".equals(node.getNodeName())) {
1792                 calculateBase();
1793             }
1794         }
1795         node.onAddedToPage();
1796     }
1797 
1798     /**
1799      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
1800      *
1801      * @param node the node that has just been removed from the tree
1802      */
1803     void notifyNodeRemoved(final DomNode node) {
1804         if (node instanceof HtmlElement) {
1805             removeMappedElement((HtmlElement) node, true, true);
1806 
1807             if (node instanceof BaseFrameElement) {
1808                 frameElements_.remove(node);
1809             }
1810             for (final HtmlElement child : node.getHtmlElementDescendants()) {
1811                 if (child instanceof BaseFrameElement) {
1812                     frameElements_.remove(child);
1813                 }
1814             }
1815 
1816             if ("base".equals(node.getNodeName())) {
1817                 calculateBase();
1818             }
1819         }
1820     }
1821 
1822     /**
1823      * Adds an element to the ID and name maps, if necessary.
1824      * @param element the element to be added to the ID and name maps
1825      * @param recurse indicates if children must be added too
1826      */
1827     void addMappedElement(final DomElement element, final boolean recurse) {
1828         if (isAncestorOf(element)) {
1829             addElement(idMap_, element, DomElement.ID_ATTRIBUTE, recurse);
1830             addElement(nameMap_, element, DomElement.NAME_ATTRIBUTE, recurse);
1831         }
1832     }
1833 
1834     private void addElement(final Map<String, MappedElementIndexEntry> map, final DomElement element,
1835             final String attribute, final boolean recurse) {
1836         final String value = element.getAttribute(attribute);
1837 
1838         if (ATTRIBUTE_NOT_DEFINED != value) {
1839             MappedElementIndexEntry elements = map.get(value);
1840             if (elements == null) {
1841                 elements = new MappedElementIndexEntry();
1842                 elements.add(element);
1843                 map.put(value, elements);
1844             }
1845             else {
1846                 elements.add(element);
1847             }
1848         }
1849         if (recurse) {
1850             // poor man's approach - we don't use getChildElements()
1851             // to avoid a bunch of object constructions
1852             DomNode nextChild = element.getFirstChild();
1853             while (nextChild != null) {
1854                 if (nextChild instanceof DomElement) {
1855                     addElement(map, (DomElement) nextChild, attribute, true);
1856                 }
1857                 nextChild = nextChild.getNextSibling();
1858             }
1859         }
1860     }
1861 
1862     /**
1863      * Removes an element and optionally its children from the ID and name maps, if necessary.
1864      * @param element the element to be removed from the ID and name maps
1865      * @param recurse indicates if children must be removed too
1866      * @param descendant indicates of the element was descendant of this HtmlPage, but now its parent might be null
1867      */
1868     void removeMappedElement(final DomElement element, final boolean recurse, final boolean descendant) {
1869         if (descendant || isAncestorOf(element)) {
1870             removeElement(idMap_, element, DomElement.ID_ATTRIBUTE, recurse);
1871             removeElement(nameMap_, element, DomElement.NAME_ATTRIBUTE, recurse);
1872         }
1873     }
1874 
1875     private void removeElement(final Map<String, MappedElementIndexEntry> map, final DomElement element,
1876             final String attribute, final boolean recurse) {
1877         final String value = element.getAttribute(attribute);
1878 
1879         if (ATTRIBUTE_NOT_DEFINED != value) {
1880             final MappedElementIndexEntry elements = map.remove(value);
1881             if (elements != null) {
1882                 elements.remove(element);
1883                 if (!elements.elements_.isEmpty()) {
1884                     map.put(value, elements);
1885                 }
1886             }
1887         }
1888         if (recurse) {
1889             for (final DomElement child : element.getChildElements()) {
1890                 removeElement(map, child, attribute, true);
1891             }
1892         }
1893     }
1894 
1895     /**
1896      * Indicates if the attribute name indicates that the owning element is mapped.
1897      * @param document the owning document
1898      * @param attributeName the name of the attribute to consider
1899      * @return {@code true} if the owning element should be mapped in its owning page
1900      */
1901     static boolean isMappedElement(final Document document, final String attributeName) {
1902         return document instanceof HtmlPage
1903             && (DomElement.NAME_ATTRIBUTE.equals(attributeName) || DomElement.ID_ATTRIBUTE.equals(attributeName));
1904     }
1905 
1906     private void calculateBase() {
1907         final List<HtmlElement> baseElements = getDocumentElement().getStaticElementsByTagName("base");
1908 
1909         base_ = null;
1910         for (final HtmlElement baseElement : baseElements) {
1911             if (baseElement instanceof HtmlBase) {
1912                 if (base_ != null) {
1913                     notifyIncorrectness("Multiple 'base' detected, only the first is used.");
1914                     break;
1915                 }
1916                 base_ = (HtmlBase) baseElement;
1917             }
1918         }
1919     }
1920 
1921     /**
1922      * Loads the content of the contained frames. This is done after the page is completely loaded, to allow script
1923      * contained in the frames to reference elements from the page located after the closing &lt;/frame&gt; tag.
1924      * @throws FailingHttpStatusCodeException if the server returns a failing status code AND the property
1925      *         {@link WebClientOptions#setThrowExceptionOnFailingStatusCode(boolean)} is set to {@code true}
1926      */
1927     void loadFrames() throws FailingHttpStatusCodeException {
1928         for (final BaseFrameElement frameElement : new ArrayList<>(frameElements_)) {
1929             // test if the frame should really be loaded:
1930             // if a script has already changed its content, it should be skipped
1931             // use == and not equals(...) to identify initial content (versus URL set to "about:blank")
1932             if (frameElement.getEnclosedWindow() != null
1933                     && UrlUtils.URL_ABOUT_BLANK == frameElement.getEnclosedPage().getUrl()
1934                     && !frameElement.isContentLoaded()) {
1935                 frameElement.loadInnerPage();
1936             }
1937         }
1938     }
1939 
1940     /**
1941      * Gives a basic representation for debugging purposes.
1942      * @return a basic representation
1943      */
1944     @Override
1945     public String toString() {
1946         final StringBuilder builder = new StringBuilder()
1947             .append("HtmlPage(")
1948             .append(getUrl())
1949             .append(")@")
1950             .append(hashCode());
1951         return builder.toString();
1952     }
1953 
1954     /**
1955      * Gets the meta tag for a given {@code http-equiv} value.
1956      * @param httpEquiv the {@code http-equiv} value
1957      * @return a list of {@link HtmlMeta}
1958      */
1959     protected List<HtmlMeta> getMetaTags(final String httpEquiv) {
1960         if (getDocumentElement() == null) {
1961             return Collections.emptyList(); // weird case, for instance if document.documentElement has been removed
1962         }
1963         final List<HtmlMeta> tags = getDocumentElement().getStaticElementsByTagName("meta");
1964         final List<HtmlMeta> foundTags = new ArrayList<>();
1965         for (final HtmlMeta htmlMeta : tags) {
1966             if (httpEquiv.equalsIgnoreCase(htmlMeta.getHttpEquivAttribute())) {
1967                 foundTags.add(htmlMeta);
1968             }
1969         }
1970         return foundTags;
1971     }
1972 
1973     /**
1974      * Creates a clone of this instance, and clears cached state to be not shared with the original.
1975      *
1976      * @return a clone of this instance
1977      */
1978     @Override
1979     protected HtmlPage clone() {
1980         final HtmlPage result = (HtmlPage) super.clone();
1981         result.elementWithFocus_ = null;
1982 
1983         result.idMap_ = new ConcurrentHashMap<>();
1984         result.nameMap_ = new ConcurrentHashMap<>();
1985 
1986         return result;
1987     }
1988 
1989     /**
1990      * {@inheritDoc}
1991      */
1992     @Override
1993     public HtmlPage cloneNode(final boolean deep) {
1994         // we need the ScriptObject clone before cloning the kids.
1995         final HtmlPage result = (HtmlPage) super.cloneNode(false);
1996         if (getWebClient().isJavaScriptEnabled()) {
1997             final HtmlUnitScriptable jsObjClone = getScriptableObject().clone();
1998             jsObjClone.setDomNode(result);
1999         }
2000 
2001         // if deep, clone the kids too, and re initialize parts of the clone
2002         if (deep) {
2003             // this was previously synchronized but that makes not sense, why
2004             // lock the source against a copy only one has a reference too,
2005             // because result is a local reference
2006             result.attributeListeners_ = null;
2007 
2008             result.selectionRanges_ = new ArrayList<>(3);
2009             // the original one is synchronized so we should do that here too, shouldn't we?
2010             result.afterLoadActions_ = Collections.synchronizedList(new ArrayList<>());
2011             result.frameElements_ = new ArrayList<>();
2012             for (DomNode child = getFirstChild(); child != null; child = child.getNextSibling()) {
2013                 result.appendChild(child.cloneNode(true));
2014             }
2015         }
2016         return result;
2017     }
2018 
2019     /**
2020      * Adds an HtmlAttributeChangeListener to the listener list.
2021      * The listener is registered for all attributes of all HtmlElements contained in this page.
2022      *
2023      * @param listener the attribute change listener to be added
2024      * @see #removeHtmlAttributeChangeListener(HtmlAttributeChangeListener)
2025      */
2026     public void addHtmlAttributeChangeListener(final HtmlAttributeChangeListener listener) {
2027         WebAssert.notNull("listener", listener);
2028         synchronized (lock_) {
2029             if (attributeListeners_ == null) {
2030                 attributeListeners_ = new LinkedHashSet<>();
2031             }
2032             attributeListeners_.add(listener);
2033         }
2034     }
2035 
2036     /**
2037      * Removes an HtmlAttributeChangeListener from the listener list.
2038      * This method should be used to remove HtmlAttributeChangeListener that were registered
2039      * for all attributes of all HtmlElements contained in this page.
2040      *
2041      * @param listener the attribute change listener to be removed
2042      * @see #addHtmlAttributeChangeListener(HtmlAttributeChangeListener)
2043      */
2044     public void removeHtmlAttributeChangeListener(final HtmlAttributeChangeListener listener) {
2045         WebAssert.notNull("listener", listener);
2046         synchronized (lock_) {
2047             if (attributeListeners_ != null) {
2048                 attributeListeners_.remove(listener);
2049             }
2050         }
2051     }
2052 
2053     /**
2054      * Notifies all registered listeners for the given event to add an attribute.
2055      * @param event the event to fire
2056      */
2057     void fireHtmlAttributeAdded(final HtmlAttributeChangeEvent event) {
2058         final List<HtmlAttributeChangeListener> listeners = safeGetAttributeListeners();
2059         if (listeners != null) {
2060             for (final HtmlAttributeChangeListener listener : listeners) {
2061                 listener.attributeAdded(event);
2062             }
2063         }
2064     }
2065 
2066     /**
2067      * Notifies all registered listeners for the given event to replace an attribute.
2068      * @param event the event to fire
2069      */
2070     void fireHtmlAttributeReplaced(final HtmlAttributeChangeEvent event) {
2071         final List<HtmlAttributeChangeListener> listeners = safeGetAttributeListeners();
2072         if (listeners != null) {
2073             for (final HtmlAttributeChangeListener listener : listeners) {
2074                 listener.attributeReplaced(event);
2075             }
2076         }
2077     }
2078 
2079     /**
2080      * Notifies all registered listeners for the given event to remove an attribute.
2081      * @param event the event to fire
2082      */
2083     void fireHtmlAttributeRemoved(final HtmlAttributeChangeEvent event) {
2084         final List<HtmlAttributeChangeListener> listeners = safeGetAttributeListeners();
2085         if (listeners != null) {
2086             for (final HtmlAttributeChangeListener listener : listeners) {
2087                 listener.attributeRemoved(event);
2088             }
2089         }
2090     }
2091 
2092     private List<HtmlAttributeChangeListener> safeGetAttributeListeners() {
2093         synchronized (lock_) {
2094             if (attributeListeners_ != null) {
2095                 return new ArrayList<>(attributeListeners_);
2096             }
2097             return null;
2098         }
2099     }
2100 
2101     /**
2102      * {@inheritDoc}
2103      */
2104     @Override
2105     protected void checkChildHierarchy(final org.w3c.dom.Node newChild) throws DOMException {
2106         if (newChild instanceof Element) {
2107             if (getDocumentElement() != null) {
2108                 throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR,
2109                     "The Document may only have a single child Element.");
2110             }
2111         }
2112         else if (newChild instanceof DocumentType) {
2113             if (getDoctype() != null) {
2114                 throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR,
2115                     "The Document may only have a single child DocumentType.");
2116             }
2117         }
2118         else if (!(newChild instanceof Comment || newChild instanceof ProcessingInstruction)) {
2119             throw new DOMException(DOMException.HIERARCHY_REQUEST_ERR,
2120                 "The Document may not have a child of this type: " + newChild.getNodeType());
2121         }
2122         super.checkChildHierarchy(newChild);
2123     }
2124 
2125     /**
2126      * Returns {@code true} if an HTML parser is operating on this page, adding content to it.
2127      * @return {@code true} if an HTML parser is operating on this page, adding content to it
2128      */
2129     public boolean isBeingParsed() {
2130         return parserCount_ > 0;
2131     }
2132 
2133     /**
2134      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2135      *
2136      * Called by the HTML parser to let the page know that it has started parsing some content for this page.
2137      */
2138     public void registerParsingStart() {
2139         parserCount_++;
2140     }
2141 
2142     /**
2143      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2144      *
2145      * Called by the HTML parser to let the page know that it has finished parsing some content for this page.
2146      */
2147     public void registerParsingEnd() {
2148         parserCount_--;
2149     }
2150 
2151     /**
2152      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2153      *
2154      * Returns {@code true} if an HTML parser is parsing a non-inline HTML snippet to add content
2155      * to this page. Non-inline content is content that is parsed for the page, but not in the
2156      * same stream as the page itself -- basically anything other than <code>document.write()</code>
2157      * or <code>document.writeln()</code>: <code>innerHTML</code>, <code>outerHTML</code>,
2158      * <code>document.createElement()</code>, etc.
2159      *
2160      * @return {@code true} if an HTML parser is parsing a non-inline HTML snippet to add content
2161      *         to this page
2162      */
2163     public boolean isParsingHtmlSnippet() {
2164         return snippetParserCount_ > 0;
2165     }
2166 
2167     /**
2168      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2169      *
2170      * Called by the HTML parser to let the page know that it has started parsing a non-inline HTML snippet.
2171      */
2172     public void registerSnippetParsingStart() {
2173         snippetParserCount_++;
2174     }
2175 
2176     /**
2177      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2178      *
2179      * Called by the HTML parser to let the page know that it has finished parsing a non-inline HTML snippet.
2180      */
2181     public void registerSnippetParsingEnd() {
2182         snippetParserCount_--;
2183     }
2184 
2185     /**
2186      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2187      *
2188      * Returns {@code true} if an HTML parser is parsing an inline HTML snippet to add content
2189      * to this page. Inline content is content inserted into the parser stream dynamically
2190      * while the page is being parsed (i.e. <code>document.write()</code> or <code>document.writeln()</code>).
2191      *
2192      * @return {@code true} if an HTML parser is parsing an inline HTML snippet to add content
2193      *         to this page
2194      */
2195     public boolean isParsingInlineHtmlSnippet() {
2196         return inlineSnippetParserCount_ > 0;
2197     }
2198 
2199     /**
2200      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2201      *
2202      * Called by the HTML parser to let the page know that it has started parsing an inline HTML snippet.
2203      */
2204     public void registerInlineSnippetParsingStart() {
2205         inlineSnippetParserCount_++;
2206     }
2207 
2208     /**
2209      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2210      *
2211      * Called by the HTML parser to let the page know that it has finished parsing an inline HTML snippet.
2212      */
2213     public void registerInlineSnippetParsingEnd() {
2214         inlineSnippetParserCount_--;
2215     }
2216 
2217     /**
2218      * Refreshes the page by sending the same parameters as previously sent to get this page.
2219      * @return the newly loaded page.
2220      * @throws IOException if an IO problem occurs
2221      */
2222     public Page refresh() throws IOException {
2223         return getWebClient().getPage(getWebResponse().getWebRequest());
2224     }
2225 
2226     /**
2227      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2228      * <p>
2229      * Parses the given string as would it belong to the content being parsed
2230      * at the current parsing position
2231      * </p>
2232      * @param string the HTML code to write in place
2233      */
2234     public void writeInParsedStream(final String string) {
2235         getDOMBuilder().pushInputString(string);
2236     }
2237 
2238     /**
2239      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2240      *
2241      * Sets the builder to allow page to send content from document.write(ln) calls.
2242      * @param htmlUnitDOMBuilder the builder
2243      */
2244     public void setDOMBuilder(final HTMLParserDOMBuilder htmlUnitDOMBuilder) {
2245         domBuilder_ = htmlUnitDOMBuilder;
2246     }
2247 
2248     /**
2249      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2250      *
2251      * Returns the current builder.
2252      * @return the current builder
2253      */
2254     public HTMLParserDOMBuilder getDOMBuilder() {
2255         return domBuilder_;
2256     }
2257 
2258     /**
2259      * <p>Returns all namespaces defined in the root element of this page.</p>
2260      * <p>The default namespace has a key of an empty string.</p>
2261      * @return all namespaces defined in the root element of this page
2262      */
2263     public Map<String, String> getNamespaces() {
2264         final org.w3c.dom.NamedNodeMap attributes = getDocumentElement().getAttributes();
2265         final Map<String, String> namespaces = new HashMap<>();
2266         for (int i = 0; i < attributes.getLength(); i++) {
2267             final Attr attr = (Attr) attributes.item(i);
2268             String name = attr.getName();
2269             if (name.startsWith("xmlns")) {
2270                 int startPos = 5;
2271                 if (name.length() > 5 && name.charAt(5) == ':') {
2272                     startPos = 6;
2273                 }
2274                 name = name.substring(startPos);
2275                 namespaces.put(name, attr.getValue());
2276             }
2277         }
2278         return namespaces;
2279     }
2280 
2281     /**
2282      * {@inheritDoc}
2283      */
2284     @Override
2285     public void setDocumentType(final DocumentType type) {
2286         super.setDocumentType(type);
2287     }
2288 
2289     /**
2290      * Saves the current page, with all images, to the specified location.
2291      * The default behavior removes all script elements.
2292      *
2293      * @param file file to write this page into
2294      * @throws IOException If an error occurs
2295      */
2296     public void save(final File file) throws IOException {
2297         new XmlSerializer().save(this, file);
2298     }
2299 
2300     /**
2301      * Returns whether the current page mode is in {@code quirks mode} or in {@code standards mode}.
2302      * @return true for {@code quirks mode}, false for {@code standards mode}
2303      */
2304     public boolean isQuirksMode() {
2305         return "BackCompat".equals(((HTMLDocument) getScriptableObject()).getCompatMode());
2306     }
2307 
2308     /**
2309      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2310      * {@inheritDoc}
2311      */
2312     @Override
2313     public boolean isAttachedToPage() {
2314         return true;
2315     }
2316 
2317     /**
2318      * {@inheritDoc}
2319      */
2320     @Override
2321     public boolean isHtmlPage() {
2322         return true;
2323     }
2324 
2325     /**
2326      * The base URL used to resolve relative URLs.
2327      * @return the base URL
2328      */
2329     public URL getBaseURL() {
2330         URL baseUrl;
2331         if (base_ == null) {
2332             baseUrl = getUrl();
2333             final WebWindow window = getEnclosingWindow();
2334             final boolean frame = window != null && window != window.getTopWindow();
2335             if (frame) {
2336                 final boolean frameSrcIsNotSet = baseUrl == UrlUtils.URL_ABOUT_BLANK;
2337                 final boolean frameSrcIsJs = "javascript".equals(baseUrl.getProtocol());
2338                 if (frameSrcIsNotSet || frameSrcIsJs) {
2339                     baseUrl = window.getTopWindow().getEnclosedPage().getWebResponse()
2340                         .getWebRequest().getUrl();
2341                 }
2342             }
2343             else if (baseUrl_ != null) {
2344                 baseUrl = baseUrl_;
2345             }
2346         }
2347         else {
2348             final String href = base_.getHrefAttribute().trim();
2349             if (StringUtils.isEmpty(href)) {
2350                 baseUrl = getUrl();
2351             }
2352             else {
2353                 final URL url = getUrl();
2354                 try {
2355                     if (href.startsWith("http://") || href.startsWith("https://")) {
2356                         baseUrl = new URL(href);
2357                     }
2358                     else if (href.startsWith("//")) {
2359                         baseUrl = new URL(String.format("%s:%s", url.getProtocol(), href));
2360                     }
2361                     else if (href.length() > 0 && href.charAt(0) == '/') {
2362                         final int port = Window.getPort(url);
2363                         baseUrl = new URL(String.format("%s://%s:%d%s", url.getProtocol(), url.getHost(), port, href));
2364                     }
2365                     else if (url.toString().endsWith("/")) {
2366                         baseUrl = new URL(String.format("%s%s", url, href));
2367                     }
2368                     else {
2369                         baseUrl = new URL(UrlUtils.resolveUrl(url, href));
2370                     }
2371                 }
2372                 catch (final MalformedURLException e) {
2373                     notifyIncorrectness("Invalid base url: \"" + href + "\", ignoring it");
2374                     baseUrl = url;
2375                 }
2376             }
2377         }
2378 
2379         return baseUrl;
2380     }
2381 
2382     /**
2383      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2384      *
2385      * Adds an {@link AutoCloseable}, which would be closed during the {@link #cleanUp()}.
2386      * @param autoCloseable the autoclosable
2387      */
2388     public void addAutoCloseable(final AutoCloseable autoCloseable) {
2389         if (autoCloseable == null) {
2390             return;
2391         }
2392 
2393         if (autoCloseableList_ == null) {
2394             autoCloseableList_ = new ArrayList<>();
2395         }
2396         autoCloseableList_.add(autoCloseable);
2397     }
2398 
2399     /**
2400      * {@inheritDoc}
2401      */
2402     @Override
2403     public boolean handles(final Event event) {
2404         if (Event.TYPE_BLUR.equals(event.getType()) || Event.TYPE_FOCUS.equals(event.getType())) {
2405             return true;
2406         }
2407         return super.handles(event);
2408     }
2409 
2410     /**
2411      * Sets the {@link ElementFromPointHandler}.
2412      * @param elementFromPointHandler the handler
2413      */
2414     public void setElementFromPointHandler(final ElementFromPointHandler elementFromPointHandler) {
2415         elementFromPointHandler_ = elementFromPointHandler;
2416     }
2417 
2418     /**
2419      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2420      *
2421      * Returns the element for the specified x coordinate and the specified y coordinate.
2422      *
2423      * @param x the x offset, in pixels
2424      * @param y the y offset, in pixels
2425      * @return the element for the specified x coordinate and the specified y coordinate
2426      */
2427     public HtmlElement getElementFromPoint(final int x, final int y) {
2428         if (elementFromPointHandler_ == null) {
2429             if (LOG.isWarnEnabled()) {
2430                 LOG.warn("ElementFromPointHandler was not specicifed for " + this);
2431             }
2432             if (x <= 0 || y <= 0) {
2433                 return null;
2434             }
2435             return getBody();
2436         }
2437         return elementFromPointHandler_.getElementFromPoint(this, x, y);
2438     }
2439 
2440     /**
2441      * Moves the focus to the specified element. This will trigger any relevant JavaScript
2442      * event handlers.
2443      *
2444      * @param newElement the element that will receive the focus, use {@code null} to remove focus from any element
2445      * @return true if the specified element now has the focus
2446      * @see #getFocusedElement()
2447      */
2448     public boolean setFocusedElement(final DomElement newElement) {
2449         return setFocusedElement(newElement, false);
2450     }
2451 
2452     /**
2453      * Moves the focus to the specified element. This will trigger any relevant JavaScript
2454      * event handlers.
2455      *
2456      * @param newElement the element that will receive the focus, use {@code null} to remove focus from any element
2457      * @param windowActivated - whether the enclosing window got focus resulting in specified element getting focus
2458      * @return true if the specified element now has the focus
2459      * @see #getFocusedElement()
2460      */
2461     public boolean setFocusedElement(final DomElement newElement, final boolean windowActivated) {
2462         if (elementWithFocus_ == newElement && !windowActivated) {
2463             // nothing to do
2464             return true;
2465         }
2466 
2467         final DomElement oldFocusedElement = elementWithFocus_;
2468         elementWithFocus_ = null;
2469 
2470         if (!windowActivated) {
2471             if (oldFocusedElement != null) {
2472                 oldFocusedElement.removeFocus();
2473                 oldFocusedElement.fireEvent(Event.TYPE_BLUR);
2474 
2475                 oldFocusedElement.fireEvent(Event.TYPE_FOCUS_OUT);
2476             }
2477         }
2478 
2479         elementWithFocus_ = newElement;
2480 
2481         // use newElement in the code below because element elementWithFocus_
2482         // might be changed by another thread
2483         if (newElement != null) {
2484             newElement.focus();
2485             newElement.fireEvent(Event.TYPE_FOCUS);
2486 
2487             newElement.fireEvent(Event.TYPE_FOCUS_IN);
2488         }
2489 
2490         // If a page reload happened as a result of the focus change then obviously this
2491         // element will not have the focus because its page has gone away.
2492         return this == getEnclosingWindow().getEnclosedPage();
2493     }
2494 
2495     /**
2496      * Returns the element with the focus or null if no element has the focus.
2497      * @return the element with focus or null
2498      * @see #setFocusedElement(DomElement)
2499      */
2500     public DomElement getFocusedElement() {
2501         return elementWithFocus_;
2502     }
2503 
2504     /**
2505      * <p><span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span></p>
2506      *
2507      * Sets the element with focus.
2508      * @param elementWithFocus the element with focus
2509      */
2510     public void setElementWithFocus(final DomElement elementWithFocus) {
2511         elementWithFocus_ = elementWithFocus;
2512     }
2513 
2514     /**
2515      * <p><span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span></p>
2516      *
2517      * @return the element with focus or the body
2518      */
2519     public HtmlElement getActiveElement() {
2520         final DomElement activeElement = getFocusedElement();
2521         if (activeElement instanceof HtmlElement) {
2522             return (HtmlElement) activeElement;
2523         }
2524 
2525         final HtmlElement body = getBody();
2526         if (body != null) {
2527             return body;
2528         }
2529         return null;
2530     }
2531 
2532     /**
2533      * <p><span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span></p>
2534      *
2535      * <p>Returns the page's current selection ranges.</p>
2536      *
2537      * @return the page's current selection ranges
2538      */
2539     public List<SimpleRange> getSelectionRanges() {
2540         return selectionRanges_;
2541     }
2542 
2543     /**
2544      * <p><span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span></p>
2545      *
2546      * <p>Makes the specified selection range the *only* selection range on this page.</p>
2547      *
2548      * @param selectionRange the selection range
2549      */
2550     public void setSelectionRange(final SimpleRange selectionRange) {
2551         selectionRanges_.clear();
2552         selectionRanges_.add(selectionRange);
2553     }
2554 
2555     /**
2556      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2557      *
2558      * Execute a Function in the given context.
2559      *
2560      * @param function the JavaScript Function to call
2561      * @param thisObject the "this" object to be used during invocation
2562      * @param args the arguments to pass into the call
2563      * @param htmlElementScope the HTML element for which this script is being executed
2564      *        This element will be the context during the JavaScript execution. If null,
2565      *        the context will default to the page.
2566      * @return a ScriptResult which will contain both the current page (which may be different than
2567      *        the previous page and a JavaScript result object.
2568      */
2569     public ScriptResult executeJavaScriptFunction(final Object function, final Object thisObject,
2570             final Object[] args, final DomNode htmlElementScope) {
2571         if (!getWebClient().isJavaScriptEnabled()) {
2572             return new ScriptResult(null);
2573         }
2574 
2575         return executeJavaScriptFunction((Function) function, (Scriptable) thisObject, args, htmlElementScope);
2576     }
2577 
2578     private ScriptResult executeJavaScriptFunction(final Function function, final Scriptable thisObject,
2579             final Object[] args, final DomNode htmlElementScope) {
2580 
2581         final JavaScriptEngine engine = (JavaScriptEngine) getWebClient().getJavaScriptEngine();
2582         final Object result = engine.callFunction(this, function, thisObject, args, htmlElementScope);
2583 
2584         return new ScriptResult(result);
2585     }
2586 
2587     private void writeObject(final ObjectOutputStream oos) throws IOException {
2588         oos.defaultWriteObject();
2589         oos.writeObject(originalCharset_ == null ? null : originalCharset_.name());
2590     }
2591 
2592     private void readObject(final ObjectInputStream ois) throws ClassNotFoundException, IOException {
2593         ois.defaultReadObject();
2594         final String charsetName = (String) ois.readObject();
2595         if (charsetName != null) {
2596             originalCharset_ = Charset.forName(charsetName);
2597         }
2598     }
2599 
2600     /**
2601      * {@inheritDoc}
2602      */
2603     @Override
2604     public void setNodeValue(final String value) {
2605         // Default behavior is to do nothing, overridden in some subclasses
2606     }
2607 
2608     /**
2609      * {@inheritDoc}
2610      */
2611     @Override
2612     public void setPrefix(final String prefix) {
2613         // Empty.
2614     }
2615 
2616     /**
2617      * {@inheritDoc}
2618      */
2619     @Override
2620     public void clearComputedStyles() {
2621         if (computedStylesCache_ != null) {
2622             computedStylesCache_.clear();
2623         }
2624     }
2625 
2626     /**
2627      * {@inheritDoc}
2628      */
2629     @Override
2630     public void clearComputedStyles(final DomElement element) {
2631         if (computedStylesCache_ != null) {
2632             computedStylesCache_.remove(element);
2633         }
2634     }
2635 
2636     /**
2637      * {@inheritDoc}
2638      */
2639     @Override
2640     public void clearComputedStylesUpToRoot(final DomElement element) {
2641         if (computedStylesCache_ != null) {
2642             computedStylesCache_.remove(element);
2643 
2644             DomNode parent = element.getParentNode();
2645             while (parent != null) {
2646                 computedStylesCache_.remove(parent);
2647                 parent = parent.getParentNode();
2648             }
2649         }
2650     }
2651 
2652     /**
2653      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2654      *
2655      * @param element the element to clear its cache
2656      * @param normalizedPseudo the pseudo attribute
2657      * @return the cached CSS2Properties object or null
2658      */
2659     public ComputedCssStyleDeclaration getStyleFromCache(final DomElement element,
2660             final String normalizedPseudo) {
2661         return getCssPropertiesCache().get(element, normalizedPseudo);
2662     }
2663 
2664     /**
2665      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2666      *
2667      * Caches a CSS2Properties object.
2668      * @param element the element to clear its cache
2669      * @param normalizedPseudo the pseudo attribute
2670      * @param style the CSS2Properties to cache
2671      */
2672     public void putStyleIntoCache(final DomElement element, final String normalizedPseudo,
2673             final ComputedCssStyleDeclaration style) {
2674         getCssPropertiesCache().put(element, normalizedPseudo, style);
2675     }
2676 
2677     /**
2678      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
2679      *
2680      * @return a list of all styles from this page (&lt;style&gt; and &lt;link rel=stylesheet&gt;).
2681      *         This returns an empty list if css support is disabled in the web client options.
2682      */
2683     public List<CssStyleSheet> getStyleSheets() {
2684         final List<CssStyleSheet> styles = new ArrayList<>();
2685         if (getWebClient().getOptions().isCssEnabled()) {
2686             for (final HtmlElement htmlElement : getHtmlElementDescendants()) {
2687                 if (htmlElement instanceof HtmlStyle) {
2688                     styles.add(((HtmlStyle) htmlElement).getSheet());
2689                     continue;
2690                 }
2691 
2692                 if (htmlElement instanceof HtmlLink) {
2693                     final HtmlLink link = (HtmlLink) htmlElement;
2694                     if (link.isStyleSheetLink()) {
2695                         styles.add(link.getSheet());
2696                     }
2697                 }
2698             }
2699         }
2700         return styles;
2701     }
2702 
2703     /**
2704      * @return the CSSPropertiesCache for this page
2705      */
2706     private ComputedStylesCache getCssPropertiesCache() {
2707         if (computedStylesCache_ == null) {
2708             computedStylesCache_ = new ComputedStylesCache();
2709 
2710             // maintain the style cache
2711             final DomHtmlAttributeChangeListenerImpl listener = new DomHtmlAttributeChangeListenerImpl();
2712             addDomChangeListener(listener);
2713             addHtmlAttributeChangeListener(listener);
2714         }
2715         return computedStylesCache_;
2716     }
2717 
2718     /**
2719      * <p>Listens for changes anywhere in the document and evicts cached computed styles whenever something relevant
2720      * changes. Note that the very lazy way of doing this (completely clearing the cache every time something happens)
2721      * results in very meager performance gains. In order to get good (but still correct) performance, we need to be
2722      * a little smarter.</p>
2723      *
2724      * <p>CSS 2.1 has the following <a href="http://www.w3.org/TR/CSS21/selector.html">selector types</a> (where "SN" is
2725      * shorthand for "the selected node"):</p>
2726      *
2727      * <ol>
2728      *   <li><em>Universal</em> (i.e. "*"): Affected by the removal of SN from the document.</li>
2729      *   <li><em>Type</em> (i.e. "div"): Affected by the removal of SN from the document.</li>
2730      *   <li><em>Descendant</em> (i.e. "div span"): Affected by changes to SN or to any of its ancestors.</li>
2731      *   <li><em>Child</em> (i.e. "div &gt; span"): Affected by changes to SN or to its parent.</li>
2732      *   <li><em>Adjacent Sibling</em> (i.e. "table + p"): Affected by changes to SN or its previous sibling.</li>
2733      *   <li><em>Attribute</em> (i.e. "div.up, div[class~=up]"): Affected by changes to an attribute of SN.</li>
2734      *   <li><em>ID</em> (i.e. "#header): Affected by changes to the <code>id</code> attribute of SN.</li>
2735      *   <li><em>Pseudo-Elements and Pseudo-Classes</em> (i.e. "p:first-child"): Affected by changes to parent.</li>
2736      * </ol>
2737      *
2738      * <p>Together, these rules dictate that the smart (but still lazy) way of removing elements from the computed style
2739      * cache is as follows -- whenever a node changes in any way, the cache needs to be cleared of styles for nodes
2740      * which:</p>
2741      *
2742      * <ul>
2743      *   <li>are actually the same node as the node that changed</li>
2744      *   <li>are siblings of the node that changed</li>
2745      *   <li>are descendants of the node that changed</li>
2746      * </ul>
2747      *
2748      * <p>Additionally, whenever a <code>style</code> node or a <code>link</code> node
2749      * with <code>rel=stylesheet</code> is added or
2750      * removed, all elements should be removed from the computed style cache.</p>
2751      */
2752     private class DomHtmlAttributeChangeListenerImpl implements DomChangeListener, HtmlAttributeChangeListener {
2753 
2754         /**
2755          * Ctor.
2756          */
2757         DomHtmlAttributeChangeListenerImpl() {
2758             super();
2759         }
2760 
2761         /**
2762          * {@inheritDoc}
2763          */
2764         @Override
2765         public void nodeAdded(final DomChangeEvent event) {
2766             nodeChanged(event.getChangedNode(), null);
2767         }
2768 
2769         /**
2770          * {@inheritDoc}
2771          */
2772         @Override
2773         public void nodeDeleted(final DomChangeEvent event) {
2774             nodeChanged(event.getChangedNode(), null);
2775         }
2776 
2777         /**
2778          * {@inheritDoc}
2779          */
2780         @Override
2781         public void attributeAdded(final HtmlAttributeChangeEvent event) {
2782             nodeChanged(event.getHtmlElement(), event.getName());
2783         }
2784 
2785         /**
2786          * {@inheritDoc}
2787          */
2788         @Override
2789         public void attributeRemoved(final HtmlAttributeChangeEvent event) {
2790             nodeChanged(event.getHtmlElement(), event.getName());
2791         }
2792 
2793         /**
2794          * {@inheritDoc}
2795          */
2796         @Override
2797         public void attributeReplaced(final HtmlAttributeChangeEvent event) {
2798             nodeChanged(event.getHtmlElement(), event.getName());
2799         }
2800 
2801         private void nodeChanged(final DomNode changedNode, final String attribName) {
2802             // If a stylesheet was changed, all of our calculations could be off; clear the cache.
2803             if (changedNode instanceof HtmlStyle) {
2804                 clearComputedStyles();
2805                 return;
2806             }
2807             if (changedNode instanceof HtmlLink) {
2808                 if (((HtmlLink) changedNode).isStyleSheetLink()) {
2809                     clearComputedStyles();
2810                     return;
2811                 }
2812             }
2813 
2814             // Apparently it wasn't a stylesheet that changed; be semi-smart about what we evict and when.
2815             // null means that a node was added/removed; we always have to take care of this for the parents
2816             final boolean clearParents = attribName == null || ATTRIBUTES_AFFECTING_PARENT.contains(attribName);
2817             if (computedStylesCache_ != null) {
2818                 computedStylesCache_.nodeChanged(changedNode, clearParents);
2819             }
2820         }
2821     }
2822 
2823     /**
2824      * Cache computed styles when possible, because their calculation is very expensive.
2825      * We use a weak hash map because we don't want this cache to be the only reason
2826      * nodes are kept around in the JVM, if all other references to them are gone.
2827      */
2828     private static final class ComputedStylesCache implements Serializable {
2829         private transient WeakHashMap<DomElement, Map<String, ComputedCssStyleDeclaration>>
2830                     computedStyles_ = new WeakHashMap<>();
2831 
2832         /**
2833          * Ctor.
2834          */
2835         ComputedStylesCache() {
2836             super();
2837         }
2838 
2839         public synchronized ComputedCssStyleDeclaration get(final DomElement element,
2840                 final String normalizedPseudo) {
2841             final Map<String, ComputedCssStyleDeclaration> elementMap = computedStyles_.get(element);
2842             if (elementMap != null) {
2843                 return elementMap.get(normalizedPseudo);
2844             }
2845             return null;
2846         }
2847 
2848         public synchronized void put(final DomElement element,
2849                 final String normalizedPseudo, final ComputedCssStyleDeclaration style) {
2850             final Map<String, ComputedCssStyleDeclaration>
2851                     elementMap = computedStyles_.computeIfAbsent(element, k -> new WeakHashMap<>());
2852             elementMap.put(normalizedPseudo, style);
2853         }
2854 
2855         public synchronized void nodeChanged(final DomNode changed, final boolean clearParents) {
2856             final Iterator<Map.Entry<DomElement, Map<String, ComputedCssStyleDeclaration>>>
2857                     i = computedStyles_.entrySet().iterator();
2858             while (i.hasNext()) {
2859                 final Map.Entry<DomElement, Map<String, ComputedCssStyleDeclaration>> entry = i.next();
2860                 final DomElement node = entry.getKey();
2861                 if (changed == node
2862                     || changed.getParentNode() == node.getParentNode()
2863                     || changed.isAncestorOf(node)
2864                     || clearParents && node.isAncestorOf(changed)) {
2865                     i.remove();
2866                 }
2867             }
2868 
2869             // maybe this is a better solution but i have to think a bit more about this
2870             //
2871             //            if (computedStyles_.isEmpty()) {
2872             //                return;
2873             //            }
2874             //
2875             //            // remove all siblings
2876             //            DomNode parent = changed.getParentNode();
2877             //            if (parent != null) {
2878             //                for (DomNode sibling : parent.getChildNodes()) {
2879             //                    computedStyles_.remove(sibling.getScriptableObject());
2880             //                }
2881             //
2882             //                if (clearParents) {
2883             //                    // remove all parents
2884             //                    while (parent != null) {
2885             //                        computedStyles_.remove(parent.getScriptableObject());
2886             //                        parent = parent.getParentNode();
2887             //                    }
2888             //                }
2889             //            }
2890             //
2891             //            // remove changed itself and all descendants
2892             //            computedStyles_.remove(changed.getScriptableObject());
2893             //            for (DomNode descendant : changed.getDescendants()) {
2894             //                computedStyles_.remove(descendant.getScriptableObject());
2895             //            }
2896         }
2897 
2898         public synchronized void clear() {
2899             computedStyles_.clear();
2900         }
2901 
2902         public synchronized Map<String, ComputedCssStyleDeclaration> remove(
2903                 final DomNode element) {
2904             return computedStyles_.remove(element);
2905         }
2906 
2907         private void readObject(final ObjectInputStream in) throws IOException, ClassNotFoundException {
2908             in.defaultReadObject();
2909             computedStyles_ = new WeakHashMap<>();
2910         }
2911     }
2912 
2913     private static final class MappedElementIndexEntry implements Serializable {
2914         private final ArrayList<DomElement> elements_;
2915         private boolean sorted_;
2916 
2917         MappedElementIndexEntry() {
2918             // we do not expect to many elements having the same id/name
2919             elements_ = new ArrayList<>(2);
2920             sorted_ = false;
2921         }
2922 
2923         void add(final DomElement element) {
2924             elements_.add(element);
2925             sorted_ = false;
2926         }
2927 
2928         DomElement first() {
2929             if (elements_.isEmpty()) {
2930                 return null;
2931             }
2932 
2933             if (sorted_) {
2934                 return elements_.get(0);
2935             }
2936 
2937             Collections.sort(elements_, DOCUMENT_POSITION_COMPERATOR);
2938             sorted_ = true;
2939 
2940             return elements_.get(0);
2941         }
2942 
2943         List<DomElement> elements() {
2944             if (sorted_ || elements_.isEmpty()) {
2945                 return elements_;
2946             }
2947 
2948             Collections.sort(elements_, DOCUMENT_POSITION_COMPERATOR);
2949             sorted_ = true;
2950 
2951             return elements_;
2952         }
2953 
2954         boolean remove(final DomElement element) {
2955             return elements_.remove(element);
2956         }
2957     }
2958 }