View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html;
16  
17  import java.io.IOException;
18  import java.net.MalformedURLException;
19  import java.net.URL;
20  import java.nio.charset.Charset;
21  import java.util.Map;
22  import java.util.Objects;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.htmlunit.FailingHttpStatusCodeException;
27  import org.htmlunit.FrameContentHandler;
28  import org.htmlunit.Page;
29  import org.htmlunit.SgmlPage;
30  import org.htmlunit.WebClient;
31  import org.htmlunit.WebClientOptions;
32  import org.htmlunit.WebRequest;
33  import org.htmlunit.WebWindow;
34  import org.htmlunit.javascript.AbstractJavaScriptEngine;
35  import org.htmlunit.javascript.PostponedAction;
36  import org.htmlunit.protocol.javascript.JavaScriptURLConnection;
37  import org.htmlunit.util.UrlUtils;
38  import org.w3c.dom.Attr;
39  
40  /**
41   * Base class for frame and iframe.
42   *
43   * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
44   * @author David K. Taylor
45   * @author <a href="mailto:cse@dynabean.de">Christian Sell</a>
46   * @author Marc Guillemot
47   * @author David D. Kilzer
48   * @author Stefan Anzinger
49   * @author Ahmed Ashour
50   * @author Dmitri Zoubkov
51   * @author Daniel Gredler
52   * @author Ronald Brill
53   * @author Frank Danek
54   * @author Lai Quang Duong
55   */
56  public abstract class BaseFrameElement extends HtmlElement {
57  
58      private static final Log LOG = LogFactory.getLog(BaseFrameElement.class);
59      private FrameWindow enclosedWindow_;
60      private boolean contentLoaded_;
61      private boolean loadSrcWhenAddedToPage_;
62  
63      /**
64       * Creates an instance of BaseFrame.
65       *
66       * @param qualifiedName the qualified name of the element type to instantiate
67       * @param page the HtmlPage that contains this element
68       * @param attributes the initial attributes
69       */
70      protected BaseFrameElement(final String qualifiedName, final SgmlPage page,
71              final Map<String, DomAttr> attributes) {
72          super(qualifiedName, page, attributes);
73  
74          init();
75  
76          if (null != page && page.isHtmlPage() && ((HtmlPage) page).isParsingHtmlSnippet()) {
77              // if created by the HTMLParser the src attribute is not set via setAttribute() or some other method but is
78              // part of the given attributes already.
79              final String src = getSrcAttribute();
80              if (ATTRIBUTE_NOT_DEFINED != src && !UrlUtils.ABOUT_BLANK.equals(src)) {
81                  loadSrcWhenAddedToPage_ = true;
82              }
83          }
84      }
85  
86      private void init() {
87          FrameWindow enclosedWindow = null;
88          try {
89              final HtmlPage htmlPage = getHtmlPageOrNull();
90              if (null != htmlPage) { // if loaded as part of XHR.responseXML, don't load content
91                  enclosedWindow = new FrameWindow(this);
92                  // put about:blank in the window to allow JS to run on this frame before the
93                  // real content is loaded
94                  final WebClient webClient = htmlPage.getWebClient();
95                  final HtmlPage temporaryPage = webClient.getPage(enclosedWindow, WebRequest.newAboutBlankRequest());
96                  temporaryPage.setReadyState(READY_STATE_LOADING);
97              }
98          }
99          catch (final FailingHttpStatusCodeException | IOException ignored) {
100             // should never occur
101         }
102         enclosedWindow_ = enclosedWindow;
103     }
104 
105     /**
106      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
107      *
108      * Called after the node for the {@code frame} or {@code iframe} has been added to the containing page.
109      * The node needs to be added first to allow JavaScript in the frame to see the frame in the parent.
110      * @throws FailingHttpStatusCodeException if the server returns a failing status code AND the property
111      *      {@link org.htmlunit.WebClientOptions#setThrowExceptionOnFailingStatusCode(boolean)} is
112      *      set to true
113      */
114 
115     public void loadInnerPage() throws FailingHttpStatusCodeException {
116         String source = getSrcAttribute();
117         if (source.isEmpty()) {
118             source = UrlUtils.ABOUT_BLANK;
119         }
120 
121         loadInnerPageIfPossible(source);
122 
123         final Page enclosedPage = getEnclosedPage();
124         if (enclosedPage != null && enclosedPage.isHtmlPage()) {
125             final HtmlPage htmlPage = (HtmlPage) enclosedPage;
126 
127             final AbstractJavaScriptEngine<?> jsEngine = htmlPage.getWebClient().getJavaScriptEngine();
128             if (jsEngine != null && jsEngine.isScriptRunning()) {
129                 final PostponedAction action = new PostponedAction(getPage(), "BaseFrame.loadInnerPage") {
130                     @Override
131                     public void execute() {
132                         htmlPage.setReadyState(READY_STATE_COMPLETE);
133                     }
134                 };
135                 jsEngine.addPostponedAction(action);
136             }
137             else {
138                 htmlPage.setReadyState(READY_STATE_COMPLETE);
139             }
140         }
141     }
142 
143     /**
144      * Indicates if the content specified by the {@code src} attribute has been loaded or not.
145      * The initial state of a frame contains an "about:blank" that is not loaded like
146      * something specified in {@code src} attribute.
147      * @return {@code false} if the frame is still in its initial state.
148      */
149     boolean isContentLoaded() {
150         return contentLoaded_;
151     }
152 
153     /**
154      * Changes the state of the {@code contentLoaded_} attribute to true.
155      * This is needed, if the content is set from javascript to avoid
156      * later overwriting from method org.htmlunit.html.HtmlPage.loadFrames().
157      */
158     void setContentLoaded() {
159         contentLoaded_ = true;
160     }
161 
162     /**
163      * @throws FailingHttpStatusCodeException if the server returns a failing status code AND the property
164      *      {@link WebClientOptions#setThrowExceptionOnFailingStatusCode(boolean)} is set to true
165      */
166     private void loadInnerPageIfPossible(final String src) throws FailingHttpStatusCodeException {
167         setContentLoaded();
168 
169         String source = src;
170         final SgmlPage page = getPage();
171         final WebClient webClient = page.getWebClient();
172         final FrameContentHandler handler = webClient.getFrameContentHandler();
173         if (null != handler && !handler.loadFrameDocument(this)) {
174             source = UrlUtils.ABOUT_BLANK;
175         }
176 
177         if (!source.isEmpty()) {
178             final URL url;
179             try {
180                 url = ((HtmlPage) page).getFullyQualifiedUrl(source);
181             }
182             catch (final MalformedURLException e) {
183                 notifyIncorrectness("Invalid src attribute of " + getTagName() + ": url=[" + source + "]. Ignored.");
184                 return;
185             }
186 
187             final URL pageUrl = page.getUrl();
188 
189             // accessing to local resource is forbidden for security reason
190             if (!"file".equals(pageUrl.getProtocol()) && "file".equals(url.getProtocol())) {
191                 notifyIncorrectness("Not allowed to load local resource: " + source);
192                 return;
193             }
194 
195             final Charset pageCharset = page.getCharset();
196             final WebRequest request = new WebRequest(url, pageCharset, pageUrl);
197 
198             if (isAlreadyLoadedByAncestor(url, request.getCharset())) {
199                 notifyIncorrectness("Recursive src attribute of " + getTagName() + ": url=[" + source + "]. Ignored.");
200                 return;
201             }
202 
203             // Use parent document's charset as container charset if same origin
204             // https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding
205             if (Objects.equals(pageUrl.getProtocol(), url.getProtocol())
206                     && Objects.equals(pageUrl.getAuthority(), url.getAuthority())) {
207                 request.setDefaultResponseContentCharset(pageCharset);
208             }
209 
210             try {
211                 webClient.getPage(enclosedWindow_, request);
212             }
213             catch (final IOException e) {
214                 if (LOG.isErrorEnabled()) {
215                     LOG.error("IOException when getting content for " + getTagName() + ": url=[" + url + "]", e);
216                 }
217             }
218         }
219     }
220 
221     /**
222      * Test if the provided URL is the one of the parents which would cause an infinite loop.
223      * @param url the URL to test
224      * @param charset the request charset
225      * @return {@code false} if no parent has already this URL
226      */
227     private boolean isAlreadyLoadedByAncestor(final URL url, final Charset charset) {
228         WebWindow window = getPage().getEnclosingWindow();
229         int nesting = 0;
230         while (window instanceof FrameWindow) {
231             nesting++;
232             if (nesting > 9) {
233                 return true;
234             }
235 
236             final URL encUrl = UrlUtils.encodeUrl(url, charset);
237             if (UrlUtils.sameFile(encUrl, window.getEnclosedPage().getUrl())) {
238                 return true;
239             }
240 
241             if (window == window.getParentWindow()) {
242                 // TODO: should getParentWindow() return null on top windows?
243                 window = null;
244             }
245             else {
246                 window = window.getParentWindow();
247             }
248         }
249         return false;
250     }
251 
252     /**
253      * Returns the value of the attribute {@code longdesc}. Refer to the
254      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
255      * documentation for details on the use of this attribute.
256      *
257      * @return the value of the attribute {@code longdesc} or an empty string if that attribute isn't defined
258      */
259     public final String getLongDescAttribute() {
260         return getAttributeDirect("longdesc");
261     }
262 
263     /**
264      * Returns the value of the attribute {@code name}. Refer to the
265      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
266      * documentation for details on the use of this attribute.
267      *
268      * @return the value of the attribute {@code name} or an empty string if that attribute isn't defined
269      */
270     public final String getNameAttribute() {
271         return getAttributeDirect(NAME_ATTRIBUTE);
272     }
273 
274     /**
275      * Sets the value of the {@code name} attribute.
276      *
277      * @param name the new window name
278      */
279     public final void setNameAttribute(final String name) {
280         setAttribute(NAME_ATTRIBUTE, name);
281     }
282 
283     /**
284      * Returns the value of the attribute {@code src}. Refer to the
285      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
286      * documentation for details on the use of this attribute.
287      *
288      * @return the value of the attribute {@code src} or an empty string if that attribute isn't defined
289      */
290     public final String getSrcAttribute() {
291         return getSrcAttributeNormalized();
292     }
293 
294     /**
295      * Returns the value of the attribute {@code frameborder}. Refer to the
296      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
297      * documentation for details on the use of this attribute.
298      *
299      * @return the value of the attribute {@code frameborder} or an empty string if that attribute isn't defined
300      */
301     public final String getFrameBorderAttribute() {
302         return getAttributeDirect("frameborder");
303     }
304 
305     /**
306      * Returns the value of the attribute {@code marginwidth}. Refer to the
307      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
308      * documentation for details on the use of this attribute.
309      *
310      * @return the value of the attribute {@code marginwidth} or an empty string if that attribute isn't defined
311      */
312     public final String getMarginWidthAttribute() {
313         return getAttributeDirect("marginwidth");
314     }
315 
316     /**
317      * Returns the value of the attribute {@code marginheight}. Refer to the
318      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
319      * documentation for details on the use of this attribute.
320      *
321      * @return the value of the attribute {@code marginheight} or an empty string if that attribute isn't defined
322      */
323     public final String getMarginHeightAttribute() {
324         return getAttributeDirect("marginheight");
325     }
326 
327     /**
328      * Returns the value of the attribute {@code noresize}. Refer to the
329      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
330      * documentation for details on the use of this attribute.
331      *
332      * @return the value of the attribute {@code noresize} or an empty string if that attribute isn't defined
333      */
334     public final String getNoResizeAttribute() {
335         return getAttributeDirect("noresize");
336     }
337 
338     /**
339      * Returns the value of the attribute {@code scrolling}. Refer to the
340      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
341      * documentation for details on the use of this attribute.
342      *
343      * @return the value of the attribute {@code scrolling} or an empty string if that attribute isn't defined
344      */
345     public final String getScrollingAttribute() {
346         return getAttributeDirect("scrolling");
347     }
348 
349     /**
350      * Returns the value of the attribute {@code onload}. This attribute is not
351      * actually supported by the HTML specification however it is supported
352      * by the popular browsers.
353      *
354      * @return the value of the attribute {@code onload} or an empty string if that attribute isn't defined
355      */
356     public final String getOnLoadAttribute() {
357         return getAttributeDirect("onload");
358     }
359 
360     /**
361      * Returns the currently loaded page in the enclosed window.
362      * This is a facility method for <code>getEnclosedWindow().getEnclosedPage()</code>.
363      * @see WebWindow#getEnclosedPage()
364      * @return the currently loaded page in the enclosed window, or {@code null} if no page has been loaded
365      */
366     public Page getEnclosedPage() {
367         return getEnclosedWindow().getEnclosedPage();
368     }
369 
370     /**
371      * Gets the window enclosed in this frame.
372      * @return the window enclosed in this frame
373      */
374     public FrameWindow getEnclosedWindow() {
375         return enclosedWindow_;
376     }
377 
378     /**
379      * Sets the value of the {@code src} attribute. Also loads the frame with the specified URL, if possible.
380      * @param attribute the new value of the {@code src} attribute
381      */
382     public final void setSrcAttribute(final String attribute) {
383         setAttribute(SRC_ATTRIBUTE, attribute);
384     }
385 
386     /**
387      * {@inheritDoc}
388      */
389     @Override
390     protected void setAttributeNS(final String namespaceURI, final String qualifiedName, String attributeValue,
391             final boolean notifyAttributeChangeListeners, final boolean notifyMutationObserver) {
392         final String qualifiedNameLC = org.htmlunit.util.StringUtils.toRootLowerCase(qualifiedName);
393         if (null != attributeValue && SRC_ATTRIBUTE.equals(qualifiedNameLC)) {
394             attributeValue = attributeValue.trim();
395         }
396 
397         super.setAttributeNS(namespaceURI, qualifiedNameLC, attributeValue, notifyAttributeChangeListeners,
398                 notifyMutationObserver);
399 
400         // do not use equals() here
401         // see HTMLIFrameElement2Test.documentCreateElement_onLoad_srcAboutBlank()
402         if (SRC_ATTRIBUTE.equals(qualifiedNameLC) && UrlUtils.ABOUT_BLANK != attributeValue) {
403             if (isAttachedToPage()) {
404                 loadSrc();
405             }
406             else {
407                 loadSrcWhenAddedToPage_ = true;
408             }
409         }
410     }
411 
412     /**
413      * {@inheritDoc}
414      */
415     @Override
416     public Attr setAttributeNode(final Attr attribute) {
417         final String qualifiedName = attribute.getName();
418         String attributeValue = null;
419         if (SRC_ATTRIBUTE.equals(qualifiedName)) {
420             attributeValue = attribute.getValue().trim();
421         }
422 
423         final Attr result = super.setAttributeNode(attribute);
424 
425         if (SRC_ATTRIBUTE.equals(qualifiedName) && !UrlUtils.ABOUT_BLANK.equals(attributeValue)) {
426             if (isAttachedToPage()) {
427                 loadSrc();
428             }
429             else {
430                 loadSrcWhenAddedToPage_ = true;
431             }
432         }
433 
434         return result;
435     }
436 
437     private void loadSrc() {
438         loadSrcWhenAddedToPage_ = false;
439         final String src = getSrcAttribute();
440 
441         // recreate a window if the old one was closed
442         if (enclosedWindow_.isClosed()) {
443             init();
444         }
445 
446         final AbstractJavaScriptEngine<?> jsEngine = getPage().getWebClient().getJavaScriptEngine();
447         // When src is set from a script, loading is postponed until script finishes
448         // in fact this implementation is probably wrong: JavaScript URL should be
449         // first evaluated and only loading, when any, should be postponed.
450         if (jsEngine == null || !jsEngine.isScriptRunning()
451                 || src.startsWith(JavaScriptURLConnection.JAVASCRIPT_PREFIX)) {
452             loadInnerPageIfPossible(src);
453         }
454         else {
455             final Page pageInFrame = getEnclosedPage();
456             final PostponedAction action = new PostponedAction(getPage(), "BaseFrame.loadSrc") {
457                 @Override
458                 public void execute() throws Exception {
459                     if (!src.isEmpty() && getSrcAttribute().equals(src)) {
460                         loadInnerPage();
461                     }
462                 }
463 
464                 @Override
465                 public boolean isStillAlive() {
466                     // skip if page in frame has already been changed
467                     return super.isStillAlive() && pageInFrame == getEnclosedPage();
468                 }
469             };
470             jsEngine.addPostponedAction(action);
471         }
472     }
473 
474     /**
475      * Creates a new {@link WebWindow} for the new clone.
476      * {@inheritDoc}
477      */
478     @Override
479     public DomNode cloneNode(final boolean deep) {
480         final BaseFrameElement clone = (BaseFrameElement) super.cloneNode(deep);
481         clone.init();
482         return clone;
483     }
484 
485     @Override
486     protected void onAddedToPage() {
487         super.onAddedToPage();
488 
489         if (loadSrcWhenAddedToPage_) {
490             loadSrc();
491         }
492     }
493 
494     @Override
495     public void remove() {
496         super.remove();
497         loadSrcWhenAddedToPage_ = true;
498         getEnclosedWindow().close();
499     }
500 
501     @Override
502     public final void removeAttribute(final String attributeName) {
503         super.removeAttribute(attributeName);
504 
505         // TODO find a better implementation without all the code duplication
506         if (isAttachedToPage()) {
507             loadSrcWhenAddedToPage_ = false;
508             final String src = getSrcAttribute();
509 
510             final AbstractJavaScriptEngine<?> jsEngine = getPage().getWebClient().getJavaScriptEngine();
511             // When src is set from a script, loading is postponed until script finishes
512             // in fact this implementation is probably wrong: JavaScript URL should be
513             // first evaluated and only loading, when any, should be postponed.
514             if (jsEngine == null || !jsEngine.isScriptRunning()) {
515                 loadInnerPageIfPossible(src);
516             }
517             else {
518                 final Page pageInFrame = getEnclosedPage();
519                 final PostponedAction action = new PostponedAction(getPage(), "BaseFrame.removeAttribute") {
520                     @Override
521                     public void execute() throws Exception {
522                         loadInnerPage();
523                     }
524 
525                     @Override
526                     public boolean isStillAlive() {
527                         // skip if page in frame has already been changed
528                         return super.isStillAlive() && pageInFrame == getEnclosedPage();
529                     }
530                 };
531                 jsEngine.addPostponedAction(action);
532             }
533         }
534         else {
535             loadSrcWhenAddedToPage_ = true;
536         }
537     }
538 }