View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html;
16  
17  import static org.htmlunit.BrowserVersionFeatures.ANCHOR_SEND_PING_REQUEST;
18  
19  import java.io.IOException;
20  import java.net.MalformedURLException;
21  import java.net.URL;
22  import java.util.Locale;
23  import java.util.Map;
24  
25  import org.apache.commons.lang3.ArrayUtils;
26  import org.apache.commons.lang3.StringUtils;
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.htmlunit.BrowserVersion;
30  import org.htmlunit.HttpHeader;
31  import org.htmlunit.HttpMethod;
32  import org.htmlunit.Page;
33  import org.htmlunit.SgmlPage;
34  import org.htmlunit.WebClient;
35  import org.htmlunit.WebRequest;
36  import org.htmlunit.WebWindow;
37  import org.htmlunit.javascript.host.event.Event;
38  import org.htmlunit.javascript.host.html.HTMLElement;
39  import org.htmlunit.protocol.javascript.JavaScriptURLConnection;
40  import org.htmlunit.util.UrlUtils;
41  
42  /**
43   * Wrapper for the HTML element "a".
44   *
45   * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
46   * @author David K. Taylor
47   * @author <a href="mailto:cse@dynabean.de">Christian Sell</a>
48   * @author Ahmed Ashour
49   * @author Dmitri Zoubkov
50   * @author Ronald Brill
51   * @author Frank Danek
52   * @author Lai Quang Duong
53   */
54  public class HtmlAnchor extends HtmlElement {
55  
56      private static final Log LOG = LogFactory.getLog(HtmlAnchor.class);
57  
58      /** The HTML tag represented by this element. */
59      public static final String TAG_NAME = "a";
60  
61      /**
62       * Creates a new instance.
63       *
64       * @param qualifiedName the qualified name of the element type to instantiate
65       * @param page the page that contains this element
66       * @param attributes the initial attributes
67       */
68      HtmlAnchor(final String qualifiedName, final SgmlPage page,
69              final Map<String, DomAttr> attributes) {
70          super(qualifiedName, page, attributes);
71      }
72  
73      /**
74       * {@inheritDoc}
75       */
76      @Override
77      @SuppressWarnings("unchecked")
78      public <P extends Page> P click(final Event event,
79              final boolean shiftKey, final boolean ctrlKey, final boolean altKey,
80              final boolean ignoreVisibility) throws IOException {
81          WebWindow oldWebWindow = null;
82          if (ctrlKey) {
83              oldWebWindow = ((HTMLElement) event.getSrcElement()).getDomNodeOrDie()
84                      .getPage().getWebClient().getCurrentWindow();
85          }
86  
87          P page = super.click(event, shiftKey, ctrlKey, altKey, ignoreVisibility);
88  
89          if (ctrlKey) {
90              page.getEnclosingWindow().getWebClient().setCurrentWindow(oldWebWindow);
91              page = (P) oldWebWindow.getEnclosedPage();
92          }
93  
94          return page;
95      }
96  
97      /**
98       * Same as {@link #doClickStateUpdate(boolean, boolean)}, except that it accepts an {@code href} suffix,
99       * needed when a click is performed on an image map to pass information on the click position.
100      *
101      * @param shiftKey {@code true} if SHIFT is pressed
102      * @param ctrlKey {@code true} if CTRL is pressed
103      * @param hrefSuffix the suffix to add to the anchor's {@code href} attribute
104      *        (for instance coordinates from an image map)
105      * @throws IOException if an IO error occurs
106      */
107     protected void doClickStateUpdate(final boolean shiftKey, final boolean ctrlKey, final String hrefSuffix)
108             throws IOException {
109         final String href = (getHrefAttribute() + hrefSuffix).trim();
110         if (LOG.isDebugEnabled()) {
111             final String w = getPage().getEnclosingWindow().getName();
112             LOG.debug("do click action in window '" + w + "', using href '" + href + "'");
113         }
114         if (ATTRIBUTE_NOT_DEFINED == getHrefAttribute()) {
115             return;
116         }
117         final String downloadAttribute = getDownloadAttribute();
118         HtmlPage page = (HtmlPage) getPage();
119         if (StringUtils.startsWithIgnoreCase(href, JavaScriptURLConnection.JAVASCRIPT_PREFIX)) {
120             final StringBuilder builder = new StringBuilder(href.length());
121             builder.append(JavaScriptURLConnection.JAVASCRIPT_PREFIX);
122             for (int i = JavaScriptURLConnection.JAVASCRIPT_PREFIX.length(); i < href.length(); i++) {
123                 final char ch = href.charAt(i);
124                 if (ch == '%' && i + 2 < href.length()) {
125                     final char ch1 = Character.toUpperCase(href.charAt(i + 1));
126                     final char ch2 = Character.toUpperCase(href.charAt(i + 2));
127                     if ((Character.isDigit(ch1) || ch1 >= 'A' && ch1 <= 'F')
128                             && (Character.isDigit(ch2) || ch2 >= 'A' && ch2 <= 'F')) {
129                         builder.append((char) Integer.parseInt(href.substring(i + 1, i + 3), 16));
130                         i += 2;
131                         continue;
132                     }
133                 }
134                 builder.append(ch);
135             }
136 
137             final String target;
138             if (shiftKey || ctrlKey || ATTRIBUTE_NOT_DEFINED != downloadAttribute) {
139                 target = WebClient.TARGET_BLANK;
140             }
141             else {
142                 target = page.getResolvedTarget(getTargetAttribute());
143             }
144             final WebWindow win = page.getWebClient().openTargetWindow(page.getEnclosingWindow(),
145                     target, WebClient.TARGET_SELF);
146             Page enclosedPage = win.getEnclosedPage();
147             if (enclosedPage == null) {
148                 win.getWebClient().getPage(win, WebRequest.newAboutBlankRequest());
149                 enclosedPage = win.getEnclosedPage();
150             }
151             if (enclosedPage != null && enclosedPage.isHtmlPage()) {
152                 page = (HtmlPage) enclosedPage;
153                 page.executeJavaScript(builder.toString(), "javascript url", getStartLineNumber());
154             }
155             return;
156         }
157 
158         final URL url = getTargetUrl(href, page);
159 
160         final WebClient webClient = page.getWebClient();
161         final BrowserVersion browser = webClient.getBrowserVersion();
162         if (ATTRIBUTE_NOT_DEFINED != getPingAttribute() && browser.hasFeature(ANCHOR_SEND_PING_REQUEST)) {
163             final URL pingUrl = getTargetUrl(getPingAttribute(), page);
164             final WebRequest pingRequest = new WebRequest(pingUrl, HttpMethod.POST);
165             pingRequest.setAdditionalHeader(HttpHeader.PING_FROM, page.getUrl().toExternalForm());
166             pingRequest.setAdditionalHeader(HttpHeader.PING_TO, url.toExternalForm());
167             pingRequest.setRequestBody("PING");
168             webClient.loadWebResponse(pingRequest);
169         }
170 
171         final WebRequest webRequest = new WebRequest(url, browser.getHtmlAcceptHeader(),
172                                                             browser.getAcceptEncodingHeader());
173         // use the page encoding even if this is a GET requests
174         webRequest.setCharset(page.getCharset());
175 
176         if (!relContainsNoreferrer()) {
177             webRequest.setRefererHeader(page.getUrl());
178         }
179 
180         if (LOG.isDebugEnabled()) {
181             LOG.debug(
182                     "Getting page for " + url.toExternalForm()
183                     + ", derived from href '" + href
184                     + "', using the originating URL "
185                     + page.getUrl());
186         }
187 
188         final String target;
189         if (shiftKey || ctrlKey
190                 || (webClient.getAttachmentHandler() == null
191                         && ATTRIBUTE_NOT_DEFINED != downloadAttribute)) {
192             target = WebClient.TARGET_BLANK;
193         }
194         else {
195             target = page.getResolvedTarget(getTargetAttribute());
196         }
197         page.getWebClient().download(page.getEnclosingWindow(), target, webRequest,
198                 true, false, (ATTRIBUTE_NOT_DEFINED != downloadAttribute) ? downloadAttribute : null, "Link click");
199     }
200 
201     private boolean relContainsNoreferrer() {
202         String rel = getRelAttribute();
203         if (rel != null) {
204             rel = rel.toLowerCase(Locale.ROOT);
205             return ArrayUtils.contains(org.htmlunit.util.StringUtils.splitAtBlank(rel), "noreferrer");
206         }
207         return false;
208     }
209 
210     /**
211      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
212      *
213      * @param href the href
214      * @param page the HtmlPage
215      * @return the calculated target url.
216      * @throws MalformedURLException if an IO error occurs
217      */
218     public static URL getTargetUrl(final String href, final HtmlPage page) throws MalformedURLException {
219         URL url = page.getFullyQualifiedUrl(href);
220         // fix for empty url
221         if (StringUtils.isEmpty(href)) {
222             url = UrlUtils.getUrlWithNewRef(url, null);
223         }
224         return url;
225     }
226 
227     /**
228      * {@inheritDoc}
229      */
230     @Override
231     protected boolean doClickStateUpdate(final boolean shiftKey, final boolean ctrlKey) throws IOException {
232         doClickStateUpdate(shiftKey, ctrlKey, "");
233         return false;
234     }
235 
236     /**
237      * Returns the value of the attribute {@code charset}. Refer to the
238      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
239      * documentation for details on the use of this attribute.
240      *
241      * @return the value of the attribute {@code charset} or an empty string if that attribute isn't defined
242      */
243     public final String getCharsetAttribute() {
244         return getAttributeDirect("charset");
245     }
246 
247     /**
248      * Returns the value of the attribute {@code type}. Refer to the
249      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
250      * documentation for details on the use of this attribute.
251      *
252      * @return the value of the attribute {@code type} or an empty string if that attribute isn't defined
253      */
254     public final String getTypeAttribute() {
255         return getAttributeDirect(TYPE_ATTRIBUTE);
256     }
257 
258     /**
259      * Returns the value of the attribute {@code name}. Refer to the
260      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
261      * documentation for details on the use of this attribute.
262      *
263      * @return the value of the attribute {@code name} or an empty string if that attribute isn't defined
264      */
265     public final String getNameAttribute() {
266         return getAttributeDirect(NAME_ATTRIBUTE);
267     }
268 
269     /**
270      * Returns the value of the attribute {@code href}. Refer to the
271      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
272      * documentation for details on the use of this attribute.
273      *
274      * @return the value of the attribute {@code href} or an empty string if that attribute isn't defined
275      */
276     public final String getHrefAttribute() {
277         return getAttributeDirect("href").trim();
278     }
279 
280     /**
281      * Returns the value of the attribute {@code hreflang}. Refer to the
282      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
283      * documentation for details on the use of this attribute.
284      *
285      * @return the value of the attribute {@code hreflang} or an empty string if that attribute isn't defined
286      */
287     public final String getHrefLangAttribute() {
288         return getAttributeDirect("hreflang");
289     }
290 
291     /**
292      * Returns the value of the attribute {@code rel}. Refer to the
293      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
294      * documentation for details on the use of this attribute.
295      *
296      * @return the value of the attribute {@code rel} or an empty string if that attribute isn't defined
297      */
298     public final String getRelAttribute() {
299         return getAttributeDirect("rel");
300     }
301 
302     /**
303      * Returns the value of the attribute {@code rev}. Refer to the
304      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
305      * documentation for details on the use of this attribute.
306      *
307      * @return the value of the attribute {@code rev} or an empty string if that attribute isn't defined
308      */
309     public final String getRevAttribute() {
310         return getAttributeDirect("rev");
311     }
312 
313     /**
314      * Returns the value of the attribute {@code accesskey}. Refer to the
315      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
316      * documentation for details on the use of this attribute.
317      *
318      * @return the value of the attribute {@code accesskey} or an empty string if that attribute isn't defined
319      */
320     public final String getAccessKeyAttribute() {
321         return getAttributeDirect("accesskey");
322     }
323 
324     /**
325      * Returns the value of the attribute {@code shape}. Refer to the
326      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
327      * documentation for details on the use of this attribute.
328      *
329      * @return the value of the attribute {@code shape} or an empty string if that attribute isn't defined
330      */
331     public final String getShapeAttribute() {
332         return getAttributeDirect("shape");
333     }
334 
335     /**
336      * Returns the value of the attribute {@code coords}. Refer to the
337      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
338      * documentation for details on the use of this attribute.
339      *
340      * @return the value of the attribute {@code coords} or an empty string if that attribute isn't defined
341      */
342     public final String getCoordsAttribute() {
343         return getAttributeDirect("coords");
344     }
345 
346     /**
347      * Returns the value of the attribute {@code tabindex}. Refer to the
348      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
349      * documentation for details on the use of this attribute.
350      *
351      * @return the value of the attribute {@code tabindex} or an empty string if that attribute isn't defined
352      */
353     public final String getTabIndexAttribute() {
354         return getAttributeDirect("tabindex");
355     }
356 
357     /**
358      * Returns the value of the attribute {@code onfocus}. Refer to the
359      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
360      * documentation for details on the use of this attribute.
361      *
362      * @return the value of the attribute {@code onfocus} or an empty string if that attribute isn't defined
363      */
364     public final String getOnFocusAttribute() {
365         return getAttributeDirect("onfocus");
366     }
367 
368     /**
369      * Returns the value of the attribute {@code onblur}. Refer to the
370      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
371      * documentation for details on the use of this attribute.
372      *
373      * @return the value of the attribute {@code onblur} or an empty string if that attribute isn't defined
374      */
375     public final String getOnBlurAttribute() {
376         return getAttributeDirect("onblur");
377     }
378 
379     /**
380      * Returns the value of the attribute {@code target}. Refer to the
381      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
382      * documentation for details on the use of this attribute.
383      *
384      * @return the value of the attribute {@code target} or an empty string if that attribute isn't defined
385      */
386     public final String getTargetAttribute() {
387         return getAttributeDirect("target");
388     }
389 
390     /**
391      * Open this link in a new window, much as web browsers do when you shift-click a link or use the context
392      * menu to open in a new window.
393      * <p>
394      * It should be noted that even web browsers will sometimes not give the expected result when using this
395      * method of following links. Links that have no real href and rely on JavaScript to do their work will
396      * fail.
397      *
398      * @return the page opened by this link, nested in a new {@link org.htmlunit.TopLevelWindow}
399      * @throws MalformedURLException if the href could not be converted to a valid URL
400      */
401     public final Page openLinkInNewWindow() throws MalformedURLException {
402         final URL target = ((HtmlPage) getPage()).getFullyQualifiedUrl(getHrefAttribute());
403         final String windowName = "HtmlAnchor.openLinkInNewWindow() target";
404         final WebWindow newWindow = getPage().getWebClient().openWindow(target, windowName);
405         return newWindow.getEnclosedPage();
406     }
407 
408     @Override
409     protected boolean isEmptyXmlTagExpanded() {
410         return true;
411     }
412 
413     /**
414      * {@inheritDoc}
415      */
416     @Override
417     public DisplayStyle getDefaultStyleDisplay() {
418         return DisplayStyle.INLINE;
419     }
420 
421     /**
422      * {@inheritDoc}
423      */
424     @Override
425     public boolean handles(final Event event) {
426         if (Event.TYPE_BLUR.equals(event.getType()) || Event.TYPE_FOCUS.equals(event.getType())) {
427             return true;
428         }
429         return super.handles(event);
430     }
431 
432     /**
433      * Returns the value of the attribute {@code ping}.
434      *
435      * @return the value of the attribute {@code ping}
436      */
437     public final String getPingAttribute() {
438         return getAttributeDirect("ping");
439     }
440 
441     /**
442      * Returns the value of the attribute {@code download}.
443      *
444      * @return the value of the attribute {@code download}
445      */
446     public final String getDownloadAttribute() {
447         return getAttributeDirect("download");
448     }
449 }