View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit;
16  
17  import java.io.IOException;
18  import java.io.ObjectInputStream;
19  import java.io.ObjectOutputStream;
20  import java.io.Serializable;
21  import java.net.IDN;
22  import java.net.MalformedURLException;
23  import java.net.URL;
24  import java.nio.charset.Charset;
25  import java.nio.charset.StandardCharsets;
26  import java.util.ArrayList;
27  import java.util.Collections;
28  import java.util.EnumSet;
29  import java.util.HashMap;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.Objects;
33  import java.util.Set;
34  import java.util.regex.Pattern;
35  
36  import org.apache.http.auth.Credentials;
37  import org.htmlunit.http.HttpUtils;
38  import org.htmlunit.httpclient.HtmlUnitUsernamePasswordCredentials;
39  import org.htmlunit.util.NameValuePair;
40  import org.htmlunit.util.UrlUtils;
41  
42  /**
43   * Parameter object for making web requests.
44   *
45   * @author Brad Clarke
46   * @author Hans Donner
47   * @author Ahmed Ashour
48   * @author Marc Guillemot
49   * @author Rodney Gitzel
50   * @author Ronald Brill
51   * @author Adam Afeltowicz
52   * @author Joerg Werner
53   * @author Michael Lueck
54   * @author Lai Quang Duong
55   * @author Kristof Neirynck
56   */
57  public class WebRequest implements Serializable {
58  
59      /**
60       * Enum to configure request creation.
61       */
62      public enum HttpHint {
63          /** Force to include the charset. */
64          IncludeCharsetInContentTypeHeader,
65  
66          /** Disable sending of stored cookies and receiving of new cookies. */
67          BlockCookies
68      }
69  
70      private static final Pattern DOT_PATTERN = Pattern.compile("/\\./");
71      private static final Pattern DOT_DOT_PATTERN = Pattern.compile("/(?!\\.\\.)[^/]*/\\.\\./");
72      private static final Pattern REMOVE_DOTS_PATTERN = Pattern.compile("^/(\\.\\.?/)*");
73  
74      private String url_; // String instead of java.net.URL because "about:blank" URLs don't serialize correctly
75      private String proxyHost_;
76      private int proxyPort_;
77      private String proxyScheme_;
78      private boolean isSocksProxy_;
79      private HttpMethod httpMethod_ = HttpMethod.GET;
80      private FormEncodingType encodingType_ = FormEncodingType.URL_ENCODED;
81      private Map<String, String> additionalHeaders_ = new HashMap<>();
82      private Credentials urlCredentials_;
83      private Credentials credentials_;
84      private int timeout_;
85      private transient Set<HttpHint> httpHints_;
86  
87      private transient Charset charset_ = StandardCharsets.ISO_8859_1;
88      // https://datatracker.ietf.org/doc/html/rfc6838#section-4.2.1
89      // private transient Charset defaultResponseContentCharset_ = StandardCharsets.UTF_8;
90      private transient Charset defaultResponseContentCharset_ = StandardCharsets.ISO_8859_1;
91  
92      /* These two are mutually exclusive; additionally, requestBody_ should only be set for POST requests. */
93      private List<NameValuePair> requestParameters_ = Collections.emptyList();
94      private String requestBody_;
95  
96      /**
97       * Instantiates a {@link WebRequest} for the specified URL.
98       * @param url the target URL
99       * @param acceptHeader the accept header to use
100      * @param acceptEncodingHeader the accept encoding header to use
101      */
102     public WebRequest(final URL url, final String acceptHeader, final String acceptEncodingHeader) {
103         setUrl(url);
104         if (acceptHeader != null) {
105             setAdditionalHeader(HttpHeader.ACCEPT, acceptHeader);
106         }
107         if (acceptEncodingHeader != null) {
108             setAdditionalHeader(HttpHeader.ACCEPT_ENCODING, acceptEncodingHeader);
109         }
110         timeout_ = -1;
111     }
112 
113     /**
114      * Instantiates a {@link WebRequest} for the specified URL.
115      * @param url the target URL
116      * @param charset the charset to use
117      * @param refererUrl the url be used by the referer header
118      */
119     public WebRequest(final URL url, final Charset charset, final URL refererUrl) {
120         setUrl(url);
121         setCharset(charset);
122         setRefererHeader(refererUrl);
123     }
124 
125     /**
126      * @return a new request for about:blank
127      */
128     public static WebRequest newAboutBlankRequest() {
129         return new WebRequest(UrlUtils.URL_ABOUT_BLANK, "*/*", "gzip, deflate");
130     }
131 
132     /**
133      * Instantiates a {@link WebRequest} for the specified URL.
134      * @param url the target URL
135      */
136     public WebRequest(final URL url) {
137         this(url, "*/*", "gzip, deflate");
138     }
139 
140     /**
141      * Instantiates a {@link WebRequest} for the specified URL using the specified HTTP submit method.
142      * @param url the target URL
143      * @param submitMethod the HTTP submit method to use
144      */
145     public WebRequest(final URL url, final HttpMethod submitMethod) {
146         this(url);
147         setHttpMethod(submitMethod);
148     }
149 
150     /**
151      * Returns the target URL.
152      * @return the target URL
153      */
154     public URL getUrl() {
155         return UrlUtils.toUrlSafe(url_);
156     }
157 
158     /**
159      * Sets the target URL. The URL may be simplified if needed (for instance eliminating
160      * irrelevant path portions like "/./").
161      * @param url the target URL
162      */
163     public void setUrl(URL url) {
164         if (url == null) {
165             url_ = null;
166             return;
167         }
168 
169         final String path = url.getPath();
170         if (path.isEmpty()) {
171             if (!url.getFile().isEmpty() || url.getProtocol().startsWith("http")) {
172                 url = buildUrlWithNewPath(url, "/");
173             }
174         }
175         else if (path.contains("/.")) {
176             url = buildUrlWithNewPath(url, removeDots(path));
177         }
178 
179         try {
180             final String idn = IDN.toASCII(url.getHost());
181             if (!idn.equals(url.getHost())) {
182                 url = UrlUtils.getUrlWithNewHost(url, idn);
183             }
184         }
185         catch (final Exception e) {
186             throw new IllegalArgumentException(
187                     "Cannot convert the hostname of URL: '" + url.toExternalForm() + "' to ASCII.", e);
188         }
189 
190         try {
191             url_ = UrlUtils.removeRedundantPort(url).toExternalForm();
192         }
193         catch (final MalformedURLException e) {
194             throw new RuntimeException("Cannot strip default port of URL: " + url.toExternalForm(), e);
195         }
196 
197         // http://john.smith:secret@localhost
198         final String userInfo = url.getUserInfo();
199         if (userInfo != null) {
200             final int splitPos = userInfo.indexOf(':');
201             if (splitPos == -1) {
202                 urlCredentials_ = new HtmlUnitUsernamePasswordCredentials(userInfo, new char[0]);
203             }
204             else {
205                 final String username = userInfo.substring(0, splitPos);
206                 final String password = userInfo.substring(splitPos + 1);
207                 urlCredentials_ = new HtmlUnitUsernamePasswordCredentials(username, password.toCharArray());
208             }
209         }
210     }
211 
212     /*
213      * Strip a URL string of "/./" and "/../" occurrences.
214      * <p>
215      * One trick here is to repeatedly create new matchers on a given
216      * pattern, so that we can see whether it needs to be re-applied;
217      * unfortunately .replaceAll() doesn't re-process its own output,
218      * so if we create a new match with a replacement, it is missed.
219      */
220     private static String removeDots(final String path) {
221         String newPath = path;
222 
223         // remove occurrences at the beginning
224         newPath = REMOVE_DOTS_PATTERN.matcher(newPath).replaceAll("/");
225         if ("/..".equals(newPath)) {
226             newPath = "/";
227         }
228 
229         // single dots have no effect, so just remove them
230         while (DOT_PATTERN.matcher(newPath).find()) {
231             newPath = DOT_PATTERN.matcher(newPath).replaceAll("/");
232         }
233 
234         // mid-path double dots should be removed WITH the previous subdirectory and replaced
235         //  with "/" BUT ONLY IF that subdirectory's not also ".." (a regex lookahead helps with this)
236         while (DOT_DOT_PATTERN.matcher(newPath).find()) {
237             newPath = DOT_DOT_PATTERN.matcher(newPath).replaceAll("/");
238         }
239 
240         return newPath;
241     }
242 
243     private static URL buildUrlWithNewPath(URL url, final String newPath) {
244         try {
245             url = UrlUtils.getUrlWithNewPath(url, newPath);
246         }
247         catch (final Exception e) {
248             throw new RuntimeException("Cannot change path of URL: " + url.toExternalForm(), e);
249         }
250         return url;
251     }
252 
253     /**
254      * Returns the proxy host to use.
255      * @return the proxy host to use
256      */
257     public String getProxyHost() {
258         return proxyHost_;
259     }
260 
261     /**
262      * Sets the proxy host to use.
263      * @param proxyHost the proxy host to use
264      */
265     public void setProxyHost(final String proxyHost) {
266         proxyHost_ = proxyHost;
267     }
268 
269     /**
270      * Returns the proxy port to use.
271      * @return the proxy port to use
272      */
273     public int getProxyPort() {
274         return proxyPort_;
275     }
276 
277     /**
278      * Sets the proxy port to use.
279      * @param proxyPort the proxy port to use
280      */
281     public void setProxyPort(final int proxyPort) {
282         proxyPort_ = proxyPort;
283     }
284 
285     /**
286      * Returns the proxy scheme to use.
287      * @return the proxy scheme to use
288      */
289     public String getProxyScheme() {
290         return proxyScheme_;
291     }
292 
293     /**
294      * Sets the proxy scheme to use.
295      * @param proxyScheme the proxy scheme to use
296      */
297     public void setProxyScheme(final String proxyScheme) {
298         proxyScheme_ = proxyScheme;
299     }
300 
301     /**
302      * Returns whether SOCKS proxy or not.
303      * @return whether SOCKS proxy or not
304      */
305     public boolean isSocksProxy() {
306         return isSocksProxy_;
307     }
308 
309     /**
310      * Sets whether SOCKS proxy or not.
311      * @param isSocksProxy whether SOCKS proxy or not
312      */
313     public void setSocksProxy(final boolean isSocksProxy) {
314         isSocksProxy_ = isSocksProxy;
315     }
316 
317     /**
318      * @return the timeout to use
319      */
320     public int getTimeout() {
321         return timeout_;
322     }
323 
324     /**
325      * Sets the timeout to use.
326      * @param timeout the timeout to use
327      */
328     public void setTimeout(final int timeout) {
329         timeout_ = timeout;
330     }
331 
332     /**
333      * Returns the form encoding type to use.
334      * @return the form encoding type to use
335      */
336     public FormEncodingType getEncodingType() {
337         return encodingType_;
338     }
339 
340     /**
341      * Sets the form encoding type to use.
342      * @param encodingType the form encoding type to use
343      */
344     public void setEncodingType(final FormEncodingType encodingType) {
345         encodingType_ = encodingType;
346     }
347 
348     /**
349      * <p>Retrieves the request parameters used. Similar to the servlet api function
350      * getParameterMap() this works depending on the request type and collects the
351      * url parameters and the body stuff.<br>
352      * The value is also normalized - null is converted to an empty string.</p>
353      * <p>In contrast to the servlet api this creates a separate KeyValuePair for every
354      * parameter. This means that pairs with the same name can be part of the list. The
355      * servlet api will return a string[] as value for the key in this case.<br>
356      * Additionally this method includes also the uploaded files for multipart post
357      * requests.</p>
358      *
359      * @return the request parameters to use
360      */
361     public List<NameValuePair> getParameters() {
362         // developer note:
363         // this has to be in sync with org.htmlunit.HttpWebConnection.makeHttpMethod(WebRequest, HttpClientBuilder)
364 
365         // developer note:
366         // the spring org.springframework.test.web.servlet.htmlunitHtmlUnitRequestBuilder uses
367         // this method and is sensitive to all the details of the current implementation.
368 
369         final List<NameValuePair> allParameters = new ArrayList<>(
370                 HttpUtils.parseUrlQuery(getUrl().getQuery(), getCharset()));
371 
372         // the servlet api ignores these parameters but to make spring happy we include them
373         final HttpMethod httpMethod = getHttpMethod();
374         if (httpMethod == HttpMethod.POST
375             || httpMethod == HttpMethod.PUT
376             || httpMethod == HttpMethod.PATCH
377             || httpMethod == HttpMethod.DELETE
378             || httpMethod == HttpMethod.OPTIONS) {
379             if (FormEncodingType.URL_ENCODED == getEncodingType()
380                 && httpMethod != HttpMethod.OPTIONS) {
381                 // spring ignores URL_ENCODED parameters for OPTIONS requests
382                 // getRequestParameters and getRequestBody are mutually exclusive
383                 if (getRequestBody() == null) {
384                     allParameters.addAll(getRequestParameters());
385                 }
386                 else {
387                     allParameters.addAll(HttpUtils.parseUrlQuery(getRequestBody(), getCharset()));
388                 }
389             }
390             else if (FormEncodingType.MULTIPART == getEncodingType()) {
391                 if (httpMethod == HttpMethod.POST) {
392                     allParameters.addAll(getRequestParameters());
393                 }
394                 else {
395                     // for PUT, PATCH, DELETE and OPTIONS spring moves the parameters up to the query
396                     // it doesn't replace the query
397                     allParameters.addAll(0, getRequestParameters());
398                 }
399             }
400         }
401 
402         return normalize(allParameters);
403     }
404 
405     private static List<NameValuePair> normalize(final List<NameValuePair> pairs) {
406         if (pairs == null || pairs.isEmpty()) {
407             return pairs;
408         }
409 
410         final List<NameValuePair> resultingPairs = new ArrayList<>();
411         for (final NameValuePair pair : pairs) {
412             resultingPairs.add(pair.normalized());
413         }
414 
415         return resultingPairs;
416     }
417 
418     /**
419      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
420      *
421      * Retrieves the request parameters to use. If set, these request parameters will overwrite any
422      * request parameters which may be present in the {@link #getUrl() URL}. Should not be used in
423      * combination with the {@link #setRequestBody(String) request body}.
424      * @return the request parameters to use
425      */
426     public List<NameValuePair> getRequestParameters() {
427         return requestParameters_;
428     }
429 
430     /**
431      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
432      *
433      * Sets the request parameters to use. If set, these request parameters will overwrite any request
434      * parameters which may be present in the {@link #getUrl() URL}. Should not be used in combination
435      * with the {@link #setRequestBody(String) request body}.
436      * @param requestParameters the request parameters to use
437      * @throws RuntimeException if the request body has already been set
438      */
439     public void setRequestParameters(final List<NameValuePair> requestParameters) throws RuntimeException {
440         if (requestBody_ != null) {
441             final String msg = "Trying to set the request parameters, but the request body has already been specified;"
442                              + "the two are mutually exclusive!";
443             throw new RuntimeException(msg);
444         }
445         requestParameters_ = requestParameters;
446     }
447 
448     /**
449      * Returns the body content to be submitted if this is a <code>POST</code> request. Ignored for all other request
450      * types. Should not be used in combination with {@link #setRequestParameters(List) request parameters}.
451      * @return the body content to be submitted if this is a <code>POST</code> request
452      */
453     public String getRequestBody() {
454         return requestBody_;
455     }
456 
457     /**
458      * Sets the body content to be submitted if this is a {@code POST}, {@code PUT} or {@code PATCH} request.
459      * Other request types result in {@link RuntimeException}.
460      * Should not be used in combination with {@link #setRequestParameters(List) request parameters}.
461      * @param requestBody the body content to be submitted if this is a {@code POST}, {@code PUT}
462      *        or {@code PATCH} request
463      * @throws RuntimeException if the request parameters have already been set
464      *                          or this is not a {@code POST}, {@code PUT} or {@code PATCH} request.
465      */
466     public void setRequestBody(final String requestBody) throws RuntimeException {
467         if (requestParameters_ != null && !requestParameters_.isEmpty()) {
468             final String msg = "Trying to set the request body, but the request parameters have already been specified;"
469                        + "the two are mutually exclusive!";
470             throw new RuntimeException(msg);
471         }
472         if (httpMethod_ != HttpMethod.POST
473                 && httpMethod_ != HttpMethod.PUT
474                 && httpMethod_ != HttpMethod.PATCH
475                 && httpMethod_ != HttpMethod.DELETE
476                 && httpMethod_ != HttpMethod.OPTIONS) {
477             final String msg = "The request body may only be set for POST, PUT, PATCH, DELETE or OPTIONS requests!";
478             throw new RuntimeException(msg);
479         }
480         requestBody_ = requestBody;
481     }
482 
483     /**
484      * Returns the HTTP submit method to use.
485      * @return the HTTP submit method to use
486      */
487     public HttpMethod getHttpMethod() {
488         return httpMethod_;
489     }
490 
491     /**
492      * Sets the HTTP submit method to use.
493      * @param submitMethod the HTTP submit method to use
494      */
495     public void setHttpMethod(final HttpMethod submitMethod) {
496         httpMethod_ = submitMethod;
497     }
498 
499     /**
500      * Returns the additional HTTP headers to use.
501      * @return the additional HTTP headers to use
502      */
503     public Map<String, String> getAdditionalHeaders() {
504         return additionalHeaders_;
505     }
506 
507     /**
508      * Sets the additional HTTP headers to use.
509      * @param additionalHeaders the additional HTTP headers to use
510      */
511     public void setAdditionalHeaders(final Map<String, String> additionalHeaders) {
512         additionalHeaders_ = additionalHeaders;
513     }
514 
515     /**
516      * Returns whether the specified header name is already included in the additional HTTP headers.
517      * @param name the name of the additional HTTP header
518      * @return true if the specified header name is included in the additional HTTP headers
519      */
520     public boolean isAdditionalHeader(final String name) {
521         for (final String key : additionalHeaders_.keySet()) {
522             if (name.equalsIgnoreCase(key)) {
523                 return true;
524             }
525         }
526         return false;
527     }
528 
529     /**
530      * Returns the header value associated with this name.
531      * @param name the name of the additional HTTP header
532      * @return the value or null
533      */
534     public String getAdditionalHeader(final String name) {
535         String newKey = name;
536         for (final String key : additionalHeaders_.keySet()) {
537             if (name.equalsIgnoreCase(key)) {
538                 newKey = key;
539                 break;
540             }
541         }
542         return additionalHeaders_.get(newKey);
543     }
544 
545     /**
546      * Sets the referer HTTP header - only if the provided url is valid.
547      * @param url the url for the referer HTTP header
548      */
549     public void setRefererHeader(final URL url) {
550         if (url == null || !url.getProtocol().startsWith("http")) {
551             return;
552         }
553 
554         try {
555             setAdditionalHeader(HttpHeader.REFERER, UrlUtils.getUrlWithoutRef(url).toExternalForm());
556         }
557         catch (final MalformedURLException ignored) {
558             // bad luck us the whole url from the pager
559         }
560     }
561 
562     /**
563      * Sets the referer HTTP header - only if the provided url is valid.
564      * @param url the url for the referer HTTP header
565      *
566      * @deprecated as of version 4.5.0; use {@link #setRefererHeader(URL)} instead
567      */
568     @Deprecated
569     public void setRefererlHeader(final URL url) {
570         setRefererHeader(url);
571     }
572 
573     /**
574      * Sets the specified name/value pair in the additional HTTP headers.
575      * @param name the name of the additional HTTP header
576      * @param value the value of the additional HTTP header
577      */
578     public void setAdditionalHeader(final String name, final String value) {
579         String newKey = name;
580         for (final String key : additionalHeaders_.keySet()) {
581             if (name.equalsIgnoreCase(key)) {
582                 newKey = key;
583                 break;
584             }
585         }
586         additionalHeaders_.put(newKey, value);
587     }
588 
589     /**
590      * Removed the specified name/value pair from the additional HTTP headers.
591      * @param name the name of the additional HTTP header
592      */
593     public void removeAdditionalHeader(String name) {
594         for (final String key : additionalHeaders_.keySet()) {
595             if (name.equalsIgnoreCase(key)) {
596                 name = key;
597                 break;
598             }
599         }
600         additionalHeaders_.remove(name);
601     }
602 
603     /**
604      * Returns the credentials to use.
605      * @return the credentials if set as part of the url
606      */
607     public Credentials getUrlCredentials() {
608         return urlCredentials_;
609     }
610 
611     /**
612      * Returns the credentials to use.
613      * @return the credentials if set from the external builder
614      */
615     public Credentials getCredentials() {
616         return credentials_;
617     }
618 
619     /**
620      * Sets the credentials to use.
621      * @param credentials the credentials to use
622      */
623     public void setCredentials(final Credentials credentials) {
624         credentials_ = credentials;
625     }
626 
627     /**
628      * Returns the character set to use to perform the request.
629      * @return the character set to use to perform the request
630      */
631     public Charset getCharset() {
632         return charset_;
633     }
634 
635     /**
636      * Sets the character set to use to perform the request. The default value
637      * is {@link java.nio.charset.StandardCharsets#ISO_8859_1}.
638      * @param charset the character set to use to perform the request
639      */
640     public void setCharset(final Charset charset) {
641         charset_ = charset;
642     }
643 
644     /**
645      * @return the default character set to use for the response when it does not specify one.
646      */
647     public Charset getDefaultResponseContentCharset() {
648         return defaultResponseContentCharset_;
649     }
650 
651     /**
652      * Sets the default character set to use for the response when it does not specify one.
653      * <p>
654      * Unless set, the default is {@link java.nio.charset.StandardCharsets#UTF_8}.
655      * @param defaultResponseContentCharset the default character set of the response
656      */
657     public void setDefaultResponseContentCharset(final Charset defaultResponseContentCharset) {
658         this.defaultResponseContentCharset_ = Objects.requireNonNull(defaultResponseContentCharset);
659     }
660 
661     /**
662      * @param hint the hint to check for
663      * @return true if the hint is enabled
664      */
665     public boolean hasHint(final HttpHint hint) {
666         if (httpHints_ == null) {
667             return false;
668         }
669         return httpHints_.contains(hint);
670     }
671 
672     /**
673      * Enables the hint.
674      * @param hint the hint to add
675      */
676     public void addHint(final HttpHint hint) {
677         if (httpHints_ == null) {
678             httpHints_ = EnumSet.noneOf(HttpHint.class);
679         }
680         httpHints_.add(hint);
681     }
682 
683     /**
684      * Returns a string representation of this object.
685      * @return a string representation of this object
686      */
687     @Override
688     public String toString() {
689         final StringBuilder builder = new StringBuilder(100)
690                 .append(getClass().getSimpleName())
691                 .append("[<url=\"").append(url_).append('"')
692                 .append(", ").append(httpMethod_)
693                 .append(", ").append(encodingType_)
694                 .append(", ").append(requestParameters_)
695                 .append(", ").append(additionalHeaders_)
696                 .append(", ").append(credentials_)
697                 .append(">]");
698         return builder.toString();
699     }
700 
701     private void writeObject(final ObjectOutputStream oos) throws IOException {
702         oos.defaultWriteObject();
703         oos.writeObject(charset_ == null ? null : charset_.name());
704         oos.writeObject(defaultResponseContentCharset_ == null ? null : defaultResponseContentCharset_.name());
705     }
706 
707     private void readObject(final ObjectInputStream ois) throws ClassNotFoundException, IOException {
708         ois.defaultReadObject();
709         final String charsetName = (String) ois.readObject();
710         if (charsetName != null) {
711             charset_ = Charset.forName(charsetName);
712         }
713         final String defaultResponseContentCharset = (String) ois.readObject();
714         if (defaultResponseContentCharset != null) {
715             defaultResponseContentCharset_ = Charset.forName(defaultResponseContentCharset);
716         }
717     }
718 }