View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.httpclient;
16  
17  import java.util.ArrayList;
18  import java.util.BitSet;
19  import java.util.Calendar;
20  import java.util.Collections;
21  import java.util.Comparator;
22  import java.util.Date;
23  import java.util.List;
24  import java.util.Locale;
25  
26  import org.apache.commons.lang3.StringUtils;
27  import org.apache.http.FormattedHeader;
28  import org.apache.http.Header;
29  import org.apache.http.HeaderElement;
30  import org.apache.http.NameValuePair;
31  import org.apache.http.ParseException;
32  import org.apache.http.client.utils.DateUtils;
33  import org.apache.http.cookie.Cookie;
34  import org.apache.http.cookie.CookieAttributeHandler;
35  import org.apache.http.cookie.CookieOrigin;
36  import org.apache.http.cookie.CookiePathComparator;
37  import org.apache.http.cookie.MalformedCookieException;
38  import org.apache.http.cookie.SM;
39  import org.apache.http.impl.cookie.BasicClientCookie;
40  import org.apache.http.impl.cookie.BasicCommentHandler;
41  import org.apache.http.impl.cookie.CookieSpecBase;
42  import org.apache.http.message.BasicHeader;
43  import org.apache.http.message.BasicHeaderElement;
44  import org.apache.http.message.BasicNameValuePair;
45  import org.apache.http.message.BufferedHeader;
46  import org.apache.http.message.ParserCursor;
47  import org.apache.http.message.TokenParser;
48  import org.apache.http.util.CharArrayBuffer;
49  import org.htmlunit.BrowserVersion;
50  
51  /**
52   * Customized BrowserCompatSpec for HtmlUnit.
53   * <p>
54   * Workaround for <a href="https://issues.apache.org/jira/browse/HTTPCLIENT-1006">HttpClient bug 1006</a>:
55   * quotes are wrongly removed in cookie's values.
56  
57   * Implementation is based on the HttpClient code.
58   *
59   * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
60   * @author Noboru Sinohara
61   * @author David D. Kilzer
62   * @author Marc Guillemot
63   * @author Brad Clarke
64   * @author Ahmed Ashour
65   * @author Nicolas Belisle
66   * @author Ronald Brill
67   * @author John J Murdoch
68   */
69  public class HtmlUnitBrowserCompatCookieSpec extends CookieSpecBase {
70  
71      /** The cookie name used for cookies with no name (HttpClient doesn't like empty names). */
72      public static final String EMPTY_COOKIE_NAME = "HTMLUNIT_EMPTY_COOKIE";
73  
74      /** Workaround for domain of local files. */
75      public static final String LOCAL_FILESYSTEM_DOMAIN = "LOCAL_FILESYSTEM";
76  
77      /**
78       * Comparator for sending cookies in right order.
79       * See specification:
80       * - RFC2109 (#4.3.4) http://www.ietf.org/rfc/rfc2109.txt
81       * - RFC2965 (#3.3.4) http://www.ietf.org/rfc/rfc2965.txt http://www.ietf.org/rfc/rfc2109.txt
82       */
83      private static final Comparator<Cookie> COOKIE_COMPARATOR = new CookiePathComparator();
84  
85      private static final NetscapeDraftHeaderParser DEFAULT_NETSCAPE_DRAFT_HEADER_PARSER
86                              = new NetscapeDraftHeaderParser();
87  
88      static final Date DATE_1_1_1970;
89  
90      static {
91          final Calendar calendar = Calendar.getInstance(Locale.ROOT);
92          calendar.setTimeZone(DateUtils.GMT);
93          calendar.set(1970, Calendar.JANUARY, 1, 0, 0, 0);
94          calendar.set(Calendar.MILLISECOND, 0);
95          DATE_1_1_1970 = calendar.getTime();
96      }
97  
98      /**
99       * Constructor.
100      *
101      * @param browserVersion the {@link BrowserVersion} to simulate
102      */
103     public HtmlUnitBrowserCompatCookieSpec(final BrowserVersion browserVersion) {
104         super(new HtmlUnitVersionAttributeHandler(),
105                 new HtmlUnitDomainHandler(browserVersion),
106                 new HtmlUnitPathHandler(),
107                 new HtmlUnitMaxAgeHandler(),
108                 new HtmlUnitSecureHandler(),
109                 new BasicCommentHandler(),
110                 new HtmlUnitExpiresHandler(browserVersion),
111                 new HtmlUnitHttpOnlyHandler(),
112                 new HtmlUnitSameSiteHandler());
113     }
114 
115     /**
116      * {@inheritDoc}
117      */
118     @Override
119     public List<Cookie> parse(Header header, final CookieOrigin origin) throws MalformedCookieException {
120         // first a hack to support empty headers
121         final String text = header.getValue();
122         int endPos = text.indexOf(';');
123         if (endPos < 0) {
124             endPos = text.indexOf('=');
125         }
126         else {
127             final int pos = text.indexOf('=');
128             if (pos > endPos) {
129                 endPos = -1;
130             }
131             else {
132                 endPos = pos;
133             }
134         }
135         if (endPos < 0) {
136             header = new BasicHeader(header.getName(), EMPTY_COOKIE_NAME + "=" + header.getValue());
137         }
138         else if (endPos == 0 || StringUtils.isBlank(text.substring(0, endPos))) {
139             header = new BasicHeader(header.getName(), EMPTY_COOKIE_NAME + header.getValue());
140         }
141 
142         final String headername = header.getName();
143         if (!SM.SET_COOKIE.equalsIgnoreCase(headername)) {
144             throw new MalformedCookieException("Unrecognized cookie header '" + header + "'");
145         }
146         final HeaderElement[] helems = header.getElements();
147         boolean versioned = false;
148         boolean netscape = false;
149         for (final HeaderElement helem: helems) {
150             if (helem.getParameterByName("version") != null) {
151                 versioned = true;
152             }
153             if (helem.getParameterByName("expires") != null) {
154                 netscape = true;
155             }
156         }
157 
158         final List<Cookie> cookies;
159         if (netscape || !versioned) {
160             // Need to parse the header again, because Netscape style cookies do not correctly
161             // support multiple header elements (comma cannot be treated as an element separator)
162             final CharArrayBuffer buffer;
163             final ParserCursor cursor;
164             if (header instanceof FormattedHeader) {
165                 buffer = ((FormattedHeader) header).getBuffer();
166                 cursor = new ParserCursor(
167                         ((FormattedHeader) header).getValuePos(),
168                         buffer.length());
169             }
170             else {
171                 final String s = header.getValue();
172                 if (s == null) {
173                     throw new MalformedCookieException("Header value is null");
174                 }
175                 buffer = new CharArrayBuffer(s.length());
176                 buffer.append(s);
177                 cursor = new ParserCursor(0, buffer.length());
178             }
179             final HeaderElement elem = DEFAULT_NETSCAPE_DRAFT_HEADER_PARSER.parseHeader(buffer, cursor);
180             final String name = elem.getName();
181             if (name == null || name.isEmpty()) {
182                 throw new MalformedCookieException("Cookie name may not be empty");
183             }
184             final String value = elem.getValue();
185             final BasicClientCookie cookie = new BasicClientCookie(name, value);
186             cookie.setPath(getDefaultPath(origin));
187             cookie.setDomain(getDefaultDomain(origin));
188 
189             // cycle through the parameters
190             final NameValuePair[] attribs = elem.getParameters();
191             for (int j = attribs.length - 1; j >= 0; j--) {
192                 final NameValuePair attrib = attribs[j];
193                 final String s = attrib.getName().toLowerCase(Locale.ROOT);
194                 cookie.setAttribute(s, attrib.getValue());
195                 final CookieAttributeHandler handler = findAttribHandler(s);
196                 if (handler != null) {
197                     handler.parse(cookie, attrib.getValue());
198                 }
199             }
200             // Override version for Netscape style cookies
201             if (netscape) {
202                 cookie.setVersion(0);
203             }
204             cookies = Collections.singletonList(cookie);
205         }
206         else {
207             cookies = parse(helems, origin);
208         }
209 
210         for (final Cookie c : cookies) {
211             // re-add quotes around value if parsing as incorrectly trimmed them
212             if (header.getValue().contains(c.getName() + "=\"" + c.getValue())) {
213                 ((BasicClientCookie) c).setValue('"' + c.getValue() + '"');
214             }
215         }
216         return cookies;
217     }
218 
219     @Override
220     public List<Header> formatCookies(final List<Cookie> cookies) {
221         cookies.sort(COOKIE_COMPARATOR);
222 
223         final CharArrayBuffer buffer = new CharArrayBuffer(20 * cookies.size());
224         buffer.append(SM.COOKIE);
225         buffer.append(": ");
226         for (int i = 0; i < cookies.size(); i++) {
227             final Cookie cookie = cookies.get(i);
228             if (i > 0) {
229                 buffer.append("; ");
230             }
231             final String cookieName = cookie.getName();
232             final String cookieValue = cookie.getValue();
233             if (cookie.getVersion() > 0 && !isQuoteEnclosed(cookieValue)) {
234                 HtmlUnitBrowserCompatCookieHeaderValueFormatter.INSTANCE.formatHeaderElement(
235                         buffer,
236                         new BasicHeaderElement(cookieName, cookieValue),
237                         false);
238             }
239             else {
240                 // Netscape style cookies do not support quoted values
241                 buffer.append(cookieName);
242                 buffer.append("=");
243                 if (cookieValue != null) {
244                     buffer.append(cookieValue);
245                 }
246             }
247         }
248         final List<Header> headers = new ArrayList<>(1);
249         headers.add(new BufferedHeader(buffer));
250         return headers;
251     }
252 
253     private static boolean isQuoteEnclosed(final String s) {
254         return s != null
255                 && s.length() > 1
256                 && '\"' == s.charAt(0)
257                 && '\"' == s.charAt(s.length() - 1);
258     }
259 
260     @Override
261     public int getVersion() {
262         return 0;
263     }
264 
265     @Override
266     public Header getVersionHeader() {
267         return null;
268     }
269 
270     @Override
271     public String toString() {
272         return "compatibility";
273     }
274 
275     private static final class NetscapeDraftHeaderParser {
276 
277         private static final char PARAM_DELIMITER = ';';
278 
279         // IMPORTANT!
280         // These private static variables must be treated as immutable and never exposed outside this class
281         private static final BitSet TOKEN_DELIMS = TokenParser.INIT_BITSET('=', PARAM_DELIMITER);
282         private static final BitSet VALUE_DELIMS = TokenParser.INIT_BITSET(PARAM_DELIMITER);
283 
284         private static final TokenParser TOKEN_PARSER = TokenParser.INSTANCE;
285 
286         HeaderElement parseHeader(final CharArrayBuffer buffer, final ParserCursor cursor) throws ParseException {
287             final NameValuePair nvp = parseNameValuePair(buffer, cursor);
288             final List<NameValuePair> params = new ArrayList<>();
289             while (!cursor.atEnd()) {
290                 final NameValuePair param = parseNameValuePair(buffer, cursor);
291                 params.add(param);
292             }
293 
294             return new BasicHeaderElement(nvp.getName(), nvp.getValue(),
295                     params.toArray(new NameValuePair[0]));
296         }
297 
298         private NameValuePair parseNameValuePair(final CharArrayBuffer buffer, final ParserCursor cursor) {
299             final String name = TOKEN_PARSER.parseToken(buffer, cursor, TOKEN_DELIMS);
300             if (cursor.atEnd()) {
301                 return new BasicNameValuePair(name, null);
302             }
303 
304             final int delim = buffer.charAt(cursor.getPos());
305             cursor.updatePos(cursor.getPos() + 1);
306             if (delim != '=') {
307                 return new BasicNameValuePair(name, null);
308             }
309 
310             final String value = TOKEN_PARSER.parseToken(buffer, cursor, VALUE_DELIMS);
311             if (!cursor.atEnd()) {
312                 cursor.updatePos(cursor.getPos() + 1);
313             }
314 
315             return new BasicNameValuePair(name, value);
316         }
317     }
318 }