View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit;
16  
17  import static org.htmlunit.httpclient.HtmlUnitBrowserCompatCookieSpec.EMPTY_COOKIE_NAME;
18  import static org.junit.jupiter.api.Assertions.fail;
19  
20  import java.io.File;
21  import java.io.IOException;
22  import java.net.URL;
23  import java.net.URLDecoder;
24  import java.nio.charset.StandardCharsets;
25  import java.time.ZonedDateTime;
26  import java.time.format.DateTimeFormatter;
27  import java.time.temporal.ChronoUnit;
28  import java.util.ArrayList;
29  import java.util.Collections;
30  import java.util.Date;
31  import java.util.List;
32  import java.util.Locale;
33  
34  import org.apache.commons.lang3.SerializationUtils;
35  import org.htmlunit.html.HtmlPage;
36  import org.htmlunit.junit.annotation.Alerts;
37  import org.htmlunit.junit.annotation.HtmlUnitNYI;
38  import org.htmlunit.util.Cookie;
39  import org.junit.jupiter.api.Test;
40  
41  /**
42   * Tests for {@link WebClient} that run with BrowserRunner.
43   *
44   * @author Ahmed Ashour
45   * @author Ronald Brill
46   * @author Sven Strickroth
47   */
48  public class WebClient2Test extends SimpleWebTestCase {
49  
50      /**
51       * Test for 3151939. The Browser removes leading '/..' from the path.
52       * @throws Exception if something goes wrong
53       */
54      @Test
55      public void loadPage_HandleDoubleDotsAtRoot() throws Exception {
56          final String htmlContent = DOCTYPE_HTML
57              + "<html><head><title>foo</title></head><body>\n"
58              + "</body></html>";
59  
60          final WebClient client = getWebClient();
61  
62          final MockWebConnection webConnection = new MockWebConnection();
63          webConnection.setDefaultResponse(htmlContent);
64          client.setWebConnection(webConnection);
65  
66          HtmlPage page = client.getPage("http://www.somewhere.org/..");
67          assertEquals("http://www.somewhere.org/", page.getUrl());
68  
69          page = client.getPage("http://www.somewhere.org/../test");
70          assertEquals("http://www.somewhere.org/test", page.getUrl());
71  
72          // many
73          page = client.getPage("http://www.somewhere.org/../../..");
74          assertEquals("http://www.somewhere.org/", page.getUrl());
75      }
76  
77      /**
78       * Verifies that a WebClient can be serialized and deserialized before it has been used.
79       * @throws Exception if an error occurs
80       */
81      @Test
82      public void serialization_beforeUse() throws Exception {
83          final WebClient client = getWebClient();
84          final WebClient copy = clone(client);
85          assertNotNull(copy);
86      }
87  
88      /**
89       * Regression test for bug 2833433.
90       * @throws Exception if an error occurs
91       */
92      @Test
93      public void serialization_pageLoad() throws Exception {
94          final String page1Content = DOCTYPE_HTML + "<html><body>hello 1</body></html>";
95          try (WebClient client = getWebClient()) {
96              final HtmlPage page1 = loadPage(client, page1Content, null, URL_FIRST);
97              assertEquals("hello 1", page1.asNormalizedText());
98  
99              final String page2Content = DOCTYPE_HTML + "<html><body>hello 2</body></html>";
100             try (WebClient copy = clone(client)) {
101                 final HtmlPage page2 = loadPage(copy, page2Content, null, URL_SECOND);
102                 assertEquals("hello 2", page2.asNormalizedText());
103             }
104         }
105     }
106 
107     /**
108      * Regression test for bug 2836355.
109      * @throws Exception if an error occurs
110      */
111     @Test
112     public void serialization_withClickAfterwards() throws Exception {
113         final String html = DOCTYPE_HTML
114             + "<html><head>\n"
115             + "<script>\n"
116             + "  function foo() {\n"
117             + "    document.getElementById('mybox').innerHTML='hello world';\n"
118             + "    return false;\n"
119             + "  }\n"
120             + "</script></head>\n"
121             + "<body><div id='mybox'></div>\n"
122             + "<a href='#' onclick='foo()' id='clicklink'>say hello world</a>\n"
123             + "</body></html>";
124         final HtmlPage page = loadPageWithAlerts(html);
125         assertEquals("", page.getElementById("mybox").getTextContent());
126 
127         final WebClient clientCopy = clone(page.getWebClient());
128         final HtmlPage pageCopy = (HtmlPage) clientCopy.getCurrentWindow().getTopWindow().getEnclosedPage();
129         pageCopy.getHtmlElementById("clicklink").click();
130         assertEquals("hello world", pageCopy.getElementById("mybox").getTextContent());
131     }
132 
133     /**
134      * Background tasks that have been registered before the serialization should
135      * wake up and run normally after the deserialization.
136      * Until now (2.7-SNAPSHOT 17.09.09) HtmlUnit has probably never supported it.
137      * This is currently not requested and this test is just to document the current status.
138      * @throws Exception if an error occurs
139      */
140     @Test
141     @Alerts({"1", "1", "exiting"})
142     @HtmlUnitNYI(CHROME = {"1", "0", ""},
143             EDGE = {"1", "0", ""},
144             FF = {"1", "0", ""},
145             FF_ESR = {"1", "0", ""})
146     public void serialization_withJSBackgroundTasks() throws Exception {
147         final String html = DOCTYPE_HTML
148             + "<html><head>\n"
149             + "<script>\n"
150             + "  function foo() {\n"
151             + "    if (window.name == 'hello') {\n"
152             + "      alert('exiting');\n"
153             + "      clearInterval(intervalId);\n"
154             + "    }\n"
155             + "  }\n"
156             + "  var intervalId = setInterval(foo, 10);\n"
157             + "</script></head>\n"
158             + "<body></body></html>";
159 
160         final String[] expected = getExpectedAlerts();
161 
162         setExpectedAlerts();
163         final HtmlPage page = loadPageWithAlerts(html);
164         // verify that 1 background job exists
165         assertEquals(Integer.parseInt(expected[0]), page.getEnclosingWindow().getJobManager().getJobCount());
166 
167         final byte[] bytes = SerializationUtils.serialize(page);
168         page.getWebClient().close();
169 
170         // deserialize page and verify that 1 background job exists
171         final HtmlPage clonedPage = (HtmlPage) SerializationUtils.deserialize(bytes);
172         assertEquals(Integer.parseInt(expected[1]), clonedPage.getEnclosingWindow().getJobManager().getJobCount());
173 
174         // configure a new CollectingAlertHandler (in fact it has surely already one and we could get and cast it)
175         final List<String> collectedAlerts = Collections.synchronizedList(new ArrayList<String>());
176         final AlertHandler alertHandler = new CollectingAlertHandler(collectedAlerts);
177         clonedPage.getWebClient().setAlertHandler(alertHandler);
178 
179         // make some change in the page on which background script reacts
180         clonedPage.getEnclosingWindow().setName("hello");
181 
182         clonedPage.getWebClient().waitForBackgroundJavaScriptStartingBefore(100);
183         assertEquals(0, clonedPage.getEnclosingWindow().getJobManager().getJobCount());
184 
185         final String[] expectedAlerts = {expected[2]};
186         assertEquals(expectedAlerts, collectedAlerts);
187     }
188 
189     /**
190      * Regression test for bug 2812769.
191      * @throws Exception if an error occurs
192      */
193     @Test
194     @Alerts(DEFAULT = "en-US,en;q=0.9",
195             FF = "en-US,en;q=0.5",
196             FF_ESR = "en-US,en;q=0.5")
197     public void acceptLanguage() throws Exception {
198         final String html = DOCTYPE_HTML + "<html><body></body></html>";
199         loadPage(html);
200         assertEquals(getExpectedAlerts()[0],
201                 getMockWebConnection().getLastAdditionalHeaders().get(HttpHeader.ACCEPT_LANGUAGE));
202     }
203 
204     /**
205      * Regression test for bug 2812769.
206      * @throws Exception if an error occurs
207      */
208     @Test
209     public void acceptLanguageFr() throws Exception {
210         final String html = DOCTYPE_HTML + "<html><body></body></html>";
211 
212         final BrowserVersion frBrowser =
213                 new BrowserVersion.BrowserVersionBuilder(getBrowserVersion())
214                         .setAcceptLanguageHeader("fr")
215                         .build();
216 
217         setBrowserVersion(frBrowser);
218         loadPageWithAlerts(html);
219         // browsers are using different casing, but this is not relevant for this test
220         assertEquals("fr",
221                 getMockWebConnection().getLastAdditionalHeaders()
222                     .get(HttpHeader.ACCEPT_LANGUAGE).toLowerCase(Locale.ROOT));
223     }
224 
225     /**
226      * As of HtmlUnit-2.7-SNAPSHOT from 24.09.09, loading about:blank in a page didn't
227      * reinitialized the window host object.
228      * @throws Exception if an error occurs
229      */
230     @Test
231     public void newWindowScopeForAboutBlank() throws Exception {
232         final HtmlPage p = loadPage(DOCTYPE_HTML + "<html><body></body></html>");
233         p.executeJavaScript("top.foo = 'hello';");
234         final ScriptResult result = p.executeJavaScript("top.foo");
235         assertEquals("hello", result.getJavaScriptResult());
236 
237         final HtmlPage page2 = p.getWebClient().getPage("about:blank");
238         final ScriptResult result2 = page2.executeJavaScript("String(top.foo)");
239         assertEquals("undefined", result2.getJavaScriptResult());
240     }
241 
242   /**
243    * @throws Exception if the test fails
244    */
245     @Test
246     public void buildCookie() throws Exception {
247         checkCookie("", EMPTY_COOKIE_NAME, "", "/", false, null);
248         checkCookie("toto", EMPTY_COOKIE_NAME, "toto", "/", false, null);
249         checkCookie("toto=", "toto", "", "/", false, null);
250         checkCookie("toto=foo", "toto", "foo", "/", false, null);
251         checkCookie("toto=foo;secure", "toto", "foo", "/", true, null);
252         checkCookie("toto=foo;path=/myPath;secure", "toto", "foo", "/myPath", true, null);
253 
254         // Check that leading and trailing whitespaces are ignored
255         checkCookie("  toto", EMPTY_COOKIE_NAME, "toto", "/", false, null);
256         checkCookie("  = toto", EMPTY_COOKIE_NAME, "toto", "/", false, null);
257         checkCookie("   toto=foo;  path=/myPath  ; secure  ",
258               "toto", "foo", "/myPath", true, null);
259 
260         // Check that we accept reserved attribute names (e.g expires, domain) in any case
261         checkCookie("toto=foo; PATH=/myPath; SeCURE",
262               "toto", "foo", "/myPath", true, null);
263 
264         // Check that we are able to parse and set the expiration date correctly
265         final ZonedDateTime inOneYear = ZonedDateTime.now().plusYears(1).truncatedTo(ChronoUnit.SECONDS);
266         final String dateString = DateTimeFormatter.RFC_1123_DATE_TIME.format(inOneYear);
267         final Date date = Date.from(inOneYear.toInstant());
268         checkCookie("toto=foo; expires=" + dateString, "toto", "foo", "/", false, date);
269     }
270 
271     private void checkCookie(final String cookieString, final String name, final String value,
272             final String path, final boolean secure, final Date date) {
273 
274         final String domain = URL_FIRST.getHost();
275 
276         getWebClient().getCookieManager().clearCookies();
277         getWebClient().addCookie(cookieString, URL_FIRST, this);
278         final Cookie cookie = getWebClient().getCookieManager().getCookies().iterator().next();
279 
280         assertEquals(name, cookie.getName());
281         assertEquals(value, cookie.getValue());
282         assertEquals(path, cookie.getPath());
283         assertEquals(domain, cookie.getDomain());
284         assertEquals(secure, cookie.isSecure());
285         // special handling for null case, because Date cannot be compared using assertEquals
286         if (date == null || cookie.getExpires() == null) {
287             assertEquals(date, cookie.getExpires());
288         }
289         else {
290             assertEquals(date.toInstant(), cookie.getExpires().toInstant());
291         }
292     }
293 
294     /**
295      * @throws Exception if something goes wrong
296      */
297     @Test
298     @Alerts({"loadExtraContent started at Page 1", " loadExtraContent finished at Page 1"})
299     @HtmlUnitNYI(CHROME = {"loadExtraContent started at Page 1", " loadExtraContent finished at Page 2"},
300             EDGE = {"loadExtraContent started at Page 1", " loadExtraContent finished at Page 2"},
301             FF = {"loadExtraContent started at Page 1", " loadExtraContent finished at Page 2"},
302             FF_ESR = {"loadExtraContent started at Page 1", " loadExtraContent finished at Page 2"})
303     public void makeSureTheCurrentJobHasEndedBeforeReplaceWindowPage() throws Exception {
304         final String htmlContent1 = DOCTYPE_HTML
305             + "<html>\n"
306             + "<head>"
307             + "  <title>Page 1</title>\n"
308             + "</head>\n"
309             + "<body>\n"
310             + "  <script>\n"
311             + "    function loadExtraContent() {\n"
312             + "      window.name += 'loadExtraContent started at ' + window.document.title;"
313             + "      for (var i = 0; i < 7000; i++) {\n"
314             + "        try {\n"
315             + "          var p = document.createElement('p');\n"
316             + "          p.innerHTML = 'new content';\n"
317             + "          var body = document.querySelector('body');\n"
318             + "          if (body) { body.appendChild(p); }\n"
319             + "        } catch(e) {\n"
320             + "          var now = new Date().getTime();\n"
321             + "          while(new Date().getTime() < now + 100) { /* Do nothing */ }\n"
322             + "        }\n"
323             + "      }\n"
324             + "      window.name += ' loadExtraContent finished at ' + window.document.title;"
325             + "    }\n"
326 
327             + "    setTimeout(loadExtraContent, 1);"
328             + "  </script>\n"
329             + "</body>\n"
330             + "</html>";
331 
332         final String htmlContent2 = DOCTYPE_HTML
333             + "<html>\n"
334             + "<head>"
335             + "  <title>Page 2</title>\n"
336             + "</head>\n"
337             + "<body>\n"
338             + "  <h1>Page2</h1>\n"
339             + "  <p>This is page 2</p>\n"
340             + "</body>\n"
341             + "</html>";
342 
343         final WebClient client = getWebClient();
344 
345         final MockWebConnection webConnection = new MockWebConnection();
346         webConnection.setDefaultResponse(htmlContent1);
347         webConnection.setResponse(URL_SECOND, htmlContent2);
348         client.setWebConnection(webConnection);
349 
350         // Load page 1. Has a setTimeout(...) function
351         final HtmlPage page1 = client.getPage(URL_FIRST);
352         verify(() -> page1.getEnclosingWindow().getName(), getExpectedAlerts()[0]);
353 
354         // Immediately load page 2. Timeout function was triggered already
355         final HtmlPage page2 = client.getPage(URL_SECOND);
356         verify(() -> page1.getEnclosingWindow().getName(),
357                 getExpectedAlerts()[0] + getExpectedAlerts()[1], DEFAULT_WAIT_TIME.multipliedBy(4));
358 
359         // Fails: return 98 (about) instead of 1
360         // assertEquals(1, page.querySelectorAll("p").size());
361     }
362 
363     /**
364      * @throws Exception if something goes wrong
365      */
366     @Test
367     public void toLocaleLowerCase() throws Exception {
368         final String html = DOCTYPE_HTML
369             + "<html><head><script>\n"
370             + "  function doTest() {\n"
371             + "    window.document.title = '\\u0130'.toLocaleLowerCase();\n"
372             + "  }\n"
373             + "</script></head>"
374             + "<body onload='doTest()'>\n"
375             + "</body></html>";
376 
377         HtmlPage page = loadPage(html);
378         assertEquals("\u0069\u0307", page.getTitleText());
379 
380         releaseResources();
381         final BrowserVersion trBrowser =
382                 new BrowserVersion.BrowserVersionBuilder(getBrowserVersion())
383                         .setBrowserLanguage("tr")
384                         .build();
385 
386         setBrowserVersion(trBrowser);
387         page = loadPage(html);
388         assertEquals("\u0069", page.getTitleText());
389     }
390 
391     /**
392      * This is supported by reals browsers but not with HtmlUnit.
393      * @throws Exception if the test fails
394      */
395     @Test
396     public void localFile() throws Exception {
397         final URL url = getClass().getClassLoader().getResource("simple.html");
398         String file = URLDecoder.decode(url.getFile(), StandardCharsets.UTF_8.name());
399         if (file.startsWith("/") && file.contains(":")) {
400             // we have to remove the trailing slash to test the c:\.... case.
401             file = file.substring(1);
402         }
403 
404         assertTrue("File '" + file + "' does not exist", new File(file).exists());
405 
406         try (WebClient webClient = new WebClient(getBrowserVersion())) {
407             webClient.getPage(file);
408             fail("IOException expected");
409         }
410         catch (final IOException e) {
411             assertTrue(e.getMessage(),
412                     e.getMessage().startsWith("Unsupported protocol '")
413                     || e.getMessage().startsWith("no protocol: /"));
414         }
415     }
416 
417     /**
418      * @throws Exception if the test fails
419      */
420     @Test
421     @Alerts("titel - simple.html")
422     public void localFileFile() throws Exception {
423         final URL url = getClass().getClassLoader().getResource("simple.html");
424         String file = URLDecoder.decode(url.getFile(), StandardCharsets.UTF_8.name());
425         if (file.startsWith("/") && file.contains(":")) {
426             // we have to remove the trailing slash to test the c:\.... case.
427             file = file.substring(1);
428         }
429 
430         assertTrue("File '" + file + "' does not exist", new File(file).exists());
431 
432         try (WebClient webClient = new WebClient(getBrowserVersion())) {
433             final HtmlPage page = webClient.getPage("file://" + file);
434             assertEquals(getExpectedAlerts()[0], page.getTitleText());
435         }
436     }
437 
438     /**
439      * @throws Exception if the test fails
440      */
441     @Test
442     public void unknownProtocol() throws Exception {
443         try (WebClient webClient = new WebClient(getBrowserVersion())) {
444             final HtmlPage page = webClient.getPage("unknown://simple.html");
445             fail("IOException expected");
446         }
447         catch (final IOException e) {
448             assertEquals("Unsupported protocol 'unknown'", e.getMessage());
449         }
450     }
451 }