View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit;
16  
17  import static org.htmlunit.httpclient.HtmlUnitBrowserCompatCookieSpec.EMPTY_COOKIE_NAME;
18  import static org.junit.jupiter.api.Assertions.assertThrows;
19  import static org.junit.jupiter.api.Assertions.fail;
20  
21  import java.io.File;
22  import java.io.IOException;
23  import java.net.URL;
24  import java.net.URLDecoder;
25  import java.nio.charset.StandardCharsets;
26  import java.time.ZonedDateTime;
27  import java.time.format.DateTimeFormatter;
28  import java.time.temporal.ChronoUnit;
29  import java.util.ArrayList;
30  import java.util.Collections;
31  import java.util.Date;
32  import java.util.List;
33  import java.util.Locale;
34  
35  import org.apache.commons.lang3.SerializationUtils;
36  import org.htmlunit.html.HtmlPage;
37  import org.htmlunit.junit.annotation.Alerts;
38  import org.htmlunit.junit.annotation.HtmlUnitNYI;
39  import org.htmlunit.util.Cookie;
40  import org.junit.jupiter.api.Test;
41  
42  /**
43   * Tests for {@link WebClient} that run with BrowserRunner.
44   *
45   * @author Ahmed Ashour
46   * @author Ronald Brill
47   * @author Sven Strickroth
48   */
49  public class WebClient2Test extends SimpleWebTestCase {
50  
51      /**
52       * Test for 3151939. The Browser removes leading '/..' from the path.
53       * @throws Exception if something goes wrong
54       */
55      @Test
56      public void loadPage_HandleDoubleDotsAtRoot() throws Exception {
57          final String htmlContent = DOCTYPE_HTML
58              + "<html><head><title>foo</title></head><body>\n"
59              + "</body></html>";
60  
61          final WebClient client = getWebClient();
62  
63          final MockWebConnection webConnection = new MockWebConnection();
64          webConnection.setDefaultResponse(htmlContent);
65          client.setWebConnection(webConnection);
66  
67          HtmlPage page = client.getPage("http://www.somewhere.org/..");
68          assertEquals("http://www.somewhere.org/", page.getUrl());
69  
70          page = client.getPage("http://www.somewhere.org/../test");
71          assertEquals("http://www.somewhere.org/test", page.getUrl());
72  
73          // many
74          page = client.getPage("http://www.somewhere.org/../../..");
75          assertEquals("http://www.somewhere.org/", page.getUrl());
76      }
77  
78      /**
79       * Verifies that a WebClient can be serialized and deserialized before it has been used.
80       * @throws Exception if an error occurs
81       */
82      @Test
83      public void serialization_beforeUse() throws Exception {
84          final WebClient client = getWebClient();
85          final WebClient copy = clone(client);
86          assertNotNull(copy);
87      }
88  
89      /**
90       * Regression test for bug 2833433.
91       * @throws Exception if an error occurs
92       */
93      @Test
94      public void serialization_pageLoad() throws Exception {
95          final String page1Content = DOCTYPE_HTML + "<html><body>hello 1</body></html>";
96          try (WebClient client = getWebClient()) {
97              final HtmlPage page1 = loadPage(client, page1Content, null, URL_FIRST);
98              assertEquals("hello 1", page1.asNormalizedText());
99  
100             final String page2Content = DOCTYPE_HTML + "<html><body>hello 2</body></html>";
101             try (WebClient copy = clone(client)) {
102                 final HtmlPage page2 = loadPage(copy, page2Content, null, URL_SECOND);
103                 assertEquals("hello 2", page2.asNormalizedText());
104             }
105         }
106     }
107 
108     /**
109      * Regression test for bug 2836355.
110      * @throws Exception if an error occurs
111      */
112     @Test
113     public void serialization_withClickAfterwards() throws Exception {
114         final String html = DOCTYPE_HTML
115             + "<html><head>\n"
116             + "<script>\n"
117             + "  function foo() {\n"
118             + "    document.getElementById('mybox').innerHTML='hello world';\n"
119             + "    return false;\n"
120             + "  }\n"
121             + "</script></head>\n"
122             + "<body><div id='mybox'></div>\n"
123             + "<a href='#' onclick='foo()' id='clicklink'>say hello world</a>\n"
124             + "</body></html>";
125         final HtmlPage page = loadPageWithAlerts(html);
126         assertEquals("", page.getElementById("mybox").getTextContent());
127 
128         final WebClient clientCopy = clone(page.getWebClient());
129         final HtmlPage pageCopy = (HtmlPage) clientCopy.getCurrentWindow().getTopWindow().getEnclosedPage();
130         pageCopy.getHtmlElementById("clicklink").click();
131         assertEquals("hello world", pageCopy.getElementById("mybox").getTextContent());
132     }
133 
134     /**
135      * Background tasks that have been registered before the serialization should
136      * wake up and run normally after the deserialization.
137      * Until now (2.7-SNAPSHOT 17.09.09) HtmlUnit has probably never supported it.
138      * This is currently not requested and this test is just to document the current status.
139      * @throws Exception if an error occurs
140      */
141     @Test
142     @Alerts({"1", "1", "exiting"})
143     @HtmlUnitNYI(CHROME = {"1", "0", ""},
144             EDGE = {"1", "0", ""},
145             FF = {"1", "0", ""},
146             FF_ESR = {"1", "0", ""})
147     public void serialization_withJSBackgroundTasks() throws Exception {
148         final String html = DOCTYPE_HTML
149             + "<html><head>\n"
150             + "<script>\n"
151             + "  function foo() {\n"
152             + "    if (window.name == 'hello') {\n"
153             + "      alert('exiting');\n"
154             + "      clearInterval(intervalId);\n"
155             + "    }\n"
156             + "  }\n"
157             + "  var intervalId = setInterval(foo, 10);\n"
158             + "</script></head>\n"
159             + "<body></body></html>";
160 
161         final String[] expected = getExpectedAlerts();
162 
163         setExpectedAlerts();
164         final HtmlPage page = loadPageWithAlerts(html);
165         // verify that 1 background job exists
166         assertEquals(Integer.parseInt(expected[0]), page.getEnclosingWindow().getJobManager().getJobCount());
167 
168         final byte[] bytes = SerializationUtils.serialize(page);
169         page.getWebClient().close();
170 
171         // deserialize page and verify that 1 background job exists
172         final HtmlPage clonedPage = (HtmlPage) SerializationUtils.deserialize(bytes);
173         assertEquals(Integer.parseInt(expected[1]), clonedPage.getEnclosingWindow().getJobManager().getJobCount());
174 
175         // configure a new CollectingAlertHandler (in fact it has surely already one and we could get and cast it)
176         final List<String> collectedAlerts = Collections.synchronizedList(new ArrayList<String>());
177         final AlertHandler alertHandler = new CollectingAlertHandler(collectedAlerts);
178         clonedPage.getWebClient().setAlertHandler(alertHandler);
179 
180         // make some change in the page on which background script reacts
181         clonedPage.getEnclosingWindow().setName("hello");
182 
183         clonedPage.getWebClient().waitForBackgroundJavaScriptStartingBefore(100);
184         assertEquals(0, clonedPage.getEnclosingWindow().getJobManager().getJobCount());
185 
186         final String[] expectedAlerts = {expected[2]};
187         assertEquals(expectedAlerts, collectedAlerts);
188     }
189 
190     /**
191      * Regression test for bug 2812769.
192      * @throws Exception if an error occurs
193      */
194     @Test
195     @Alerts(DEFAULT = "en-US,en;q=0.9",
196             FF = "en-US,en;q=0.5",
197             FF_ESR = "en-US,en;q=0.5")
198     public void acceptLanguage() throws Exception {
199         final String html = DOCTYPE_HTML + "<html><body></body></html>";
200         loadPage(html);
201         assertEquals(getExpectedAlerts()[0],
202                 getMockWebConnection().getLastAdditionalHeaders().get(HttpHeader.ACCEPT_LANGUAGE));
203     }
204 
205     /**
206      * Regression test for bug 2812769.
207      * @throws Exception if an error occurs
208      */
209     @Test
210     public void acceptLanguageFr() throws Exception {
211         final String html = DOCTYPE_HTML + "<html><body></body></html>";
212 
213         final BrowserVersion frBrowser =
214                 new BrowserVersion.BrowserVersionBuilder(getBrowserVersion())
215                         .setAcceptLanguageHeader("fr")
216                         .build();
217 
218         setBrowserVersion(frBrowser);
219         loadPageWithAlerts(html);
220         // browsers are using different casing, but this is not relevant for this test
221         assertEquals("fr",
222                 getMockWebConnection().getLastAdditionalHeaders()
223                     .get(HttpHeader.ACCEPT_LANGUAGE).toLowerCase(Locale.ROOT));
224     }
225 
226     /**
227      * As of HtmlUnit-2.7-SNAPSHOT from 24.09.09, loading about:blank in a page didn't
228      * reinitialized the window host object.
229      * @throws Exception if an error occurs
230      */
231     @Test
232     public void newWindowScopeForAboutBlank() throws Exception {
233         final HtmlPage p = loadPage(DOCTYPE_HTML + "<html><body></body></html>");
234         p.executeJavaScript("top.foo = 'hello';");
235         final ScriptResult result = p.executeJavaScript("top.foo");
236         assertEquals("hello", result.getJavaScriptResult());
237 
238         final HtmlPage page2 = p.getWebClient().getPage("about:blank");
239         final ScriptResult result2 = page2.executeJavaScript("String(top.foo)");
240         assertEquals("undefined", result2.getJavaScriptResult());
241     }
242 
243   /**
244    * @throws Exception if the test fails
245    */
246     @Test
247     public void buildCookie() throws Exception {
248         checkCookie("", EMPTY_COOKIE_NAME, "", "/", false, null);
249         checkCookie("toto", EMPTY_COOKIE_NAME, "toto", "/", false, null);
250         checkCookie("toto=", "toto", "", "/", false, null);
251         checkCookie("toto=foo", "toto", "foo", "/", false, null);
252         checkCookie("toto=foo;secure", "toto", "foo", "/", true, null);
253         checkCookie("toto=foo;path=/myPath;secure", "toto", "foo", "/myPath", true, null);
254 
255         // Check that leading and trailing whitespaces are ignored
256         checkCookie("  toto", EMPTY_COOKIE_NAME, "toto", "/", false, null);
257         checkCookie("  = toto", EMPTY_COOKIE_NAME, "toto", "/", false, null);
258         checkCookie("   toto=foo;  path=/myPath  ; secure  ",
259               "toto", "foo", "/myPath", true, null);
260 
261         // Check that we accept reserved attribute names (e.g expires, domain) in any case
262         checkCookie("toto=foo; PATH=/myPath; SeCURE",
263               "toto", "foo", "/myPath", true, null);
264 
265         // Check that we are able to parse and set the expiration date correctly
266         final ZonedDateTime inOneYear = ZonedDateTime.now().plusYears(1).truncatedTo(ChronoUnit.SECONDS);
267         final String dateString = DateTimeFormatter.RFC_1123_DATE_TIME.format(inOneYear);
268         final Date date = Date.from(inOneYear.toInstant());
269         checkCookie("toto=foo; expires=" + dateString, "toto", "foo", "/", false, date);
270     }
271 
272     private void checkCookie(final String cookieString, final String name, final String value,
273             final String path, final boolean secure, final Date date) {
274 
275         final String domain = URL_FIRST.getHost();
276 
277         getWebClient().getCookieManager().clearCookies();
278         getWebClient().addCookie(cookieString, URL_FIRST, this);
279         final Cookie cookie = getWebClient().getCookieManager().getCookies().iterator().next();
280 
281         assertEquals(name, cookie.getName());
282         assertEquals(value, cookie.getValue());
283         assertEquals(path, cookie.getPath());
284         assertEquals(domain, cookie.getDomain());
285         assertEquals(secure, cookie.isSecure());
286         // special handling for null case, because Date cannot be compared using assertEquals
287         if (date == null || cookie.getExpires() == null) {
288             assertEquals(date, cookie.getExpires());
289         }
290         else {
291             assertEquals(date.toInstant(), cookie.getExpires().toInstant());
292         }
293     }
294 
295     /**
296      * @throws Exception if something goes wrong
297      */
298     @Test
299     @Alerts({"loadExtraContent started at Page 1", " loadExtraContent finished at Page 1"})
300     @HtmlUnitNYI(CHROME = {"loadExtraContent started at Page 1", " loadExtraContent finished at Page 2"},
301             EDGE = {"loadExtraContent started at Page 1", " loadExtraContent finished at Page 2"},
302             FF = {"loadExtraContent started at Page 1", " loadExtraContent finished at Page 2"},
303             FF_ESR = {"loadExtraContent started at Page 1", " loadExtraContent finished at Page 2"})
304     public void makeSureTheCurrentJobHasEndedBeforeReplaceWindowPage() throws Exception {
305         final String htmlContent1 = DOCTYPE_HTML
306             + "<html>\n"
307             + "<head>"
308             + "  <title>Page 1</title>\n"
309             + "</head>\n"
310             + "<body>\n"
311             + "  <script>\n"
312             + "    function loadExtraContent() {\n"
313             + "      window.name += 'loadExtraContent started at ' + window.document.title;"
314             + "      for (var i = 0; i < 7000; i++) {\n"
315             + "        try {\n"
316             + "          var p = document.createElement('p');\n"
317             + "          p.innerHTML = 'new content';\n"
318             + "          var body = document.querySelector('body');\n"
319             + "          if (body) { body.appendChild(p); }\n"
320             + "        } catch(e) {\n"
321             + "          var now = new Date().getTime();\n"
322             + "          while(new Date().getTime() < now + 100) { /* Do nothing */ }\n"
323             + "        }\n"
324             + "      }\n"
325             + "      window.name += ' loadExtraContent finished at ' + window.document.title;"
326             + "    }\n"
327 
328             + "    setTimeout(loadExtraContent, 1);"
329             + "  </script>\n"
330             + "</body>\n"
331             + "</html>";
332 
333         final String htmlContent2 = DOCTYPE_HTML
334             + "<html>\n"
335             + "<head>"
336             + "  <title>Page 2</title>\n"
337             + "</head>\n"
338             + "<body>\n"
339             + "  <h1>Page2</h1>\n"
340             + "  <p>This is page 2</p>\n"
341             + "</body>\n"
342             + "</html>";
343 
344         final WebClient client = getWebClient();
345 
346         final MockWebConnection webConnection = new MockWebConnection();
347         webConnection.setDefaultResponse(htmlContent1);
348         webConnection.setResponse(URL_SECOND, htmlContent2);
349         client.setWebConnection(webConnection);
350 
351         // Load page 1. Has a setTimeout(...) function
352         final HtmlPage page1 = client.getPage(URL_FIRST);
353         verify(() -> page1.getEnclosingWindow().getName(), getExpectedAlerts()[0]);
354 
355         // Immediately load page 2. Timeout function was triggered already
356         final HtmlPage page2 = client.getPage(URL_SECOND);
357         verify(() -> page1.getEnclosingWindow().getName(),
358                 getExpectedAlerts()[0] + getExpectedAlerts()[1], DEFAULT_WAIT_TIME.multipliedBy(4));
359 
360         // Fails: return 98 (about) instead of 1
361         // assertEquals(1, page.querySelectorAll("p").size());
362     }
363 
364     /**
365      * @throws Exception if something goes wrong
366      */
367     @Test
368     @Alerts({"loadExtraContent started at Page 1", " loadExtraContent finished at Page 1"})
369     public void buttonClickReachesPageWithJScompileError() throws Exception {
370         final String htmlStartPage = DOCTYPE_HTML
371             + "<html>\n"
372             + "<head>"
373             + "  <title>Page 1</title>\n"
374             + "</head>\n"
375             + "<body>\n"
376             + "  <form method='post' action='" + URL_SECOND + "'>\n"
377             + "    <input id='btnNext' type='submit' value='Next'>\n"
378             + "  </form>\n"
379             + "</body>\n"
380             + "</html>";
381 
382         final String htmlSecondPage = DOCTYPE_HTML
383             + "<html>\n"
384             + "<head>"
385             + "  <title>Page 2</title>\n"
386             + "  <script>\n"
387             + "    this script does not compile!"
388             + "  </script>\n"
389             + "</head>\n"
390             + "<body>\n"
391             + "  <h1>Page2</h1>\n"
392             + "  <p>This is page 2</p>\n"
393             + "</body>\n"
394             + "</html>";
395 
396         final WebClient client = getWebClient();
397 
398         final MockWebConnection webConnection = getMockWebConnection();
399         client.setWebConnection(webConnection);
400 
401         webConnection.setDefaultResponse(htmlStartPage);
402         webConnection.setResponse(URL_SECOND, htmlSecondPage);
403 
404         final HtmlPage page1 = client.getPage(URL_FIRST);
405         assertEquals("Page 1", page1.getTitleText());
406 
407         assertThrows(ScriptException.class, () -> page1.getElementById("btnNext").click());
408     }
409 
410     /**
411      * @throws Exception if something goes wrong
412      */
413     @Test
414     public void toLocaleLowerCase() throws Exception {
415         final String html = DOCTYPE_HTML
416             + "<html><head><script>\n"
417             + "  function doTest() {\n"
418             + "    window.document.title = '\\u0130'.toLocaleLowerCase();\n"
419             + "  }\n"
420             + "</script></head>"
421             + "<body onload='doTest()'>\n"
422             + "</body></html>";
423 
424         HtmlPage page = loadPage(html);
425         assertEquals("\u0069\u0307", page.getTitleText());
426 
427         releaseResources();
428         final BrowserVersion trBrowser =
429                 new BrowserVersion.BrowserVersionBuilder(getBrowserVersion())
430                         .setBrowserLanguage("tr")
431                         .build();
432 
433         setBrowserVersion(trBrowser);
434         page = loadPage(html);
435         assertEquals("\u0069", page.getTitleText());
436     }
437 
438     /**
439      * This is supported by reals browsers but not with HtmlUnit.
440      * @throws Exception if the test fails
441      */
442     @Test
443     public void localFile() throws Exception {
444         final URL url = getClass().getClassLoader().getResource("simple.html");
445         String file = URLDecoder.decode(url.getFile(), StandardCharsets.UTF_8.name());
446         if (file.startsWith("/") && file.contains(":")) {
447             // we have to remove the trailing slash to test the c:\.... case.
448             file = file.substring(1);
449         }
450 
451         assertTrue("File '" + file + "' does not exist", new File(file).exists());
452 
453         try (WebClient webClient = new WebClient(getBrowserVersion())) {
454             webClient.getPage(file);
455             fail("IOException expected");
456         }
457         catch (final IOException e) {
458             assertTrue(e.getMessage(),
459                     e.getMessage().startsWith("Unsupported protocol '")
460                     || e.getMessage().startsWith("no protocol: /"));
461         }
462     }
463 
464     /**
465      * @throws Exception if the test fails
466      */
467     @Test
468     @Alerts("titel - simple.html")
469     public void localFileFile() throws Exception {
470         final URL url = getClass().getClassLoader().getResource("simple.html");
471         String file = URLDecoder.decode(url.getFile(), StandardCharsets.UTF_8.name());
472         if (file.startsWith("/") && file.contains(":")) {
473             // we have to remove the trailing slash to test the c:\.... case.
474             file = file.substring(1);
475         }
476 
477         assertTrue("File '" + file + "' does not exist", new File(file).exists());
478 
479         try (WebClient webClient = new WebClient(getBrowserVersion())) {
480             final HtmlPage page = webClient.getPage("file://" + file);
481             assertEquals(getExpectedAlerts()[0], page.getTitleText());
482         }
483     }
484 
485     /**
486      * @throws Exception if the test fails
487      */
488     @Test
489     public void unknownProtocol() throws Exception {
490         try (WebClient webClient = new WebClient(getBrowserVersion())) {
491             final HtmlPage page = webClient.getPage("unknown://simple.html");
492             fail("IOException expected");
493         }
494         catch (final IOException e) {
495             assertEquals("Unsupported protocol 'unknown'", e.getMessage());
496         }
497     }
498 }