View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit;
16  
17  import static org.htmlunit.httpclient.HtmlUnitBrowserCompatCookieSpec.EMPTY_COOKIE_NAME;
18  import static org.junit.Assert.fail;
19  
20  import java.io.File;
21  import java.io.IOException;
22  import java.net.URL;
23  import java.net.URLDecoder;
24  import java.nio.charset.StandardCharsets;
25  import java.time.ZonedDateTime;
26  import java.time.format.DateTimeFormatter;
27  import java.time.temporal.ChronoUnit;
28  import java.util.ArrayList;
29  import java.util.Collections;
30  import java.util.Date;
31  import java.util.List;
32  import java.util.Locale;
33  
34  import org.apache.commons.lang3.SerializationUtils;
35  import org.htmlunit.html.HtmlPage;
36  import org.htmlunit.junit.BrowserRunner;
37  import org.htmlunit.junit.annotation.Alerts;
38  import org.htmlunit.junit.annotation.HtmlUnitNYI;
39  import org.htmlunit.junit.annotation.Retry;
40  import org.htmlunit.util.Cookie;
41  import org.junit.Test;
42  import org.junit.runner.RunWith;
43  
44  /**
45   * Tests for {@link WebClient} that run with BrowserRunner.
46   *
47   * @author Ahmed Ashour
48   * @author Ronald Brill
49   * @author Sven Strickroth
50   */
51  @RunWith(BrowserRunner.class)
52  public class WebClient2Test extends SimpleWebTestCase {
53  
54      /**
55       * Test for 3151939. The Browser removes leading '/..' from the path.
56       * @throws Exception if something goes wrong
57       */
58      @Test
59      public void loadPage_HandleDoubleDotsAtRoot() throws Exception {
60          final String htmlContent = DOCTYPE_HTML
61              + "<html><head><title>foo</title></head><body>\n"
62              + "</body></html>";
63  
64          final WebClient client = getWebClient();
65  
66          final MockWebConnection webConnection = new MockWebConnection();
67          webConnection.setDefaultResponse(htmlContent);
68          client.setWebConnection(webConnection);
69  
70          HtmlPage page = client.getPage("http://www.somewhere.org/..");
71          assertEquals("http://www.somewhere.org/", page.getUrl());
72  
73          page = client.getPage("http://www.somewhere.org/../test");
74          assertEquals("http://www.somewhere.org/test", page.getUrl());
75  
76          // many
77          page = client.getPage("http://www.somewhere.org/../../..");
78          assertEquals("http://www.somewhere.org/", page.getUrl());
79      }
80  
81      /**
82       * Verifies that a WebClient can be serialized and deserialized before it has been used.
83       * @throws Exception if an error occurs
84       */
85      @Test
86      public void serialization_beforeUse() throws Exception {
87          final WebClient client = getWebClient();
88          final WebClient copy = clone(client);
89          assertNotNull(copy);
90      }
91  
92      /**
93       * Regression test for bug 2833433.
94       * @throws Exception if an error occurs
95       */
96      @Test
97      public void serialization_pageLoad() throws Exception {
98          final String page1Content = DOCTYPE_HTML + "<html><body>hello 1</body></html>";
99          try (WebClient client = getWebClient()) {
100             final HtmlPage page1 = loadPage(client, page1Content, null, URL_FIRST);
101             assertEquals("hello 1", page1.asNormalizedText());
102 
103             final String page2Content = DOCTYPE_HTML + "<html><body>hello 2</body></html>";
104             try (WebClient copy = clone(client)) {
105                 final HtmlPage page2 = loadPage(copy, page2Content, null, URL_SECOND);
106                 assertEquals("hello 2", page2.asNormalizedText());
107             }
108         }
109     }
110 
111     /**
112      * Regression test for bug 2836355.
113      * @throws Exception if an error occurs
114      */
115     @Test
116     public void serialization_withClickAfterwards() throws Exception {
117         final String html = DOCTYPE_HTML
118             + "<html><head>\n"
119             + "<script>\n"
120             + "  function foo() {\n"
121             + "    document.getElementById('mybox').innerHTML='hello world';\n"
122             + "    return false;\n"
123             + "  }\n"
124             + "</script></head>\n"
125             + "<body><div id='mybox'></div>\n"
126             + "<a href='#' onclick='foo()' id='clicklink'>say hello world</a>\n"
127             + "</body></html>";
128         final HtmlPage page = loadPageWithAlerts(html);
129         assertEquals("", page.getElementById("mybox").getTextContent());
130 
131         final WebClient clientCopy = clone(page.getWebClient());
132         final HtmlPage pageCopy = (HtmlPage) clientCopy.getCurrentWindow().getTopWindow().getEnclosedPage();
133         pageCopy.getHtmlElementById("clicklink").click();
134         assertEquals("hello world", pageCopy.getElementById("mybox").getTextContent());
135     }
136 
137     /**
138      * Background tasks that have been registered before the serialization should
139      * wake up and run normally after the deserialization.
140      * Until now (2.7-SNAPSHOT 17.09.09) HtmlUnit has probably never supported it.
141      * This is currently not requested and this test is just to document the current status.
142      * @throws Exception if an error occurs
143      */
144     @Test
145     @Alerts({"1", "1", "exiting"})
146     @HtmlUnitNYI(CHROME = {"1", "0", ""},
147             EDGE = {"1", "0", ""},
148             FF = {"1", "0", ""},
149             FF_ESR = {"1", "0", ""})
150     public void serialization_withJSBackgroundTasks() throws Exception {
151         final String html = DOCTYPE_HTML
152             + "<html><head>\n"
153             + "<script>\n"
154             + "  function foo() {\n"
155             + "    if (window.name == 'hello') {\n"
156             + "      alert('exiting');\n"
157             + "      clearInterval(intervalId);\n"
158             + "    }\n"
159             + "  }\n"
160             + "  var intervalId = setInterval(foo, 10);\n"
161             + "</script></head>\n"
162             + "<body></body></html>";
163 
164         final String[] expected = getExpectedAlerts();
165 
166         setExpectedAlerts();
167         final HtmlPage page = loadPageWithAlerts(html);
168         // verify that 1 background job exists
169         assertEquals(Integer.parseInt(expected[0]), page.getEnclosingWindow().getJobManager().getJobCount());
170 
171         final byte[] bytes = SerializationUtils.serialize(page);
172         page.getWebClient().close();
173 
174         // deserialize page and verify that 1 background job exists
175         final HtmlPage clonedPage = (HtmlPage) SerializationUtils.deserialize(bytes);
176         assertEquals(Integer.parseInt(expected[1]), clonedPage.getEnclosingWindow().getJobManager().getJobCount());
177 
178         // configure a new CollectingAlertHandler (in fact it has surely already one and we could get and cast it)
179         final List<String> collectedAlerts = Collections.synchronizedList(new ArrayList<String>());
180         final AlertHandler alertHandler = new CollectingAlertHandler(collectedAlerts);
181         clonedPage.getWebClient().setAlertHandler(alertHandler);
182 
183         // make some change in the page on which background script reacts
184         clonedPage.getEnclosingWindow().setName("hello");
185 
186         clonedPage.getWebClient().waitForBackgroundJavaScriptStartingBefore(100);
187         assertEquals(0, clonedPage.getEnclosingWindow().getJobManager().getJobCount());
188 
189         final String[] expectedAlerts = {expected[2]};
190         assertEquals(expectedAlerts, collectedAlerts);
191     }
192 
193     /**
194      * Regression test for bug 2812769.
195      * @throws Exception if an error occurs
196      */
197     @Test
198     @Alerts(DEFAULT = "en-US,en;q=0.9",
199             FF = "en-US,en;q=0.5",
200             FF_ESR = "en-US,en;q=0.5")
201     public void acceptLanguage() throws Exception {
202         final String html = DOCTYPE_HTML + "<html><body></body></html>";
203         loadPage(html);
204         assertEquals(getExpectedAlerts()[0],
205                 getMockWebConnection().getLastAdditionalHeaders().get(HttpHeader.ACCEPT_LANGUAGE));
206     }
207 
208     /**
209      * Regression test for bug 2812769.
210      * @throws Exception if an error occurs
211      */
212     @Test
213     public void acceptLanguageFr() throws Exception {
214         final String html = DOCTYPE_HTML + "<html><body></body></html>";
215 
216         final BrowserVersion frBrowser =
217                 new BrowserVersion.BrowserVersionBuilder(getBrowserVersion())
218                         .setAcceptLanguageHeader("fr")
219                         .build();
220 
221         setBrowserVersion(frBrowser);
222         loadPageWithAlerts(html);
223         // browsers are using different casing, but this is not relevant for this test
224         assertEquals("fr",
225                 getMockWebConnection().getLastAdditionalHeaders()
226                     .get(HttpHeader.ACCEPT_LANGUAGE).toLowerCase(Locale.ROOT));
227     }
228 
229     /**
230      * As of HtmlUnit-2.7-SNAPSHOT from 24.09.09, loading about:blank in a page didn't
231      * reinitialized the window host object.
232      * @throws Exception if an error occurs
233      */
234     @Test
235     public void newWindowScopeForAboutBlank() throws Exception {
236         final HtmlPage p = loadPage(DOCTYPE_HTML + "<html><body></body></html>");
237         p.executeJavaScript("top.foo = 'hello';");
238         final ScriptResult result = p.executeJavaScript("top.foo");
239         assertEquals("hello", result.getJavaScriptResult());
240 
241         final HtmlPage page2 = p.getWebClient().getPage("about:blank");
242         final ScriptResult result2 = page2.executeJavaScript("String(top.foo)");
243         assertEquals("undefined", result2.getJavaScriptResult());
244     }
245 
246   /**
247    * @throws Exception if the test fails
248    */
249     @Test
250     public void buildCookie() throws Exception {
251         checkCookie("", EMPTY_COOKIE_NAME, "", "/", false, null);
252         checkCookie("toto", EMPTY_COOKIE_NAME, "toto", "/", false, null);
253         checkCookie("toto=", "toto", "", "/", false, null);
254         checkCookie("toto=foo", "toto", "foo", "/", false, null);
255         checkCookie("toto=foo;secure", "toto", "foo", "/", true, null);
256         checkCookie("toto=foo;path=/myPath;secure", "toto", "foo", "/myPath", true, null);
257 
258         // Check that leading and trailing whitespaces are ignored
259         checkCookie("  toto", EMPTY_COOKIE_NAME, "toto", "/", false, null);
260         checkCookie("  = toto", EMPTY_COOKIE_NAME, "toto", "/", false, null);
261         checkCookie("   toto=foo;  path=/myPath  ; secure  ",
262               "toto", "foo", "/myPath", true, null);
263 
264         // Check that we accept reserved attribute names (e.g expires, domain) in any case
265         checkCookie("toto=foo; PATH=/myPath; SeCURE",
266               "toto", "foo", "/myPath", true, null);
267 
268         // Check that we are able to parse and set the expiration date correctly
269         final ZonedDateTime inOneYear = ZonedDateTime.now().plusYears(1).truncatedTo(ChronoUnit.SECONDS);
270         final String dateString = DateTimeFormatter.RFC_1123_DATE_TIME.format(inOneYear);
271         final Date date = Date.from(inOneYear.toInstant());
272         checkCookie("toto=foo; expires=" + dateString, "toto", "foo", "/", false, date);
273     }
274 
275     private void checkCookie(final String cookieString, final String name, final String value,
276             final String path, final boolean secure, final Date date) {
277 
278         final String domain = URL_FIRST.getHost();
279 
280         getWebClient().getCookieManager().clearCookies();
281         getWebClient().addCookie(cookieString, URL_FIRST, this);
282         final Cookie cookie = getWebClient().getCookieManager().getCookies().iterator().next();
283 
284         assertEquals(name, cookie.getName());
285         assertEquals(value, cookie.getValue());
286         assertEquals(path, cookie.getPath());
287         assertEquals(domain, cookie.getDomain());
288         assertEquals(secure, cookie.isSecure());
289         // special handling for null case, because Date cannot be compared using assertEquals
290         if (date == null || cookie.getExpires() == null) {
291             assertEquals(date, cookie.getExpires());
292         }
293         else {
294             assertEquals(date.toInstant(), cookie.getExpires().toInstant());
295         }
296     }
297 
298     /**
299      * @throws Exception if something goes wrong
300      */
301     @Test
302     @Retry
303     @Alerts({"loadExtraContent started at Page 1", " loadExtraContent finished at Page 1"})
304     @HtmlUnitNYI(CHROME = {"loadExtraContent started at Page 1", " loadExtraContent finished at Page 2"},
305             EDGE = {"loadExtraContent started at Page 1", " loadExtraContent finished at Page 2"},
306             FF = {"loadExtraContent started at Page 1", " loadExtraContent finished at Page 2"},
307             FF_ESR = {"loadExtraContent started at Page 1", " loadExtraContent finished at Page 2"})
308     public void makeSureTheCurrentJobHasEndedBeforeReplaceWindowPage() throws Exception {
309         final String htmlContent1 = DOCTYPE_HTML
310             + "<html>\n"
311             + "<head>"
312             + "  <title>Page 1</title>\n"
313             + "</head>\n"
314             + "<body>\n"
315             + "  <script>\n"
316             + "    function loadExtraContent() {\n"
317             + "      window.name += 'loadExtraContent started at ' + window.document.title;"
318             + "      for (var i = 0; i < 7000; i++) {\n"
319             + "        try {\n"
320             + "          var p = document.createElement('p');\n"
321             + "          p.innerHTML = 'new content';\n"
322             + "          var body = document.querySelector('body');\n"
323             + "          if (body) { body.appendChild(p); }\n"
324             + "        } catch(e) {\n"
325             + "          var now = new Date().getTime();\n"
326             + "          while(new Date().getTime() < now + 100) { /* Do nothing */ }\n"
327             + "        }\n"
328             + "      }\n"
329             + "      window.name += ' loadExtraContent finished at ' + window.document.title;"
330             + "    }\n"
331 
332             + "    setTimeout(loadExtraContent, 1);"
333             + "  </script>\n"
334             + "</body>\n"
335             + "</html>";
336 
337         final String htmlContent2 = DOCTYPE_HTML
338             + "<html>\n"
339             + "<head>"
340             + "  <title>Page 2</title>\n"
341             + "</head>\n"
342             + "<body>\n"
343             + "  <h1>Page2</h1>\n"
344             + "  <p>This is page 2</p>\n"
345             + "</body>\n"
346             + "</html>";
347 
348         final WebClient client = getWebClient();
349 
350         final MockWebConnection webConnection = new MockWebConnection();
351         webConnection.setDefaultResponse(htmlContent1);
352         webConnection.setResponse(URL_SECOND, htmlContent2);
353         client.setWebConnection(webConnection);
354 
355         // Load page 1. Has a setTimeout(...) function
356         final HtmlPage page1 = client.getPage(URL_FIRST);
357         verify(() -> page1.getEnclosingWindow().getName(), getExpectedAlerts()[0]);
358 
359         // Immediately load page 2. Timeout function was triggered already
360         final HtmlPage page2 = client.getPage(URL_SECOND);
361         verify(() -> page1.getEnclosingWindow().getName(),
362                 getExpectedAlerts()[0] + getExpectedAlerts()[1], DEFAULT_WAIT_TIME.multipliedBy(4));
363 
364         // Fails: return 98 (about) instead of 1
365         // assertEquals(1, page.querySelectorAll("p").size());
366     }
367 
368     /**
369      * @throws Exception if something goes wrong
370      */
371     @Test
372     public void toLocaleLowerCase() throws Exception {
373         final String html = DOCTYPE_HTML
374             + "<html><head><script>\n"
375             + "  function doTest() {\n"
376             + "    window.document.title = '\\u0130'.toLocaleLowerCase();\n"
377             + "  }\n"
378             + "</script></head>"
379             + "<body onload='doTest()'>\n"
380             + "</body></html>";
381 
382         HtmlPage page = loadPage(html);
383         assertEquals("\u0069\u0307", page.getTitleText());
384 
385         releaseResources();
386         final BrowserVersion trBrowser =
387                 new BrowserVersion.BrowserVersionBuilder(getBrowserVersion())
388                         .setBrowserLanguage("tr")
389                         .build();
390 
391         setBrowserVersion(trBrowser);
392         page = loadPage(html);
393         assertEquals("\u0069", page.getTitleText());
394     }
395 
396     /**
397      * This is supported by reals browsers but not with HtmlUnit.
398      * @throws Exception if the test fails
399      */
400     @Test
401     public void localFile() throws Exception {
402         final URL url = getClass().getClassLoader().getResource("simple.html");
403         String file = URLDecoder.decode(url.getFile(), StandardCharsets.UTF_8);
404         if (file.startsWith("/") && file.contains(":")) {
405             // we have to remove the trailing slash to test the c:\.... case.
406             file = file.substring(1);
407         }
408 
409         assertTrue("File '" + file + "' does not exist", new File(file).exists());
410 
411         try (WebClient webClient = new WebClient(getBrowserVersion())) {
412             webClient.getPage(file);
413             fail("IOException expected");
414         }
415         catch (final IOException e) {
416             assertTrue(e.getMessage(),
417                     e.getMessage().startsWith("Unsupported protocol '")
418                     || e.getMessage().startsWith("no protocol: /"));
419         }
420     }
421 
422     /**
423      * @throws Exception if the test fails
424      */
425     @Test
426     @Alerts("titel - simple.html")
427     public void localFileFile() throws Exception {
428         final URL url = getClass().getClassLoader().getResource("simple.html");
429         String file = URLDecoder.decode(url.getFile(), StandardCharsets.UTF_8);
430         if (file.startsWith("/") && file.contains(":")) {
431             // we have to remove the trailing slash to test the c:\.... case.
432             file = file.substring(1);
433         }
434 
435         assertTrue("File '" + file + "' does not exist", new File(file).exists());
436 
437         try (WebClient webClient = new WebClient(getBrowserVersion())) {
438             final HtmlPage page = webClient.getPage("file://" + file);
439             assertEquals(getExpectedAlerts()[0], page.getTitleText());
440         }
441     }
442 
443     /**
444      * @throws Exception if the test fails
445      */
446     @Test
447     public void unknownProtocol() throws Exception {
448         try (WebClient webClient = new WebClient(getBrowserVersion())) {
449             final HtmlPage page = webClient.getPage("unknown://simple.html");
450             fail("IOException expected");
451         }
452         catch (final IOException e) {
453             assertEquals("Unsupported protocol 'unknown'", e.getMessage());
454         }
455     }
456 }