View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html;
16  
17  import static java.nio.charset.StandardCharsets.ISO_8859_1;
18  import static java.nio.charset.StandardCharsets.UTF_8;
19  
20  import java.io.File;
21  import java.io.FileInputStream;
22  import java.io.InputStream;
23  import java.net.URL;
24  import java.nio.file.Path;
25  import java.util.ArrayList;
26  import java.util.Collections;
27  import java.util.List;
28  
29  import org.apache.commons.io.FileUtils;
30  import org.apache.commons.io.IOUtils;
31  import org.apache.commons.text.RandomStringGenerator;
32  import org.htmlunit.CollectingAlertHandler;
33  import org.htmlunit.MockWebConnection;
34  import org.htmlunit.SimpleWebTestCase;
35  import org.htmlunit.WebClient;
36  import org.htmlunit.junit.annotation.Alerts;
37  import org.htmlunit.util.MimeType;
38  import org.junit.jupiter.api.Test;
39  import org.junit.jupiter.api.io.TempDir;
40  
41  /**
42   * Tests for {@link HtmlPage}.
43   *
44   * @author Ahmed Ashour
45   * @author Marc Guillemot
46   * @author Ronald Brill
47   */
48  public class HtmlPage2Test extends SimpleWebTestCase {
49  
50      @TempDir
51      static Path TEMP_DIR_;
52  
53      /**
54       * @throws Exception if the test fails
55       */
56      @Test
57      public void getFullQualifiedUrl_topWindow() throws Exception {
58          final String firstHtml = DOCTYPE_HTML
59              + "<html><head><title>first</title>\n"
60              + "<script>\n"
61              + "function init() {\n"
62              + "  var iframe = window.frames['f'];\n"
63              + "  iframe.document.write(\"<form name='form' action='" + URL_SECOND + "'>"
64              + "<input name='submit' type='submit'></form>\");\n"
65              + "  iframe.document.close();\n"
66              + "}\n"
67              + "</script></head>\n"
68              + "<body onload='init()'>\n"
69              + "  <iframe name='f'></iframe>\n"
70              + "</body></html>";
71          final String secondHtml = DOCTYPE_HTML
72              + "<html><head><title>second</title></head>\n"
73              + "<body><p>Form submitted successfully.</p></body></html>";
74  
75          final WebClient client = getWebClient();
76  
77          final MockWebConnection webConnection = new MockWebConnection();
78          webConnection.setResponse(URL_FIRST, firstHtml);
79          webConnection.setDefaultResponse(secondHtml);
80          client.setWebConnection(webConnection);
81  
82          final HtmlPage page = client.getPage(URL_FIRST);
83  
84          HtmlPage framePage = (HtmlPage) page.getFrameByName("f").getEnclosedPage();
85          final HtmlForm form = framePage.getFormByName("form");
86          final HtmlInput submit = form.getInputByName("submit");
87          framePage = submit.click();
88          assertEquals("Form submitted successfully.", framePage.getBody().asNormalizedText());
89      }
90  
91      /**
92       * @throws Exception if the test fails
93       */
94      @Test
95      @Alerts("Hello there")
96      public void save() throws Exception {
97          final String html = DOCTYPE_HTML + "<html><head><script src='" + URL_SECOND + "'>\n</script></head></html>";
98  
99          final String js = "alert('Hello there')";
100 
101         final WebClient webClient = getWebClient();
102         final MockWebConnection webConnection = new MockWebConnection();
103 
104         webConnection.setResponse(URL_FIRST, html);
105         webConnection.setResponse(URL_SECOND, js);
106         webClient.setWebConnection(webConnection);
107 
108         final List<String> collectedAlerts = new ArrayList<>();
109         webClient.setAlertHandler(new CollectingAlertHandler(collectedAlerts));
110 
111         final HtmlPage page = webClient.getPage(URL_FIRST);
112         assertEquals(getExpectedAlerts(), collectedAlerts);
113 
114         final HtmlScript sript = page.getFirstByXPath("//script");
115         assertEquals(URL_SECOND.toString(), sript.getSrcAttribute());
116 
117         final File tmpFolder = new File(TEMP_DIR_.toFile(), "hu");
118         tmpFolder.mkdir();
119         final File file = new File(tmpFolder, "hu_HtmlPageTest_save.html");
120         FileUtils.deleteQuietly(file);
121 
122         page.save(file);
123         assertTrue(file.exists());
124         assertTrue(file.isFile());
125         final String content = FileUtils.readFileToString(file, ISO_8859_1);
126         assertFalse(content.contains("<script"));
127 
128         assertEquals(URL_SECOND.toString(), sript.getSrcAttribute());
129     }
130 
131     /**
132      * @throws Exception if the test fails
133      */
134     @Test
135     public void save_image() throws Exception {
136         final String html = DOCTYPE_HTML + "<html><body><img src='" + URL_SECOND + "'></body></html>";
137 
138         final URL url = getClass().getClassLoader().getResource("testfiles/tiny-jpg.img");
139         final WebClient webClient = getWebClientWithMockWebConnection();
140         try (FileInputStream fis = new FileInputStream(new File(url.toURI()))) {
141             final byte[] directBytes = IOUtils.toByteArray(fis);
142             final MockWebConnection webConnection = getMockWebConnection();
143 
144             webConnection.setResponse(URL_FIRST, html);
145             webConnection.setResponse(URL_SECOND, directBytes, 200, "ok", "image/jpg", Collections.emptyList());
146         }
147 
148         final HtmlPage page = webClient.getPage(URL_FIRST);
149         final HtmlImage img = page.getFirstByXPath("//img");
150         assertEquals(URL_SECOND.toString(), img.getSrcAttribute());
151         final File tmpFolder = new File(TEMP_DIR_.toFile(), "hu");
152         tmpFolder.mkdir();
153         final File file = new File(tmpFolder, "hu_HtmlPageTest_save2.html");
154         FileUtils.deleteQuietly(file);
155         final File imgFile = new File(tmpFolder, "hu_HtmlPageTest_save2/second.jpeg");
156         FileUtils.deleteQuietly(imgFile);
157 
158         page.save(file);
159         assertTrue(file.exists());
160         assertTrue(file.isFile());
161         final byte[] loadedBytes = FileUtils.readFileToByteArray(imgFile);
162         assertTrue(loadedBytes.length > 0);
163         assertEquals(URL_SECOND.toString(), img.getSrcAttribute());
164     }
165 
166     /**
167      * As of 24.05.2011 an IOException was occurring when saving a page where
168      * the response to the request for an image was not an image.
169      * @throws Exception if the test fails
170      */
171     @Test
172     public void save_imageNotImage() throws Exception {
173         final String html = DOCTYPE_HTML + "<html><body><img src='foo.txt'></body></html>";
174 
175         final MockWebConnection webConnection = getMockWebConnection();
176 
177         webConnection.setDefaultResponse("hello", MimeType.TEXT_PLAIN);
178 
179         final HtmlPage page = loadPageWithAlerts(html);
180 
181         final File tmpFolder = new File(TEMP_DIR_.toFile(), "hu");
182         tmpFolder.mkdir();
183         final File file = new File(tmpFolder, "hu_save.html");
184         FileUtils.deleteQuietly(file);
185 
186         page.save(file);
187         assertTrue(file.exists());
188         assertTrue(file.isFile());
189 
190         final File imgFile = new File(tmpFolder, "hu_save/foo.txt");
191         assertEquals("hello", FileUtils.readFileToString(imgFile, UTF_8));
192     }
193 
194     /**
195      * @throws Exception if the test fails
196      */
197     @Test
198     public void save_image_without_src() throws Exception {
199         final String html = DOCTYPE_HTML + "<html><body><img></body></html>";
200 
201         final WebClient webClient = getWebClientWithMockWebConnection();
202         final MockWebConnection webConnection = getMockWebConnection();
203 
204         webConnection.setResponse(URL_FIRST, html);
205 
206         final HtmlPage page = webClient.getPage(URL_FIRST);
207         final File tmpFolder = new File(TEMP_DIR_.toFile(), "hu");
208         tmpFolder.mkdir();
209         final File file = new File(tmpFolder, "hu_HtmlPageTest_save3.html");
210         FileUtils.deleteQuietly(file);
211 
212         page.save(file);
213         assertTrue(file.exists());
214         assertTrue(file.isFile());
215 
216         final HtmlImage img = page.getFirstByXPath("//img");
217         assertEquals(DomElement.ATTRIBUTE_NOT_DEFINED, img.getSrcAttribute());
218     }
219 
220     /**
221      * @throws Exception if the test fails
222      */
223     @Test
224     public void save_image_empty_src() throws Exception {
225         final String html = DOCTYPE_HTML + "<html><body><img src=''></body></html>";
226 
227         final WebClient webClient = getWebClientWithMockWebConnection();
228         final MockWebConnection webConnection = getMockWebConnection();
229 
230         webConnection.setResponse(URL_FIRST, html);
231 
232         final HtmlPage page = webClient.getPage(URL_FIRST);
233         final File tmpFolder = new File(TEMP_DIR_.toFile(), "hu");
234         tmpFolder.mkdir();
235         final File file = new File(tmpFolder, "hu_HtmlPageTest_save3.html");
236         FileUtils.deleteQuietly(file);
237 
238         page.save(file);
239         assertTrue(file.exists());
240         assertTrue(file.isFile());
241 
242         final HtmlImage img = page.getFirstByXPath("//img");
243         assertEquals(DomElement.ATTRIBUTE_NOT_DEFINED, img.getSrcAttribute());
244     }
245 
246     /**
247      * @throws Exception if the test fails
248      */
249     @Test
250     public void save_frames() throws Exception {
251         final String mainContent = DOCTYPE_HTML
252             + "<html><head><title>First</title></head>\n"
253             + "<frameset cols='50%,*'>\n"
254             + "  <frame name='left' src='" + URL_SECOND + "' frameborder='1' />\n"
255             + "  <frame name='right' src='" + URL_THIRD + "' frameborder='1' />\n"
256             + "  <frame name='withoutsrc' />\n"
257             + "</frameset>\n"
258             + "</html>";
259         final String frameLeftContent = DOCTYPE_HTML
260             + "<html><head><title>Second</title></head><body>\n"
261             + "<iframe src='iframe.html'></iframe>\n"
262             + "<img src='img.jpg'>\n"
263             + "</body></html>";
264         final String frameRightContent = DOCTYPE_HTML
265                 + "<html><head><title>Third</title></head><body>frame right</body></html>";
266         final String iframeContent  = DOCTYPE_HTML
267                 + "<html><head><title>Iframe</title></head><body>iframe</body></html>";
268 
269         try (InputStream is = getClass().getClassLoader().getResourceAsStream("testfiles/tiny-jpg.img")) {
270             final byte[] directBytes = IOUtils.toByteArray(is);
271 
272             final MockWebConnection webConnection = getMockWebConnection();
273             webConnection.setResponse(URL_FIRST, mainContent);
274             webConnection.setResponse(URL_SECOND, frameLeftContent);
275             webConnection.setResponse(URL_THIRD, frameRightContent);
276             final URL urlIframe = new URL(URL_SECOND, "iframe.html");
277             webConnection.setResponse(urlIframe, iframeContent);
278 
279             final URL urlImage = new URL(URL_SECOND, "img.jpg");
280             webConnection.setResponse(urlImage, directBytes, 200, "ok", "image/jpg", Collections.emptyList());
281         }
282 
283         final WebClient webClient = getWebClientWithMockWebConnection();
284         final HtmlPage page = webClient.getPage(URL_FIRST);
285         final HtmlFrame leftFrame = page.getElementByName("left");
286         assertEquals(URL_SECOND.toString(), leftFrame.getSrcAttribute());
287         final File tmpFolder = new File(TEMP_DIR_.toFile(), "hu");
288         tmpFolder.mkdir();
289         final File file = new File(tmpFolder, "hu_HtmlPageTest_saveFrame.html");
290         FileUtils.deleteQuietly(file);
291         final File expectedLeftFrameFile = new File(tmpFolder, "hu_HtmlPageTest_saveFrame/second.html");
292         FileUtils.deleteQuietly(expectedLeftFrameFile);
293         final File expectedRightFrameFile = new File(tmpFolder, "hu_HtmlPageTest_saveFrame/third.html");
294         FileUtils.deleteQuietly(expectedRightFrameFile);
295         final File expectedIFrameFile = new File(tmpFolder, "hu_HtmlPageTest_saveFrame/second/iframe.html");
296         FileUtils.deleteQuietly(expectedIFrameFile);
297         final File expectedImgFile = new File(tmpFolder, "hu_HtmlPageTest_saveFrame/second/img.jpg");
298         FileUtils.deleteQuietly(expectedImgFile);
299         final File[] allFiles = {file, expectedLeftFrameFile, expectedImgFile, expectedIFrameFile,
300             expectedRightFrameFile};
301 
302         page.save(file);
303         for (final File f : allFiles) {
304             assertTrue(f.toString(), f.exists());
305             assertTrue(f.toString(), f.isFile());
306         }
307 
308         final byte[] loadedBytes = FileUtils.readFileToByteArray(expectedImgFile);
309         assertTrue(loadedBytes.length > 0);
310 
311         // ensure that saving the page hasn't changed the DOM
312         assertEquals(URL_SECOND.toString(), leftFrame.getSrcAttribute());
313     }
314 
315     /**
316      * @throws Exception if the test fails
317      */
318     @Test
319     public void save_css() throws Exception {
320         final String html = DOCTYPE_HTML
321             + "<html><head>\n"
322             + "<link rel='stylesheet' type='text/css' href='" + URL_SECOND + "'/></head></html>";
323 
324         final String css = "body {color: blue}";
325 
326         final WebClient webClient = getWebClientWithMockWebConnection();
327         final MockWebConnection webConnection = getMockWebConnection();
328 
329         webConnection.setResponse(URL_FIRST, html);
330         webConnection.setResponse(URL_SECOND, css);
331 
332         final HtmlPage page = webClient.getPage(URL_FIRST);
333         final HtmlLink cssLink = page.getFirstByXPath("//link");
334         assertEquals(URL_SECOND.toString(), cssLink.getHrefAttribute());
335 
336         final File tmpFolder = new File(TEMP_DIR_.toFile(), "hu");
337         tmpFolder.mkdir();
338         final File file = new File(tmpFolder, "hu_HtmlPageTest_save4.html");
339         FileUtils.deleteQuietly(file);
340         final File cssFile = new File(tmpFolder, "hu_HtmlPageTest_save4/second.css");
341         FileUtils.deleteQuietly(cssFile);
342 
343         page.save(file);
344         assertTrue(file.exists());
345         assertTrue(file.isFile());
346         assertEquals(css, FileUtils.readFileToString(cssFile, ISO_8859_1));
347 
348         assertEquals(URL_SECOND.toString(), cssLink.getHrefAttribute());
349     }
350 
351     /**
352      * @throws Exception if the test fails
353      */
354     @Test
355     public void save_css_without_href() throws Exception {
356         final String html = DOCTYPE_HTML
357             + "<html><head>\n"
358             + "<link rel='stylesheet' type='text/css' /></head></html>";
359 
360         final WebClient webClient = getWebClientWithMockWebConnection();
361         final MockWebConnection webConnection = getMockWebConnection();
362 
363         webConnection.setResponse(URL_FIRST, html);
364 
365         final HtmlPage page = webClient.getPage(URL_FIRST);
366         final File tmpFolder = new File(TEMP_DIR_.toFile(), "hu");
367         tmpFolder.mkdir();
368         final File file = new File(tmpFolder, "hu_HtmlPageTest_save5.html");
369         FileUtils.deleteQuietly(file);
370 
371         page.save(file);
372         assertTrue(file.exists());
373         assertTrue(file.isFile());
374 
375         final HtmlLink cssLink = page.getFirstByXPath("//link");
376         assertEquals(DomElement.ATTRIBUTE_NOT_DEFINED, cssLink.getHrefAttribute());
377     }
378 
379     /**
380      * @throws Exception if the test fails
381      */
382     @Test
383     public void save_css_empty_href() throws Exception {
384         final String html = DOCTYPE_HTML
385             + "<html><head>\n"
386             + "<link rel='stylesheet' type='text/css' href='' /></head></html>";
387 
388         final WebClient webClient = getWebClientWithMockWebConnection();
389         final MockWebConnection webConnection = getMockWebConnection();
390 
391         webConnection.setResponse(URL_FIRST, html);
392 
393         final HtmlPage page = webClient.getPage(URL_FIRST);
394         final File tmpFolder = new File(TEMP_DIR_.toFile(), "hu");
395         tmpFolder.mkdir();
396         final File file = new File(tmpFolder, "hu_HtmlPageTest_save5.html");
397         FileUtils.deleteQuietly(file);
398 
399         page.save(file);
400         assertTrue(file.exists());
401         assertTrue(file.isFile());
402 
403         final HtmlLink cssLink = page.getFirstByXPath("//link");
404         assertEquals(DomElement.ATTRIBUTE_NOT_DEFINED, cssLink.getHrefAttribute());
405     }
406 
407     /**
408      * This was producing java.io.IOException: File name too long as of HtmlUnit-2.9.
409      * Many file systems have a limit 255 byte for file names.
410      * @throws Exception if the test fails
411      */
412     @Test
413     public void saveShouldStripLongFileNames() throws Exception {
414         final RandomStringGenerator generator = new RandomStringGenerator.Builder().withinRange('a', 'z').get();
415         final String longName = generator.generate(500) + ".html";
416         final String html = DOCTYPE_HTML + "<html><body><iframe src='" + longName + "'></iframe></body></html>";
417 
418         final WebClient webClient = getWebClient();
419         final MockWebConnection webConnection = new MockWebConnection();
420 
421         webConnection.setDefaultResponse(DOCTYPE_HTML + "<html/>");
422         webConnection.setResponse(URL_FIRST, html);
423         webClient.setWebConnection(webConnection);
424 
425         final HtmlPage page = webClient.getPage(URL_FIRST);
426 
427         final File tmpFolder = new File(TEMP_DIR_.toFile(), "hu");
428         tmpFolder.mkdir();
429         final File file = new File(tmpFolder, "hu_HtmlPageTest_save.html");
430         FileUtils.deleteQuietly(file);
431 
432         page.save(file);
433         assertTrue(file.exists());
434         assertTrue(file.isFile());
435     }
436 
437     /**
438      * @throws Exception if the test fails
439      */
440     @Test
441     public void serialization_attributeListenerLock() throws Exception {
442         final String html = DOCTYPE_HTML
443             + "<html><head><script>\n"
444             + "function foo() {\n"
445             + "  document.getElementById('aframe').src = '" + URL_FIRST + "';\n"
446             + "  return false;\n"
447             + "}</script>\n"
448             + "<body><iframe src='about:blank' id='aframe'></iframe>\n"
449             + "<a href='#' onclick='foo()' id='link'>load iframe</a></body></html>";
450         final HtmlPage page = loadPageWithAlerts(html);
451         final WebClient copy = clone(page.getWebClient());
452         final HtmlPage copyPage = (HtmlPage) copy.getCurrentWindow().getTopWindow().getEnclosedPage();
453         copyPage.getHtmlElementById("link").click();
454         assertEquals(URL_FIRST.toExternalForm(), copyPage.getElementById("aframe").getAttribute("src"));
455     }
456 
457     /**
458      * @throws Exception if the test fails
459      */
460     @Test
461     public void save_emptyTextArea() throws Exception {
462         final String html = DOCTYPE_HTML
463             + "<html>\n"
464             + "<head/>\n"
465             + "<body>\n"
466             + "<textarea></textarea>\n"
467             + "</body>\n"
468             + "</html>";
469 
470         final HtmlPage page = loadPage(html);
471         final File tmpFolder = new File(System.getProperty("java.io.tmpdir"));
472         final File file = new File(tmpFolder, "hu_HtmlPage2Test_save_emptyTextArea.html");
473         try {
474             page.save(file);
475             assertTrue(file.exists());
476             assertTrue(file.isFile());
477             assertTrue(page.asXml().contains("</textarea>"));
478             assertTrue(FileUtils.readFileToString(file, ISO_8859_1).contains("</textarea>"));
479         }
480         finally {
481             assertTrue(file.delete());
482         }
483     }
484 }