View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html;
16  
17  import static java.nio.charset.StandardCharsets.ISO_8859_1;
18  import static java.nio.charset.StandardCharsets.UTF_8;
19  
20  import java.io.File;
21  import java.io.FileInputStream;
22  import java.io.InputStream;
23  import java.net.URL;
24  import java.util.ArrayList;
25  import java.util.Collections;
26  import java.util.List;
27  
28  import org.apache.commons.io.FileUtils;
29  import org.apache.commons.io.IOUtils;
30  import org.apache.commons.text.RandomStringGenerator;
31  import org.htmlunit.CollectingAlertHandler;
32  import org.htmlunit.MockWebConnection;
33  import org.htmlunit.SimpleWebTestCase;
34  import org.htmlunit.WebClient;
35  import org.htmlunit.junit.BrowserRunner;
36  import org.htmlunit.junit.annotation.Alerts;
37  import org.htmlunit.util.MimeType;
38  import org.junit.Rule;
39  import org.junit.Test;
40  import org.junit.rules.TemporaryFolder;
41  import org.junit.runner.RunWith;
42  
43  /**
44   * Tests for {@link HtmlPage}.
45   *
46   * @author Ahmed Ashour
47   * @author Marc Guillemot
48   * @author Ronald Brill
49   */
50  @RunWith(BrowserRunner.class)
51  public class HtmlPage2Test extends SimpleWebTestCase {
52  
53      /**
54       * Utility for temporary folders.
55       * Has to be public due to JUnit's constraints for @Rule.
56       */
57      @Rule
58      public final TemporaryFolder tmpFolderProvider_ = new TemporaryFolder();
59  
60      /**
61       * @throws Exception if the test fails
62       */
63      @Test
64      public void getFullQualifiedUrl_topWindow() throws Exception {
65          final String firstHtml = DOCTYPE_HTML
66              + "<html><head><title>first</title>\n"
67              + "<script>\n"
68              + "function init() {\n"
69              + "  var iframe = window.frames['f'];\n"
70              + "  iframe.document.write(\"<form name='form' action='" + URL_SECOND + "'>"
71              + "<input name='submit' type='submit'></form>\");\n"
72              + "  iframe.document.close();\n"
73              + "}\n"
74              + "</script></head>\n"
75              + "<body onload='init()'>\n"
76              + "  <iframe name='f'></iframe>\n"
77              + "</body></html>";
78          final String secondHtml = DOCTYPE_HTML
79              + "<html><head><title>second</title></head>\n"
80              + "<body><p>Form submitted successfully.</p></body></html>";
81  
82          final WebClient client = getWebClient();
83  
84          final MockWebConnection webConnection = new MockWebConnection();
85          webConnection.setResponse(URL_FIRST, firstHtml);
86          webConnection.setDefaultResponse(secondHtml);
87          client.setWebConnection(webConnection);
88  
89          final HtmlPage page = client.getPage(URL_FIRST);
90  
91          HtmlPage framePage = (HtmlPage) page.getFrameByName("f").getEnclosedPage();
92          final HtmlForm form = framePage.getFormByName("form");
93          final HtmlInput submit = form.getInputByName("submit");
94          framePage = submit.click();
95          assertEquals("Form submitted successfully.", framePage.getBody().asNormalizedText());
96      }
97  
98      /**
99       * @throws Exception if the test fails
100      */
101     @Test
102     @Alerts("Hello there")
103     public void save() throws Exception {
104         final String html = DOCTYPE_HTML + "<html><head><script src='" + URL_SECOND + "'>\n</script></head></html>";
105 
106         final String js = "alert('Hello there')";
107 
108         final WebClient webClient = getWebClient();
109         final MockWebConnection webConnection = new MockWebConnection();
110 
111         webConnection.setResponse(URL_FIRST, html);
112         webConnection.setResponse(URL_SECOND, js);
113         webClient.setWebConnection(webConnection);
114 
115         final List<String> collectedAlerts = new ArrayList<>();
116         webClient.setAlertHandler(new CollectingAlertHandler(collectedAlerts));
117 
118         final HtmlPage page = webClient.getPage(URL_FIRST);
119         assertEquals(getExpectedAlerts(), collectedAlerts);
120 
121         final HtmlScript sript = page.getFirstByXPath("//script");
122         assertEquals(URL_SECOND.toString(), sript.getSrcAttribute());
123 
124         final File tmpFolder = tmpFolderProvider_.newFolder("hu");
125         final File file = new File(tmpFolder, "hu_HtmlPageTest_save.html");
126         page.save(file);
127         assertTrue(file.exists());
128         assertTrue(file.isFile());
129         final String content = FileUtils.readFileToString(file, ISO_8859_1);
130         assertFalse(content.contains("<script"));
131 
132         assertEquals(URL_SECOND.toString(), sript.getSrcAttribute());
133     }
134 
135     /**
136      * @throws Exception if the test fails
137      */
138     @Test
139     public void save_image() throws Exception {
140         final String html = DOCTYPE_HTML + "<html><body><img src='" + URL_SECOND + "'></body></html>";
141 
142         final URL url = getClass().getClassLoader().getResource("testfiles/tiny-jpg.img");
143         final WebClient webClient = getWebClientWithMockWebConnection();
144         try (FileInputStream fis = new FileInputStream(new File(url.toURI()))) {
145             final byte[] directBytes = IOUtils.toByteArray(fis);
146             final MockWebConnection webConnection = getMockWebConnection();
147 
148             webConnection.setResponse(URL_FIRST, html);
149             webConnection.setResponse(URL_SECOND, directBytes, 200, "ok", "image/jpg", Collections.emptyList());
150         }
151 
152         final HtmlPage page = webClient.getPage(URL_FIRST);
153         final HtmlImage img = page.getFirstByXPath("//img");
154         assertEquals(URL_SECOND.toString(), img.getSrcAttribute());
155         final File tmpFolder = tmpFolderProvider_.newFolder("hu");
156         final File file = new File(tmpFolder, "hu_HtmlPageTest_save2.html");
157         final File imgFile = new File(tmpFolder, "hu_HtmlPageTest_save2/second.jpeg");
158         page.save(file);
159         assertTrue(file.exists());
160         assertTrue(file.isFile());
161         final byte[] loadedBytes = FileUtils.readFileToByteArray(imgFile);
162         assertTrue(loadedBytes.length > 0);
163         assertEquals(URL_SECOND.toString(), img.getSrcAttribute());
164     }
165 
166     /**
167      * As of 24.05.2011 an IOException was occurring when saving a page where
168      * the response to the request for an image was not an image.
169      * @throws Exception if the test fails
170      */
171     @Test
172     public void save_imageNotImage() throws Exception {
173         final String html = DOCTYPE_HTML + "<html><body><img src='foo.txt'></body></html>";
174 
175         final MockWebConnection webConnection = getMockWebConnection();
176 
177         webConnection.setDefaultResponse("hello", MimeType.TEXT_PLAIN);
178 
179         final HtmlPage page = loadPageWithAlerts(html);
180 
181         final File folder = tmpFolderProvider_.newFolder("hu");
182         final File file = new File(folder, "hu_save.html");
183         page.save(file);
184         assertTrue(file.exists());
185         assertTrue(file.isFile());
186 
187         final File imgFile = new File(folder, "hu_save/foo.txt");
188         assertEquals("hello", FileUtils.readFileToString(imgFile, UTF_8));
189     }
190 
191     /**
192      * @throws Exception if the test fails
193      */
194     @Test
195     public void save_image_without_src() throws Exception {
196         final String html = DOCTYPE_HTML + "<html><body><img></body></html>";
197 
198         final WebClient webClient = getWebClientWithMockWebConnection();
199         final MockWebConnection webConnection = getMockWebConnection();
200 
201         webConnection.setResponse(URL_FIRST, html);
202 
203         final HtmlPage page = webClient.getPage(URL_FIRST);
204         final File tmpFolder = tmpFolderProvider_.newFolder("hu");
205         final File file = new File(tmpFolder, "hu_HtmlPageTest_save3.html");
206         page.save(file);
207         assertTrue(file.exists());
208         assertTrue(file.isFile());
209 
210         final HtmlImage img = page.getFirstByXPath("//img");
211         assertEquals(DomElement.ATTRIBUTE_NOT_DEFINED, img.getSrcAttribute());
212     }
213 
214     /**
215      * @throws Exception if the test fails
216      */
217     @Test
218     public void save_image_empty_src() throws Exception {
219         final String html = DOCTYPE_HTML + "<html><body><img src=''></body></html>";
220 
221         final WebClient webClient = getWebClientWithMockWebConnection();
222         final MockWebConnection webConnection = getMockWebConnection();
223 
224         webConnection.setResponse(URL_FIRST, html);
225 
226         final HtmlPage page = webClient.getPage(URL_FIRST);
227         final File tmpFolder = tmpFolderProvider_.newFolder("hu");
228         final File file = new File(tmpFolder, "hu_HtmlPageTest_save3.html");
229         page.save(file);
230         assertTrue(file.exists());
231         assertTrue(file.isFile());
232 
233         final HtmlImage img = page.getFirstByXPath("//img");
234         assertEquals(DomElement.ATTRIBUTE_NOT_DEFINED, img.getSrcAttribute());
235     }
236 
237     /**
238      * @throws Exception if the test fails
239      */
240     @Test
241     public void save_frames() throws Exception {
242         final String mainContent = DOCTYPE_HTML
243             + "<html><head><title>First</title></head>\n"
244             + "<frameset cols='50%,*'>\n"
245             + "  <frame name='left' src='" + URL_SECOND + "' frameborder='1' />\n"
246             + "  <frame name='right' src='" + URL_THIRD + "' frameborder='1' />\n"
247             + "  <frame name='withoutsrc' />\n"
248             + "</frameset>\n"
249             + "</html>";
250         final String frameLeftContent = DOCTYPE_HTML
251             + "<html><head><title>Second</title></head><body>\n"
252             + "<iframe src='iframe.html'></iframe>\n"
253             + "<img src='img.jpg'>\n"
254             + "</body></html>";
255         final String frameRightContent = DOCTYPE_HTML
256                 + "<html><head><title>Third</title></head><body>frame right</body></html>";
257         final String iframeContent  = DOCTYPE_HTML
258                 + "<html><head><title>Iframe</title></head><body>iframe</body></html>";
259 
260         try (InputStream is = getClass().getClassLoader().getResourceAsStream("testfiles/tiny-jpg.img")) {
261             final byte[] directBytes = IOUtils.toByteArray(is);
262 
263             final MockWebConnection webConnection = getMockWebConnection();
264             webConnection.setResponse(URL_FIRST, mainContent);
265             webConnection.setResponse(URL_SECOND, frameLeftContent);
266             webConnection.setResponse(URL_THIRD, frameRightContent);
267             final URL urlIframe = new URL(URL_SECOND, "iframe.html");
268             webConnection.setResponse(urlIframe, iframeContent);
269 
270             final URL urlImage = new URL(URL_SECOND, "img.jpg");
271             webConnection.setResponse(urlImage, directBytes, 200, "ok", "image/jpg", Collections.emptyList());
272         }
273 
274         final WebClient webClient = getWebClientWithMockWebConnection();
275         final HtmlPage page = webClient.getPage(URL_FIRST);
276         final HtmlFrame leftFrame = page.getElementByName("left");
277         assertEquals(URL_SECOND.toString(), leftFrame.getSrcAttribute());
278         final File tmpFolder = tmpFolderProvider_.newFolder("hu");
279         final File file = new File(tmpFolder, "hu_HtmlPageTest_saveFrame.html");
280         final File expectedLeftFrameFile = new File(tmpFolder, "hu_HtmlPageTest_saveFrame/second.html");
281         final File expectedRightFrameFile = new File(tmpFolder, "hu_HtmlPageTest_saveFrame/third.html");
282         final File expectedIFrameFile = new File(tmpFolder, "hu_HtmlPageTest_saveFrame/second/iframe.html");
283         final File expectedImgFile = new File(tmpFolder, "hu_HtmlPageTest_saveFrame/second/img.jpg");
284         final File[] allFiles = {file, expectedLeftFrameFile, expectedImgFile, expectedIFrameFile,
285             expectedRightFrameFile};
286 
287         page.save(file);
288         for (final File f : allFiles) {
289             assertTrue(f.toString(), f.exists());
290             assertTrue(f.toString(), f.isFile());
291         }
292 
293         final byte[] loadedBytes = FileUtils.readFileToByteArray(expectedImgFile);
294         assertTrue(loadedBytes.length > 0);
295 
296         // ensure that saving the page hasn't changed the DOM
297         assertEquals(URL_SECOND.toString(), leftFrame.getSrcAttribute());
298     }
299 
300     /**
301      * @throws Exception if the test fails
302      */
303     @Test
304     public void save_css() throws Exception {
305         final String html = DOCTYPE_HTML
306             + "<html><head>\n"
307             + "<link rel='stylesheet' type='text/css' href='" + URL_SECOND + "'/></head></html>";
308 
309         final String css = "body {color: blue}";
310 
311         final WebClient webClient = getWebClientWithMockWebConnection();
312         final MockWebConnection webConnection = getMockWebConnection();
313 
314         webConnection.setResponse(URL_FIRST, html);
315         webConnection.setResponse(URL_SECOND, css);
316 
317         final HtmlPage page = webClient.getPage(URL_FIRST);
318         final HtmlLink cssLink = page.getFirstByXPath("//link");
319         assertEquals(URL_SECOND.toString(), cssLink.getHrefAttribute());
320 
321         final File tmpFolder = tmpFolderProvider_.newFolder("hu");
322         final File file = new File(tmpFolder, "hu_HtmlPageTest_save4.html");
323         final File cssFile = new File(tmpFolder, "hu_HtmlPageTest_save4/second.css");
324         page.save(file);
325         assertTrue(file.exists());
326         assertTrue(file.isFile());
327         assertEquals(css, FileUtils.readFileToString(cssFile, ISO_8859_1));
328 
329         assertEquals(URL_SECOND.toString(), cssLink.getHrefAttribute());
330     }
331 
332     /**
333      * @throws Exception if the test fails
334      */
335     @Test
336     public void save_css_without_href() throws Exception {
337         final String html = DOCTYPE_HTML
338             + "<html><head>\n"
339             + "<link rel='stylesheet' type='text/css' /></head></html>";
340 
341         final WebClient webClient = getWebClientWithMockWebConnection();
342         final MockWebConnection webConnection = getMockWebConnection();
343 
344         webConnection.setResponse(URL_FIRST, html);
345 
346         final HtmlPage page = webClient.getPage(URL_FIRST);
347         final File tmpFolder = tmpFolderProvider_.newFolder("hu");
348         final File file = new File(tmpFolder, "hu_HtmlPageTest_save5.html");
349         page.save(file);
350         assertTrue(file.exists());
351         assertTrue(file.isFile());
352 
353         final HtmlLink cssLink = page.getFirstByXPath("//link");
354         assertEquals(DomElement.ATTRIBUTE_NOT_DEFINED, cssLink.getHrefAttribute());
355     }
356 
357     /**
358      * @throws Exception if the test fails
359      */
360     @Test
361     public void save_css_empty_href() throws Exception {
362         final String html = DOCTYPE_HTML
363             + "<html><head>\n"
364             + "<link rel='stylesheet' type='text/css' href='' /></head></html>";
365 
366         final WebClient webClient = getWebClientWithMockWebConnection();
367         final MockWebConnection webConnection = getMockWebConnection();
368 
369         webConnection.setResponse(URL_FIRST, html);
370 
371         final HtmlPage page = webClient.getPage(URL_FIRST);
372         final File tmpFolder = tmpFolderProvider_.newFolder("hu");
373         final File file = new File(tmpFolder, "hu_HtmlPageTest_save5.html");
374         page.save(file);
375         assertTrue(file.exists());
376         assertTrue(file.isFile());
377 
378         final HtmlLink cssLink = page.getFirstByXPath("//link");
379         assertEquals(DomElement.ATTRIBUTE_NOT_DEFINED, cssLink.getHrefAttribute());
380     }
381 
382     /**
383      * This was producing java.io.IOException: File name too long as of HtmlUnit-2.9.
384      * Many file systems have a limit 255 byte for file names.
385      * @throws Exception if the test fails
386      */
387     @Test
388     public void saveShouldStripLongFileNames() throws Exception {
389         final RandomStringGenerator generator = new RandomStringGenerator.Builder().withinRange('a', 'z').get();
390         final String longName = generator.generate(500) + ".html";
391         final String html = DOCTYPE_HTML + "<html><body><iframe src='" + longName + "'></iframe></body></html>";
392 
393         final WebClient webClient = getWebClient();
394         final MockWebConnection webConnection = new MockWebConnection();
395 
396         webConnection.setDefaultResponse(DOCTYPE_HTML + "<html/>");
397         webConnection.setResponse(URL_FIRST, html);
398         webClient.setWebConnection(webConnection);
399 
400         final HtmlPage page = webClient.getPage(URL_FIRST);
401 
402         final File tmpFolder = tmpFolderProvider_.newFolder("hu");
403         final File file = new File(tmpFolder, "hu_HtmlPageTest_save.html");
404         page.save(file);
405         assertTrue(file.exists());
406         assertTrue(file.isFile());
407     }
408 
409     /**
410      * @throws Exception if the test fails
411      */
412     @Test
413     public void serialization_attributeListenerLock() throws Exception {
414         final String html = DOCTYPE_HTML
415             + "<html><head><script>\n"
416             + "function foo() {\n"
417             + "  document.getElementById('aframe').src = '" + URL_FIRST + "';\n"
418             + "  return false;\n"
419             + "}</script>\n"
420             + "<body><iframe src='about:blank' id='aframe'></iframe>\n"
421             + "<a href='#' onclick='foo()' id='link'>load iframe</a></body></html>";
422         final HtmlPage page = loadPageWithAlerts(html);
423         final WebClient copy = clone(page.getWebClient());
424         final HtmlPage copyPage = (HtmlPage) copy.getCurrentWindow().getTopWindow().getEnclosedPage();
425         copyPage.getHtmlElementById("link").click();
426         assertEquals(URL_FIRST.toExternalForm(), copyPage.getElementById("aframe").getAttribute("src"));
427     }
428 
429     /**
430      * @throws Exception if the test fails
431      */
432     @Test
433     public void save_emptyTextArea() throws Exception {
434         final String html = DOCTYPE_HTML
435             + "<html>\n"
436             + "<head/>\n"
437             + "<body>\n"
438             + "<textarea></textarea>\n"
439             + "</body>\n"
440             + "</html>";
441 
442         final HtmlPage page = loadPage(html);
443         final File tmpFolder = new File(System.getProperty("java.io.tmpdir"));
444         final File file = new File(tmpFolder, "hu_HtmlPage2Test_save_emptyTextArea.html");
445         try {
446             page.save(file);
447             assertTrue(file.exists());
448             assertTrue(file.isFile());
449             assertTrue(page.asXml().contains("</textarea>"));
450             assertTrue(FileUtils.readFileToString(file, ISO_8859_1).contains("</textarea>"));
451         }
452         finally {
453             assertTrue(file.delete());
454         }
455     }
456 }