View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html.xpath;
16  
17  import static org.junit.jupiter.api.Assertions.fail;
18  
19  import java.util.ArrayList;
20  import java.util.Arrays;
21  import java.util.List;
22  
23  import org.htmlunit.SimpleWebTestCase;
24  import org.htmlunit.html.DomNode;
25  import org.htmlunit.html.DomText;
26  import org.htmlunit.html.HtmlAnchor;
27  import org.htmlunit.html.HtmlBody;
28  import org.htmlunit.html.HtmlDivision;
29  import org.htmlunit.html.HtmlElement;
30  import org.htmlunit.html.HtmlPage;
31  import org.htmlunit.html.HtmlTableCell;
32  import org.junit.jupiter.api.Test;
33  
34  /**
35   * Tests for XPath evaluation on HtmlUnit DOM.
36   *
37   * @author Marc Guillemot
38   * @author Ahmed Ashour
39   * @author Ronald Brill
40   */
41  public class HtmlUnitXPathTest extends SimpleWebTestCase {
42  
43      /**
44       * Test evaluation of some simple paths.
45       * @throws Exception if test fails
46       */
47      @Test
48      public void simplePath() throws Exception {
49          final String content = DOCTYPE_HTML
50              + "<html><head><title>Test page</title></head>\n"
51              + "<body><a href='foo.html' id='myLink'>foo</a></body>\n"
52              + "</html>";
53  
54          final HtmlPage page = loadPage(content);
55          assertEquals(page.getDocumentElement(), page.getFirstByXPath("/html"));
56          assertEquals(page.getDocumentElement().getFirstChild(), page.getFirstByXPath("/html/head"));
57          assertEquals(page.getHtmlElementById("myLink"), page.getFirstByXPath("/html/body/a"));
58          assertEquals("Test page", ((DomText) page.getFirstByXPath("/html/head/title/text()")).getNodeValue());
59      }
60  
61      /**
62       * Test evaluation relative from elements other than the whole page.
63       * @throws Exception if test fails
64       */
65      @Test
66      public void xpathFromElement() throws Exception {
67          final String content = DOCTYPE_HTML
68              + "<html><head><title>Test page</title></head>\n"
69              + "<body><a href='foo.html' id='myLink'>foo</a></body>\n"
70              + "</html>";
71  
72          final HtmlPage page = loadPage(content);
73          final HtmlBody body = page.getFirstByXPath("/html/body");
74  
75          assertEquals(page.getHtmlElementById("myLink"), body.getFirstByXPath("./a"));
76      }
77  
78      /**
79       * Test that the elements are in the right order.
80       * @throws Exception if test fails
81       */
82      @Test
83      @SuppressWarnings("unchecked")
84      public void elementOrder() throws Exception {
85          final String content = DOCTYPE_HTML
86              + "<html><head><title>First</title><script>\n"
87              + "</script></head><body>\n"
88              + "</body></html>";
89  
90          final HtmlPage page = loadPage(content);
91          final List<?> list = page.getByXPath("//*");
92  
93          final String[] expected = {"html", "head", "title", "script", "body"};
94          final List<String> actualNames = new ArrayList<>();
95          for (final DomNode node : (List<DomNode>) list) {
96              actualNames.add(node.getNodeName());
97          }
98          assertEquals(expected, actualNames);
99      }
100 
101     /**
102      * Test evaluation of paths after they're changed through JavaScript.
103      * @throws Exception if test fails
104      */
105     @Test
106     public void whenJSChangesPage() throws Exception {
107         final String content = DOCTYPE_HTML
108             + "<html><head><title>foo</title><script>\n"
109             + "function addOption() {\n"
110             + "  var options = document.form1.select1.options;\n"
111             + "  var index = options.length;\n"
112             + "  options[index] = new Option('Four','value4');\n"
113             + "}</script>\n"
114             + "</head>\n"
115             + "<body>\n"
116             + "<p>hello world</p>\n"
117             + "<form name='form1'>\n"
118             + "  <select name='select1'>\n"
119             + "    <option name='option1' value='value1'>One</option>\n"
120             + "    <option name='option2' value='value2' selected>Two</option>\n"
121             + "    <option name='option3' value='value3'>Three</option>\n"
122             + "  </select>\n"
123             + "</form>\n"
124             + "<a href='javascript:addOption()'>add option</a>\n"
125             + "</body></html>";
126 
127         final HtmlPage page = loadPage(content);
128         assertEquals("foo", page.getTitleText());
129 
130         assertEquals(3, page.<Number>getFirstByXPath("count(//select[@name='select1']/option)").intValue());
131 
132         page.getAnchors().get(0).click();
133         assertEquals(4, page.<Number>getFirstByXPath("count(//select[@name='select1']/option)").intValue());
134     }
135 
136     /**
137      * Tests XPath where results are attributes.
138      * @throws Exception if test fails
139      */
140     @Test
141     @SuppressWarnings("unchecked")
142     public void listAttributesResult() throws Exception {
143         final String content = DOCTYPE_HTML
144             + "<html><body>\n"
145             + "<img src='1.png'>\n"
146             + "<img src='2.png'>\n"
147             + "<img src='3.png'>\n"
148             + "</body></html>";
149 
150         final HtmlPage page = loadPage(content);
151 
152         final List<?> nameList = page.getByXPath("//img/@src");
153         final List<?> valueList = new ArrayList<>(nameList);
154 
155         final String[] expectedNames = {"src", "src", "src"};
156 
157         final List<String> collectedNames = new ArrayList<>();
158         for (final DomNode node : (List<DomNode>) nameList) {
159             collectedNames.add(node.getNodeName());
160         }
161         assertEquals(expectedNames, collectedNames);
162 
163         final String[] expectedValues = {"1.png", "2.png", "3.png"};
164         final List<String> collectedValues = new ArrayList<>();
165         for (final DomNode node : (List<DomNode>) valueList) {
166             collectedValues.add(node.getNodeValue());
167         }
168         assertEquals(expectedValues, collectedValues);
169     }
170 
171     /**
172      * Test if option/text() is cleaned like other text().
173      * @throws Exception if test fails
174      */
175     @Test
176     public void optionText_getFirstByXPath() throws Exception {
177         final String content = DOCTYPE_HTML
178             + "<html><head><title>Test page</title></head>\n"
179             + "<body><form name='foo'>\n"
180             + "<select name='test'><option value='1'>foo&nbsp;and&nbsp;foo</option></select>\n"
181             + "</form></body></html>";
182 
183         final HtmlPage page = loadPage(content);
184         final String value = page.getFirstByXPath("string(//option)");
185         final int[] expectedValues = {102, 111, 111, 160, 97, 110, 100, 160, 102, 111, 111};
186         int index = 0;
187         for (final int v : expectedValues) {
188             if (value.codePointAt(index++) != v) {
189                 fail();
190             }
191         }
192     }
193 
194     /**
195      * Regression test for http://sourceforge.net/p/htmlunit/bugs/365/.
196      * @throws Exception if test fails
197      */
198     @Test
199     public void followingAxis() throws Exception {
200         final String content = DOCTYPE_HTML
201             + "<html><title>XPath tests</title><body>\n"
202             + "<table id='table1'>\n"
203             + "<tr id='tr1'>\n"
204             + "<td id='td11'>a3</td>\n"
205             + "<td id='td12'>c</td>\n"
206             + "</tr>\n"
207             + "<tr id='tr2'>\n"
208             + "<td id='td21'>a4</td>\n"
209             + "<td id='td22'>c</td>\n"
210             + "</tr>\n"
211             + "</table>\n"
212             + "</body></html>";
213 
214         final HtmlPage page = loadPage(content);
215         final HtmlElement td12 = page.getHtmlElementById("td12");
216         final HtmlElement tr2 = page.getHtmlElementById("tr2");
217         final HtmlElement td21 = page.getHtmlElementById("td21");
218         final HtmlElement td22 = page.getHtmlElementById("td22");
219         xpath(page, "//*[contains(.,'a4')]/following::td[.='c']", new Object[] {td22});
220 
221         xpath(page, "//body/following::*", new Object[] {});
222         xpath(page, "//html/following::*", new Object[] {});
223         xpath(page, "//table/following::*", new Object[] {});
224         xpath(page, "//td[@id='td11']/following::*", new Object[] {td12, tr2, td21, td22});
225     }
226 
227     private static void xpath(final HtmlPage page, final String xpathExpr, final Object[] expectedNodes) {
228         assertEquals(Arrays.asList(expectedNodes), page.getByXPath(xpathExpr));
229     }
230 
231     /**
232      * @throws Exception if test fails
233      */
234     @Test
235     public void id() throws Exception {
236         final String content = DOCTYPE_HTML
237             + "<html><head><title>foo</title></head>\n"
238             + "<body>\n"
239             + "<div>\n"
240             + "  <a href='link.html' id='test'>\n"
241             + "</div>\n"
242             + "</body></html>";
243 
244         final HtmlPage page = loadPage(content);
245 
246         final HtmlAnchor anchor = page.getHtmlElementById("test");
247         assertSame(anchor, page.getFirstByXPath("//a[@id='test']"));
248         assertSame(anchor, page.getFirstByXPath("//*[@id='test']"));
249 
250         assertNull(page.getFirstByXPath("//div[@id='doesNotExist']"));
251         assertNull(page.getFirstByXPath("id('doesNotExist')"));
252     }
253 
254     /**
255      * @throws Exception if test fails
256      */
257     @Test
258     public void changingAttributes() throws Exception {
259         final String content = DOCTYPE_HTML
260             + "<html><head><title>foo</title></head>\n"
261             + "<body>\n"
262             + "<div id='testDiv' title='foo'></div>\n"
263             + "</body></html>";
264 
265         final HtmlPage page = loadPage(content);
266         final HtmlDivision div = page.getHtmlElementById("testDiv");
267 
268         assertSame(div, page.getFirstByXPath("//*[@title = 'foo']"));
269         assertNull(page.getFirstByXPath("//*[@class = 'design']"));
270 
271         div.setAttribute("class", "design");
272         assertSame(div, page.getFirstByXPath("//*[@class = 'design']"));
273     }
274 
275     /**
276      * @throws Exception if test fails
277      */
278     @Test
279     public void specialAttribute() throws Exception {
280         final String content = DOCTYPE_HTML
281             + "<html><head></head>\n"
282             + "<body>\n"
283             + "  <table><tr>\n"
284             + "    <td id='myTd' ab='test' a_b='test' a-b='test' a.b='test'>@X</td></tr></table>\n"
285             + "</body></html>";
286 
287         final HtmlPage page = loadPage(content);
288 
289         final HtmlTableCell cell = page.getHtmlElementById("myTd");
290 
291         assertSame(cell, page.getFirstByXPath("//td[@ab= 'test']"));
292         assertSame(cell, page.getFirstByXPath("//td[@AB= 'test']"));
293 
294         assertSame(cell, page.getFirstByXPath("//td[@a_b= 'test']"));
295         assertSame(cell, page.getFirstByXPath("//td[@A_B= 'test']"));
296 
297         assertSame(cell, page.getFirstByXPath("//td[@a-b= 'test']"));
298         assertSame(cell, page.getFirstByXPath("//td[@A-B= 'test']"));
299 
300         assertSame(cell, page.getFirstByXPath("//td[@a.b= 'test']"));
301         assertSame(cell, page.getFirstByXPath("//td[@A.B= 'test']"));
302     }
303 
304     /**
305      * @throws Exception if test fails
306      */
307     @Test
308     public void specialAtInText() throws Exception {
309         final String content = DOCTYPE_HTML
310             + "<html><head></head>\n"
311             + "<body>\n"
312             + "  <table><tr><td id='myTd'>@X</td></tr></table>\n"
313             + "</body></html>";
314 
315         final HtmlPage page = loadPage(content);
316 
317         final HtmlTableCell cell = page.getHtmlElementById("myTd");
318         assertNull(page.getFirstByXPath("//td[text()='@x']"));
319         assertSame(cell, page.getFirstByXPath("//td[text()='@X']"));
320     }
321 
322     /**
323      * @throws Exception if test fails
324      */
325     @Test
326     public void specialBracesInText() throws Exception {
327         String content = DOCTYPE_HTML
328             + "<html><head></head>\n"
329             + "<body>\n"
330             + "  <table><tr><td id='myTd'>(X)</td></tr></table>\n"
331             + "</body></html>";
332 
333         HtmlPage page = loadPage(content);
334 
335         HtmlTableCell cell = page.getHtmlElementById("myTd");
336         assertNull(page.getFirstByXPath("//td[normalize-space()='(x)']"));
337         assertSame(cell, page.getFirstByXPath("//td[normalize-space()='(X)']"));
338 
339         content = DOCTYPE_HTML
340             + "<html><head></head>\n"
341             + "<body>\n"
342             + "  <table><tr><td id='myTd'>[X]</td></tr></table>\n"
343             + "</body></html>";
344 
345         page = loadPage(content);
346 
347         cell = page.getHtmlElementById("myTd");
348         assertNull(page.getFirstByXPath("//td[text()='[x]']"));
349         assertSame(cell, page.getFirstByXPath("//td[text()='[X]']"));
350     }
351 }