View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html.xpath;
16  
17  import static org.junit.Assert.fail;
18  
19  import java.util.ArrayList;
20  import java.util.Arrays;
21  import java.util.List;
22  
23  import org.htmlunit.SimpleWebTestCase;
24  import org.htmlunit.html.DomNode;
25  import org.htmlunit.html.DomText;
26  import org.htmlunit.html.HtmlAnchor;
27  import org.htmlunit.html.HtmlBody;
28  import org.htmlunit.html.HtmlDivision;
29  import org.htmlunit.html.HtmlElement;
30  import org.htmlunit.html.HtmlPage;
31  import org.htmlunit.html.HtmlTableCell;
32  import org.htmlunit.junit.BrowserRunner;
33  import org.junit.Test;
34  import org.junit.runner.RunWith;
35  
36  /**
37   * Tests for XPath evaluation on HtmlUnit DOM.
38   *
39   * @author Marc Guillemot
40   * @author Ahmed Ashour
41   * @author Ronald Brill
42   */
43  @RunWith(BrowserRunner.class)
44  public class HtmlUnitXPathTest extends SimpleWebTestCase {
45  
46      /**
47       * Test evaluation of some simple paths.
48       * @throws Exception if test fails
49       */
50      @Test
51      public void simplePath() throws Exception {
52          final String content = DOCTYPE_HTML
53              + "<html><head><title>Test page</title></head>\n"
54              + "<body><a href='foo.html' id='myLink'>foo</a></body>\n"
55              + "</html>";
56  
57          final HtmlPage page = loadPage(content);
58          assertEquals(page.getDocumentElement(), page.getFirstByXPath("/html"));
59          assertEquals(page.getDocumentElement().getFirstChild(), page.getFirstByXPath("/html/head"));
60          assertEquals(page.getHtmlElementById("myLink"), page.getFirstByXPath("/html/body/a"));
61          assertEquals("Test page", ((DomText) page.getFirstByXPath("/html/head/title/text()")).getNodeValue());
62      }
63  
64      /**
65       * Test evaluation relative from elements other than the whole page.
66       * @throws Exception if test fails
67       */
68      @Test
69      public void xpathFromElement() throws Exception {
70          final String content = DOCTYPE_HTML
71              + "<html><head><title>Test page</title></head>\n"
72              + "<body><a href='foo.html' id='myLink'>foo</a></body>\n"
73              + "</html>";
74  
75          final HtmlPage page = loadPage(content);
76          final HtmlBody body = page.getFirstByXPath("/html/body");
77  
78          assertEquals(page.getHtmlElementById("myLink"), body.getFirstByXPath("./a"));
79      }
80  
81      /**
82       * Test that the elements are in the right order.
83       * @throws Exception if test fails
84       */
85      @Test
86      @SuppressWarnings("unchecked")
87      public void elementOrder() throws Exception {
88          final String content = DOCTYPE_HTML
89              + "<html><head><title>First</title><script>\n"
90              + "</script></head><body>\n"
91              + "</body></html>";
92  
93          final HtmlPage page = loadPage(content);
94          final List<?> list = page.getByXPath("//*");
95  
96          final String[] expected = {"html", "head", "title", "script", "body"};
97          final List<String> actualNames = new ArrayList<>();
98          for (final DomNode node : (List<DomNode>) list) {
99              actualNames.add(node.getNodeName());
100         }
101         assertEquals(expected, actualNames);
102     }
103 
104     /**
105      * Test evaluation of paths after they're changed through JavaScript.
106      * @throws Exception if test fails
107      */
108     @Test
109     public void whenJSChangesPage() throws Exception {
110         final String content = DOCTYPE_HTML
111             + "<html><head><title>foo</title><script>\n"
112             + "function addOption() {\n"
113             + "  var options = document.form1.select1.options;\n"
114             + "  var index = options.length;\n"
115             + "  options[index] = new Option('Four','value4');\n"
116             + "}</script>\n"
117             + "</head>\n"
118             + "<body>\n"
119             + "<p>hello world</p>\n"
120             + "<form name='form1'>\n"
121             + "  <select name='select1'>\n"
122             + "    <option name='option1' value='value1'>One</option>\n"
123             + "    <option name='option2' value='value2' selected>Two</option>\n"
124             + "    <option name='option3' value='value3'>Three</option>\n"
125             + "  </select>\n"
126             + "</form>\n"
127             + "<a href='javascript:addOption()'>add option</a>\n"
128             + "</body></html>";
129 
130         final HtmlPage page = loadPage(content);
131         assertEquals("foo", page.getTitleText());
132 
133         assertEquals(3, page.<Number>getFirstByXPath("count(//select[@name='select1']/option)").intValue());
134 
135         page.getAnchors().get(0).click();
136         assertEquals(4, page.<Number>getFirstByXPath("count(//select[@name='select1']/option)").intValue());
137     }
138 
139     /**
140      * Tests XPath where results are attributes.
141      * @throws Exception if test fails
142      */
143     @Test
144     @SuppressWarnings("unchecked")
145     public void listAttributesResult() throws Exception {
146         final String content = DOCTYPE_HTML
147             + "<html><body>\n"
148             + "<img src='1.png'>\n"
149             + "<img src='2.png'>\n"
150             + "<img src='3.png'>\n"
151             + "</body></html>";
152 
153         final HtmlPage page = loadPage(content);
154 
155         final List<?> nameList = page.getByXPath("//img/@src");
156         final List<?> valueList = new ArrayList<>(nameList);
157 
158         final String[] expectedNames = {"src", "src", "src"};
159 
160         final List<String> collectedNames = new ArrayList<>();
161         for (final DomNode node : (List<DomNode>) nameList) {
162             collectedNames.add(node.getNodeName());
163         }
164         assertEquals(expectedNames, collectedNames);
165 
166         final String[] expectedValues = {"1.png", "2.png", "3.png"};
167         final List<String> collectedValues = new ArrayList<>();
168         for (final DomNode node : (List<DomNode>) valueList) {
169             collectedValues.add(node.getNodeValue());
170         }
171         assertEquals(expectedValues, collectedValues);
172     }
173 
174     /**
175      * Test if option/text() is cleaned like other text().
176      * @throws Exception if test fails
177      */
178     @Test
179     public void optionText_getFirstByXPath() throws Exception {
180         final String content = DOCTYPE_HTML
181             + "<html><head><title>Test page</title></head>\n"
182             + "<body><form name='foo'>\n"
183             + "<select name='test'><option value='1'>foo&nbsp;and&nbsp;foo</option></select>\n"
184             + "</form></body></html>";
185 
186         final HtmlPage page = loadPage(content);
187         final String value = page.getFirstByXPath("string(//option)");
188         final int[] expectedValues = {102, 111, 111, 160, 97, 110, 100, 160, 102, 111, 111};
189         int index = 0;
190         for (final int v : expectedValues) {
191             if (value.codePointAt(index++) != v) {
192                 fail();
193             }
194         }
195     }
196 
197     /**
198      * Regression test for http://sourceforge.net/p/htmlunit/bugs/365/.
199      * @throws Exception if test fails
200      */
201     @Test
202     public void followingAxis() throws Exception {
203         final String content = DOCTYPE_HTML
204             + "<html><title>XPath tests</title><body>\n"
205             + "<table id='table1'>\n"
206             + "<tr id='tr1'>\n"
207             + "<td id='td11'>a3</td>\n"
208             + "<td id='td12'>c</td>\n"
209             + "</tr>\n"
210             + "<tr id='tr2'>\n"
211             + "<td id='td21'>a4</td>\n"
212             + "<td id='td22'>c</td>\n"
213             + "</tr>\n"
214             + "</table>\n"
215             + "</body></html>";
216 
217         final HtmlPage page = loadPage(content);
218         final HtmlElement td12 = page.getHtmlElementById("td12");
219         final HtmlElement tr2 = page.getHtmlElementById("tr2");
220         final HtmlElement td21 = page.getHtmlElementById("td21");
221         final HtmlElement td22 = page.getHtmlElementById("td22");
222         xpath(page, "//*[contains(.,'a4')]/following::td[.='c']", new Object[] {td22});
223 
224         xpath(page, "//body/following::*", new Object[] {});
225         xpath(page, "//html/following::*", new Object[] {});
226         xpath(page, "//table/following::*", new Object[] {});
227         xpath(page, "//td[@id='td11']/following::*", new Object[] {td12, tr2, td21, td22});
228     }
229 
230     private static void xpath(final HtmlPage page, final String xpathExpr, final Object[] expectedNodes) {
231         assertEquals(Arrays.asList(expectedNodes), page.getByXPath(xpathExpr));
232     }
233 
234     /**
235      * @throws Exception if test fails
236      */
237     @Test
238     public void id() throws Exception {
239         final String content = DOCTYPE_HTML
240             + "<html><head><title>foo</title></head>\n"
241             + "<body>\n"
242             + "<div>\n"
243             + "  <a href='link.html' id='test'>\n"
244             + "</div>\n"
245             + "</body></html>";
246 
247         final HtmlPage page = loadPage(content);
248 
249         final HtmlAnchor anchor = page.getHtmlElementById("test");
250         assertSame(anchor, page.getFirstByXPath("//a[@id='test']"));
251         assertSame(anchor, page.getFirstByXPath("//*[@id='test']"));
252 
253         assertNull(page.getFirstByXPath("//div[@id='doesNotExist']"));
254         assertNull(page.getFirstByXPath("id('doesNotExist')"));
255     }
256 
257     /**
258      * @throws Exception if test fails
259      */
260     @Test
261     public void changingAttributes() throws Exception {
262         final String content = DOCTYPE_HTML
263             + "<html><head><title>foo</title></head>\n"
264             + "<body>\n"
265             + "<div id='testDiv' title='foo'></div>\n"
266             + "</body></html>";
267 
268         final HtmlPage page = loadPage(content);
269         final HtmlDivision div = page.getHtmlElementById("testDiv");
270 
271         assertSame(div, page.getFirstByXPath("//*[@title = 'foo']"));
272         assertNull(page.getFirstByXPath("//*[@class = 'design']"));
273 
274         div.setAttribute("class", "design");
275         assertSame(div, page.getFirstByXPath("//*[@class = 'design']"));
276     }
277 
278     /**
279      * @throws Exception if test fails
280      */
281     @Test
282     public void specialAttribute() throws Exception {
283         final String content = DOCTYPE_HTML
284             + "<html><head></head>\n"
285             + "<body>\n"
286             + "  <table><tr>\n"
287             + "    <td id='myTd' ab='test' a_b='test' a-b='test' a.b='test'>@X</td></tr></table>\n"
288             + "</body></html>";
289 
290         final HtmlPage page = loadPage(content);
291 
292         final HtmlTableCell cell = page.getHtmlElementById("myTd");
293 
294         assertSame(cell, page.getFirstByXPath("//td[@ab= 'test']"));
295         assertSame(cell, page.getFirstByXPath("//td[@AB= 'test']"));
296 
297         assertSame(cell, page.getFirstByXPath("//td[@a_b= 'test']"));
298         assertSame(cell, page.getFirstByXPath("//td[@A_B= 'test']"));
299 
300         assertSame(cell, page.getFirstByXPath("//td[@a-b= 'test']"));
301         assertSame(cell, page.getFirstByXPath("//td[@A-B= 'test']"));
302 
303         assertSame(cell, page.getFirstByXPath("//td[@a.b= 'test']"));
304         assertSame(cell, page.getFirstByXPath("//td[@A.B= 'test']"));
305     }
306 
307     /**
308      * @throws Exception if test fails
309      */
310     @Test
311     public void specialAtInText() throws Exception {
312         final String content = DOCTYPE_HTML
313             + "<html><head></head>\n"
314             + "<body>\n"
315             + "  <table><tr><td id='myTd'>@X</td></tr></table>\n"
316             + "</body></html>";
317 
318         final HtmlPage page = loadPage(content);
319 
320         final HtmlTableCell cell = page.getHtmlElementById("myTd");
321         assertNull(page.getFirstByXPath("//td[text()='@x']"));
322         assertSame(cell, page.getFirstByXPath("//td[text()='@X']"));
323     }
324 
325     /**
326      * @throws Exception if test fails
327      */
328     @Test
329     public void specialBracesInText() throws Exception {
330         String content = DOCTYPE_HTML
331             + "<html><head></head>\n"
332             + "<body>\n"
333             + "  <table><tr><td id='myTd'>(X)</td></tr></table>\n"
334             + "</body></html>";
335 
336         HtmlPage page = loadPage(content);
337 
338         HtmlTableCell cell = page.getHtmlElementById("myTd");
339         assertNull(page.getFirstByXPath("//td[normalize-space()='(x)']"));
340         assertSame(cell, page.getFirstByXPath("//td[normalize-space()='(X)']"));
341 
342         content = DOCTYPE_HTML
343             + "<html><head></head>\n"
344             + "<body>\n"
345             + "  <table><tr><td id='myTd'>[X]</td></tr></table>\n"
346             + "</body></html>";
347 
348         page = loadPage(content);
349 
350         cell = page.getHtmlElementById("myTd");
351         assertNull(page.getFirstByXPath("//td[text()='[x]']"));
352         assertSame(cell, page.getFirstByXPath("//td[text()='[X]']"));
353     }
354 }