View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html.xpath;
16  
17  import javax.xml.transform.ErrorListener;
18  import javax.xml.transform.TransformerException;
19  
20  import org.htmlunit.xpath.Expression;
21  import org.htmlunit.xpath.XPathContext;
22  import org.htmlunit.xpath.compiler.Compiler;
23  import org.htmlunit.xpath.compiler.FunctionTable;
24  import org.htmlunit.xpath.compiler.XPathParser;
25  import org.htmlunit.xpath.objects.XObject;
26  import org.htmlunit.xpath.res.XPATHErrorResources;
27  import org.htmlunit.xpath.res.XPATHMessages;
28  import org.htmlunit.xpath.xml.utils.DefaultErrorHandler;
29  import org.htmlunit.xpath.xml.utils.PrefixResolver;
30  import org.htmlunit.xpath.xml.utils.WrappedRuntimeException;
31  
32  /**
33   * XPath adapter implementation for HtmlUnit.
34   *
35   * @author Ahmed Ashour
36   * @author Ronald Brill
37   */
38  public class XPathAdapter {
39  
40      private enum STATE {
41          DEFAULT,
42          DOUBLE_QUOTED,
43          SINGLE_QUOTED,
44          ATTRIB
45      }
46  
47      private final Expression mainExp_;
48  
49      /**
50       * Constructor.
51       * @param exprString the XPath expression
52       * @param prefixResolver a prefix resolver to use to resolve prefixes to namespace URIs
53       * @param caseSensitive whether the attributes should be case-sensitive
54       * @throws TransformerException if a syntax or other error occurs
55       */
56      public XPathAdapter(final String exprString, final PrefixResolver prefixResolver, final boolean caseSensitive)
57                  throws TransformerException {
58  
59          final ErrorListener errorHandler = new DefaultErrorHandler();
60          final XPathParser parser = new XPathParser(errorHandler);
61          final Compiler compiler = new Compiler(errorHandler, new FunctionTable());
62  
63          final String expression = preProcessXPath(exprString, caseSensitive);
64          parser.initXPath(compiler, expression, prefixResolver);
65  
66          mainExp_ = compiler.compile(0);
67      }
68  
69      /**
70       * Pre-processes the specified case-insensitive XPath expression before passing it to the engine.
71       * The current implementation lower-cases the attribute name, and anything outside the brackets.
72       *
73       * @param xpath the XPath expression to pre-process
74       * @param caseSensitive whether or not the XPath expression should be case-sensitive
75       * @return the processed XPath expression
76       */
77      private static String preProcessXPath(final String xpath, final boolean caseSensitive) {
78          if (caseSensitive) {
79              return xpath;
80          }
81  
82          final char[] charArray = xpath.toCharArray();
83          STATE state = STATE.DEFAULT;
84  
85          final int length = charArray.length;
86          int insideBrackets = 0;
87          for (int i = 0; i < length; i++) {
88              final char ch = charArray[i];
89              switch (ch) {
90                  case '@':
91                      if (state == STATE.DEFAULT) {
92                          state = STATE.ATTRIB;
93                      }
94                      break;
95  
96                  case '"':
97                      if (state == STATE.DEFAULT || state == STATE.ATTRIB) {
98                          state = STATE.DOUBLE_QUOTED;
99                      }
100                     else if (state == STATE.DOUBLE_QUOTED) {
101                         state = STATE.DEFAULT;
102                     }
103                     break;
104 
105                 case '\'':
106                     if (state == STATE.DEFAULT || state == STATE.ATTRIB) {
107                         state = STATE.SINGLE_QUOTED;
108                     }
109                     else if (state == STATE.SINGLE_QUOTED) {
110                         state = STATE.DEFAULT;
111                     }
112                     break;
113 
114                 case '[':
115                 case '(':
116                     if (state == STATE.ATTRIB) {
117                         state = STATE.DEFAULT;
118                     }
119                     insideBrackets++;
120                     break;
121 
122                 case ']':
123                 case ')':
124                     if (state == STATE.ATTRIB) {
125                         state = STATE.DEFAULT;
126                     }
127                     insideBrackets--;
128                     break;
129 
130                 default:
131                     if (insideBrackets == 0
132                             && state != STATE.SINGLE_QUOTED
133                             && state != STATE.DOUBLE_QUOTED) {
134                         charArray[i] = Character.toLowerCase(ch);
135                     }
136                     else if (state == STATE.ATTRIB) {
137                         charArray[i] = Character.toLowerCase(ch);
138                     }
139 
140                     if (state == STATE.ATTRIB) {
141                         final boolean isValidAttribChar =
142                                 ('a' <= ch && ch <= 'z')
143                                 || ('A' <= ch && ch <= 'Z')
144                                 || ('0' <= ch && ch <= '9')
145                                 || ('\u00C0' <= ch && ch <= '\u00D6')
146                                 || ('\u00D8' <= ch && ch <= '\u00F6')
147                                 || ('\u00F8' <= ch && ch <= '\u02FF')
148                                 || ('\u0370' <= ch && ch <= '\u037D')
149                                 || ('\u037F' <= ch && ch <= '\u1FFF')
150                                 || ('\u200C' <= ch && ch <= '\u200D')
151                                 || ('\u2C00' <= ch && ch <= '\u2FEF')
152                                 || ('\u3001' <= ch && ch <= '\uD7FF')
153                                 || ('\uF900' <= ch && ch <= '\uFDCF')
154                                 || ('\uFDF0' <= ch && ch <= '\uFFFD')
155                                 // [#x10000-#xEFFFF]
156                                 || ('\u00B7' == ch)
157                                 || ('\u0300' <= ch && ch <= '\u036F')
158                                 || ('\u203F' <= ch && ch <= '\u2040')
159                                 || ('_' == ch)
160                                 || ('-' == ch)
161                                 || ('.' == ch);
162 
163                         if (!isValidAttribChar) {
164                             state = STATE.DEFAULT;
165                         }
166                     }
167             }
168         }
169         return new String(charArray);
170     }
171 
172     /**
173      * Given an expression and a context, evaluate the XPath and return the result.
174      *
175      * @param xpathContext the execution context
176      * @param contextNode the node that "." expresses
177      * @param namespaceContext the context in which namespaces in the XPath are supposed to be expanded
178      * @return the result of the XPath or null if callbacks are used
179      * @throws TransformerException if the error condition is severe enough to halt processing
180      */
181     @SuppressWarnings("PMD.PreserveStackTrace")
182     XObject execute(final XPathContext xpathContext, final int contextNode,
183         final PrefixResolver namespaceContext) throws TransformerException {
184         xpathContext.pushNamespaceContext(namespaceContext);
185 
186         xpathContext.pushCurrentNodeAndExpression(contextNode);
187 
188         XObject xobj = null;
189 
190         try {
191             xobj = mainExp_.execute(xpathContext);
192         }
193         catch (final TransformerException ex) {
194             ex.setLocator(mainExp_);
195             final ErrorListener el = xpathContext.getErrorListener();
196             if (null != el) {
197                 el.error(ex);
198             }
199             else {
200                 throw ex;
201             }
202         }
203         catch (final Exception e) {
204             Exception unwrapped = e;
205             while (unwrapped instanceof WrappedRuntimeException) {
206                 unwrapped = ((WrappedRuntimeException) unwrapped).getException();
207             }
208             String msg = unwrapped.getMessage();
209 
210             if (msg == null || msg.isEmpty()) {
211                 msg = XPATHMessages.createXPATHMessage(XPATHErrorResources.ER_XPATH_ERROR, null);
212             }
213             final TransformerException te = new TransformerException(msg, mainExp_, unwrapped);
214             final ErrorListener el = xpathContext.getErrorListener();
215             if (null != el) {
216                 el.fatalError(te);
217             }
218             else {
219                 throw te;
220             }
221         }
222         finally {
223             xpathContext.popNamespaceContext();
224             xpathContext.popCurrentNodeAndExpression();
225         }
226 
227         return xobj;
228     }
229 }