View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.javascript.host.dom;
16  
17  import java.io.IOException;
18  
19  import org.htmlunit.StringWebResponse;
20  import org.htmlunit.WebClient;
21  import org.htmlunit.WebResponse;
22  import org.htmlunit.WebWindow;
23  import org.htmlunit.html.HtmlPage;
24  import org.htmlunit.html.parser.HTMLParser;
25  import org.htmlunit.javascript.HtmlUnitScriptable;
26  import org.htmlunit.javascript.JavaScriptEngine;
27  import org.htmlunit.javascript.configuration.JsxClass;
28  import org.htmlunit.javascript.configuration.JsxConstructor;
29  import org.htmlunit.javascript.configuration.JsxFunction;
30  import org.htmlunit.javascript.host.Window;
31  import org.htmlunit.javascript.host.html.HTMLDocument;
32  import org.htmlunit.javascript.host.xml.XMLDocument;
33  import org.htmlunit.util.MimeType;
34  
35  /**
36   * A JavaScript object for {@code DOMParser}.
37   *
38   * @author Ahmed Ashour
39   * @author Frank Danek
40   * @author Ronald Brill
41   *
42   * @see <a href="http://www.w3.org/TR/DOM-Parsing/">W3C Spec</a>
43   * @see <a href="http://domparsing.spec.whatwg.org/">WhatWG Spec</a>
44   * @see <a href="https://developer.mozilla.org/en-US/docs/Web/API/DOMParser">Mozilla Developer Network</a>
45   * @see <a href="http://msdn.microsoft.com/en-us/library/ff975060.aspx">MSDN</a>
46   * @see <a href="http://www.xulplanet.com/references/objref/DOMParser.html">XUL Planet</a>
47   */
48  @JsxClass
49  public class DOMParser extends HtmlUnitScriptable {
50  
51      /**
52       * JavaScript constructor.
53       */
54      @JsxConstructor
55      public void jsConstructor() {
56          // nothing to do
57      }
58  
59      /**
60       * Parses the given Unicode string into a DOM document.
61       * @param str the Unicode string to be parsed
62       * @param type the MIME type of the string -
63       *        <code>text/html</code>, <code>text/xml</code>, <code>application/xml</code>,
64       *        <code>application/xhtml+xml</code>, <code>image/svg+xml</code>. Must not be {@code null}.
65       * @return the generated document
66       */
67      @JsxFunction
68      public Document parseFromString(final String str, final Object type) {
69          try {
70              final Document document = parseFromString(this, str, type);
71              if (document == null) {
72                  throw JavaScriptEngine.typeError("Invalid 'type' parameter: " + type);
73              }
74              return document;
75          }
76          catch (final IOException e) {
77              throw JavaScriptEngine.syntaxError("Parsing failed" + e.getMessage());
78          }
79      }
80  
81      /**
82       * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
83       *
84       * Parses the given Unicode string into a DOM document.
85       * @param scriptable the ScriptableObject this belongs to
86       * @param str the Unicode string to be parsed
87       * @param type the MIME type of the string -
88       *        <code>text/html</code>, <code>text/xml</code>, <code>application/xml</code>,
89       *        <code>application/xhtml+xml</code>, <code>image/svg+xml</code>. Must not be {@code null}.
90       * @return the generated document
91       * @throws IOException in case of error
92       */
93      public static Document parseFromString(final HtmlUnitScriptable scriptable, final String str, final Object type)
94                  throws IOException {
95          if (type == null || JavaScriptEngine.isUndefined(type)) {
96              throw JavaScriptEngine.typeError("Missing 'type' parameter");
97          }
98  
99          if (MimeType.TEXT_XML.equals(type)
100                 || MimeType.APPLICATION_XML.equals(type)
101                 || MimeType.APPLICATION_XHTML.equals(type)
102                 || "image/svg+xml".equals(type)) {
103             final XMLDocument document = new XMLDocument();
104             document.setParentScope(scriptable.getParentScope());
105             document.setPrototype(scriptable.getPrototype(XMLDocument.class));
106             document.loadXML(str);
107             return document;
108         }
109 
110         if (MimeType.TEXT_HTML.equals(type)) {
111             final WebWindow webWindow = scriptable.getWindow().getWebWindow();
112             final WebResponse webResponse = new StringWebResponse(str, webWindow.getEnclosedPage().getUrl());
113 
114             return parseHtmlDocument(scriptable, webResponse, webWindow);
115         }
116 
117         return null;
118     }
119 
120     /**
121      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
122      *
123      * Parses the given Unicode string into a DOM document.
124      * @param scriptable the ScriptableObject this belongs to
125      * @param webResponse the response to be parsed
126      * @param webWindow the window
127      * @return the generated document
128      * @throws IOException in case of error
129      */
130     public static Document parseHtmlDocument(final HtmlUnitScriptable scriptable, final WebResponse webResponse,
131                                 final WebWindow webWindow)
132                 throws IOException {
133         // a similar impl is in
134         // org.htmlunit.javascript.host.dom.DOMImplementation.createHTMLDocument(Object)
135         final HtmlPage page = new HtmlPage(webResponse, webWindow);
136         page.setEnclosingWindow(null);
137         final Window window = webWindow.getScriptableObject();
138 
139         // document knows the window but is not the windows document
140         final HTMLDocument document = new HTMLDocument();
141         document.setParentScope(window);
142         document.setPrototype(window.getPrototype(document.getClass()));
143         // document.setWindow(window);
144         document.setDomNode(page);
145 
146         final WebClient webClient = webWindow.getWebClient();
147         final HTMLParser htmlParser = webClient.getPageCreator().getHtmlParser();
148         htmlParser.parse(webResponse, page, false, true);
149         return page.getScriptableObject();
150     }
151 }