View Javadoc
1   /*
2    * Copyright (c) 2002-2026 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html.parser.neko;
16  
17  import static org.htmlunit.BrowserVersionFeatures.JS_SCRIPT_IN_TEMPLATE_EXECUTED_ON_ATTACH;
18  
19  import java.io.IOException;
20  import java.io.StringReader;
21  import java.net.URL;
22  import java.nio.charset.Charset;
23  import java.util.ArrayDeque;
24  import java.util.Deque;
25  
26  import org.htmlunit.BrowserVersion;
27  import org.htmlunit.ObjectInstantiationException;
28  import org.htmlunit.WebClient;
29  import org.htmlunit.WebResponse;
30  import org.htmlunit.cyberneko.HTMLConfiguration;
31  import org.htmlunit.cyberneko.HTMLElements;
32  import org.htmlunit.cyberneko.HTMLScanner;
33  import org.htmlunit.cyberneko.HTMLTagBalancingListener;
34  import org.htmlunit.cyberneko.xerces.parsers.AbstractSAXParser;
35  import org.htmlunit.cyberneko.xerces.xni.Augmentations;
36  import org.htmlunit.cyberneko.xerces.xni.QName;
37  import org.htmlunit.cyberneko.xerces.xni.XMLAttributes;
38  import org.htmlunit.cyberneko.xerces.xni.XMLString;
39  import org.htmlunit.cyberneko.xerces.xni.XNIException;
40  import org.htmlunit.cyberneko.xerces.xni.parser.XMLInputSource;
41  import org.htmlunit.cyberneko.xerces.xni.parser.XMLParserConfiguration;
42  import org.htmlunit.html.DomCDataSection;
43  import org.htmlunit.html.DomComment;
44  import org.htmlunit.html.DomDocumentType;
45  import org.htmlunit.html.DomElement;
46  import org.htmlunit.html.DomNode;
47  import org.htmlunit.html.DomText;
48  import org.htmlunit.html.ElementFactory;
49  import org.htmlunit.html.Html;
50  import org.htmlunit.html.HtmlBody;
51  import org.htmlunit.html.HtmlElement;
52  import org.htmlunit.html.HtmlForm;
53  import org.htmlunit.html.HtmlHiddenInput;
54  import org.htmlunit.html.HtmlImage;
55  import org.htmlunit.html.HtmlPage;
56  import org.htmlunit.html.HtmlSvg;
57  import org.htmlunit.html.HtmlTable;
58  import org.htmlunit.html.HtmlTableRow;
59  import org.htmlunit.html.HtmlTemplate;
60  import org.htmlunit.html.ScriptElement;
61  import org.htmlunit.html.SubmittableElement;
62  import org.htmlunit.html.XHtmlPage;
63  import org.htmlunit.html.parser.HTMLParser;
64  import org.htmlunit.html.parser.HTMLParserDOMBuilder;
65  import org.htmlunit.html.parser.HTMLParserListener;
66  import org.htmlunit.javascript.host.html.HTMLBodyElement;
67  import org.htmlunit.util.StringUtils;
68  import org.w3c.dom.Node;
69  import org.xml.sax.Attributes;
70  import org.xml.sax.ContentHandler;
71  import org.xml.sax.Locator;
72  import org.xml.sax.SAXException;
73  import org.xml.sax.ext.LexicalHandler;
74  
75  /**
76   * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
77   *
78   * The parser and DOM builder. This class subclasses Xerces's AbstractSAXParser and implements
79   * the ContentHandler interface. Thus all parser APIs are kept private. The ContentHandler methods
80   * consume SAX events to build the page DOM
81   *
82   * @author Christian Sell
83   * @author David K. Taylor
84   * @author Chris Erskine
85   * @author Ahmed Ashour
86   * @author Marc Guillemot
87   * @author Ethan Glasser-Camp
88   * @author Sudhan Moghe
89   * @author Ronald Brill
90   * @author Frank Danek
91   * @author Carsten Steul
92   * @author Ronny Shapiro
93   * @author Atsushi Nakagawa
94   */
95  final class HtmlUnitNekoDOMBuilder extends AbstractSAXParser
96          implements ContentHandler, LexicalHandler, HTMLTagBalancingListener, HTMLParserDOMBuilder {
97  
98      // cache Neko Elements for performance and memory efficiency
99      private static final HTMLElements HTMLELEMENTS = new HTMLElements();
100 
101     private enum HeadParsed { YES, SYNTHESIZED, NO }
102 
103     private final HTMLParser htmlParser_;
104     private final HtmlPage page_;
105 
106     private Locator locator_;
107     private final Deque<DomNode> stack_ = new ArrayDeque<>();
108 
109     /** Did the snippet tried to overwrite the start node? */
110     private boolean snippetStartNodeOverwritten_;
111     private final int initialSize_;
112     private DomNode currentNode_;
113     private final boolean createdByJavascript_;
114     private final XMLString characters_ = new XMLString();
115     private HtmlUnitNekoDOMBuilder.HeadParsed headParsed_ = HeadParsed.NO;
116     private HtmlElement body_;
117     private boolean lastTagWasSynthesized_;
118     private HtmlForm consumingForm_;
119     private boolean formEndingIsAdjusting_;
120     private boolean insideSvg_;
121     private boolean insideTemplate_;
122 
123     private static final String FEATURE_AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
124     private static final String FEATURE_PARSE_NOSCRIPT
125         = "http://cyberneko.org/html/features/parse-noscript-content";
126 
127     /**
128      * Parses and then inserts the specified HTML content into the HTML content currently being parsed.
129      * @param html the HTML content to push
130      */
131     @Override
132     public void pushInputString(final String html) {
133         page_.registerParsingStart();
134         page_.registerInlineSnippetParsingStart();
135         try {
136             final WebResponse webResponse = page_.getWebResponse();
137             final Charset charset = webResponse.getContentCharset();
138             final String url = webResponse.getWebRequest().getUrl().toString();
139             final XMLInputSource in = new XMLInputSource(null, url, null, new StringReader(html), charset.name());
140             ((HTMLConfiguration) parserConfiguration_).evaluateInputSource(in);
141         }
142         finally {
143             page_.registerParsingEnd();
144             page_.registerInlineSnippetParsingEnd();
145         }
146     }
147 
148     /**
149      * Creates a new builder for parsing the specified response contents.
150      * @param node the location at which to insert the new content
151      * @param url the page's URL
152      * @param createdByJavascript if true the (script) tag was created by javascript
153      */
154     HtmlUnitNekoDOMBuilder(final HTMLParser htmlParser, final WebClient webClient,
155             final DomNode node, final URL url, final String htmlContent, final boolean createdByJavascript) {
156         super(createConfiguration(webClient.getBrowserVersion()));
157 
158         htmlParser_ = htmlParser;
159         page_ = (HtmlPage) node.getPage();
160 
161         currentNode_ = node;
162         for (final Node ancestor : currentNode_.getAncestors()) {
163             stack_.push((DomNode) ancestor);
164         }
165         createdByJavascript_ = createdByJavascript;
166 
167         final HTMLParserListener listener = webClient.getHTMLParserListener();
168         final boolean reportErrors = listener != null;
169         if (reportErrors) {
170             parserConfiguration_.setErrorHandler(new HtmlUnitNekoHTMLErrorHandler(listener, url, htmlContent));
171         }
172 
173         try {
174             setFeature(FEATURE_AUGMENTATIONS, true);
175             setFeature("http://cyberneko.org/html/features/report-errors", reportErrors);
176             setFeature(FEATURE_PARSE_NOSCRIPT, !webClient.isJavaScriptEnabled());
177             setFeature(HTMLScanner.ALLOW_SELFCLOSING_IFRAME, false);
178 
179             setContentHandler(this);
180             setLexicalHandler(this); //comments and CDATA
181         }
182         catch (final SAXException e) {
183             throw new ObjectInstantiationException("unable to create HTML parser", e);
184         }
185         initialSize_ = stack_.size();
186     }
187 
188     /**
189      * Create the configuration depending on the simulated browser
190      * @return the configuration
191      */
192     private static XMLParserConfiguration createConfiguration(final BrowserVersion browserVersion) {
193         // HTMLElements.HTMLElementsWithCache are not thread safe
194         // because the cache is not synchronized
195         // we have to create a new one for each parser run
196         return new HTMLConfiguration(new HTMLElements.HTMLElementsWithCache(HTMLELEMENTS));
197     }
198 
199     /**
200      * {@inheritDoc}
201      */
202     @Override
203     public void setDocumentLocator(final Locator locator) {
204         locator_ = locator;
205     }
206 
207     /**
208      * {@inheritDoc}
209      */
210     @Override
211     public void startDocument() throws SAXException {
212         // nothing to do
213     }
214 
215     /** {@inheritDoc} */
216     @Override
217     public void startElement(final QName element, final XMLAttributes attributes, final Augmentations augs)
218         throws XNIException {
219         // augs might change so we store only the interesting part
220         lastTagWasSynthesized_ = augs.isSynthesized();
221         super.startElement(element, attributes, augs);
222     }
223 
224     /**
225      * {@inheritDoc}
226      */
227     @Override
228     public void startElement(String namespaceURI, final String localName, final String qName, final Attributes atts)
229         throws SAXException {
230 
231         if (snippetStartNodeOverwritten_) {
232             snippetStartNodeOverwritten_ = false;
233             return;
234         }
235         handleCharacters();
236 
237         final String tagLower = StringUtils.toRootLowerCase(localName);
238         if (page_.isParsingHtmlSnippet() && ("html".equals(tagLower) || "body".equals(tagLower))) {
239             // we have to push the current node on the stack to make sure
240             // the endElement call is able to remove a node from the stack
241             stack_.push(currentNode_);
242             return;
243         }
244 
245         if ("head".equals(tagLower)) {
246             if (headParsed_ == HeadParsed.YES || page_.isParsingHtmlSnippet()) {
247                 // we have to push the current node on the stack to make sure
248                 // the endElement call is able to remove a node from the stack
249                 stack_.push(currentNode_);
250                 return;
251             }
252 
253             headParsed_ = lastTagWasSynthesized_ ? HeadParsed.SYNTHESIZED : HeadParsed.YES;
254         }
255 
256         // If we're adding a body element, keep track of any temporary synthetic ones
257         // that we may have had to create earlier (for document.write(), for example).
258         HtmlBody oldBody = null;
259         final boolean isBodyTag = "body".equals(tagLower);
260         if (isBodyTag) {
261             final HtmlBody body = page_.getBody();
262             if (body != null) {
263                 oldBody = body;
264             }
265         }
266 
267         if (namespaceURI != null) {
268             namespaceURI = namespaceURI.trim();
269         }
270         // Add the new node.
271         if (!(page_ instanceof XHtmlPage) && Html.XHTML_NAMESPACE.equals(namespaceURI)) {
272             namespaceURI = null;
273         }
274 
275         final ElementFactory factory =
276                 htmlParser_.getElementFactory(page_, namespaceURI, qName, insideSvg_, false);
277         if (factory == HtmlUnitNekoHtmlParser.SVG_FACTORY) {
278             namespaceURI = Html.SVG_NAMESPACE;
279         }
280 
281         final DomElement newElement = factory.createElementNS(page_, namespaceURI, qName, atts);
282         newElement.setStartLocation(locator_.getLineNumber(), locator_.getColumnNumber());
283 
284         // parse can't replace everything as it does not buffer elements while parsing
285         addNodeToRightParent(currentNode_, newElement);
286 
287         if (newElement instanceof HtmlSvg) {
288             insideSvg_ = true;
289         }
290         else if (newElement instanceof HtmlTemplate) {
291             insideTemplate_ = true;
292         }
293 
294         // Forms own elements simply by enclosing source-wise rather than DOM parent-child relationship
295         // Forms without a </form> will keep consuming forever
296         else if (newElement instanceof HtmlForm form) {
297             consumingForm_ = form;
298             formEndingIsAdjusting_ = false;
299         }
300         else if (consumingForm_ != null) {
301             // If the current form enclosed a suitable element
302             if (newElement instanceof SubmittableElement) {
303                 // Let these be owned by the form
304                 if (((HtmlElement) newElement).getEnclosingForm() != consumingForm_) {
305                     ((HtmlElement) newElement).setOwningForm(consumingForm_);
306                 }
307             }
308         }
309 
310         // If we had an old synthetic body, and we just added a real body element, quietly
311         // remove the old body and move its children to the real body element we just added.
312         if (oldBody != null) {
313             oldBody.quietlyRemoveAndMoveChildrenTo(newElement);
314         }
315 
316         if (!insideSvg_ && isBodyTag) {
317             body_ = (HtmlElement) newElement;
318         }
319         else if (createdByJavascript_
320                 && newElement instanceof ScriptElement script
321                 && (!insideTemplate_
322                         || !page_.getWebClient().getBrowserVersion()
323                                 .hasFeature(JS_SCRIPT_IN_TEMPLATE_EXECUTED_ON_ATTACH))) {
324             script.markAsCreatedByDomParser();
325         }
326 
327         currentNode_ = newElement;
328         stack_.push(currentNode_);
329     }
330 
331     /**
332      * Adds the new node to the right parent that is not necessary the currentNode in case of
333      * malformed HTML code. The method tries to emulate the behavior of Firefox.
334      */
335     private void addNodeToRightParent(final DomNode currentNode, final DomElement newElement) {
336         final String currentNodeName = currentNode.getNodeName();
337         final String newNodeName = newElement.getNodeName();
338 
339         // First ensure table elements are housed correctly
340         if (isTableChild(newNodeName)) {
341             final DomNode parent =
342                     "table".equals(currentNodeName) ? currentNode : findElementOnStack("table");
343             appendChild(parent, newElement);
344             return;
345         }
346         if ("tr".equals(newNodeName)) {
347             final DomNode parent =
348                     isTableChild(currentNodeName) ? currentNode : findElementOnStack("tbody", "thead", "tfoot");
349             appendChild(parent, newElement);
350             return;
351         }
352         if (isTableCell(newNodeName)) {
353             final DomNode parent =
354                     "tr".equals(currentNodeName) ? currentNode : findElementOnStack("tr");
355             appendChild(parent, newElement);
356             return;
357         }
358 
359         // Next ensure non-table elements don't appear in tables
360         if ("table".equals(currentNodeName) || isTableChild(currentNodeName) || "tr".equals(currentNodeName)) {
361             if ("template".equals(newNodeName)) {
362                 currentNode.appendChild(newElement);
363             }
364 
365             // Scripts, forms, and styles are exempt
366             else if (!"colgroup".equals(currentNodeName)
367                     && ("script".equals(newNodeName)
368                         || "form".equals(newNodeName)
369                         || "style".equals(newNodeName))) {
370                 currentNode.appendChild(newElement);
371             }
372 
373             // These are good
374             else if ("col".equals(newNodeName) && "colgroup".equals(currentNodeName)) {
375                 currentNode.appendChild(newElement);
376             }
377             else if ("caption".equals(currentNodeName)) {
378                 currentNode.appendChild(newElement);
379             }
380             else if (newElement instanceof HtmlHiddenInput) {
381                 currentNode.appendChild(newElement);
382             }
383             else {
384                 // Move before the table
385                 final DomNode parent = findElementOnStack("table");
386                 parent.insertBefore(newElement);
387             }
388             return;
389         }
390 
391         if (formEndingIsAdjusting_ && "form".equals(currentNodeName)) {
392             // We cater to HTMLTagBalancer's shortcomings by moving this node out of the <form>
393             appendChild(currentNode.getParentNode(), newElement);
394             return;
395         }
396 
397         // Everything else
398         appendChild(currentNode, newElement);
399     }
400 
401     private DomNode findElementOnStack(final String searchedElementName) {
402         for (final DomNode node : stack_) {
403             if (searchedElementName.equals(node.getNodeName())) {
404                 return node;
405             }
406         }
407 
408         // this is surely wrong but at least it won't throw a NPE
409         return stack_.peek();
410     }
411 
412     private DomNode findElementOnStack(final String... searchedElementNames) {
413         for (final DomNode node : stack_) {
414             for (final String searchedElementName : searchedElementNames) {
415                 if (searchedElementName.equals(node.getNodeName())) {
416                     return node;
417                 }
418             }
419         }
420 
421         // this is surely wrong but at least it won't throw a NPE
422         return stack_.peek();
423     }
424 
425     private static boolean isTableChild(final String nodeName) {
426         if (nodeName == null || nodeName.length() < 5) {
427             return false;
428         }
429 
430         return "thead".equals(nodeName)
431                 || "tbody".equals(nodeName)
432                 || "tfoot".equals(nodeName)
433                 || "caption".equals(nodeName)
434                 || "colgroup".equals(nodeName);
435     }
436 
437     private static boolean isTableCell(final String nodeName) {
438         if (nodeName == null || nodeName.length() != 2) {
439             return false;
440         }
441         return "td".equals(nodeName) || "th".equals(nodeName);
442     }
443 
444     /** {@inheritDoc} */
445     @Override
446     public void endElement(final QName element, final Augmentations augs)
447         throws XNIException {
448         // augs might change so we store only the interesting part
449         lastTagWasSynthesized_ = augs.isSynthesized();
450         super.endElement(element, augs);
451     }
452 
453     /**
454      * {@inheritDoc}
455      */
456     @Override
457     public void endElement(final String namespaceURI, final String localName, final String qName)
458         throws SAXException {
459 
460         final String tagLower = StringUtils.toRootLowerCase(localName);
461 
462         handleCharacters();
463 
464         if (page_.isParsingHtmlSnippet()) {
465             if ("html".equals(tagLower) || "body".equals(tagLower)) {
466                 return;
467             }
468             if (stack_.size() == initialSize_) {
469                 // a <p> inside a <p> is valid for innerHTML processing
470                 // see HTMLParser2Test for more cases
471                 snippetStartNodeOverwritten_ = !StringUtils.equalsChar('p', tagLower);
472                 return;
473             }
474         }
475 
476         if ("svg".equals(tagLower)) {
477             insideSvg_ = false;
478         }
479         else if ("template".equals(tagLower)) {
480             insideTemplate_ = false;
481         }
482 
483         // this only avoids a problem when the stack is empty here
484         // but for this case we made the problem before - the balancing
485         // is broken already
486         if (stack_.isEmpty()) {
487             return;
488         }
489 
490         final DomNode previousNode = stack_.pop(); //remove currentElement from stack
491         previousNode.setEndLocation(locator_.getLineNumber(), locator_.getColumnNumber());
492 
493         if ("form".equals(tagLower) && !lastTagWasSynthesized_) {
494             // We get here if the </form> was on the same DOM tree depth as the <form> that started it,
495             // otherwise HTMLTagBalancer gives us the end through ignoredEndElement()
496             consumingForm_ = null;
497         }
498 
499         if (!stack_.isEmpty()) {
500             currentNode_ = stack_.peek();
501         }
502 
503         final boolean postponed = page_.isParsingInlineHtmlSnippet();
504         previousNode.onAllChildrenAddedToPage(postponed);
505     }
506 
507     /** {@inheritDoc} */
508     @Override
509     public void characters(final char[] ch, final int start, final int length) throws SAXException {
510         characters_.append(ch, start, length);
511     }
512 
513     /** {@inheritDoc} */
514     @Override
515     public void ignorableWhitespace(final char[] ch, final int start, final int length) throws SAXException {
516         characters_.append(ch, start, length);
517     }
518 
519     /**
520      * Picks up the character data accumulated so far and add it to the current element as a text node.
521      */
522     private void handleCharacters() {
523         // make the code easier to read because we remove a nesting level
524         if (characters_.length() == 0) {
525             return;
526         }
527 
528         // Use the normal behavior: append a text node for the accumulated text.
529         final String textValue = characters_.toString();
530         characters_.clear();
531 
532         if (StringUtils.isBlank(textValue)) {
533             appendChild(currentNode_, new DomText(page_, textValue));
534             return;
535         }
536 
537         // malformed HTML: </td>some text</tr> => text comes before the table
538         if (currentNode_ instanceof HtmlTableRow row) {
539             final HtmlTable enclosingTable = row.getEnclosingTable();
540             if (enclosingTable != null) { // may be null when called from Range.createContextualFragment
541                 if (enclosingTable.getPreviousSibling() instanceof DomText domText) {
542                     domText.setTextContent(domText.getWholeText() + textValue);
543                 }
544                 else {
545                     enclosingTable.insertBefore(new DomText(page_, textValue));
546                 }
547             }
548         }
549         else if (currentNode_ instanceof HtmlTable enclosingTable) {
550             if (enclosingTable.getPreviousSibling() instanceof DomText domText) {
551                 domText.setTextContent(domText.getWholeText() + textValue);
552             }
553             else {
554                 enclosingTable.insertBefore(new DomText(page_, textValue));
555             }
556         }
557         else if (currentNode_ instanceof HtmlImage) {
558             currentNode_.getParentNode().appendChild(new DomText(page_, textValue));
559         }
560         else {
561             appendChild(currentNode_, new DomText(page_, textValue));
562         }
563     }
564 
565     /** {@inheritDoc} */
566     @Override
567     public void endDocument() throws SAXException {
568         handleCharacters();
569         if (locator_ != null) {
570             page_.setEndLocation(locator_.getLineNumber(), locator_.getColumnNumber());
571         }
572     }
573 
574     /** {@inheritDoc} */
575     @Override
576     public void startPrefixMapping(final String prefix, final String uri) throws SAXException {
577         // nothing to do
578     }
579 
580     /** {@inheritDoc} */
581     @Override
582     public void endPrefixMapping(final String prefix) throws SAXException {
583         // nothing to do
584     }
585 
586     /** {@inheritDoc} */
587     @Override
588     public void processingInstruction(final String target, final String data) throws SAXException {
589         // nothing to do
590     }
591 
592     /** {@inheritDoc} */
593     @Override
594     public void skippedEntity(final String name) throws SAXException {
595         // nothing to do
596     }
597 
598     // LexicalHandler methods
599 
600     /** {@inheritDoc} */
601     @Override
602     public void comment(final char[] ch, final int start, final int length) {
603         handleCharacters();
604         final String data = new String(ch, start, length);
605         final DomComment comment = new DomComment(page_, data);
606         appendChild(currentNode_, comment);
607     }
608 
609     /** {@inheritDoc} */
610     @Override
611     public void endCDATA() {
612         final String data = characters_.toString();
613         characters_.clear();
614 
615         final DomCDataSection cdataSection = new DomCDataSection(page_, data);
616         appendChild(currentNode_, cdataSection);
617     }
618 
619     /** {@inheritDoc} */
620     @Override
621     public void endDTD() {
622         // nothing to do
623     }
624 
625     /** {@inheritDoc} */
626     @Override
627     public void endEntity(final String name) {
628         // nothing to do
629     }
630 
631     /** {@inheritDoc} */
632     @Override
633     public void startCDATA() {
634         handleCharacters();
635     }
636 
637     /** {@inheritDoc} */
638     @Override
639     public void startDTD(final String name, final String publicId, final String systemId) {
640         final DomDocumentType type = new DomDocumentType(page_, name, publicId, systemId);
641         page_.setDocumentType(type);
642 
643         final Node child;
644         child = type;
645         page_.appendChild(child);
646     }
647 
648     /** {@inheritDoc} */
649     @Override
650     public void startEntity(final String name) {
651         // nothing to do
652     }
653 
654     /**
655      * {@inheritDoc}
656      */
657     @Override
658     public void ignoredEndElement(final QName element, final Augmentations augs) {
659         // HTMLTagBalancer brings us here if </form> was found in the source on a different
660         // DOM tree depth (either above or below) to the <form> that started it
661         if ("form".equals(element.getLocalpart()) && consumingForm_ != null) {
662             consumingForm_ = null;
663 
664             if (findElementOnStack("table", "form") instanceof HtmlTable) {
665                 // The </form> just goes missing for these (really? just tables?)
666             }
667             else {
668                 /*
669                  * This </form> was ignored by HTMLTagBalancer as it generates its own
670                  * </form> at the end of the depth with the starting <form>.
671                  * e.g. This:
672                  * | <form>
673                  * |   <div>
674                  * |     </form> <!--ignored by HTMLTagBalancer-->
675                  * |   </div>
676                  * |   <input>
677                  *
678                  * is turned into:
679                  * | <form>
680                  * |   <div>
681                  * |   </div>
682                  * |   <input>
683                  * | </form> <!--synthesized by HTMLTagBalancer-->
684                  *
685                  * but this isn't suitable for us because </form> shouldn't be ignored but
686                  * rather moved directly behind the tree it's in to instead become:
687                  * | <form>
688                  * |   <div>
689                  * |   </div>
690                  * | </form> <!--moved out of div-->
691                  * | <input> <!--proceeding children are not part of form-->
692                  */
693                 // We cater for this by moving out nodes such as the <input> in the above
694                 // diagram out of the form
695                 formEndingIsAdjusting_ = true;
696             }
697         }
698     }
699 
700     /**
701      * {@inheritDoc}
702      */
703     @Override
704     public void ignoredStartElement(final QName elem, final XMLAttributes attrs, final Augmentations augs) {
705         // when multiple html/body elements are encountered, the attributes of the discarded
706         // elements are used when not previously defined
707         if (attrs != null && body_ != null) {
708             final String lp = elem.getLocalpart();
709             if (lp != null && lp.length() == 4) {
710                 if ("body".equalsIgnoreCase(lp)) {
711                     copyAttributes(body_, attrs);
712                 }
713                 else if ("html".equalsIgnoreCase(lp)) {
714                     final DomNode parent = body_.getParentNode();
715                     if (parent instanceof DomElement element) {
716                         copyAttributes(element, attrs);
717                     }
718                 }
719             }
720         }
721     }
722 
723     private static void copyAttributes(final DomElement to, final XMLAttributes attrs) {
724         final int length = attrs.getLength();
725 
726         for (int i = 0; i < length; i++) {
727             final String attrName = StringUtils.toRootLowerCase(attrs.getLocalName(i));
728             if (to.getAttributes().getNamedItem(attrName) == null) {
729                 to.setAttribute(attrName, attrs.getValue(i));
730                 if (attrName.startsWith("on") && to.getPage().getWebClient().isJavaScriptEngineEnabled()
731                         && to.getScriptableObject() instanceof HTMLBodyElement) {
732                     final HTMLBodyElement jsBody = to.getScriptableObject();
733                     jsBody.createEventHandlerFromAttribute(attrName, attrs.getValue(i));
734                 }
735             }
736         }
737     }
738 
739     /**
740      * {@inheritDoc}
741      */
742     @Override
743     public void parse(final XMLInputSource inputSource) throws XNIException, IOException {
744         final HTMLParserDOMBuilder oldBuilder = page_.getDOMBuilder();
745         page_.setDOMBuilder(this);
746         try {
747             super.parse(inputSource);
748         }
749         finally {
750             page_.setDOMBuilder(oldBuilder);
751         }
752     }
753 
754     private static void appendChild(final DomNode parent, final DomNode child) {
755         if (parent instanceof HtmlTemplate template) {
756             template.getContent().appendChild(child);
757             return;
758         }
759 
760         parent.appendChild(child);
761     }
762 }