View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html.impl;
16  
17  import java.io.Serializable;
18  import java.util.ArrayList;
19  import java.util.Collections;
20  import java.util.Iterator;
21  import java.util.List;
22  
23  import org.apache.commons.lang3.builder.EqualsBuilder;
24  import org.apache.commons.lang3.builder.HashCodeBuilder;
25  import org.htmlunit.SgmlPage;
26  import org.htmlunit.html.DomDocumentFragment;
27  import org.htmlunit.html.DomNode;
28  import org.htmlunit.html.DomNodeList;
29  import org.htmlunit.html.DomText;
30  import org.w3c.dom.DOMException;
31  import org.w3c.dom.DocumentFragment;
32  import org.w3c.dom.Node;
33  import org.w3c.dom.NodeList;
34  
35  /**
36   * Simple implementation of an Range.
37   *
38   * @author Marc Guillemot
39   * @author Daniel Gredler
40   * @author James Phillpotts
41   * @author Ahmed Ashour
42   * @author Ronald Brill
43   */
44  public class SimpleRange implements Serializable {
45  
46      /** The start (anchor) container. */
47      private DomNode startContainer_;
48  
49      /** The end (focus) container. */
50      private DomNode endContainer_;
51  
52      /**
53       * The start (anchor) offset; units are chars if the start container is a text node or an
54       * input element, DOM nodes otherwise.
55       */
56      private int startOffset_;
57  
58      /**
59       * The end (focus) offset; units are chars if the end container is a text node or an input
60       * element, DOM nodes otherwise.
61       */
62      private int endOffset_;
63  
64      /**
65       * Constructs a range without any content.
66       */
67      public SimpleRange() {
68          // Empty.
69      }
70  
71      /**
72       * Constructs a range for the specified element.
73       * @param node the node for the range
74       */
75      public SimpleRange(final DomNode node) {
76          startContainer_ = node;
77          endContainer_ = node;
78          startOffset_ = 0;
79          endOffset_ = getMaxOffset(node);
80      }
81  
82      /**
83       * Constructs a range for the provided element and start and end offset.
84       * @param node the node for the range
85       * @param offset the start and end offset
86       */
87      public SimpleRange(final DomNode node, final int offset) {
88          startContainer_ = node;
89          endContainer_ = node;
90          startOffset_ = offset;
91          endOffset_ = offset;
92      }
93  
94      /**
95       * Constructs a range for the provided elements and offsets.
96       * @param startNode the start node
97       * @param startOffset the start offset
98       * @param endNode the end node
99       * @param endOffset the end offset
100      */
101     public SimpleRange(final DomNode startNode, final int startOffset, final DomNode endNode, final int endOffset) {
102         startContainer_ = startNode;
103         endContainer_ = endNode;
104         startOffset_ = startOffset;
105         endOffset_ = endOffset;
106         if (startNode == endNode && startOffset > endOffset) {
107             endOffset_ = startOffset;
108         }
109     }
110 
111     /**
112      * Duplicates the contents of this.
113      * @return DocumentFragment that contains content equivalent to this
114      */
115     public DomDocumentFragment cloneContents() {
116         // Clone the common ancestor.
117         final DomNode ancestor = getCommonAncestorContainer();
118 
119         if (ancestor == null) {
120             return new DomDocumentFragment(null);
121         }
122         final DomNode ancestorClone = ancestor.cloneNode(true);
123 
124         // Find the start container and end container clones.
125         DomNode startClone = null;
126         DomNode endClone = null;
127         final DomNode start = startContainer_;
128         final DomNode end = endContainer_;
129         if (start == ancestor) {
130             startClone = ancestorClone;
131         }
132         if (end == ancestor) {
133             endClone = ancestorClone;
134         }
135         final Iterable<DomNode> descendants = ancestor.getDescendants();
136         if (startClone == null || endClone == null) {
137             final Iterator<DomNode> i = descendants.iterator();
138             final Iterator<DomNode> ci = ancestorClone.getDescendants().iterator();
139             while (i.hasNext()) {
140                 final DomNode e = i.next();
141                 final DomNode ce = ci.next();
142                 if (start == e) {
143                     startClone = ce;
144                 }
145                 else if (end == e) {
146                     endClone = ce;
147                     break;
148                 }
149             }
150         }
151 
152         // Do remove from end first so that it can't affect the offset values
153 
154         // Remove everything following the selection end from the clones.
155         if (endClone == null) {
156             throw new IllegalStateException("Unable to find end node clone.");
157         }
158         deleteAfter(endClone, endOffset_);
159         for (DomNode n = endClone; n != null; n = n.getParentNode()) {
160             while (n.getNextSibling() != null) {
161                 n.getNextSibling().remove();
162             }
163         }
164 
165         // Remove everything prior to the selection start from the clones.
166         if (startClone == null) {
167             throw new IllegalStateException("Unable to find start node clone.");
168         }
169         deleteBefore(startClone, startOffset_);
170         for (DomNode n = startClone; n != null; n = n.getParentNode()) {
171             while (n.getPreviousSibling() != null) {
172                 n.getPreviousSibling().remove();
173             }
174         }
175 
176         final SgmlPage page = ancestor.getPage();
177         final DomDocumentFragment fragment = new DomDocumentFragment(page);
178         if (start == end) {
179             fragment.appendChild(ancestorClone);
180         }
181         else {
182             for (final DomNode n : ancestorClone.getChildNodes()) {
183                 fragment.appendChild(n);
184             }
185         }
186         return fragment;
187     }
188 
189     /**
190      * Produces a new SimpleRange whose boundary-points are equal to the
191      * boundary-points of this.
192      * @return duplicated simple
193      */
194     public SimpleRange cloneRange() {
195         return new SimpleRange(startContainer_, startOffset_, endContainer_, endOffset_);
196     }
197 
198     /**
199      * Collapse this range onto one of its boundary-points.
200      * @param toStart if true, collapses the Range onto its start; else collapses it onto its end.
201      */
202     public void collapse(final boolean toStart) {
203         if (toStart) {
204             endContainer_ = startContainer_;
205             endOffset_ = startOffset_;
206         }
207         else {
208             startContainer_ = endContainer_;
209             startOffset_ = endOffset_;
210         }
211     }
212 
213     /**
214      * Removes the contents of this range from the containing document or
215      * document fragment without returning a reference to the removed
216      * content.
217      */
218     public void deleteContents() {
219         final DomNode ancestor = getCommonAncestorContainer();
220         if (ancestor != null) {
221             deleteContents(ancestor);
222         }
223     }
224 
225     private void deleteContents(final DomNode ancestor) {
226         final DomNode start;
227         final DomNode end;
228         if (isOffsetChars(startContainer_)) {
229             start = startContainer_;
230             String text = getText(start);
231             if (startOffset_ > -1 && startOffset_ < text.length()) {
232                 text = text.substring(0, startOffset_);
233             }
234             setText(start, text);
235         }
236         else if (startContainer_.getChildNodes().getLength() > startOffset_) {
237             start = (DomNode) startContainer_.getChildNodes().item(startOffset_);
238         }
239         else {
240             start = startContainer_.getNextSibling();
241         }
242         if (isOffsetChars(endContainer_)) {
243             end = endContainer_;
244             String text = getText(end);
245             if (endOffset_ > -1 && endOffset_ < text.length()) {
246                 text = text.substring(endOffset_);
247             }
248             setText(end, text);
249         }
250         else if (endContainer_.getChildNodes().getLength() > endOffset_) {
251             end = (DomNode) endContainer_.getChildNodes().item(endOffset_);
252         }
253         else {
254             end = endContainer_.getNextSibling();
255         }
256         boolean foundStart = false;
257         boolean started = false;
258         final Iterator<DomNode> i = ancestor.getDescendants().iterator();
259         while (i.hasNext()) {
260             final DomNode n = i.next();
261             if (n == end) {
262                 break;
263             }
264             if (n == start) {
265                 foundStart = true;
266             }
267             if (foundStart && (n != start || !isOffsetChars(startContainer_))) {
268                 started = true;
269             }
270             if (started && !n.isAncestorOf(end)) {
271                 i.remove();
272             }
273         }
274     }
275 
276     /**
277      * Moves the contents of a Range from the containing document or document
278      * fragment to a new DocumentFragment.
279      * @return DocumentFragment containing the extracted contents
280      * @throws DOMException in case of error
281      */
282     public DomDocumentFragment extractContents() throws DOMException {
283         final DomDocumentFragment fragment = cloneContents();
284 
285         // Remove everything inside the range from the original nodes.
286         deleteContents();
287 
288         // Build the document fragment using the cloned nodes, and return it.
289         return fragment;
290     }
291 
292     /**
293      * @return true if startContainer equals endContainer and
294      *         startOffset equals endOffset
295      * @throws DOMException in case of error
296      */
297     public boolean isCollapsed() throws DOMException {
298         return startContainer_ == endContainer_ && startOffset_ == endOffset_;
299     }
300 
301     /**
302      * @return the deepest common ancestor container of this range's two
303      *         boundary-points.
304      * @throws DOMException in case of error
305      */
306     public DomNode getCommonAncestorContainer() throws DOMException {
307         if (startContainer_ != null && endContainer_ != null) {
308             for (DomNode p1 = startContainer_; p1 != null; p1 = p1.getParentNode()) {
309                 for (DomNode p2 = endContainer_; p2 != null; p2 = p2.getParentNode()) {
310                     if (p1 == p2) {
311                         return p1;
312                     }
313                 }
314             }
315         }
316         return null;
317     }
318 
319     /**
320      * @return the Node within which this range ends
321      */
322     public DomNode getEndContainer() {
323         return endContainer_;
324     }
325 
326     /**
327      * @return offset within the ending node of this
328      */
329     public int getEndOffset() {
330         return endOffset_;
331     }
332 
333     /**
334      * @return the Node within which this range begins
335      */
336     public DomNode getStartContainer() {
337         return startContainer_;
338     }
339 
340     /**
341      * @return offset within the starting node of this
342      */
343     public int getStartOffset() {
344         return startOffset_;
345     }
346 
347     /**
348      * Inserts a node into the Document or DocumentFragment at the start of
349      * the Range. If the container is a Text node, this will be split at the
350      * start of the Range (as if the Text node's splitText method was
351      * performed at the insertion point) and the insertion will occur
352      * between the two resulting Text nodes. Adjacent Text nodes will not be
353      * automatically merged. If the node to be inserted is a
354      * DocumentFragment node, the children will be inserted rather than the
355      * DocumentFragment node itself.
356      * @param newNode The node to insert at the start of the Range
357      */
358     public void insertNode(final DomNode newNode) {
359         if (isOffsetChars(startContainer_)) {
360             final DomNode split = startContainer_.cloneNode(false);
361             String text = getText(startContainer_);
362             if (startOffset_ > -1 && startOffset_ < text.length()) {
363                 text = text.substring(0, startOffset_);
364             }
365             setText(startContainer_, text);
366             text = getText(split);
367             if (startOffset_ > -1 && startOffset_ < text.length()) {
368                 text = text.substring(startOffset_);
369             }
370             setText(split, text);
371             insertNodeOrDocFragment(startContainer_.getParentNode(), split, startContainer_.getNextSibling());
372             insertNodeOrDocFragment(startContainer_.getParentNode(), newNode, split);
373         }
374         else {
375             insertNodeOrDocFragment(startContainer_, newNode,
376                     (DomNode) startContainer_.getChildNodes().item(startOffset_));
377         }
378 
379         setStart(newNode, 0);
380     }
381 
382     private static void insertNodeOrDocFragment(final DomNode parent, final DomNode newNode, final DomNode refNode) {
383         if (newNode instanceof DocumentFragment) {
384             final DocumentFragment fragment = (DocumentFragment) newNode;
385 
386             final NodeList childNodes = fragment.getChildNodes();
387             while (childNodes.getLength() > 0) {
388                 final Node item = childNodes.item(0);
389                 parent.insertBefore(item, refNode);
390             }
391         }
392         else {
393             parent.insertBefore(newNode, refNode);
394         }
395     }
396 
397     /**
398      * Select a node and its contents.
399      * @param node The node to select.
400      */
401     public void selectNode(final DomNode node) {
402         startContainer_ = node;
403         startOffset_ = 0;
404         endContainer_ = node;
405         endOffset_ = getMaxOffset(node);
406     }
407 
408     /**
409      * Select the contents within a node.
410      * @param node Node to select from
411      */
412     public void selectNodeContents(final DomNode node) {
413         startContainer_ = node.getFirstChild();
414         startOffset_ = 0;
415         endContainer_ = node.getLastChild();
416         endOffset_ = getMaxOffset(node.getLastChild());
417     }
418 
419     /**
420      * Sets the attributes describing the end.
421      * @param refNode the refNode
422      * @param offset offset
423      */
424     public void setEnd(final DomNode refNode, final int offset) {
425         endContainer_ = refNode;
426         endOffset_ = offset;
427     }
428 
429     /**
430      * Sets the attributes describing the start.
431      * @param refNode the refNode
432      * @param offset offset
433      */
434     public void setStart(final DomNode refNode, final int offset) {
435         startContainer_ = refNode;
436         startOffset_ = offset;
437     }
438 
439     /**
440      * Reparents the contents of the Range to the given node and inserts the
441      * node at the position of the start of the Range.
442      * @param newParent The node to surround the contents with.
443      */
444     public void surroundContents(final DomNode newParent) {
445         newParent.appendChild(extractContents());
446         insertNode(newParent);
447         setStart(newParent, 0);
448         setEnd(newParent, getMaxOffset(newParent));
449     }
450 
451     /**
452      * {@inheritDoc}
453      */
454     @Override
455     public boolean equals(final Object obj) {
456         if (!(obj instanceof SimpleRange)) {
457             return false;
458         }
459         final SimpleRange other = (SimpleRange) obj;
460         return new EqualsBuilder()
461             .append(startContainer_, other.startContainer_)
462             .append(endContainer_, other.endContainer_)
463             .append(startOffset_, other.startOffset_)
464             .append(endOffset_, other.endOffset_).isEquals();
465     }
466 
467     /**
468      * {@inheritDoc}
469      */
470     @Override
471     public int hashCode() {
472         return new HashCodeBuilder()
473             .append(startContainer_)
474             .append(endContainer_)
475             .append(startOffset_)
476             .append(endOffset_).toHashCode();
477     }
478 
479     /**
480      * {@inheritDoc}
481      */
482     @Override
483     public String toString() {
484         final DomDocumentFragment fragment = cloneContents();
485         if (fragment.getPage() != null) {
486             return fragment.asNormalizedText();
487         }
488         return "";
489     }
490 
491     private static boolean isOffsetChars(final DomNode node) {
492         return node instanceof DomText || node instanceof SelectableTextInput;
493     }
494 
495     private static String getText(final DomNode node) {
496         if (node instanceof SelectableTextInput) {
497             return ((SelectableTextInput) node).getText();
498         }
499         return node.getTextContent();
500     }
501 
502     private static void setText(final DomNode node, final String text) {
503         if (node instanceof SelectableTextInput) {
504             ((SelectableTextInput) node).setText(text);
505         }
506         else {
507             node.setTextContent(text);
508         }
509     }
510 
511     private static void deleteBefore(final DomNode node, int offset) {
512         if (isOffsetChars(node)) {
513             String text = getText(node);
514             if (offset > -1 && offset < text.length()) {
515                 text = text.substring(offset);
516             }
517             else {
518                 text = "";
519             }
520             setText(node, text);
521         }
522         else {
523             final DomNodeList<DomNode> children = node.getChildNodes();
524             for (int i = 0; i < offset && i < children.getLength(); i++) {
525                 final DomNode child = children.get(i);
526                 child.remove();
527                 i--;
528                 offset--;
529             }
530         }
531     }
532 
533     private static void deleteAfter(final DomNode node, final int offset) {
534         if (isOffsetChars(node)) {
535             String text = getText(node);
536             if (offset > -1 && offset < text.length()) {
537                 text = text.substring(0, offset);
538                 setText(node, text);
539             }
540         }
541         else {
542             final DomNodeList<DomNode> children = node.getChildNodes();
543             for (int i = offset; i < children.getLength(); i++) {
544                 final DomNode child = children.get(i);
545                 child.remove();
546                 i--;
547             }
548         }
549     }
550 
551     private static int getMaxOffset(final DomNode node) {
552         return isOffsetChars(node) ? getText(node).length() : node.getChildNodes().getLength();
553     }
554 
555     /**
556      * @return a list with all nodes contained in this range
557      */
558     public List<DomNode> containedNodes() {
559         final DomNode ancestor = getCommonAncestorContainer();
560         if (ancestor == null) {
561             return Collections.EMPTY_LIST;
562         }
563 
564         final DomNode start;
565         final DomNode end;
566         if (isOffsetChars(startContainer_)) {
567             start = startContainer_;
568             String text = getText(start);
569             if (startOffset_ > -1 && startOffset_ < text.length()) {
570                 text = text.substring(0, startOffset_);
571             }
572             setText(start, text);
573         }
574         else if (startContainer_.getChildNodes().getLength() > startOffset_) {
575             start = (DomNode) startContainer_.getChildNodes().item(startOffset_);
576         }
577         else {
578             start = startContainer_.getNextSibling();
579         }
580         if (isOffsetChars(endContainer_)) {
581             end = endContainer_;
582             String text = getText(end);
583             if (endOffset_ > -1 && endOffset_ < text.length()) {
584                 text = text.substring(endOffset_);
585             }
586             setText(end, text);
587         }
588         else if (endContainer_.getChildNodes().getLength() > endOffset_) {
589             end = (DomNode) endContainer_.getChildNodes().item(endOffset_);
590         }
591         else {
592             end = endContainer_.getNextSibling();
593         }
594 
595         boolean foundStart = false;
596         boolean started = false;
597         final List<DomNode> nodes = new ArrayList<>();
598         for (final DomNode n : ancestor.getDescendants()) {
599             if (n == end) {
600                 break;
601             }
602             if (n == start) {
603                 foundStart = true;
604             }
605             if (foundStart && (n != start || !isOffsetChars(startContainer_))) {
606                 started = true;
607             }
608             if (started && !n.isAncestorOf(end)) {
609                 nodes.add(n);
610             }
611         }
612         return nodes;
613     }
614 }