View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html;
16  
17  import java.util.ArrayList;
18  import java.util.Collections;
19  import java.util.HashSet;
20  import java.util.Iterator;
21  import java.util.List;
22  import java.util.Map;
23  import java.util.NoSuchElementException;
24  
25  import org.htmlunit.ElementNotFoundException;
26  import org.htmlunit.SgmlPage;
27  
28  /**
29   * Wrapper for the HTML element "table".
30   *
31   * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
32   * @author David K. Taylor
33   * @author <a href="mailto:cse@dynabean.de">Christian Sell</a>
34   * @author Ahmed Ashour
35   * @author Ronald Brill
36   * @author Frank Danek
37   */
38  public class HtmlTable extends HtmlElement {
39  
40      /** The HTML tag represented by this element. */
41      public static final String TAG_NAME = "table";
42  
43      /**
44       * Creates an instance.
45       *
46       * @param qualifiedName the qualified name of the element type to instantiate
47       * @param page the page that contains this element
48       * @param attributes the initial attributes
49       */
50      HtmlTable(final String qualifiedName, final SgmlPage page,
51              final Map<String, DomAttr> attributes) {
52          super(qualifiedName, page, attributes);
53      }
54  
55      /**
56       * Returns the first cell that matches the specified row and column, searching left to right, top to bottom.
57       * <p>This method returns different values than getRow(rowIndex).getCell(cellIndex) because this takes cellspan
58       * and rowspan into account.<br>
59       * This means, a cell with colspan='2' consumes two columns; a cell with rowspan='3' consumes three rows. The
60       * index is based on the 'background' model of the table; if you have a row like<br>
61       * &lt;td&gt;cell1&lt;/td&gt; &lt;td colspan='2'&gt;cell2&lt;/td&gt; then this row is treated as a row with
62       * three cells.<br>
63       * <p>
64       * <code>
65       * getCellAt(rowIndex, 0).asText() returns "cell1";<br>
66       * getCellAt(rowIndex, 1).asText() returns "cell2";<br>
67       * getCellAt(rowIndex, 2).asText() returns "cell2"; and<br>
68       * getCellAt(rowIndex, 3).asText() returns null;
69       * </code>
70       * </p>
71       *
72       * @param rowIndex the row index
73       * @param columnIndex the column index
74       * @return the HtmlTableCell at that location or null if there are no cells at that location
75       */
76      public final HtmlTableCell getCellAt(final int rowIndex, final int columnIndex) {
77          final RowIterator rowIterator = getRowIterator();
78          final HashSet<Position> occupied = new HashSet<>();
79          int row = 0;
80          for (final HtmlTableRow htmlTableRow : rowIterator) {
81              final HtmlTableRow.CellIterator cellIterator = htmlTableRow.getCellIterator();
82              int col = 0;
83              for (final HtmlTableCell cell : cellIterator) {
84                  while (occupied.contains(new Position(row, col))) {
85                      col++;
86                  }
87                  final int nextRow = row + cell.getRowSpan();
88                  if (row <= rowIndex && nextRow > rowIndex) {
89                      final int nextCol = col + cell.getColumnSpan();
90                      if (col <= columnIndex && nextCol > columnIndex) {
91                          return cell;
92                      }
93                  }
94                  if (cell.getRowSpan() > 1 || cell.getColumnSpan() > 1) {
95                      for (int i = 0; i < cell.getRowSpan(); i++) {
96                          for (int j = 0; j < cell.getColumnSpan(); j++) {
97                              occupied.add(new Position(row + i, col + j));
98                          }
99                      }
100                 }
101                 col++;
102             }
103             row++;
104         }
105         return null;
106     }
107 
108     /**
109      * @return an iterator over all the HtmlTableRow objects
110      */
111     private RowIterator getRowIterator() {
112         return new RowIterator();
113     }
114 
115     /**
116      * @return an immutable list containing all the HtmlTableRow objects
117      * @see #getRowIterator
118      */
119     public List<HtmlTableRow> getRows() {
120         final List<HtmlTableRow> result = new ArrayList<>();
121         for (final HtmlTableRow row : getRowIterator()) {
122             result.add(row);
123         }
124         return Collections.unmodifiableList(result);
125     }
126 
127     /**
128      * @param index the 0-based index of the row
129      * @return the HtmlTableRow at the given index
130      * @throws IndexOutOfBoundsException if there is no row at the given index
131      * @see #getRowIterator
132      */
133     public HtmlTableRow getRow(final int index) throws IndexOutOfBoundsException {
134         int count = 0;
135         for (final HtmlTableRow row : getRowIterator()) {
136             if (count == index) {
137                 return row;
138             }
139             count++;
140         }
141         throw new IndexOutOfBoundsException("No row found for index " + index + ".");
142     }
143 
144     /**
145      * Computes the number of rows in this table. Note that the count is computed dynamically
146      * by iterating over all rows.
147      *
148      * @return the number of rows in this table
149      */
150     public final int getRowCount() {
151         int count = 0;
152         for (final RowIterator iterator = getRowIterator(); iterator.hasNext(); iterator.next()) {
153             count++;
154         }
155         return count;
156     }
157 
158     /**
159      * Finds and return the row with the specified id.
160      *
161      * @param id the id of the row
162      * @return the row with the specified id
163      * @exception ElementNotFoundException If the row cannot be found.
164      */
165     public final HtmlTableRow getRowById(final String id) throws ElementNotFoundException {
166         for (final HtmlTableRow row : getRowIterator()) {
167             if (row.getId().equals(id)) {
168                 return row;
169             }
170         }
171         throw new ElementNotFoundException("tr", DomElement.ID_ATTRIBUTE, id);
172     }
173 
174     /**
175      * Returns the table caption text or an empty string if a caption wasn't specified.
176      *
177      * @return the caption text
178      */
179     public String getCaptionText() {
180         for (final DomElement element : getChildElements()) {
181             if (element instanceof HtmlCaption) {
182                 return element.asNormalizedText();
183             }
184         }
185         return null;
186     }
187 
188     /**
189      * Returns the table header or null if a header wasn't specified.
190      *
191      * @return the table header
192      */
193     public HtmlTableHeader getHeader() {
194         for (final DomElement element : getChildElements()) {
195             if (element instanceof HtmlTableHeader) {
196                 return (HtmlTableHeader) element;
197             }
198         }
199         return null;
200     }
201 
202     /**
203      * Returns the table footer or null if a footer wasn't specified.
204      *
205      * @return the table footer
206      */
207     public HtmlTableFooter getFooter() {
208         for (final DomElement element : getChildElements()) {
209             if (element instanceof HtmlTableFooter) {
210                 return (HtmlTableFooter) element;
211             }
212         }
213         return null;
214     }
215 
216     /**
217      * Returns a list of tables bodies defined in this table. If no bodies were defined
218      * then an empty list will be returned.
219      *
220      * @return a list of {@link HtmlTableBody} objects
221      */
222     public List<HtmlTableBody> getBodies() {
223         final List<HtmlTableBody> bodies = new ArrayList<>();
224         for (final DomElement element : getChildElements()) {
225             if (element instanceof HtmlTableBody) {
226                 bodies.add((HtmlTableBody) element);
227             }
228         }
229         return bodies;
230     }
231 
232     /**
233      * Returns the value of the attribute {@code summary}. Refer to the
234      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
235      * documentation for details on the use of this attribute.
236      *
237      * @return the value of the attribute {@code summary}
238      *         or an empty string if that attribute isn't defined.
239      */
240     public final String getSummaryAttribute() {
241         return getAttributeDirect("summary");
242     }
243 
244     /**
245      * Returns the value of the attribute {@code width}. Refer to the
246      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
247      * documentation for details on the use of this attribute.
248      *
249      * @return the value of the attribute {@code width}
250      *         or an empty string if that attribute isn't defined.
251      */
252     public final String getWidthAttribute() {
253         return getAttributeDirect("width");
254     }
255 
256     /**
257      * Returns the value of the attribute {@code border}. Refer to the
258      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
259      * documentation for details on the use of this attribute.
260      *
261      * @return the value of the attribute {@code border}
262      *         or an empty string if that attribute isn't defined.
263      */
264     public final String getBorderAttribute() {
265         return getAttributeDirect("border");
266     }
267 
268     /**
269      * Returns the value of the attribute {@code frame}. Refer to the
270      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
271      * documentation for details on the use of this attribute.
272      *
273      * @return the value of the attribute {@code frame}
274      *         or an empty string if that attribute isn't defined.
275      */
276     public final String getFrameAttribute() {
277         return getAttributeDirect("frame");
278     }
279 
280     /**
281      * Returns the value of the attribute {@code rules}. Refer to the
282      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
283      * documentation for details on the use of this attribute.
284      *
285      * @return the value of the attribute {@code rules}
286      *         or an empty string if that attribute isn't defined.
287      */
288     public final String getRulesAttribute() {
289         return getAttributeDirect("rules");
290     }
291 
292     /**
293      * Returns the value of the attribute {@code cellspacing}. Refer to the
294      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
295      * documentation for details on the use of this attribute.
296      *
297      * @return the value of the attribute {@code cellspacing}
298      *         or an empty string if that attribute isn't defined.
299      */
300     public final String getCellSpacingAttribute() {
301         return getAttributeDirect("cellspacing");
302     }
303 
304     /**
305      * Returns the value of the attribute {@code cellpadding}. Refer to the
306      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
307      * documentation for details on the use of this attribute.
308      *
309      * @return the value of the attribute {@code cellpadding}
310      *         or an empty string if that attribute isn't defined.
311      */
312     public final String getCellPaddingAttribute() {
313         return getAttributeDirect("cellpadding");
314     }
315 
316     /**
317      * Returns the value of the attribute {@code align}. Refer to the
318      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
319      * documentation for details on the use of this attribute.
320      *
321      * @return the value of the attribute {@code align}
322      *         or an empty string if that attribute isn't defined.
323      */
324     public final String getAlignAttribute() {
325         return getAttributeDirect("align");
326     }
327 
328     /**
329      * Returns the value of the attribute {@code bgcolor}. Refer to the
330      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
331      * documentation for details on the use of this attribute.
332      *
333      * @return the value of the attribute {@code bgcolor}
334      *         or an empty string if that attribute isn't defined.
335      */
336     public final String getBgcolorAttribute() {
337         return getAttributeDirect("bgcolor");
338     }
339 
340     /**
341      * An iterator that moves over all rows in this table. The iterator will also
342      * enter into nested row group elements (header, footer and body).
343      */
344     private class RowIterator implements Iterator<HtmlTableRow>, Iterable<HtmlTableRow> {
345         private HtmlTableRow nextRow_;
346         private TableRowGroup currentGroup_;
347 
348         /** Creates a new instance. */
349         RowIterator() {
350             setNextRow(getFirstChild());
351         }
352 
353         /**
354          * @return {@code true} if there are more rows available
355          */
356         @Override
357         public boolean hasNext() {
358             return nextRow_ != null;
359         }
360 
361         /**
362          * @return the next row from this iterator
363          * @throws NoSuchElementException if no more rows are available
364          */
365         @Override
366         public HtmlTableRow next() throws NoSuchElementException {
367             return nextRow();
368         }
369 
370         /**
371          * Removes the current row from the underlying table.
372          */
373         @Override
374         public void remove() {
375             if (nextRow_ == null) {
376                 throw new IllegalStateException();
377             }
378             final DomNode sibling = nextRow_.getPreviousSibling();
379             if (sibling != null) {
380                 sibling.remove();
381             }
382         }
383 
384         /**
385          * @return the next row from this iterator
386          * @throws NoSuchElementException if no more rows are available
387          */
388         public HtmlTableRow nextRow() throws NoSuchElementException {
389             if (nextRow_ != null) {
390                 final HtmlTableRow result = nextRow_;
391                 setNextRow(nextRow_.getNextSibling());
392                 return result;
393             }
394             throw new NoSuchElementException();
395         }
396 
397         /**
398          * Sets the internal position to the next row, starting at the given node.
399          * @param node the node to mark as the next row; if this is not a row, the
400          *        next reachable row will be marked.
401          */
402         private void setNextRow(final DomNode node) {
403             nextRow_ = null;
404             for (DomNode next = node; next != null; next = next.getNextSibling()) {
405                 if (next instanceof HtmlTableRow) {
406                     nextRow_ = (HtmlTableRow) next;
407                     return;
408                 }
409                 else if (currentGroup_ == null && next instanceof TableRowGroup) {
410                     currentGroup_ = (TableRowGroup) next;
411                     setNextRow(next.getFirstChild());
412                     return;
413                 }
414             }
415             if (currentGroup_ != null) {
416                 final DomNode group = currentGroup_;
417                 currentGroup_ = null;
418                 setNextRow(group.getNextSibling());
419             }
420         }
421 
422         @Override
423         public Iterator<HtmlTableRow> iterator() {
424             return this;
425         }
426     }
427 
428     /**
429      * {@inheritDoc}
430      * @return {@code true} as browsers ignore self closing <code>table</code> tags.
431      */
432     @Override
433     protected boolean isEmptyXmlTagExpanded() {
434         return true;
435     }
436 
437     /**
438      * {@inheritDoc}
439      */
440     @Override
441     public DisplayStyle getDefaultStyleDisplay() {
442         return DisplayStyle.TABLE;
443     }
444 
445     private static final class Position {
446 
447         private final int posX_;
448         private final int posY_;
449 
450         Position(final int x, final int y) {
451             posX_ = x;
452             posY_ = y;
453         }
454 
455         @Override
456         public int hashCode() {
457             final int prime = 31;
458             int result = 1;
459             result = prime * result + posX_;
460             result = prime * result + posY_;
461             return result;
462         }
463 
464         @Override
465         public boolean equals(final Object obj) {
466             if (this == obj) {
467                 return true;
468             }
469             if (obj == null) {
470                 return false;
471             }
472             if (getClass() != obj.getClass()) {
473                 return false;
474             }
475 
476             final Position other = (Position) obj;
477             if (posX_ != other.posX_) {
478                 return false;
479             }
480             if (posY_ != other.posY_) {
481                 return false;
482             }
483 
484             return true;
485         }
486     }
487 }