View Javadoc
1   /*
2    * Copyright (c) 2002-2026 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html;
16  
17  import java.util.ArrayList;
18  import java.util.Collections;
19  import java.util.HashSet;
20  import java.util.Iterator;
21  import java.util.List;
22  import java.util.Map;
23  import java.util.NoSuchElementException;
24  
25  import org.htmlunit.ElementNotFoundException;
26  import org.htmlunit.SgmlPage;
27  import org.htmlunit.util.geometry.Point2D;
28  
29  /**
30   * Wrapper for the HTML element "table".
31   *
32   * @author Mike Bowler
33   * @author David K. Taylor
34   * @author Christian Sell
35   * @author Ahmed Ashour
36   * @author Ronald Brill
37   * @author Frank Danek
38   */
39  public class HtmlTable extends HtmlElement {
40  
41      /** The HTML tag represented by this element. */
42      public static final String TAG_NAME = "table";
43  
44      /**
45       * Creates an instance.
46       *
47       * @param qualifiedName the qualified name of the element type to instantiate
48       * @param page the page that contains this element
49       * @param attributes the initial attributes
50       */
51      HtmlTable(final String qualifiedName, final SgmlPage page,
52              final Map<String, DomAttr> attributes) {
53          super(qualifiedName, page, attributes);
54      }
55  
56      /**
57       * Returns the first cell that matches the specified row and column, searching left to right, top to bottom.
58       * <p>This method returns different values than getRow(rowIndex).getCell(cellIndex) because this takes cellspan
59       * and rowspan into account.<br>
60       * This means, a cell with colspan='2' consumes two columns; a cell with rowspan='3' consumes three rows. The
61       * index is based on the 'background' model of the table; if you have a row like<br>
62       * &lt;td&gt;cell1&lt;/td&gt; &lt;td colspan='2'&gt;cell2&lt;/td&gt; then this row is treated as a row with
63       * three cells.<br>
64       * <p>
65       * <code>
66       * getCellAt(rowIndex, 0).asText() returns "cell1";<br>
67       * getCellAt(rowIndex, 1).asText() returns "cell2";<br>
68       * getCellAt(rowIndex, 2).asText() returns "cell2"; and<br>
69       * getCellAt(rowIndex, 3).asText() returns null;
70       * </code>
71       * </p>
72       *
73       * @param rowIndex the row index
74       * @param columnIndex the column index
75       * @return the HtmlTableCell at that location or null if there are no cells at that location
76       */
77      public final HtmlTableCell getCellAt(final int rowIndex, final int columnIndex) {
78          final RowIterator rowIterator = getRowIterator();
79          final HashSet<Point2D> occupied = new HashSet<>();
80          int row = 0;
81          for (final HtmlTableRow htmlTableRow : rowIterator) {
82              final HtmlTableRow.CellIterator cellIterator = htmlTableRow.getCellIterator();
83              int col = 0;
84              for (final HtmlTableCell cell : cellIterator) {
85                  while (occupied.contains(new Point2D(row, col))) {
86                      col++;
87                  }
88                  final int nextRow = row + cell.getRowSpan();
89                  if (row <= rowIndex && nextRow > rowIndex) {
90                      final int nextCol = col + cell.getColumnSpan();
91                      if (col <= columnIndex && nextCol > columnIndex) {
92                          return cell;
93                      }
94                  }
95                  final int rowSpan = cell.getRowSpan();
96                  final int columnSpan = cell.getColumnSpan();
97                  if (rowSpan > 1 || columnSpan > 1) {
98                      for (int i = 0; i < rowSpan; i++) {
99                          for (int j = 0; j < columnSpan; j++) {
100                             occupied.add(new Point2D(row + i, col + j));
101                         }
102                     }
103                 }
104                 col++;
105             }
106             row++;
107         }
108         return null;
109     }
110 
111     /**
112      * @return an iterator over all the HtmlTableRow objects
113      */
114     private RowIterator getRowIterator() {
115         return new RowIterator();
116     }
117 
118     /**
119      * @return an immutable list containing all the HtmlTableRow objects
120      * @see #getRowIterator
121      */
122     public List<HtmlTableRow> getRows() {
123         final List<HtmlTableRow> result = new ArrayList<>();
124         for (final HtmlTableRow row : getRowIterator()) {
125             result.add(row);
126         }
127         return Collections.unmodifiableList(result);
128     }
129 
130     /**
131      * @param index the 0-based index of the row
132      * @return the HtmlTableRow at the given index
133      * @throws IndexOutOfBoundsException if there is no row at the given index
134      * @see #getRowIterator
135      */
136     public HtmlTableRow getRow(final int index) throws IndexOutOfBoundsException {
137         int count = 0;
138         for (final HtmlTableRow row : getRowIterator()) {
139             if (count == index) {
140                 return row;
141             }
142             count++;
143         }
144         throw new IndexOutOfBoundsException("No row found for index " + index + ".");
145     }
146 
147     /**
148      * Computes the number of rows in this table. Note that the count is computed dynamically
149      * by iterating over all rows.
150      *
151      * @return the number of rows in this table
152      */
153     public final int getRowCount() {
154         int count = 0;
155         for (final RowIterator iterator = getRowIterator(); iterator.hasNext(); iterator.next()) {
156             count++;
157         }
158         return count;
159     }
160 
161     /**
162      * Finds and return the row with the specified id.
163      *
164      * @param id the id of the row
165      * @return the row with the specified id
166      * @exception ElementNotFoundException If the row cannot be found.
167      */
168     public final HtmlTableRow getRowById(final String id) throws ElementNotFoundException {
169         for (final HtmlTableRow row : getRowIterator()) {
170             if (row.getId().equals(id)) {
171                 return row;
172             }
173         }
174         throw new ElementNotFoundException("tr", DomElement.ID_ATTRIBUTE, id);
175     }
176 
177     /**
178      * Returns the table caption text or an empty string if a caption wasn't specified.
179      *
180      * @return the caption text
181      */
182     public String getCaptionText() {
183         for (final DomElement element : getChildElements()) {
184             if (element instanceof HtmlCaption) {
185                 return element.asNormalizedText();
186             }
187         }
188         return null;
189     }
190 
191     /**
192      * Returns the table header or null if a header wasn't specified.
193      *
194      * @return the table header
195      */
196     public HtmlTableHeader getHeader() {
197         for (final DomElement element : getChildElements()) {
198             if (element instanceof HtmlTableHeader header) {
199                 return header;
200             }
201         }
202         return null;
203     }
204 
205     /**
206      * Returns the table footer or null if a footer wasn't specified.
207      *
208      * @return the table footer
209      */
210     public HtmlTableFooter getFooter() {
211         for (final DomElement element : getChildElements()) {
212             if (element instanceof HtmlTableFooter footer) {
213                 return footer;
214             }
215         }
216         return null;
217     }
218 
219     /**
220      * Returns a list of tables bodies defined in this table. If no bodies were defined
221      * then an empty list will be returned.
222      *
223      * @return a list of {@link HtmlTableBody} objects
224      */
225     public List<HtmlTableBody> getBodies() {
226         final List<HtmlTableBody> bodies = new ArrayList<>();
227         for (final DomElement element : getChildElements()) {
228             if (element instanceof HtmlTableBody body) {
229                 bodies.add(body);
230             }
231         }
232         return bodies;
233     }
234 
235     /**
236      * Returns the value of the attribute {@code summary}. Refer to the
237      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
238      * documentation for details on the use of this attribute.
239      *
240      * @return the value of the attribute {@code summary}
241      *         or an empty string if that attribute isn't defined.
242      */
243     public final String getSummaryAttribute() {
244         return getAttributeDirect("summary");
245     }
246 
247     /**
248      * Returns the value of the attribute {@code width}. Refer to the
249      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
250      * documentation for details on the use of this attribute.
251      *
252      * @return the value of the attribute {@code width}
253      *         or an empty string if that attribute isn't defined.
254      */
255     public final String getWidthAttribute() {
256         return getAttributeDirect("width");
257     }
258 
259     /**
260      * Returns the value of the attribute {@code border}. Refer to the
261      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
262      * documentation for details on the use of this attribute.
263      *
264      * @return the value of the attribute {@code border}
265      *         or an empty string if that attribute isn't defined.
266      */
267     public final String getBorderAttribute() {
268         return getAttributeDirect("border");
269     }
270 
271     /**
272      * Returns the value of the attribute {@code frame}. Refer to the
273      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
274      * documentation for details on the use of this attribute.
275      *
276      * @return the value of the attribute {@code frame}
277      *         or an empty string if that attribute isn't defined.
278      */
279     public final String getFrameAttribute() {
280         return getAttributeDirect("frame");
281     }
282 
283     /**
284      * Returns the value of the attribute {@code rules}. Refer to the
285      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
286      * documentation for details on the use of this attribute.
287      *
288      * @return the value of the attribute {@code rules}
289      *         or an empty string if that attribute isn't defined.
290      */
291     public final String getRulesAttribute() {
292         return getAttributeDirect("rules");
293     }
294 
295     /**
296      * Returns the value of the attribute {@code cellspacing}. Refer to the
297      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
298      * documentation for details on the use of this attribute.
299      *
300      * @return the value of the attribute {@code cellspacing}
301      *         or an empty string if that attribute isn't defined.
302      */
303     public final String getCellSpacingAttribute() {
304         return getAttributeDirect("cellspacing");
305     }
306 
307     /**
308      * Returns the value of the attribute {@code cellpadding}. Refer to the
309      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
310      * documentation for details on the use of this attribute.
311      *
312      * @return the value of the attribute {@code cellpadding}
313      *         or an empty string if that attribute isn't defined.
314      */
315     public final String getCellPaddingAttribute() {
316         return getAttributeDirect("cellpadding");
317     }
318 
319     /**
320      * Returns the value of the attribute {@code align}. Refer to the
321      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
322      * documentation for details on the use of this attribute.
323      *
324      * @return the value of the attribute {@code align}
325      *         or an empty string if that attribute isn't defined.
326      */
327     public final String getAlignAttribute() {
328         return getAttributeDirect("align");
329     }
330 
331     /**
332      * Returns the value of the attribute {@code bgcolor}. Refer to the
333      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
334      * documentation for details on the use of this attribute.
335      *
336      * @return the value of the attribute {@code bgcolor}
337      *         or an empty string if that attribute isn't defined.
338      */
339     public final String getBgcolorAttribute() {
340         return getAttributeDirect("bgcolor");
341     }
342 
343     /**
344      * An iterator that moves over all rows in this table. The iterator will also
345      * enter into nested row group elements (header, footer and body).
346      */
347     private class RowIterator implements Iterator<HtmlTableRow>, Iterable<HtmlTableRow> {
348         private HtmlTableRow nextRow_;
349         private TableRowGroup currentGroup_;
350 
351         /** Creates a new instance. */
352         RowIterator() {
353             setNextRow(getFirstChild());
354         }
355 
356         /**
357          * @return {@code true} if there are more rows available
358          */
359         @Override
360         public boolean hasNext() {
361             return nextRow_ != null;
362         }
363 
364         /**
365          * @return the next row from this iterator
366          * @throws NoSuchElementException if no more rows are available
367          */
368         @Override
369         public HtmlTableRow next() throws NoSuchElementException {
370             return nextRow();
371         }
372 
373         /**
374          * Removes the current row from the underlying table.
375          */
376         @Override
377         public void remove() {
378             if (nextRow_ == null) {
379                 throw new IllegalStateException();
380             }
381             final DomNode sibling = nextRow_.getPreviousSibling();
382             if (sibling != null) {
383                 sibling.remove();
384             }
385         }
386 
387         /**
388          * @return the next row from this iterator
389          * @throws NoSuchElementException if no more rows are available
390          */
391         public HtmlTableRow nextRow() throws NoSuchElementException {
392             if (nextRow_ != null) {
393                 final HtmlTableRow result = nextRow_;
394                 setNextRow(nextRow_.getNextSibling());
395                 return result;
396             }
397             throw new NoSuchElementException();
398         }
399 
400         /**
401          * Sets the internal position to the next row, starting at the given node.
402          * @param node the node to mark as the next row; if this is not a row, the
403          *        next reachable row will be marked.
404          */
405         private void setNextRow(final DomNode node) {
406             nextRow_ = null;
407             for (DomNode next = node; next != null; next = next.getNextSibling()) {
408                 if (next instanceof HtmlTableRow row) {
409                     nextRow_ = row;
410                     return;
411                 }
412                 else if (currentGroup_ == null && next instanceof TableRowGroup group) {
413                     currentGroup_ = group;
414                     setNextRow(next.getFirstChild());
415                     return;
416                 }
417             }
418             if (currentGroup_ != null) {
419                 final DomNode group = currentGroup_;
420                 currentGroup_ = null;
421                 setNextRow(group.getNextSibling());
422             }
423         }
424 
425         @Override
426         public Iterator<HtmlTableRow> iterator() {
427             return this;
428         }
429     }
430 
431     /**
432      * {@inheritDoc}
433      * @return {@code true} as browsers ignore self closing <code>table</code> tags.
434      */
435     @Override
436     protected boolean isEmptyXmlTagExpanded() {
437         return true;
438     }
439 
440     /**
441      * {@inheritDoc}
442      */
443     @Override
444     public DisplayStyle getDefaultStyleDisplay() {
445         return DisplayStyle.TABLE;
446     }
447 }