View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html;
16  
17  import java.util.ArrayList;
18  import java.util.Collections;
19  import java.util.HashSet;
20  import java.util.Iterator;
21  import java.util.List;
22  import java.util.Map;
23  import java.util.NoSuchElementException;
24  
25  import org.htmlunit.ElementNotFoundException;
26  import org.htmlunit.SgmlPage;
27  
28  /**
29   * Wrapper for the HTML element "table".
30   *
31   * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
32   * @author David K. Taylor
33   * @author <a href="mailto:cse@dynabean.de">Christian Sell</a>
34   * @author Ahmed Ashour
35   * @author Ronald Brill
36   * @author Frank Danek
37   */
38  public class HtmlTable extends HtmlElement {
39  
40      /** The HTML tag represented by this element. */
41      public static final String TAG_NAME = "table";
42  
43      /**
44       * Creates an instance.
45       *
46       * @param qualifiedName the qualified name of the element type to instantiate
47       * @param page the page that contains this element
48       * @param attributes the initial attributes
49       */
50      HtmlTable(final String qualifiedName, final SgmlPage page,
51              final Map<String, DomAttr> attributes) {
52          super(qualifiedName, page, attributes);
53      }
54  
55      /**
56       * Returns the first cell that matches the specified row and column, searching left to right, top to bottom.
57       * <p>This method returns different values than getRow(rowIndex).getCell(cellIndex) because this takes cellspan
58       * and rowspan into account.<br>
59       * This means, a cell with colspan='2' consumes two columns; a cell with rowspan='3' consumes three rows. The
60       * index is based on the 'background' model of the table; if you have a row like<br>
61       * &lt;td&gt;cell1&lt;/td&gt; &lt;td colspan='2'&gt;cell2&lt;/td&gt; then this row is treated as a row with
62       * three cells.<br>
63       * <p>
64       * <code>
65       * getCellAt(rowIndex, 0).asText() returns "cell1";<br>
66       * getCellAt(rowIndex, 1).asText() returns "cell2";<br>
67       * getCellAt(rowIndex, 2).asText() returns "cell2"; and<br>
68       * getCellAt(rowIndex, 3).asText() returns null;
69       * </code>
70       * </p>
71       *
72       * @param rowIndex the row index
73       * @param columnIndex the column index
74       * @return the HtmlTableCell at that location or null if there are no cells at that location
75       */
76      public final HtmlTableCell getCellAt(final int rowIndex, final int columnIndex) {
77          final RowIterator rowIterator = getRowIterator();
78          final HashSet<Position> occupied = new HashSet<>();
79          int row = 0;
80          for (final HtmlTableRow htmlTableRow : rowIterator) {
81              final HtmlTableRow.CellIterator cellIterator = htmlTableRow.getCellIterator();
82              int col = 0;
83              for (final HtmlTableCell cell : cellIterator) {
84                  while (occupied.contains(new Position(row, col))) {
85                      col++;
86                  }
87                  final int nextRow = row + cell.getRowSpan();
88                  if (row <= rowIndex && nextRow > rowIndex) {
89                      final int nextCol = col + cell.getColumnSpan();
90                      if (col <= columnIndex && nextCol > columnIndex) {
91                          return cell;
92                      }
93                  }
94                  final int rowSpan = cell.getRowSpan();
95                  final int columnSpan = cell.getColumnSpan();
96                  if (rowSpan > 1 || columnSpan > 1) {
97                      for (int i = 0; i < rowSpan; i++) {
98                          for (int j = 0; j < columnSpan; j++) {
99                              occupied.add(new Position(row + i, col + j));
100                         }
101                     }
102                 }
103                 col++;
104             }
105             row++;
106         }
107         return null;
108     }
109 
110     /**
111      * @return an iterator over all the HtmlTableRow objects
112      */
113     private RowIterator getRowIterator() {
114         return new RowIterator();
115     }
116 
117     /**
118      * @return an immutable list containing all the HtmlTableRow objects
119      * @see #getRowIterator
120      */
121     public List<HtmlTableRow> getRows() {
122         final List<HtmlTableRow> result = new ArrayList<>();
123         for (final HtmlTableRow row : getRowIterator()) {
124             result.add(row);
125         }
126         return Collections.unmodifiableList(result);
127     }
128 
129     /**
130      * @param index the 0-based index of the row
131      * @return the HtmlTableRow at the given index
132      * @throws IndexOutOfBoundsException if there is no row at the given index
133      * @see #getRowIterator
134      */
135     public HtmlTableRow getRow(final int index) throws IndexOutOfBoundsException {
136         int count = 0;
137         for (final HtmlTableRow row : getRowIterator()) {
138             if (count == index) {
139                 return row;
140             }
141             count++;
142         }
143         throw new IndexOutOfBoundsException("No row found for index " + index + ".");
144     }
145 
146     /**
147      * Computes the number of rows in this table. Note that the count is computed dynamically
148      * by iterating over all rows.
149      *
150      * @return the number of rows in this table
151      */
152     public final int getRowCount() {
153         int count = 0;
154         for (final RowIterator iterator = getRowIterator(); iterator.hasNext(); iterator.next()) {
155             count++;
156         }
157         return count;
158     }
159 
160     /**
161      * Finds and return the row with the specified id.
162      *
163      * @param id the id of the row
164      * @return the row with the specified id
165      * @exception ElementNotFoundException If the row cannot be found.
166      */
167     public final HtmlTableRow getRowById(final String id) throws ElementNotFoundException {
168         for (final HtmlTableRow row : getRowIterator()) {
169             if (row.getId().equals(id)) {
170                 return row;
171             }
172         }
173         throw new ElementNotFoundException("tr", DomElement.ID_ATTRIBUTE, id);
174     }
175 
176     /**
177      * Returns the table caption text or an empty string if a caption wasn't specified.
178      *
179      * @return the caption text
180      */
181     public String getCaptionText() {
182         for (final DomElement element : getChildElements()) {
183             if (element instanceof HtmlCaption) {
184                 return element.asNormalizedText();
185             }
186         }
187         return null;
188     }
189 
190     /**
191      * Returns the table header or null if a header wasn't specified.
192      *
193      * @return the table header
194      */
195     public HtmlTableHeader getHeader() {
196         for (final DomElement element : getChildElements()) {
197             if (element instanceof HtmlTableHeader) {
198                 return (HtmlTableHeader) element;
199             }
200         }
201         return null;
202     }
203 
204     /**
205      * Returns the table footer or null if a footer wasn't specified.
206      *
207      * @return the table footer
208      */
209     public HtmlTableFooter getFooter() {
210         for (final DomElement element : getChildElements()) {
211             if (element instanceof HtmlTableFooter) {
212                 return (HtmlTableFooter) element;
213             }
214         }
215         return null;
216     }
217 
218     /**
219      * Returns a list of tables bodies defined in this table. If no bodies were defined
220      * then an empty list will be returned.
221      *
222      * @return a list of {@link HtmlTableBody} objects
223      */
224     public List<HtmlTableBody> getBodies() {
225         final List<HtmlTableBody> bodies = new ArrayList<>();
226         for (final DomElement element : getChildElements()) {
227             if (element instanceof HtmlTableBody) {
228                 bodies.add((HtmlTableBody) element);
229             }
230         }
231         return bodies;
232     }
233 
234     /**
235      * Returns the value of the attribute {@code summary}. Refer to the
236      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
237      * documentation for details on the use of this attribute.
238      *
239      * @return the value of the attribute {@code summary}
240      *         or an empty string if that attribute isn't defined.
241      */
242     public final String getSummaryAttribute() {
243         return getAttributeDirect("summary");
244     }
245 
246     /**
247      * Returns the value of the attribute {@code width}. Refer to the
248      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
249      * documentation for details on the use of this attribute.
250      *
251      * @return the value of the attribute {@code width}
252      *         or an empty string if that attribute isn't defined.
253      */
254     public final String getWidthAttribute() {
255         return getAttributeDirect("width");
256     }
257 
258     /**
259      * Returns the value of the attribute {@code border}. Refer to the
260      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
261      * documentation for details on the use of this attribute.
262      *
263      * @return the value of the attribute {@code border}
264      *         or an empty string if that attribute isn't defined.
265      */
266     public final String getBorderAttribute() {
267         return getAttributeDirect("border");
268     }
269 
270     /**
271      * Returns the value of the attribute {@code frame}. Refer to the
272      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
273      * documentation for details on the use of this attribute.
274      *
275      * @return the value of the attribute {@code frame}
276      *         or an empty string if that attribute isn't defined.
277      */
278     public final String getFrameAttribute() {
279         return getAttributeDirect("frame");
280     }
281 
282     /**
283      * Returns the value of the attribute {@code rules}. Refer to the
284      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
285      * documentation for details on the use of this attribute.
286      *
287      * @return the value of the attribute {@code rules}
288      *         or an empty string if that attribute isn't defined.
289      */
290     public final String getRulesAttribute() {
291         return getAttributeDirect("rules");
292     }
293 
294     /**
295      * Returns the value of the attribute {@code cellspacing}. Refer to the
296      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
297      * documentation for details on the use of this attribute.
298      *
299      * @return the value of the attribute {@code cellspacing}
300      *         or an empty string if that attribute isn't defined.
301      */
302     public final String getCellSpacingAttribute() {
303         return getAttributeDirect("cellspacing");
304     }
305 
306     /**
307      * Returns the value of the attribute {@code cellpadding}. Refer to the
308      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
309      * documentation for details on the use of this attribute.
310      *
311      * @return the value of the attribute {@code cellpadding}
312      *         or an empty string if that attribute isn't defined.
313      */
314     public final String getCellPaddingAttribute() {
315         return getAttributeDirect("cellpadding");
316     }
317 
318     /**
319      * Returns the value of the attribute {@code align}. Refer to the
320      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
321      * documentation for details on the use of this attribute.
322      *
323      * @return the value of the attribute {@code align}
324      *         or an empty string if that attribute isn't defined.
325      */
326     public final String getAlignAttribute() {
327         return getAttributeDirect("align");
328     }
329 
330     /**
331      * Returns the value of the attribute {@code bgcolor}. Refer to the
332      * <a href="http://www.w3.org/TR/html401/">HTML 4.01</a>
333      * documentation for details on the use of this attribute.
334      *
335      * @return the value of the attribute {@code bgcolor}
336      *         or an empty string if that attribute isn't defined.
337      */
338     public final String getBgcolorAttribute() {
339         return getAttributeDirect("bgcolor");
340     }
341 
342     /**
343      * An iterator that moves over all rows in this table. The iterator will also
344      * enter into nested row group elements (header, footer and body).
345      */
346     private class RowIterator implements Iterator<HtmlTableRow>, Iterable<HtmlTableRow> {
347         private HtmlTableRow nextRow_;
348         private TableRowGroup currentGroup_;
349 
350         /** Creates a new instance. */
351         RowIterator() {
352             setNextRow(getFirstChild());
353         }
354 
355         /**
356          * @return {@code true} if there are more rows available
357          */
358         @Override
359         public boolean hasNext() {
360             return nextRow_ != null;
361         }
362 
363         /**
364          * @return the next row from this iterator
365          * @throws NoSuchElementException if no more rows are available
366          */
367         @Override
368         public HtmlTableRow next() throws NoSuchElementException {
369             return nextRow();
370         }
371 
372         /**
373          * Removes the current row from the underlying table.
374          */
375         @Override
376         public void remove() {
377             if (nextRow_ == null) {
378                 throw new IllegalStateException();
379             }
380             final DomNode sibling = nextRow_.getPreviousSibling();
381             if (sibling != null) {
382                 sibling.remove();
383             }
384         }
385 
386         /**
387          * @return the next row from this iterator
388          * @throws NoSuchElementException if no more rows are available
389          */
390         public HtmlTableRow nextRow() throws NoSuchElementException {
391             if (nextRow_ != null) {
392                 final HtmlTableRow result = nextRow_;
393                 setNextRow(nextRow_.getNextSibling());
394                 return result;
395             }
396             throw new NoSuchElementException();
397         }
398 
399         /**
400          * Sets the internal position to the next row, starting at the given node.
401          * @param node the node to mark as the next row; if this is not a row, the
402          *        next reachable row will be marked.
403          */
404         private void setNextRow(final DomNode node) {
405             nextRow_ = null;
406             for (DomNode next = node; next != null; next = next.getNextSibling()) {
407                 if (next instanceof HtmlTableRow) {
408                     nextRow_ = (HtmlTableRow) next;
409                     return;
410                 }
411                 else if (currentGroup_ == null && next instanceof TableRowGroup) {
412                     currentGroup_ = (TableRowGroup) next;
413                     setNextRow(next.getFirstChild());
414                     return;
415                 }
416             }
417             if (currentGroup_ != null) {
418                 final DomNode group = currentGroup_;
419                 currentGroup_ = null;
420                 setNextRow(group.getNextSibling());
421             }
422         }
423 
424         @Override
425         public Iterator<HtmlTableRow> iterator() {
426             return this;
427         }
428     }
429 
430     /**
431      * {@inheritDoc}
432      * @return {@code true} as browsers ignore self closing <code>table</code> tags.
433      */
434     @Override
435     protected boolean isEmptyXmlTagExpanded() {
436         return true;
437     }
438 
439     /**
440      * {@inheritDoc}
441      */
442     @Override
443     public DisplayStyle getDefaultStyleDisplay() {
444         return DisplayStyle.TABLE;
445     }
446 
447     private static final class Position {
448 
449         private final int posX_;
450         private final int posY_;
451 
452         Position(final int x, final int y) {
453             posX_ = x;
454             posY_ = y;
455         }
456 
457         @Override
458         public int hashCode() {
459             final int prime = 31;
460             int result = 1;
461             result = prime * result + posX_;
462             result = prime * result + posY_;
463             return result;
464         }
465 
466         @Override
467         public boolean equals(final Object obj) {
468             if (this == obj) {
469                 return true;
470             }
471             if (obj == null) {
472                 return false;
473             }
474             if (getClass() != obj.getClass()) {
475                 return false;
476             }
477 
478             final Position other = (Position) obj;
479             if (posX_ != other.posX_) {
480                 return false;
481             }
482             if (posY_ != other.posY_) {
483                 return false;
484             }
485 
486             return true;
487         }
488     }
489 }