1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.html;
16
17 import java.io.Serializable;
18
19 import org.w3c.dom.DOMException;
20 import org.w3c.dom.Node;
21 import org.w3c.dom.traversal.NodeFilter;
22
23
24
25
26
27
28
29
30
31
32
33
34
35 public class HtmlDomTreeWalker implements Serializable {
36
37 private final DomNode root_;
38 private DomNode currentNode_;
39 private final int whatToShow_;
40 private final NodeFilter filter_;
41 private final boolean expandEntityReferences_;
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58 public HtmlDomTreeWalker(final DomNode root, final int whatToShow, final NodeFilter filter,
59 final boolean expandEntityReferences) throws DOMException {
60 if (root == null) {
61 throw new IllegalArgumentException("root must not be null");
62 }
63 root_ = root;
64 whatToShow_ = whatToShow;
65 filter_ = filter;
66 expandEntityReferences_ = expandEntityReferences;
67 currentNode_ = root_;
68 }
69
70
71
72
73
74 public DomNode getRoot() {
75 return root_;
76 }
77
78
79
80
81
82 public int getWhatToShow() {
83 return whatToShow_;
84 }
85
86
87
88
89
90 public NodeFilter getFilter() {
91 return filter_;
92 }
93
94
95
96
97
98 public boolean getExpandEntityReferences() {
99 return expandEntityReferences_;
100 }
101
102
103
104
105
106 public DomNode getCurrentNode() {
107 return currentNode_;
108 }
109
110
111
112
113
114
115 public void setCurrentNode(final Node currentNode) throws DOMException {
116 if (currentNode == null) {
117 throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
118 "currentNode cannot be set to null");
119 }
120 currentNode_ = (DomNode) currentNode;
121 }
122
123
124
125
126
127 public DomNode nextNode() {
128 final DomNode leftChild = getEquivalentLogical(currentNode_.getFirstChild(), false);
129 if (leftChild != null) {
130 currentNode_ = leftChild;
131 return leftChild;
132 }
133 final DomNode rightSibling = getEquivalentLogical(currentNode_.getNextSibling(), false);
134 if (rightSibling != null) {
135 currentNode_ = rightSibling;
136 return rightSibling;
137 }
138
139 final DomNode uncle = getFirstUncleNode(currentNode_);
140 if (uncle != null) {
141 currentNode_ = uncle;
142 return uncle;
143 }
144
145 return null;
146 }
147
148
149
150
151
152 private DomNode getFirstUncleNode(final DomNode n) {
153 if (n == root_ || n == null) {
154 return null;
155 }
156
157 final DomNode parent = n.getParentNode();
158 if (parent == null) {
159 return null;
160 }
161
162 final DomNode uncle = getEquivalentLogical(parent.getNextSibling(), false);
163 if (uncle != null) {
164 return uncle;
165 }
166
167 return getFirstUncleNode(parent);
168 }
169
170
171
172
173
174
175
176
177
178
179
180 private DomNode getEquivalentLogical(final DomNode n, final boolean lookLeft) {
181
182 if (n == null) {
183 return null;
184 }
185 if (isNodeVisible(n)) {
186 return n;
187 }
188
189
190 if (isNodeSkipped(n)) {
191 final DomNode child;
192 if (lookLeft) {
193 child = getEquivalentLogical(n.getLastChild(), lookLeft);
194 }
195 else {
196 child = getEquivalentLogical(n.getFirstChild(), lookLeft);
197 }
198
199 if (child != null) {
200 return child;
201 }
202 }
203
204
205
206 return getSibling(n, lookLeft);
207 }
208
209
210
211
212 private boolean isNodeVisible(final Node n) {
213 if (acceptNode(n) == NodeFilter.FILTER_ACCEPT) {
214 if (filter_ == null || filter_.acceptNode(n) == NodeFilter.FILTER_ACCEPT) {
215 return expandEntityReferences_ || n.getParentNode() == null
216 || n.getParentNode().getNodeType() != Node.ENTITY_REFERENCE_NODE;
217 }
218 }
219 return false;
220 }
221
222
223
224
225
226
227
228
229
230 private short acceptNode(final Node n) {
231 final int flag = getFlagForNode(n);
232
233 if ((whatToShow_ & flag) != 0) {
234 return NodeFilter.FILTER_ACCEPT;
235 }
236
237 return NodeFilter.FILTER_SKIP;
238 }
239
240
241
242
243
244
245
246
247
248 public static int getFlagForNode(final Node node) {
249 switch (node.getNodeType()) {
250 case Node.ELEMENT_NODE:
251 return NodeFilter.SHOW_ELEMENT;
252 case Node.ATTRIBUTE_NODE:
253 return NodeFilter.SHOW_ATTRIBUTE;
254 case Node.TEXT_NODE:
255 return NodeFilter.SHOW_TEXT;
256 case Node.CDATA_SECTION_NODE:
257 return NodeFilter.SHOW_CDATA_SECTION;
258 case Node.ENTITY_REFERENCE_NODE:
259 return NodeFilter.SHOW_ENTITY_REFERENCE;
260 case Node.ENTITY_NODE:
261 return NodeFilter.SHOW_ENTITY;
262 case Node.PROCESSING_INSTRUCTION_NODE:
263 return NodeFilter.SHOW_PROCESSING_INSTRUCTION;
264 case Node.COMMENT_NODE:
265 return NodeFilter.SHOW_COMMENT;
266 case Node.DOCUMENT_NODE:
267 return NodeFilter.SHOW_DOCUMENT;
268 case Node.DOCUMENT_TYPE_NODE:
269 return NodeFilter.SHOW_DOCUMENT_TYPE;
270 case Node.DOCUMENT_FRAGMENT_NODE:
271 return NodeFilter.SHOW_DOCUMENT_FRAGMENT;
272 case Node.NOTATION_NODE:
273 return NodeFilter.SHOW_NOTATION;
274 default:
275 return 0;
276 }
277 }
278
279
280 private boolean isNodeSkipped(final Node n) {
281 return !isNodeVisible(n) && !isNodeRejected(n);
282 }
283
284
285 private boolean isNodeRejected(final Node n) {
286 if (acceptNode(n) == NodeFilter.FILTER_REJECT) {
287 return true;
288 }
289 if (filter_ != null && filter_.acceptNode(n) == NodeFilter.FILTER_REJECT) {
290 return true;
291 }
292 return !expandEntityReferences_ && n.getParentNode() != null
293 && n.getParentNode().getNodeType() == Node.ENTITY_REFERENCE_NODE;
294 }
295
296
297 private DomNode getSibling(final DomNode n, final boolean lookLeft) {
298 if (n == null) {
299 return null;
300 }
301
302 if (isNodeVisible(n)) {
303 return null;
304 }
305
306 final DomNode sibling;
307 if (lookLeft) {
308 sibling = n.getPreviousSibling();
309 }
310 else {
311 sibling = n.getNextSibling();
312 }
313
314 if (sibling == null) {
315
316 if (n == root_) {
317 return null;
318 }
319 return getSibling(n.getParentNode(), lookLeft);
320
321 }
322 return getEquivalentLogical(sibling, lookLeft);
323 }
324
325
326
327
328
329 public DomNode nextSibling() {
330 if (currentNode_ == root_) {
331 return null;
332 }
333
334 final DomNode newNode = getEquivalentLogical(currentNode_.getNextSibling(), false);
335
336 if (newNode != null) {
337 currentNode_ = newNode;
338 }
339
340 return newNode;
341 }
342
343
344
345
346
347 public DomNode parentNode() {
348 if (currentNode_ == root_) {
349 return null;
350 }
351
352 DomNode newNode = currentNode_;
353
354 do {
355 newNode = newNode.getParentNode();
356 }
357 while (newNode != null && !isNodeVisible(newNode) && newNode != root_);
358
359 if (newNode == null || !isNodeVisible(newNode)) {
360 return null;
361 }
362 currentNode_ = newNode;
363 return newNode;
364 }
365
366
367
368
369
370 public DomNode previousSibling() {
371 if (currentNode_ == root_) {
372 return null;
373 }
374
375 final DomNode newNode = getEquivalentLogical(currentNode_.getPreviousSibling(), true);
376
377 if (newNode != null) {
378 currentNode_ = newNode;
379 }
380
381 return newNode;
382 }
383
384
385
386
387
388 public DomNode lastChild() {
389 final DomNode newNode = getEquivalentLogical(currentNode_.getLastChild(), true);
390
391 if (newNode != null) {
392 currentNode_ = newNode;
393 }
394
395 return newNode;
396 }
397
398
399
400
401
402 public DomNode previousNode() {
403 final DomNode newNode = getPreviousNode(currentNode_);
404
405 if (newNode != null) {
406 currentNode_ = newNode;
407 }
408
409 return newNode;
410 }
411
412
413
414
415
416 private DomNode getPreviousNode(final DomNode n) {
417 if (n == root_) {
418 return null;
419 }
420 final DomNode left = getEquivalentLogical(n.getPreviousSibling(), true);
421 if (left == null) {
422 final DomNode parent = n.getParentNode();
423 if (parent == null) {
424 return null;
425 }
426 if (isNodeVisible(parent)) {
427 return parent;
428 }
429 }
430
431 DomNode follow = left;
432 if (follow != null) {
433 while (follow.hasChildNodes()) {
434 final DomNode toFollow = getEquivalentLogical(follow.getLastChild(), true);
435 if (toFollow == null) {
436 break;
437 }
438 follow = toFollow;
439 }
440 }
441 return follow;
442 }
443
444
445
446
447
448 public DomNode firstChild() {
449 final DomNode newNode = getEquivalentLogical(currentNode_.getFirstChild(), false);
450
451 if (newNode != null) {
452 currentNode_ = newNode;
453 }
454
455 return newNode;
456 }
457 }