001package org.jsoup.nodes; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.internal.QuietAppendable; 005import org.jsoup.internal.StringUtil; 006import org.jsoup.parser.ParseSettings; 007import org.jsoup.select.NodeFilter; 008import org.jsoup.select.NodeVisitor; 009import org.jspecify.annotations.Nullable; 010 011import java.io.IOException; 012import java.util.ArrayList; 013import java.util.Arrays; 014import java.util.Collections; 015import java.util.Iterator; 016import java.util.LinkedList; 017import java.util.List; 018import java.util.function.Consumer; 019import java.util.stream.Stream; 020 021/** 022 The base, abstract Node model. {@link Element}, {@link Document}, {@link Comment}, {@link TextNode}, et al., 023 are instances of Node. 024 025 @author Jonathan Hedley, jonathan@hedley.net */ 026public abstract class Node implements Cloneable { 027 static final List<Node> EmptyNodes = Collections.emptyList(); 028 static final String EmptyString = ""; 029 @Nullable Element parentNode; // Nodes don't always have parents 030 int siblingIndex; 031 032 /** 033 * Default constructor. Doesn't set up base uri, children, or attributes; use with caution. 034 */ 035 protected Node() { 036 } 037 038 /** 039 Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof). 040 @return node name 041 */ 042 public abstract String nodeName(); 043 044 /** 045 Get the normalized name of this node. For node types other than Element, this is the same as {@link #nodeName()}. 046 For an Element, will be the lower-cased tag name. 047 @return normalized node name 048 @since 1.15.4. 049 */ 050 public String normalName() { 051 return nodeName(); 052 } 053 054 /** 055 Get the node's value. For a TextNode, the whole text; for a Comment, the comment data; for an Element, 056 wholeOwnText. Returns "" if there is no value. 057 @return the node's value 058 */ 059 public String nodeValue() { 060 return ""; 061 } 062 063 /** 064 Test if this node has the specified normalized name, in any namespace. 065 * @param normalName a normalized element name (e.g. {@code div}). 066 * @return true if the element's normal name matches exactly 067 * @since 1.17.2 068 */ 069 public boolean nameIs(String normalName) { 070 return normalName().equals(normalName); 071 } 072 073 /** 074 Test if this node's parent has the specified normalized name. 075 * @param normalName a normalized name (e.g. {@code div}). 076 * @return true if the parent element's normal name matches exactly 077 * @since 1.17.2 078 */ 079 public boolean parentNameIs(String normalName) { 080 return parentNode != null && parentNode.normalName().equals(normalName); 081 } 082 083 /** 084 Test if this node's parent is an Element with the specified normalized name and namespace. 085 * @param normalName a normalized element name (e.g. {@code div}). 086 * @param namespace the namespace 087 * @return true if the parent element's normal name matches exactly, and that element is in the specified namespace 088 * @since 1.17.2 089 */ 090 public boolean parentElementIs(String normalName, String namespace) { 091 return parentNode != null && parentNode instanceof Element 092 && ((Element) parentNode).elementIs(normalName, namespace); 093 } 094 095 /** 096 * Check if this Node has an actual Attributes object. 097 */ 098 protected abstract boolean hasAttributes(); 099 100 /** 101 Checks if this node has a parent. Nodes won't have parents if (e.g.) they are newly created and not added as a child 102 to an existing node, or if they are a {@link #shallowClone()}. In such cases, {@link #parent()} will return {@code null}. 103 @return if this node has a parent. 104 */ 105 public boolean hasParent() { 106 return parentNode != null; 107 } 108 109 /** 110 * Get an attribute's value by its key. <b>Case insensitive</b> 111 * <p> 112 * To get an absolute URL from an attribute that may be a relative URL, prefix the key with <code><b>abs:</b></code>, 113 * which is a shortcut to the {@link #absUrl} method. 114 * </p> 115 * E.g.: 116 * <blockquote><code>String url = a.attr("abs:href");</code></blockquote> 117 * 118 * @param attributeKey The attribute key. 119 * @return The attribute, or empty string if not present (to avoid nulls). 120 * @see #attributes() 121 * @see #hasAttr(String) 122 * @see #absUrl(String) 123 */ 124 public String attr(String attributeKey) { 125 Validate.notNull(attributeKey); 126 if (!hasAttributes()) 127 return EmptyString; 128 129 String val = attributes().getIgnoreCase(attributeKey); 130 if (val.length() > 0) 131 return val; 132 else if (attributeKey.startsWith("abs:")) 133 return absUrl(attributeKey.substring("abs:".length())); 134 else return ""; 135 } 136 137 /** 138 * Get each of the Element's attributes. 139 * @return attributes (which implements Iterable, with the same order as presented in the original HTML). 140 */ 141 public abstract Attributes attributes(); 142 143 /** 144 Get the number of attributes that this Node has. 145 @return the number of attributes 146 @since 1.14.2 147 */ 148 public int attributesSize() { 149 // added so that we can test how many attributes exist without implicitly creating the Attributes object 150 return hasAttributes() ? attributes().size() : 0; 151 } 152 153 /** 154 * Set an attribute (key=value). If the attribute already exists, it is replaced. The attribute key comparison is 155 * <b>case insensitive</b>. The key will be set with case sensitivity as set in the parser settings. 156 * @param attributeKey The attribute key. 157 * @param attributeValue The attribute value. 158 * @return this (for chaining) 159 */ 160 public Node attr(String attributeKey, String attributeValue) { 161 Document doc = ownerDocument(); 162 ParseSettings settings = doc != null ? doc.parser().settings() : ParseSettings.htmlDefault; 163 attributeKey = settings.normalizeAttribute(attributeKey); 164 attributes().putIgnoreCase(attributeKey, attributeValue); 165 return this; 166 } 167 168 /** 169 * Test if this Node has an attribute. <b>Case insensitive</b>. 170 * @param attributeKey The attribute key to check. 171 * @return true if the attribute exists, false if not. 172 */ 173 public boolean hasAttr(String attributeKey) { 174 Validate.notNull(attributeKey); 175 if (!hasAttributes()) 176 return false; 177 178 if (attributeKey.startsWith("abs:")) { 179 String key = attributeKey.substring("abs:".length()); 180 if (attributes().hasKeyIgnoreCase(key) && !absUrl(key).isEmpty()) 181 return true; 182 } 183 return attributes().hasKeyIgnoreCase(attributeKey); 184 } 185 186 /** 187 * Remove an attribute from this node. 188 * @param attributeKey The attribute to remove. 189 * @return this (for chaining) 190 */ 191 public Node removeAttr(String attributeKey) { 192 Validate.notNull(attributeKey); 193 if (hasAttributes()) 194 attributes().removeIgnoreCase(attributeKey); 195 return this; 196 } 197 198 /** 199 * Clear (remove) each of the attributes in this node. 200 * @return this, for chaining 201 */ 202 public Node clearAttributes() { 203 if (hasAttributes()) { 204 Iterator<Attribute> it = attributes().iterator(); 205 while (it.hasNext()) { 206 it.next(); 207 it.remove(); 208 } 209 } 210 return this; 211 } 212 213 /** 214 Get the base URI that applies to this node. Will return an empty string if not defined. Used to make relative links 215 absolute. 216 217 @return base URI 218 @see #absUrl 219 */ 220 public abstract String baseUri(); 221 222 /** 223 * Set the baseUri for just this node (not its descendants), if this Node tracks base URIs. 224 * @param baseUri new URI 225 */ 226 protected abstract void doSetBaseUri(String baseUri); 227 228 /** 229 Update the base URI of this node and all of its descendants. 230 @param baseUri base URI to set 231 */ 232 public void setBaseUri(final String baseUri) { 233 Validate.notNull(baseUri); 234 doSetBaseUri(baseUri); 235 } 236 237 /** 238 * Get an absolute URL from a URL attribute that may be relative (such as an <code><a href></code> or 239 * <code><img src></code>). 240 * <p> 241 * E.g.: <code>String absUrl = linkEl.absUrl("href");</code> 242 * </p> 243 * <p> 244 * If the attribute value is already absolute (i.e. it starts with a protocol, like 245 * <code>http://</code> or <code>https://</code> etc), and it successfully parses as a URL, the attribute is 246 * returned directly. Otherwise, it is treated as a URL relative to the element's {@link #baseUri}, and made 247 * absolute using that. 248 * </p> 249 * <p> 250 * As an alternate, you can use the {@link #attr} method with the <code>abs:</code> prefix, e.g.: 251 * <code>String absUrl = linkEl.attr("abs:href");</code> 252 * </p> 253 * 254 * @param attributeKey The attribute key 255 * @return An absolute URL if one could be made, or an empty string (not null) if the attribute was missing or 256 * could not be made successfully into a URL. 257 * @see #attr 258 * @see java.net.URL#URL(java.net.URL, String) 259 */ 260 public String absUrl(String attributeKey) { 261 Validate.notEmpty(attributeKey); 262 if (!(hasAttributes() && attributes().hasKeyIgnoreCase(attributeKey))) // not using hasAttr, so that we don't recurse down hasAttr->absUrl 263 return ""; 264 265 return StringUtil.resolve(baseUri(), attributes().getIgnoreCase(attributeKey)); 266 } 267 268 protected abstract List<Node> ensureChildNodes(); 269 270 /** 271 Get a child node by its 0-based index. 272 @param index index of child node 273 @return the child node at this index. 274 @throws IndexOutOfBoundsException if the index is out of bounds. 275 */ 276 public Node childNode(int index) { 277 return ensureChildNodes().get(index); 278 } 279 280 /** 281 Get this node's children. Presented as an unmodifiable list: new children can not be added, but the child nodes 282 themselves can be manipulated. 283 @return list of children. If no children, returns an empty list. 284 */ 285 public List<Node> childNodes() { 286 if (childNodeSize() == 0) 287 return EmptyNodes; 288 289 List<Node> children = ensureChildNodes(); 290 List<Node> rewrap = new ArrayList<>(children.size()); // wrapped so that looping and moving will not throw a CME as the source changes 291 rewrap.addAll(children); 292 return Collections.unmodifiableList(rewrap); 293 } 294 295 /** 296 * Returns a deep copy of this node's children. Changes made to these nodes will not be reflected in the original 297 * nodes 298 * @return a deep copy of this node's children 299 */ 300 public List<Node> childNodesCopy() { 301 final List<Node> nodes = ensureChildNodes(); 302 final ArrayList<Node> children = new ArrayList<>(nodes.size()); 303 for (Node node : nodes) { 304 children.add(node.clone()); 305 } 306 return children; 307 } 308 309 /** 310 * Get the number of child nodes that this node holds. 311 * @return the number of child nodes that this node holds. 312 */ 313 public abstract int childNodeSize(); 314 315 protected Node[] childNodesAsArray() { 316 return ensureChildNodes().toArray(new Node[0]); 317 } 318 319 /** 320 * Delete all this node's children. 321 * @return this node, for chaining 322 */ 323 public abstract Node empty(); 324 325 /** 326 Gets this node's parent node. This is always an Element. 327 @return parent node; or null if no parent. 328 @see #hasParent() 329 @see #parentElement(); 330 */ 331 public @Nullable Node parent() { 332 return parentNode; 333 } 334 335 /** 336 Gets this node's parent Element. 337 @return parent element; or null if this node has no parent. 338 @see #hasParent() 339 @since 1.21.1 340 */ 341 public @Nullable Element parentElement() { 342 return parentNode; 343 } 344 345 /** 346 Gets this node's parent node. Not overridable by extending classes, so useful if you really just need the Node type. 347 @return parent node; or null if no parent. 348 */ 349 public @Nullable final Node parentNode() { 350 return parentNode; 351 } 352 353 /** 354 * Get this node's root node; that is, its topmost ancestor. If this node is the top ancestor, returns {@code this}. 355 * @return topmost ancestor. 356 */ 357 public Node root() { 358 Node node = this; 359 while (node.parentNode != null) 360 node = node.parentNode; 361 return node; 362 } 363 364 /** 365 * Gets the Document associated with this Node. 366 * @return the Document associated with this Node, or null if there is no such Document. 367 */ 368 public @Nullable Document ownerDocument() { 369 Node node = this; 370 while (node != null) { 371 if (node instanceof Document) return (Document) node; 372 node = node.parentNode; 373 } 374 return null; 375 } 376 377 /** 378 * Remove (delete) this node from the DOM tree. If this node has children, they are also removed. If this node is 379 * an orphan, nothing happens. 380 */ 381 public void remove() { 382 if (parentNode != null) 383 parentNode.removeChild(this); 384 } 385 386 /** 387 * Insert the specified HTML into the DOM before this node (as a preceding sibling). 388 * @param html HTML to add before this node 389 * @return this node, for chaining 390 * @see #after(String) 391 */ 392 public Node before(String html) { 393 addSiblingHtml(siblingIndex(), html); 394 return this; 395 } 396 397 /** 398 * Insert the specified node into the DOM before this node (as a preceding sibling). 399 * @param node to add before this node 400 * @return this node, for chaining 401 * @see #after(Node) 402 */ 403 public Node before(Node node) { 404 Validate.notNull(node); 405 Validate.notNull(parentNode); 406 407 // if the incoming node is a sibling of this, remove it first so siblingIndex is correct on add 408 if (node.parentNode == parentNode) node.remove(); 409 410 parentNode.addChildren(siblingIndex(), node); 411 return this; 412 } 413 414 /** 415 * Insert the specified HTML into the DOM after this node (as a following sibling). 416 * @param html HTML to add after this node 417 * @return this node, for chaining 418 * @see #before(String) 419 */ 420 public Node after(String html) { 421 addSiblingHtml(siblingIndex() + 1, html); 422 return this; 423 } 424 425 /** 426 * Insert the specified node into the DOM after this node (as a following sibling). 427 * @param node to add after this node 428 * @return this node, for chaining 429 * @see #before(Node) 430 */ 431 public Node after(Node node) { 432 Validate.notNull(node); 433 Validate.notNull(parentNode); 434 435 // if the incoming node is a sibling of this, remove it first so siblingIndex is correct on add 436 if (node.parentNode == parentNode) node.remove(); 437 438 parentNode.addChildren(siblingIndex() + 1, node); 439 return this; 440 } 441 442 private void addSiblingHtml(int index, String html) { 443 Validate.notNull(html); 444 Validate.notNull(parentNode); 445 446 Element context = parentNode instanceof Element ? (Element) parentNode : null; 447 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, context, baseUri()); 448 parentNode.addChildren(index, nodes.toArray(new Node[0])); 449 } 450 451 /** 452 Wrap the supplied HTML around this node. 453 454 @param html HTML to wrap around this node, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. If 455 the input HTML does not parse to a result starting with an Element, this will be a no-op. 456 @return this node, for chaining. 457 */ 458 public Node wrap(String html) { 459 Validate.notEmpty(html); 460 461 // Parse context - parent (because wrapping), this, or null 462 Element context = 463 parentNode != null && parentNode instanceof Element ? (Element) parentNode : 464 this instanceof Element ? (Element) this : 465 null; 466 List<Node> wrapChildren = NodeUtils.parser(this).parseFragmentInput(html, context, baseUri()); 467 Node wrapNode = wrapChildren.get(0); 468 if (!(wrapNode instanceof Element)) // nothing to wrap with; noop 469 return this; 470 471 Element wrap = (Element) wrapNode; 472 Element deepest = getDeepChild(wrap); 473 if (parentNode != null) 474 parentNode.replaceChild(this, wrap); 475 deepest.addChildren(this); // side effect of tricking wrapChildren to lose first 476 477 // remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is remainder 478 if (wrapChildren.size() > 0) { 479 //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here) 480 for (int i = 0; i < wrapChildren.size(); i++) { 481 Node remainder = wrapChildren.get(i); 482 // if no parent, this could be the wrap node, so skip 483 if (wrap == remainder) 484 continue; 485 486 if (remainder.parentNode != null) 487 remainder.parentNode.removeChild(remainder); 488 wrap.after(remainder); 489 } 490 } 491 return this; 492 } 493 494 /** 495 * Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping 496 * the node but keeping its children. 497 * <p> 498 * For example, with the input html: 499 * </p> 500 * <p>{@code <div>One <span>Two <b>Three</b></span></div>}</p> 501 * Calling {@code element.unwrap()} on the {@code span} element will result in the html: 502 * <p>{@code <div>One Two <b>Three</b></div>}</p> 503 * and the {@code "Two "} {@link TextNode} being returned. 504 * 505 * @return the first child of this node, after the node has been unwrapped. @{code Null} if the node had no children. 506 * @see #remove() 507 * @see #wrap(String) 508 */ 509 public @Nullable Node unwrap() { 510 Validate.notNull(parentNode); 511 Node firstChild = firstChild(); 512 parentNode.addChildren(siblingIndex(), this.childNodesAsArray()); 513 this.remove(); 514 515 return firstChild; 516 } 517 518 private static Element getDeepChild(Element el) { 519 Element child = el.firstElementChild(); 520 while (child != null) { 521 el = child; 522 child = child.firstElementChild(); 523 } 524 return el; 525 } 526 527 /** 528 * Replace this node in the DOM with the supplied node. 529 * @param in the node that will replace the existing node. 530 */ 531 public void replaceWith(Node in) { 532 Validate.notNull(in); 533 if (parentNode == null) parentNode = in.parentNode; // allows old to have been temp removed before replacing 534 Validate.notNull(parentNode); 535 parentNode.replaceChild(this, in); 536 } 537 538 protected void setParentNode(Node parentNode) { 539 Validate.notNull(parentNode); 540 if (this.parentNode != null) 541 this.parentNode.removeChild(this); 542 assert parentNode instanceof Element; 543 this.parentNode = (Element) parentNode; 544 } 545 546 protected void replaceChild(Node out, Node in) { 547 Validate.isTrue(out.parentNode == this); 548 Validate.notNull(in); 549 if (out == in) return; // no-op self replacement 550 551 if (in.parentNode != null) 552 in.parentNode.removeChild(in); 553 554 final int index = out.siblingIndex(); 555 ensureChildNodes().set(index, in); 556 assert this instanceof Element; 557 in.parentNode = (Element) this; 558 in.setSiblingIndex(index); 559 out.parentNode = null; 560 } 561 562 protected void removeChild(Node out) { 563 Validate.isTrue(out.parentNode == this); 564 Element el = (Element) this; 565 if (el.hasValidChildren()) // can remove by index 566 ensureChildNodes().remove(out.siblingIndex); 567 else 568 ensureChildNodes().remove(out); // iterates, but potentially not every one 569 570 el.invalidateChildren(); 571 out.parentNode = null; 572 } 573 574 protected void addChildren(Node... children) { 575 //most used. short circuit addChildren(int), which hits reindex children and array copy 576 final List<Node> nodes = ensureChildNodes(); 577 578 for (Node child: children) { 579 reparentChild(child); 580 nodes.add(child); 581 child.setSiblingIndex(nodes.size()-1); 582 } 583 } 584 585 protected void addChildren(int index, Node... children) { 586 // todo clean up all these and use the list, not the var array. just need to be careful when iterating the incoming (as we are removing as we go) 587 Validate.notNull(children); 588 if (children.length == 0) return; 589 final List<Node> nodes = ensureChildNodes(); 590 591 // fast path - if used as a wrap (index=0, children = child[0].parent.children - do inplace 592 final Node firstParent = children[0].parent(); 593 if (firstParent != null && firstParent.childNodeSize() == children.length) { 594 boolean sameList = true; 595 final List<Node> firstParentNodes = firstParent.ensureChildNodes(); 596 // identity check contents to see if same 597 int i = children.length; 598 while (i-- > 0) { 599 if (children[i] != firstParentNodes.get(i)) { 600 sameList = false; 601 break; 602 } 603 } 604 if (sameList) { // moving, so OK to empty firstParent and short-circuit 605 firstParent.empty(); 606 nodes.addAll(index, Arrays.asList(children)); 607 i = children.length; 608 assert this instanceof Element; 609 while (i-- > 0) { 610 children[i].parentNode = (Element) this; 611 } 612 ((Element) this).invalidateChildren(); 613 return; 614 } 615 } 616 617 Validate.noNullElements(children); 618 for (Node child : children) { 619 reparentChild(child); 620 } 621 nodes.addAll(index, Arrays.asList(children)); 622 ((Element) this).invalidateChildren(); 623 } 624 625 protected void reparentChild(Node child) { 626 child.setParentNode(this); 627 } 628 629 /** 630 Retrieves this node's sibling nodes. Similar to {@link #childNodes() node.parent.childNodes()}, but does not 631 include this node (a node is not a sibling of itself). 632 @return node siblings. If the node has no parent, returns an empty list. 633 */ 634 public List<Node> siblingNodes() { 635 if (parentNode == null) 636 return Collections.emptyList(); 637 638 List<Node> nodes = parentNode.ensureChildNodes(); 639 List<Node> siblings = new ArrayList<>(nodes.size() - 1); 640 for (Node node: nodes) 641 if (node != this) 642 siblings.add(node); 643 return siblings; 644 } 645 646 /** 647 Get this node's next sibling. 648 @return next sibling, or {@code null} if this is the last sibling 649 */ 650 public @Nullable Node nextSibling() { 651 if (parentNode == null) 652 return null; // root 653 654 final List<Node> siblings = parentNode.ensureChildNodes(); 655 final int index = siblingIndex() + 1; 656 if (siblings.size() > index) { 657 Node node = siblings.get(index); 658 assert (node.siblingIndex == index); // sanity test that invalidations haven't missed 659 return node; 660 } else 661 return null; 662 } 663 664 /** 665 Get this node's previous sibling. 666 @return the previous sibling, or @{code null} if this is the first sibling 667 */ 668 public @Nullable Node previousSibling() { 669 if (parentNode == null) 670 return null; // root 671 672 if (siblingIndex() > 0) 673 return parentNode.ensureChildNodes().get(siblingIndex-1); 674 else 675 return null; 676 } 677 678 /** 679 * Get the list index of this node in its node sibling list. E.g. if this is the first node 680 * sibling, returns 0. 681 * @return position in node sibling list 682 * @see org.jsoup.nodes.Element#elementSiblingIndex() 683 */ 684 public int siblingIndex() { 685 if (parentNode != null && !parentNode.childNodes.validChildren) 686 parentNode.reindexChildren(); 687 688 return siblingIndex; 689 } 690 691 protected void setSiblingIndex(int siblingIndex) { 692 this.siblingIndex = siblingIndex; 693 } 694 695 /** 696 Gets the first child node of this node, or {@code null} if there is none. This could be any Node type, such as an 697 Element, TextNode, Comment, etc. Use {@link Element#firstElementChild()} to get the first Element child. 698 @return the first child node, or null if there are no children. 699 @see Element#firstElementChild() 700 @see #lastChild() 701 @since 1.15.2 702 */ 703 public @Nullable Node firstChild() { 704 if (childNodeSize() == 0) return null; 705 return ensureChildNodes().get(0); 706 } 707 708 /** 709 Gets the last child node of this node, or {@code null} if there is none. 710 @return the last child node, or null if there are no children. 711 @see Element#lastElementChild() 712 @see #firstChild() 713 @since 1.15.2 714 */ 715 public @Nullable Node lastChild() { 716 final int size = childNodeSize(); 717 if (size == 0) return null; 718 List<Node> children = ensureChildNodes(); 719 return children.get(size - 1); 720 } 721 722 /** 723 Gets the first sibling of this node. That may be this node. 724 725 @return the first sibling node 726 @since 1.21.1 727 */ 728 public Node firstSibling() { 729 if (parentNode != null) { 730 //noinspection DataFlowIssue 731 return parentNode.firstChild(); 732 } else 733 return this; // orphan is its own first sibling 734 } 735 736 /** 737 Gets the last sibling of this node. That may be this node. 738 739 @return the last sibling (aka the parent's last child) 740 @since 1.21.1 741 */ 742 public Node lastSibling() { 743 if (parentNode != null) { 744 //noinspection DataFlowIssue (not nullable, would be this if no other sibs) 745 return parentNode.lastChild(); 746 } else 747 return this; 748 } 749 750 /** 751 Gets the next sibling Element of this node. E.g., if a {@code div} contains two {@code p}s, the 752 {@code nextElementSibling} of the first {@code p} is the second {@code p}. 753 <p>This is similar to {@link #nextSibling()}, but specifically finds only Elements.</p> 754 755 @return the next element, or null if there is no next element 756 @see #previousElementSibling() 757 */ 758 public @Nullable Element nextElementSibling() { 759 Node next = this; 760 while ((next = next.nextSibling()) != null) { 761 if (next instanceof Element) return (Element) next; 762 } 763 return null; 764 } 765 766 /** 767 Gets the previous Element sibling of this node. 768 769 @return the previous element, or null if there is no previous element 770 @see #nextElementSibling() 771 */ 772 public @Nullable Element previousElementSibling() { 773 Node prev = this; 774 while ((prev = prev.previousSibling()) != null) { 775 if (prev instanceof Element) return (Element) prev; 776 } 777 return null; 778 } 779 780 /** 781 * Perform a depth-first traversal through this node and its descendants. 782 * @param nodeVisitor the visitor callbacks to perform on each node 783 * @return this node, for chaining 784 */ 785 public Node traverse(NodeVisitor nodeVisitor) { 786 Validate.notNull(nodeVisitor); 787 nodeVisitor.traverse(this); 788 return this; 789 } 790 791 /** 792 Perform the supplied action on this Node and each of its descendants, during a depth-first traversal. Nodes may be 793 inspected, changed, added, replaced, or removed. 794 @param action the function to perform on the node 795 @return this Node, for chaining 796 @see Element#forEach(Consumer) 797 */ 798 public Node forEachNode(Consumer<? super Node> action) { 799 Validate.notNull(action); 800 nodeStream().forEach(action); 801 return this; 802 } 803 804 /** 805 * Perform a depth-first controllable traversal through this node and its descendants. 806 * @param nodeFilter the filter callbacks to perform on each node 807 * @return this node, for chaining 808 */ 809 public Node filter(NodeFilter nodeFilter) { 810 Validate.notNull(nodeFilter); 811 nodeFilter.traverse(this); 812 return this; 813 } 814 815 /** 816 Returns a Stream of this Node and all of its descendant Nodes. The stream has document order. 817 @return a stream of all nodes. 818 @see Element#stream() 819 @since 1.17.1 820 */ 821 public Stream<Node> nodeStream() { 822 return NodeUtils.stream(this, Node.class); 823 } 824 825 /** 826 Returns a Stream of this and descendant nodes, containing only nodes of the specified type. The stream has document 827 order. 828 @return a stream of nodes filtered by type. 829 @see Element#stream() 830 @since 1.17.1 831 */ 832 public <T extends Node> Stream<T> nodeStream(Class<T> type) { 833 return NodeUtils.stream(this, type); 834 } 835 836 /** 837 Get the outer HTML of this node. For example, on a {@code p} element, may return {@code <p>Para</p>}. 838 @return outer HTML 839 @see Element#html() 840 @see Element#text() 841 */ 842 public String outerHtml() { 843 StringBuilder sb = StringUtil.borrowBuilder(); 844 outerHtml(QuietAppendable.wrap(sb)); 845 return StringUtil.releaseBuilder(sb); 846 } 847 848 protected void outerHtml(Appendable accum) { 849 outerHtml(QuietAppendable.wrap(accum)); 850 } 851 852 protected void outerHtml(QuietAppendable accum) { 853 Printer printer = Printer.printerFor(this, accum); 854 printer.traverse(this); 855 } 856 857 /** 858 Get the outer HTML of this node. 859 860 @param accum accumulator to place HTML into 861 @param out 862 */ 863 abstract void outerHtmlHead(final QuietAppendable accum, final Document.OutputSettings out); 864 865 abstract void outerHtmlTail(final QuietAppendable accum, final Document.OutputSettings out); 866 867 /** 868 Write this node and its children to the given {@link Appendable}. 869 870 @param appendable the {@link Appendable} to write to. 871 @return the supplied {@link Appendable}, for chaining. 872 @throws org.jsoup.SerializationException if the appendable throws an IOException. 873 */ 874 public <T extends Appendable> T html(T appendable) { 875 outerHtml(appendable); 876 return appendable; 877 } 878 879 /** 880 Get the source range (start and end positions) in the original input source from which this node was parsed. 881 Position tracking must be enabled prior to parsing the content. For an Element, this will be the positions of the 882 start tag. 883 @return the range for the start of the node, or {@code untracked} if its range was not tracked. 884 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 885 @see Range#isImplicit() 886 @see Element#endSourceRange() 887 @see Attributes#sourceRange(String name) 888 @since 1.15.2 889 */ 890 public Range sourceRange() { 891 return Range.of(this, true); 892 } 893 894 /** 895 * Gets this node's outer HTML. 896 * @return outer HTML. 897 * @see #outerHtml() 898 */ 899 @Override 900 public String toString() { 901 return outerHtml(); 902 } 903 904 /** @deprecated internal method moved into Printer; will be removed in a future version */ 905 @Deprecated 906 protected void indent(Appendable accum, int depth, Document.OutputSettings out) throws IOException { 907 accum.append('\n').append(StringUtil.padding(depth * out.indentAmount(), out.maxPaddingWidth())); 908 } 909 910 /** 911 * Check if this node is the same instance of another (object identity test). 912 * <p>For a node value equality check, see {@link #hasSameValue(Object)}</p> 913 * @param o other object to compare to 914 * @return true if the content of this node is the same as the other 915 * @see Node#hasSameValue(Object) 916 */ 917 @Override 918 public boolean equals(@Nullable Object o) { 919 // implemented just so that javadoc is clear this is an identity test 920 return this == o; 921 } 922 923 /** 924 Provides a hashCode for this Node, based on its object identity. Changes to the Node's content will not impact the 925 result. 926 @return an object identity based hashcode for this Node 927 */ 928 @Override 929 public int hashCode() { 930 // implemented so that javadoc and scanners are clear this is an identity test 931 return super.hashCode(); 932 } 933 934 /** 935 * Check if this node has the same content as another node. A node is considered the same if its name, attributes and content match the 936 * other node; particularly its position in the tree does not influence its similarity. 937 * @param o other object to compare to 938 * @return true if the content of this node is the same as the other 939 */ 940 public boolean hasSameValue(@Nullable Object o) { 941 if (this == o) return true; 942 if (o == null || getClass() != o.getClass()) return false; 943 944 return this.outerHtml().equals(((Node) o).outerHtml()); 945 } 946 947 /** 948 Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings. 949 <p><ul> 950 <li>If this node is a {@link LeafNode}, the clone will have no parent.</li> 951 <li>If this node is an {@link Element}, the clone will have a simple owning {@link Document} to retain the 952 configured output settings and parser.</li> 953 </ul></p> 954 <p>The cloned node may be adopted into another Document or node structure using 955 {@link Element#appendChild(Node)}.</p> 956 957 @return a stand-alone cloned node, including clones of any children 958 @see #shallowClone() 959 */ 960 @SuppressWarnings("MethodDoesntCallSuperMethod") 961 // because it does call super.clone in doClone - analysis just isn't following 962 @Override 963 public Node clone() { 964 Node thisClone = doClone(null); // splits for orphan 965 966 // Queue up nodes that need their children cloned (BFS). 967 final LinkedList<Node> nodesToProcess = new LinkedList<>(); 968 nodesToProcess.add(thisClone); 969 970 while (!nodesToProcess.isEmpty()) { 971 Node currParent = nodesToProcess.remove(); 972 973 final int size = currParent.childNodeSize(); 974 for (int i = 0; i < size; i++) { 975 final List<Node> childNodes = currParent.ensureChildNodes(); 976 Node childClone = childNodes.get(i).doClone(currParent); 977 childNodes.set(i, childClone); 978 nodesToProcess.add(childClone); 979 } 980 } 981 982 return thisClone; 983 } 984 985 /** 986 * Create a stand-alone, shallow copy of this node. None of its children (if any) will be cloned, and it will have 987 * no parent or sibling nodes. 988 * @return a single independent copy of this node 989 * @see #clone() 990 */ 991 public Node shallowClone() { 992 return doClone(null); 993 } 994 995 /* 996 * Return a clone of the node using the given parent (which can be null). 997 * Not a deep copy of children. 998 */ 999 protected Node doClone(@Nullable Node parent) { 1000 assert parent == null || parent instanceof Element; 1001 Node clone; 1002 1003 try { 1004 clone = (Node) super.clone(); 1005 } catch (CloneNotSupportedException e) { 1006 throw new RuntimeException(e); 1007 } 1008 1009 clone.parentNode = (Element) parent; // can be null, to create an orphan split 1010 clone.siblingIndex = parent == null ? 0 : siblingIndex(); 1011 // if not keeping the parent, shallowClone the ownerDocument to preserve its settings 1012 if (parent == null && !(this instanceof Document)) { 1013 Document doc = ownerDocument(); 1014 if (doc != null) { 1015 Document docClone = doc.shallowClone(); 1016 clone.parentNode = docClone; 1017 docClone.ensureChildNodes().add(clone); 1018 } 1019 } 1020 1021 return clone; 1022 } 1023}