001package org.jsoup.nodes; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.internal.QuietAppendable; 005import org.jsoup.internal.StringUtil; 006import org.jsoup.parser.ParseSettings; 007import org.jsoup.select.NodeFilter; 008import org.jsoup.select.NodeVisitor; 009import org.jspecify.annotations.Nullable; 010 011import java.io.IOException; 012import java.util.ArrayList; 013import java.util.Arrays; 014import java.util.Collections; 015import java.util.Iterator; 016import java.util.LinkedList; 017import java.util.List; 018import java.util.function.Consumer; 019import java.util.stream.Stream; 020 021/** 022 The base, abstract Node model. {@link Element}, {@link Document}, {@link Comment}, {@link TextNode}, et al., 023 are instances of Node. 024 025 @author Jonathan Hedley, jonathan@hedley.net */ 026public abstract class Node implements Cloneable { 027 static final List<Node> EmptyNodes = Collections.emptyList(); 028 static final String EmptyString = ""; 029 @Nullable Element parentNode; // Nodes don't always have parents 030 int siblingIndex; 031 032 /** 033 * Default constructor. Doesn't set up base uri, children, or attributes; use with caution. 034 */ 035 protected Node() { 036 } 037 038 /** 039 Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof). 040 @return node name 041 */ 042 public abstract String nodeName(); 043 044 /** 045 Get the normalized name of this node. For node types other than Element, this is the same as {@link #nodeName()}. 046 For an Element, will be the lower-cased tag name. 047 @return normalized node name 048 @since 1.15.4. 049 */ 050 public String normalName() { 051 return nodeName(); 052 } 053 054 /** 055 Get the node's value. For a TextNode, the whole text; for a Comment, the comment data; for an Element, 056 wholeOwnText. Returns "" if there is no value. 057 @return the node's value 058 */ 059 public String nodeValue() { 060 return ""; 061 } 062 063 /** 064 Test if this node has the specified normalized name, in any namespace. 065 * @param normalName a normalized element name (e.g. {@code div}). 066 * @return true if the element's normal name matches exactly 067 * @since 1.17.2 068 */ 069 public boolean nameIs(String normalName) { 070 return normalName().equals(normalName); 071 } 072 073 /** 074 Test if this node's parent has the specified normalized name. 075 * @param normalName a normalized name (e.g. {@code div}). 076 * @return true if the parent element's normal name matches exactly 077 * @since 1.17.2 078 */ 079 public boolean parentNameIs(String normalName) { 080 return parentNode != null && parentNode.normalName().equals(normalName); 081 } 082 083 /** 084 Test if this node's parent is an Element with the specified normalized name and namespace. 085 * @param normalName a normalized element name (e.g. {@code div}). 086 * @param namespace the namespace 087 * @return true if the parent element's normal name matches exactly, and that element is in the specified namespace 088 * @since 1.17.2 089 */ 090 public boolean parentElementIs(String normalName, String namespace) { 091 return parentNode != null && parentNode instanceof Element 092 && ((Element) parentNode).elementIs(normalName, namespace); 093 } 094 095 /** 096 * Check if this Node has an actual Attributes object. 097 */ 098 protected abstract boolean hasAttributes(); 099 100 /** 101 Checks if this node has a parent. Nodes won't have parents if (e.g.) they are newly created and not added as a child 102 to an existing node, or if they are a {@link #shallowClone()}. In such cases, {@link #parent()} will return {@code null}. 103 @return if this node has a parent. 104 */ 105 public boolean hasParent() { 106 return parentNode != null; 107 } 108 109 /** 110 * Get an attribute's value by its key. <b>Case insensitive</b> 111 * <p> 112 * To get an absolute URL from an attribute that may be a relative URL, prefix the key with <code><b>abs:</b></code>, 113 * which is a shortcut to the {@link #absUrl} method. 114 * </p> 115 * E.g.: 116 * <blockquote><code>String url = a.attr("abs:href");</code></blockquote> 117 * 118 * @param attributeKey The attribute key. 119 * @return The attribute, or empty string if not present (to avoid nulls). 120 * @see #attributes() 121 * @see #hasAttr(String) 122 * @see #absUrl(String) 123 */ 124 public String attr(String attributeKey) { 125 Validate.notNull(attributeKey); 126 if (!hasAttributes()) 127 return EmptyString; 128 129 String val = attributes().getIgnoreCase(attributeKey); 130 if (val.length() > 0) 131 return val; 132 else if (attributeKey.startsWith("abs:")) 133 return absUrl(attributeKey.substring("abs:".length())); 134 else return ""; 135 } 136 137 /** 138 * Get each of the Element's attributes. 139 * @return attributes (which implements Iterable, with the same order as presented in the original HTML). 140 */ 141 public abstract Attributes attributes(); 142 143 /** 144 Get the number of attributes that this Node has. 145 @return the number of attributes 146 @since 1.14.2 147 */ 148 public int attributesSize() { 149 // added so that we can test how many attributes exist without implicitly creating the Attributes object 150 return hasAttributes() ? attributes().size() : 0; 151 } 152 153 /** 154 * Set an attribute (key=value). If the attribute already exists, it is replaced. The attribute key comparison is 155 * <b>case insensitive</b>. The key will be set with case sensitivity as set in the parser settings. 156 * @param attributeKey The attribute key. 157 * @param attributeValue The attribute value. 158 * @return this (for chaining) 159 */ 160 public Node attr(String attributeKey, String attributeValue) { 161 Document doc = ownerDocument(); 162 ParseSettings settings = doc != null ? doc.parser().settings() : ParseSettings.htmlDefault; 163 attributeKey = settings.normalizeAttribute(attributeKey); 164 attributes().putIgnoreCase(attributeKey, attributeValue); 165 return this; 166 } 167 168 /** 169 * Test if this Node has an attribute. <b>Case insensitive</b>. 170 * @param attributeKey The attribute key to check. 171 * @return true if the attribute exists, false if not. 172 */ 173 public boolean hasAttr(String attributeKey) { 174 Validate.notNull(attributeKey); 175 if (!hasAttributes()) 176 return false; 177 178 if (attributeKey.startsWith("abs:")) { 179 String key = attributeKey.substring("abs:".length()); 180 if (attributes().hasKeyIgnoreCase(key) && !absUrl(key).isEmpty()) 181 return true; 182 } 183 return attributes().hasKeyIgnoreCase(attributeKey); 184 } 185 186 /** 187 * Remove an attribute from this node. 188 * @param attributeKey The attribute to remove. 189 * @return this (for chaining) 190 */ 191 public Node removeAttr(String attributeKey) { 192 Validate.notNull(attributeKey); 193 if (hasAttributes()) 194 attributes().removeIgnoreCase(attributeKey); 195 return this; 196 } 197 198 /** 199 * Clear (remove) each of the attributes in this node. 200 * @return this, for chaining 201 */ 202 public Node clearAttributes() { 203 if (hasAttributes()) { 204 Iterator<Attribute> it = attributes().iterator(); 205 while (it.hasNext()) { 206 it.next(); 207 it.remove(); 208 } 209 } 210 return this; 211 } 212 213 /** 214 Get the base URI that applies to this node. Will return an empty string if not defined. Used to make relative links 215 absolute. 216 217 @return base URI 218 @see #absUrl 219 */ 220 public abstract String baseUri(); 221 222 /** 223 * Set the baseUri for just this node (not its descendants), if this Node tracks base URIs. 224 * @param baseUri new URI 225 */ 226 protected abstract void doSetBaseUri(String baseUri); 227 228 /** 229 Update the base URI of this node and all of its descendants. 230 @param baseUri base URI to set 231 */ 232 public void setBaseUri(final String baseUri) { 233 Validate.notNull(baseUri); 234 doSetBaseUri(baseUri); 235 } 236 237 /** 238 * Get an absolute URL from a URL attribute that may be relative (such as an <code><a href></code> or 239 * <code><img src></code>). 240 * <p> 241 * E.g.: <code>String absUrl = linkEl.absUrl("href");</code> 242 * </p> 243 * <p> 244 * If the attribute value is already absolute (i.e. it starts with a protocol, like 245 * <code>http://</code> or <code>https://</code> etc), and it successfully parses as a URL, the attribute is 246 * returned directly. Otherwise, it is treated as a URL relative to the element's {@link #baseUri}, and made 247 * absolute using that. 248 * </p> 249 * <p> 250 * As an alternate, you can use the {@link #attr} method with the <code>abs:</code> prefix, e.g.: 251 * <code>String absUrl = linkEl.attr("abs:href");</code> 252 * </p> 253 * 254 * @param attributeKey The attribute key 255 * @return An absolute URL if one could be made, or an empty string (not null) if the attribute was missing or 256 * could not be made successfully into a URL. 257 * @see #attr 258 * @see java.net.URL#URL(java.net.URL, String) 259 */ 260 public String absUrl(String attributeKey) { 261 Validate.notEmpty(attributeKey); 262 if (!(hasAttributes() && attributes().hasKeyIgnoreCase(attributeKey))) // not using hasAttr, so that we don't recurse down hasAttr->absUrl 263 return ""; 264 265 return StringUtil.resolve(baseUri(), attributes().getIgnoreCase(attributeKey)); 266 } 267 268 protected abstract List<Node> ensureChildNodes(); 269 270 /** 271 Get a child node by its 0-based index. 272 @param index index of child node 273 @return the child node at this index. 274 @throws IndexOutOfBoundsException if the index is out of bounds. 275 */ 276 public Node childNode(int index) { 277 return ensureChildNodes().get(index); 278 } 279 280 /** 281 Get this node's children. Presented as an unmodifiable list: new children can not be added, but the child nodes 282 themselves can be manipulated. 283 @return list of children. If no children, returns an empty list. 284 */ 285 public List<Node> childNodes() { 286 if (childNodeSize() == 0) 287 return EmptyNodes; 288 289 List<Node> children = ensureChildNodes(); 290 List<Node> rewrap = new ArrayList<>(children.size()); // wrapped so that looping and moving will not throw a CME as the source changes 291 rewrap.addAll(children); 292 return Collections.unmodifiableList(rewrap); 293 } 294 295 /** 296 * Returns a deep copy of this node's children. Changes made to these nodes will not be reflected in the original 297 * nodes 298 * @return a deep copy of this node's children 299 */ 300 public List<Node> childNodesCopy() { 301 final List<Node> nodes = ensureChildNodes(); 302 final ArrayList<Node> children = new ArrayList<>(nodes.size()); 303 for (Node node : nodes) { 304 children.add(node.clone()); 305 } 306 return children; 307 } 308 309 /** 310 * Get the number of child nodes that this node holds. 311 * @return the number of child nodes that this node holds. 312 */ 313 public abstract int childNodeSize(); 314 315 protected Node[] childNodesAsArray() { 316 return ensureChildNodes().toArray(new Node[0]); 317 } 318 319 /** 320 * Delete all this node's children. 321 * @return this node, for chaining 322 */ 323 public abstract Node empty(); 324 325 /** 326 Gets this node's parent node. This is always an Element. 327 @return parent node; or null if no parent. 328 @see #hasParent() 329 @see #parentElement(); 330 */ 331 public @Nullable Node parent() { 332 return parentNode; 333 } 334 335 /** 336 Gets this node's parent Element. 337 @return parent element; or null if this node has no parent. 338 @see #hasParent() 339 @since 1.21.1 340 */ 341 public @Nullable Element parentElement() { 342 return parentNode; 343 } 344 345 /** 346 Gets this node's parent node. Not overridable by extending classes, so useful if you really just need the Node type. 347 @return parent node; or null if no parent. 348 */ 349 public @Nullable final Node parentNode() { 350 return parentNode; 351 } 352 353 /** 354 * Get this node's root node; that is, its topmost ancestor. If this node is the top ancestor, returns {@code this}. 355 * @return topmost ancestor. 356 */ 357 public Node root() { 358 Node node = this; 359 while (node.parentNode != null) 360 node = node.parentNode; 361 return node; 362 } 363 364 /** 365 * Gets the Document associated with this Node. 366 * @return the Document associated with this Node, or null if there is no such Document. 367 */ 368 public @Nullable Document ownerDocument() { 369 Node root = root(); 370 return (root instanceof Document) ? (Document) root : null; 371 } 372 373 /** 374 * Remove (delete) this node from the DOM tree. If this node has children, they are also removed. If this node is 375 * an orphan, nothing happens. 376 */ 377 public void remove() { 378 if (parentNode != null) 379 parentNode.removeChild(this); 380 } 381 382 /** 383 * Insert the specified HTML into the DOM before this node (as a preceding sibling). 384 * @param html HTML to add before this node 385 * @return this node, for chaining 386 * @see #after(String) 387 */ 388 public Node before(String html) { 389 addSiblingHtml(siblingIndex, html); 390 return this; 391 } 392 393 /** 394 * Insert the specified node into the DOM before this node (as a preceding sibling). 395 * @param node to add before this node 396 * @return this node, for chaining 397 * @see #after(Node) 398 */ 399 public Node before(Node node) { 400 Validate.notNull(node); 401 Validate.notNull(parentNode); 402 403 // if the incoming node is a sibling of this, remove it first so siblingIndex is correct on add 404 if (node.parentNode == parentNode) node.remove(); 405 406 parentNode.addChildren(siblingIndex, node); 407 return this; 408 } 409 410 /** 411 * Insert the specified HTML into the DOM after this node (as a following sibling). 412 * @param html HTML to add after this node 413 * @return this node, for chaining 414 * @see #before(String) 415 */ 416 public Node after(String html) { 417 addSiblingHtml(siblingIndex + 1, html); 418 return this; 419 } 420 421 /** 422 * Insert the specified node into the DOM after this node (as a following sibling). 423 * @param node to add after this node 424 * @return this node, for chaining 425 * @see #before(Node) 426 */ 427 public Node after(Node node) { 428 Validate.notNull(node); 429 Validate.notNull(parentNode); 430 431 // if the incoming node is a sibling of this, remove it first so siblingIndex is correct on add 432 if (node.parentNode == parentNode) node.remove(); 433 434 parentNode.addChildren(siblingIndex + 1, node); 435 return this; 436 } 437 438 private void addSiblingHtml(int index, String html) { 439 Validate.notNull(html); 440 Validate.notNull(parentNode); 441 442 Element context = parentNode instanceof Element ? (Element) parentNode : null; 443 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, context, baseUri()); 444 parentNode.addChildren(index, nodes.toArray(new Node[0])); 445 } 446 447 /** 448 Wrap the supplied HTML around this node. 449 450 @param html HTML to wrap around this node, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. If 451 the input HTML does not parse to a result starting with an Element, this will be a no-op. 452 @return this node, for chaining. 453 */ 454 public Node wrap(String html) { 455 Validate.notEmpty(html); 456 457 // Parse context - parent (because wrapping), this, or null 458 Element context = 459 parentNode != null && parentNode instanceof Element ? (Element) parentNode : 460 this instanceof Element ? (Element) this : 461 null; 462 List<Node> wrapChildren = NodeUtils.parser(this).parseFragmentInput(html, context, baseUri()); 463 Node wrapNode = wrapChildren.get(0); 464 if (!(wrapNode instanceof Element)) // nothing to wrap with; noop 465 return this; 466 467 Element wrap = (Element) wrapNode; 468 Element deepest = getDeepChild(wrap); 469 if (parentNode != null) 470 parentNode.replaceChild(this, wrap); 471 deepest.addChildren(this); // side effect of tricking wrapChildren to lose first 472 473 // remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is remainder 474 if (wrapChildren.size() > 0) { 475 //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here) 476 for (int i = 0; i < wrapChildren.size(); i++) { 477 Node remainder = wrapChildren.get(i); 478 // if no parent, this could be the wrap node, so skip 479 if (wrap == remainder) 480 continue; 481 482 if (remainder.parentNode != null) 483 remainder.parentNode.removeChild(remainder); 484 wrap.after(remainder); 485 } 486 } 487 return this; 488 } 489 490 /** 491 * Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping 492 * the node but keeping its children. 493 * <p> 494 * For example, with the input html: 495 * </p> 496 * <p>{@code <div>One <span>Two <b>Three</b></span></div>}</p> 497 * Calling {@code element.unwrap()} on the {@code span} element will result in the html: 498 * <p>{@code <div>One Two <b>Three</b></div>}</p> 499 * and the {@code "Two "} {@link TextNode} being returned. 500 * 501 * @return the first child of this node, after the node has been unwrapped. @{code Null} if the node had no children. 502 * @see #remove() 503 * @see #wrap(String) 504 */ 505 public @Nullable Node unwrap() { 506 Validate.notNull(parentNode); 507 Node firstChild = firstChild(); 508 parentNode.addChildren(siblingIndex, this.childNodesAsArray()); 509 this.remove(); 510 511 return firstChild; 512 } 513 514 private static Element getDeepChild(Element el) { 515 Element child = el.firstElementChild(); 516 while (child != null) { 517 el = child; 518 child = child.firstElementChild(); 519 } 520 return el; 521 } 522 523 /** 524 * Replace this node in the DOM with the supplied node. 525 * @param in the node that will replace the existing node. 526 */ 527 public void replaceWith(Node in) { 528 Validate.notNull(in); 529 if (parentNode == null) parentNode = in.parentNode; // allows old to have been temp removed before replacing 530 Validate.notNull(parentNode); 531 parentNode.replaceChild(this, in); 532 } 533 534 protected void setParentNode(Node parentNode) { 535 Validate.notNull(parentNode); 536 if (this.parentNode != null) 537 this.parentNode.removeChild(this); 538 assert parentNode instanceof Element; 539 this.parentNode = (Element) parentNode; 540 } 541 542 protected void replaceChild(Node out, Node in) { 543 Validate.isTrue(out.parentNode == this); 544 Validate.notNull(in); 545 if (out == in) return; // no-op self replacement 546 547 if (in.parentNode != null) 548 in.parentNode.removeChild(in); 549 550 final int index = out.siblingIndex; 551 ensureChildNodes().set(index, in); 552 assert this instanceof Element; 553 in.parentNode = (Element) this; 554 in.setSiblingIndex(index); 555 out.parentNode = null; 556 } 557 558 protected void removeChild(Node out) { 559 Validate.isTrue(out.parentNode == this); 560 final int index = out.siblingIndex; 561 ensureChildNodes().remove(index); 562 reindexChildren(index); 563 out.parentNode = null; 564 } 565 566 protected void addChildren(Node... children) { 567 //most used. short circuit addChildren(int), which hits reindex children and array copy 568 final List<Node> nodes = ensureChildNodes(); 569 570 for (Node child: children) { 571 reparentChild(child); 572 nodes.add(child); 573 child.setSiblingIndex(nodes.size()-1); 574 } 575 } 576 577 protected void addChildren(int index, Node... children) { 578 Validate.notNull(children); 579 if (children.length == 0) { 580 return; 581 } 582 final List<Node> nodes = ensureChildNodes(); 583 584 // fast path - if used as a wrap (index=0, children = child[0].parent.children - do inplace 585 final Node firstParent = children[0].parent(); 586 if (firstParent != null && firstParent.childNodeSize() == children.length) { 587 boolean sameList = true; 588 final List<Node> firstParentNodes = firstParent.ensureChildNodes(); 589 // identity check contents to see if same 590 int i = children.length; 591 while (i-- > 0) { 592 if (children[i] != firstParentNodes.get(i)) { 593 sameList = false; 594 break; 595 } 596 } 597 if (sameList) { // moving, so OK to empty firstParent and short-circuit 598 boolean wasEmpty = childNodeSize() == 0; 599 firstParent.empty(); 600 nodes.addAll(index, Arrays.asList(children)); 601 i = children.length; 602 assert this instanceof Element; 603 while (i-- > 0) { 604 children[i].parentNode = (Element) this; 605 } 606 if (!(wasEmpty && children[0].siblingIndex == 0)) // skip reindexing if we just moved 607 reindexChildren(index); 608 return; 609 } 610 } 611 612 Validate.noNullElements(children); 613 for (Node child : children) { 614 reparentChild(child); 615 } 616 nodes.addAll(index, Arrays.asList(children)); 617 reindexChildren(index); 618 } 619 620 protected void reparentChild(Node child) { 621 child.setParentNode(this); 622 } 623 624 private void reindexChildren(int start) { 625 final int size = childNodeSize(); 626 if (size == 0) return; 627 final List<Node> childNodes = ensureChildNodes(); 628 for (int i = start; i < size; i++) { 629 childNodes.get(i).setSiblingIndex(i); 630 } 631 } 632 633 /** 634 Retrieves this node's sibling nodes. Similar to {@link #childNodes() node.parent.childNodes()}, but does not 635 include this node (a node is not a sibling of itself). 636 @return node siblings. If the node has no parent, returns an empty list. 637 */ 638 public List<Node> siblingNodes() { 639 if (parentNode == null) 640 return Collections.emptyList(); 641 642 List<Node> nodes = parentNode.ensureChildNodes(); 643 List<Node> siblings = new ArrayList<>(nodes.size() - 1); 644 for (Node node: nodes) 645 if (node != this) 646 siblings.add(node); 647 return siblings; 648 } 649 650 /** 651 Get this node's next sibling. 652 @return next sibling, or {@code null} if this is the last sibling 653 */ 654 public @Nullable Node nextSibling() { 655 if (parentNode == null) 656 return null; // root 657 658 final List<Node> siblings = parentNode.ensureChildNodes(); 659 final int index = siblingIndex+1; 660 if (siblings.size() > index) 661 return siblings.get(index); 662 else 663 return null; 664 } 665 666 /** 667 Get this node's previous sibling. 668 @return the previous sibling, or @{code null} if this is the first sibling 669 */ 670 public @Nullable Node previousSibling() { 671 if (parentNode == null) 672 return null; // root 673 674 if (siblingIndex > 0) 675 return parentNode.ensureChildNodes().get(siblingIndex-1); 676 else 677 return null; 678 } 679 680 /** 681 * Get the list index of this node in its node sibling list. E.g. if this is the first node 682 * sibling, returns 0. 683 * @return position in node sibling list 684 * @see org.jsoup.nodes.Element#elementSiblingIndex() 685 */ 686 public int siblingIndex() { 687 return siblingIndex; 688 } 689 690 protected void setSiblingIndex(int siblingIndex) { 691 this.siblingIndex = siblingIndex; 692 } 693 694 /** 695 Gets the first child node of this node, or {@code null} if there is none. This could be any Node type, such as an 696 Element, TextNode, Comment, etc. Use {@link Element#firstElementChild()} to get the first Element child. 697 @return the first child node, or null if there are no children. 698 @see Element#firstElementChild() 699 @see #lastChild() 700 @since 1.15.2 701 */ 702 public @Nullable Node firstChild() { 703 if (childNodeSize() == 0) return null; 704 return ensureChildNodes().get(0); 705 } 706 707 /** 708 Gets the last child node of this node, or {@code null} if there is none. 709 @return the last child node, or null if there are no children. 710 @see Element#lastElementChild() 711 @see #firstChild() 712 @since 1.15.2 713 */ 714 public @Nullable Node lastChild() { 715 final int size = childNodeSize(); 716 if (size == 0) return null; 717 List<Node> children = ensureChildNodes(); 718 return children.get(size - 1); 719 } 720 721 /** 722 Gets the first sibling of this node. That may be this node. 723 724 @return the first sibling node 725 @since 1.21.1 726 */ 727 public Node firstSibling() { 728 if (parentNode != null) { 729 //noinspection DataFlowIssue 730 return parentNode.firstChild(); 731 } else 732 return this; // orphan is its own first sibling 733 } 734 735 /** 736 Gets the last sibling of this node. That may be this node. 737 738 @return the last sibling (aka the parent's last child) 739 @since 1.21.1 740 */ 741 public Node lastSibling() { 742 if (parentNode != null) { 743 //noinspection DataFlowIssue (not nullable, would be this if no other sibs) 744 return parentNode.lastChild(); 745 } else 746 return this; 747 } 748 749 /** 750 Gets the next sibling Element of this node. E.g., if a {@code div} contains two {@code p}s, the 751 {@code nextElementSibling} of the first {@code p} is the second {@code p}. 752 <p>This is similar to {@link #nextSibling()}, but specifically finds only Elements.</p> 753 754 @return the next element, or null if there is no next element 755 @see #previousElementSibling() 756 */ 757 public @Nullable Element nextElementSibling() { 758 Node next = this; 759 while ((next = next.nextSibling()) != null) { 760 if (next instanceof Element) return (Element) next; 761 } 762 return null; 763 } 764 765 /** 766 Gets the previous Element sibling of this node. 767 768 @return the previous element, or null if there is no previous element 769 @see #nextElementSibling() 770 */ 771 public @Nullable Element previousElementSibling() { 772 Node prev = this; 773 while ((prev = prev.previousSibling()) != null) { 774 if (prev instanceof Element) return (Element) prev; 775 } 776 return null; 777 } 778 779 /** 780 * Perform a depth-first traversal through this node and its descendants. 781 * @param nodeVisitor the visitor callbacks to perform on each node 782 * @return this node, for chaining 783 */ 784 public Node traverse(NodeVisitor nodeVisitor) { 785 Validate.notNull(nodeVisitor); 786 nodeVisitor.traverse(this); 787 return this; 788 } 789 790 /** 791 Perform the supplied action on this Node and each of its descendants, during a depth-first traversal. Nodes may be 792 inspected, changed, added, replaced, or removed. 793 @param action the function to perform on the node 794 @return this Node, for chaining 795 @see Element#forEach(Consumer) 796 */ 797 public Node forEachNode(Consumer<? super Node> action) { 798 Validate.notNull(action); 799 nodeStream().forEach(action); 800 return this; 801 } 802 803 /** 804 * Perform a depth-first controllable traversal through this node and its descendants. 805 * @param nodeFilter the filter callbacks to perform on each node 806 * @return this node, for chaining 807 */ 808 public Node filter(NodeFilter nodeFilter) { 809 Validate.notNull(nodeFilter); 810 nodeFilter.traverse(this); 811 return this; 812 } 813 814 /** 815 Returns a Stream of this Node and all of its descendant Nodes. The stream has document order. 816 @return a stream of all nodes. 817 @see Element#stream() 818 @since 1.17.1 819 */ 820 public Stream<Node> nodeStream() { 821 return NodeUtils.stream(this, Node.class); 822 } 823 824 /** 825 Returns a Stream of this and descendant nodes, containing only nodes of the specified type. The stream has document 826 order. 827 @return a stream of nodes filtered by type. 828 @see Element#stream() 829 @since 1.17.1 830 */ 831 public <T extends Node> Stream<T> nodeStream(Class<T> type) { 832 return NodeUtils.stream(this, type); 833 } 834 835 /** 836 Get the outer HTML of this node. For example, on a {@code p} element, may return {@code <p>Para</p>}. 837 @return outer HTML 838 @see Element#html() 839 @see Element#text() 840 */ 841 public String outerHtml() { 842 StringBuilder sb = StringUtil.borrowBuilder(); 843 outerHtml(QuietAppendable.wrap(sb)); 844 return StringUtil.releaseBuilder(sb); 845 } 846 847 protected void outerHtml(Appendable accum) { 848 outerHtml(QuietAppendable.wrap(accum)); 849 } 850 851 protected void outerHtml(QuietAppendable accum) { 852 Printer printer = Printer.printerFor(this, accum); 853 printer.traverse(this); 854 } 855 856 /** 857 Get the outer HTML of this node. 858 859 @param accum accumulator to place HTML into 860 @param out 861 */ 862 abstract void outerHtmlHead(final QuietAppendable accum, final Document.OutputSettings out); 863 864 abstract void outerHtmlTail(final QuietAppendable accum, final Document.OutputSettings out); 865 866 /** 867 Write this node and its children to the given {@link Appendable}. 868 869 @param appendable the {@link Appendable} to write to. 870 @return the supplied {@link Appendable}, for chaining. 871 @throws org.jsoup.SerializationException if the appendable throws an IOException. 872 */ 873 public <T extends Appendable> T html(T appendable) { 874 outerHtml(appendable); 875 return appendable; 876 } 877 878 /** 879 Get the source range (start and end positions) in the original input source from which this node was parsed. 880 Position tracking must be enabled prior to parsing the content. For an Element, this will be the positions of the 881 start tag. 882 @return the range for the start of the node, or {@code untracked} if its range was not tracked. 883 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 884 @see Range#isImplicit() 885 @see Element#endSourceRange() 886 @see Attributes#sourceRange(String name) 887 @since 1.15.2 888 */ 889 public Range sourceRange() { 890 return Range.of(this, true); 891 } 892 893 /** 894 * Gets this node's outer HTML. 895 * @return outer HTML. 896 * @see #outerHtml() 897 */ 898 @Override 899 public String toString() { 900 return outerHtml(); 901 } 902 903 /** @deprecated internal method moved into Printer; will be removed in a future version */ 904 @Deprecated 905 protected void indent(Appendable accum, int depth, Document.OutputSettings out) throws IOException { 906 accum.append('\n').append(StringUtil.padding(depth * out.indentAmount(), out.maxPaddingWidth())); 907 } 908 909 /** 910 * Check if this node is the same instance of another (object identity test). 911 * <p>For a node value equality check, see {@link #hasSameValue(Object)}</p> 912 * @param o other object to compare to 913 * @return true if the content of this node is the same as the other 914 * @see Node#hasSameValue(Object) 915 */ 916 @Override 917 public boolean equals(@Nullable Object o) { 918 // implemented just so that javadoc is clear this is an identity test 919 return this == o; 920 } 921 922 /** 923 Provides a hashCode for this Node, based on its object identity. Changes to the Node's content will not impact the 924 result. 925 @return an object identity based hashcode for this Node 926 */ 927 @Override 928 public int hashCode() { 929 // implemented so that javadoc and scanners are clear this is an identity test 930 return super.hashCode(); 931 } 932 933 /** 934 * Check if this node has the same content as another node. A node is considered the same if its name, attributes and content match the 935 * other node; particularly its position in the tree does not influence its similarity. 936 * @param o other object to compare to 937 * @return true if the content of this node is the same as the other 938 */ 939 public boolean hasSameValue(@Nullable Object o) { 940 if (this == o) return true; 941 if (o == null || getClass() != o.getClass()) return false; 942 943 return this.outerHtml().equals(((Node) o).outerHtml()); 944 } 945 946 /** 947 Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings. 948 <p><ul> 949 <li>If this node is a {@link LeafNode}, the clone will have no parent.</li> 950 <li>If this node is an {@link Element}, the clone will have a simple owning {@link Document} to retain the 951 configured output settings and parser.</li> 952 </ul></p> 953 <p>The cloned node may be adopted into another Document or node structure using 954 {@link Element#appendChild(Node)}.</p> 955 956 @return a stand-alone cloned node, including clones of any children 957 @see #shallowClone() 958 */ 959 @SuppressWarnings("MethodDoesntCallSuperMethod") 960 // because it does call super.clone in doClone - analysis just isn't following 961 @Override 962 public Node clone() { 963 Node thisClone = doClone(null); // splits for orphan 964 965 // Queue up nodes that need their children cloned (BFS). 966 final LinkedList<Node> nodesToProcess = new LinkedList<>(); 967 nodesToProcess.add(thisClone); 968 969 while (!nodesToProcess.isEmpty()) { 970 Node currParent = nodesToProcess.remove(); 971 972 final int size = currParent.childNodeSize(); 973 for (int i = 0; i < size; i++) { 974 final List<Node> childNodes = currParent.ensureChildNodes(); 975 Node childClone = childNodes.get(i).doClone(currParent); 976 childNodes.set(i, childClone); 977 nodesToProcess.add(childClone); 978 } 979 } 980 981 return thisClone; 982 } 983 984 /** 985 * Create a stand-alone, shallow copy of this node. None of its children (if any) will be cloned, and it will have 986 * no parent or sibling nodes. 987 * @return a single independent copy of this node 988 * @see #clone() 989 */ 990 public Node shallowClone() { 991 return doClone(null); 992 } 993 994 /* 995 * Return a clone of the node using the given parent (which can be null). 996 * Not a deep copy of children. 997 */ 998 protected Node doClone(@Nullable Node parent) { 999 assert parent == null || parent instanceof Element; 1000 Node clone; 1001 1002 try { 1003 clone = (Node) super.clone(); 1004 } catch (CloneNotSupportedException e) { 1005 throw new RuntimeException(e); 1006 } 1007 1008 clone.parentNode = (Element) parent; // can be null, to create an orphan split 1009 clone.siblingIndex = parent == null ? 0 : siblingIndex; 1010 // if not keeping the parent, shallowClone the ownerDocument to preserve its settings 1011 if (parent == null && !(this instanceof Document)) { 1012 Document doc = ownerDocument(); 1013 if (doc != null) { 1014 Document docClone = doc.shallowClone(); 1015 clone.parentNode = docClone; 1016 docClone.ensureChildNodes().add(clone); 1017 } 1018 } 1019 1020 return clone; 1021 } 1022}