001package org.jsoup.nodes; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.internal.StringUtil; 005import org.jsoup.parser.ParseSettings; 006import org.jsoup.select.NodeFilter; 007import org.jsoup.select.NodeTraversor; 008import org.jsoup.select.NodeVisitor; 009import org.jspecify.annotations.Nullable; 010 011import java.io.IOException; 012import java.util.ArrayList; 013import java.util.Arrays; 014import java.util.Collections; 015import java.util.Iterator; 016import java.util.LinkedList; 017import java.util.List; 018import java.util.function.Consumer; 019import java.util.stream.Stream; 020 021/** 022 The base, abstract Node model. {@link Element}, {@link Document}, {@link Comment}, {@link TextNode}, et al., 023 are instances of Node. 024 025 @author Jonathan Hedley, jonathan@hedley.net */ 026public abstract class Node implements Cloneable { 027 static final List<Node> EmptyNodes = Collections.emptyList(); 028 static final String EmptyString = ""; 029 @Nullable Node parentNode; // Nodes don't always have parents 030 int siblingIndex; 031 032 /** 033 * Default constructor. Doesn't set up base uri, children, or attributes; use with caution. 034 */ 035 protected Node() { 036 } 037 038 /** 039 Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof). 040 @return node name 041 */ 042 public abstract String nodeName(); 043 044 /** 045 Get the normalized name of this node. For node types other than Element, this is the same as {@link #nodeName()}. 046 For an Element, will be the lower-cased tag name. 047 @return normalized node name 048 @since 1.15.4. 049 */ 050 public String normalName() { 051 return nodeName(); 052 } 053 054 /** 055 Test if this node has the specified normalized name, in any namespace. 056 * @param normalName a normalized element name (e.g. {@code div}). 057 * @return true if the element's normal name matches exactly 058 * @since 1.17.2 059 */ 060 public boolean nameIs(String normalName) { 061 return normalName().equals(normalName); 062 } 063 064 /** 065 Test if this node's parent has the specified normalized name. 066 * @param normalName a normalized name (e.g. {@code div}). 067 * @return true if the parent element's normal name matches exactly 068 * @since 1.17.2 069 */ 070 public boolean parentNameIs(String normalName) { 071 return parentNode != null && parentNode.normalName().equals(normalName); 072 } 073 074 /** 075 Test if this node's parent is an Element with the specified normalized name and namespace. 076 * @param normalName a normalized element name (e.g. {@code div}). 077 * @param namespace the namespace 078 * @return true if the parent element's normal name matches exactly, and that element is in the specified namespace 079 * @since 1.17.2 080 */ 081 public boolean parentElementIs(String normalName, String namespace) { 082 return parentNode != null && parentNode instanceof Element 083 && ((Element) parentNode).elementIs(normalName, namespace); 084 } 085 086 /** 087 * Check if this Node has an actual Attributes object. 088 */ 089 protected abstract boolean hasAttributes(); 090 091 /** 092 Checks if this node has a parent. Nodes won't have parents if (e.g.) they are newly created and not added as a child 093 to an existing node, or if they are a {@link #shallowClone()}. In such cases, {@link #parent()} will return {@code null}. 094 @return if this node has a parent. 095 */ 096 public boolean hasParent() { 097 return parentNode != null; 098 } 099 100 /** 101 * Get an attribute's value by its key. <b>Case insensitive</b> 102 * <p> 103 * To get an absolute URL from an attribute that may be a relative URL, prefix the key with <code><b>abs:</b></code>, 104 * which is a shortcut to the {@link #absUrl} method. 105 * </p> 106 * E.g.: 107 * <blockquote><code>String url = a.attr("abs:href");</code></blockquote> 108 * 109 * @param attributeKey The attribute key. 110 * @return The attribute, or empty string if not present (to avoid nulls). 111 * @see #attributes() 112 * @see #hasAttr(String) 113 * @see #absUrl(String) 114 */ 115 public String attr(String attributeKey) { 116 Validate.notNull(attributeKey); 117 if (!hasAttributes()) 118 return EmptyString; 119 120 String val = attributes().getIgnoreCase(attributeKey); 121 if (val.length() > 0) 122 return val; 123 else if (attributeKey.startsWith("abs:")) 124 return absUrl(attributeKey.substring("abs:".length())); 125 else return ""; 126 } 127 128 /** 129 * Get each of the Element's attributes. 130 * @return attributes (which implements Iterable, with the same order as presented in the original HTML). 131 */ 132 public abstract Attributes attributes(); 133 134 /** 135 Get the number of attributes that this Node has. 136 @return the number of attributes 137 @since 1.14.2 138 */ 139 public int attributesSize() { 140 // added so that we can test how many attributes exist without implicitly creating the Attributes object 141 return hasAttributes() ? attributes().size() : 0; 142 } 143 144 /** 145 * Set an attribute (key=value). If the attribute already exists, it is replaced. The attribute key comparison is 146 * <b>case insensitive</b>. The key will be set with case sensitivity as set in the parser settings. 147 * @param attributeKey The attribute key. 148 * @param attributeValue The attribute value. 149 * @return this (for chaining) 150 */ 151 public Node attr(String attributeKey, String attributeValue) { 152 Document doc = ownerDocument(); 153 ParseSettings settings = doc != null ? doc.parser().settings() : ParseSettings.htmlDefault; 154 attributeKey = settings.normalizeAttribute(attributeKey); 155 attributes().putIgnoreCase(attributeKey, attributeValue); 156 return this; 157 } 158 159 /** 160 * Test if this Node has an attribute. <b>Case insensitive</b>. 161 * @param attributeKey The attribute key to check. 162 * @return true if the attribute exists, false if not. 163 */ 164 public boolean hasAttr(String attributeKey) { 165 Validate.notNull(attributeKey); 166 if (!hasAttributes()) 167 return false; 168 169 if (attributeKey.startsWith("abs:")) { 170 String key = attributeKey.substring("abs:".length()); 171 if (attributes().hasKeyIgnoreCase(key) && !absUrl(key).isEmpty()) 172 return true; 173 } 174 return attributes().hasKeyIgnoreCase(attributeKey); 175 } 176 177 /** 178 * Remove an attribute from this node. 179 * @param attributeKey The attribute to remove. 180 * @return this (for chaining) 181 */ 182 public Node removeAttr(String attributeKey) { 183 Validate.notNull(attributeKey); 184 if (hasAttributes()) 185 attributes().removeIgnoreCase(attributeKey); 186 return this; 187 } 188 189 /** 190 * Clear (remove) each of the attributes in this node. 191 * @return this, for chaining 192 */ 193 public Node clearAttributes() { 194 if (hasAttributes()) { 195 Iterator<Attribute> it = attributes().iterator(); 196 while (it.hasNext()) { 197 it.next(); 198 it.remove(); 199 } 200 } 201 return this; 202 } 203 204 /** 205 Get the base URI that applies to this node. Will return an empty string if not defined. Used to make relative links 206 absolute. 207 208 @return base URI 209 @see #absUrl 210 */ 211 public abstract String baseUri(); 212 213 /** 214 * Set the baseUri for just this node (not its descendants), if this Node tracks base URIs. 215 * @param baseUri new URI 216 */ 217 protected abstract void doSetBaseUri(String baseUri); 218 219 /** 220 Update the base URI of this node and all of its descendants. 221 @param baseUri base URI to set 222 */ 223 public void setBaseUri(final String baseUri) { 224 Validate.notNull(baseUri); 225 doSetBaseUri(baseUri); 226 } 227 228 /** 229 * Get an absolute URL from a URL attribute that may be relative (such as an <code><a href></code> or 230 * <code><img src></code>). 231 * <p> 232 * E.g.: <code>String absUrl = linkEl.absUrl("href");</code> 233 * </p> 234 * <p> 235 * If the attribute value is already absolute (i.e. it starts with a protocol, like 236 * <code>http://</code> or <code>https://</code> etc), and it successfully parses as a URL, the attribute is 237 * returned directly. Otherwise, it is treated as a URL relative to the element's {@link #baseUri}, and made 238 * absolute using that. 239 * </p> 240 * <p> 241 * As an alternate, you can use the {@link #attr} method with the <code>abs:</code> prefix, e.g.: 242 * <code>String absUrl = linkEl.attr("abs:href");</code> 243 * </p> 244 * 245 * @param attributeKey The attribute key 246 * @return An absolute URL if one could be made, or an empty string (not null) if the attribute was missing or 247 * could not be made successfully into a URL. 248 * @see #attr 249 * @see java.net.URL#URL(java.net.URL, String) 250 */ 251 public String absUrl(String attributeKey) { 252 Validate.notEmpty(attributeKey); 253 if (!(hasAttributes() && attributes().hasKeyIgnoreCase(attributeKey))) // not using hasAttr, so that we don't recurse down hasAttr->absUrl 254 return ""; 255 256 return StringUtil.resolve(baseUri(), attributes().getIgnoreCase(attributeKey)); 257 } 258 259 protected abstract List<Node> ensureChildNodes(); 260 261 /** 262 Get a child node by its 0-based index. 263 @param index index of child node 264 @return the child node at this index. 265 @throws IndexOutOfBoundsException if the index is out of bounds. 266 */ 267 public Node childNode(int index) { 268 return ensureChildNodes().get(index); 269 } 270 271 /** 272 Get this node's children. Presented as an unmodifiable list: new children can not be added, but the child nodes 273 themselves can be manipulated. 274 @return list of children. If no children, returns an empty list. 275 */ 276 public List<Node> childNodes() { 277 if (childNodeSize() == 0) 278 return EmptyNodes; 279 280 List<Node> children = ensureChildNodes(); 281 List<Node> rewrap = new ArrayList<>(children.size()); // wrapped so that looping and moving will not throw a CME as the source changes 282 rewrap.addAll(children); 283 return Collections.unmodifiableList(rewrap); 284 } 285 286 /** 287 * Returns a deep copy of this node's children. Changes made to these nodes will not be reflected in the original 288 * nodes 289 * @return a deep copy of this node's children 290 */ 291 public List<Node> childNodesCopy() { 292 final List<Node> nodes = ensureChildNodes(); 293 final ArrayList<Node> children = new ArrayList<>(nodes.size()); 294 for (Node node : nodes) { 295 children.add(node.clone()); 296 } 297 return children; 298 } 299 300 /** 301 * Get the number of child nodes that this node holds. 302 * @return the number of child nodes that this node holds. 303 */ 304 public abstract int childNodeSize(); 305 306 protected Node[] childNodesAsArray() { 307 return ensureChildNodes().toArray(new Node[0]); 308 } 309 310 /** 311 * Delete all this node's children. 312 * @return this node, for chaining 313 */ 314 public abstract Node empty(); 315 316 317 /** 318 Gets this node's parent node. 319 @return parent node; or null if no parent. 320 @see #hasParent() 321 */ 322 public @Nullable Node parent() { 323 return parentNode; 324 } 325 326 /** 327 Gets this node's parent node. Not overridable by extending classes, so useful if you really just need the Node type. 328 @return parent node; or null if no parent. 329 */ 330 public @Nullable final Node parentNode() { 331 return parentNode; 332 } 333 334 /** 335 * Get this node's root node; that is, its topmost ancestor. If this node is the top ancestor, returns {@code this}. 336 * @return topmost ancestor. 337 */ 338 public Node root() { 339 Node node = this; 340 while (node.parentNode != null) 341 node = node.parentNode; 342 return node; 343 } 344 345 /** 346 * Gets the Document associated with this Node. 347 * @return the Document associated with this Node, or null if there is no such Document. 348 */ 349 public @Nullable Document ownerDocument() { 350 Node root = root(); 351 return (root instanceof Document) ? (Document) root : null; 352 } 353 354 /** 355 * Remove (delete) this node from the DOM tree. If this node has children, they are also removed. If this node is 356 * an orphan, nothing happens. 357 */ 358 public void remove() { 359 if (parentNode != null) 360 parentNode.removeChild(this); 361 } 362 363 /** 364 * Insert the specified HTML into the DOM before this node (as a preceding sibling). 365 * @param html HTML to add before this node 366 * @return this node, for chaining 367 * @see #after(String) 368 */ 369 public Node before(String html) { 370 addSiblingHtml(siblingIndex, html); 371 return this; 372 } 373 374 /** 375 * Insert the specified node into the DOM before this node (as a preceding sibling). 376 * @param node to add before this node 377 * @return this node, for chaining 378 * @see #after(Node) 379 */ 380 public Node before(Node node) { 381 Validate.notNull(node); 382 Validate.notNull(parentNode); 383 384 // if the incoming node is a sibling of this, remove it first so siblingIndex is correct on add 385 if (node.parentNode == parentNode) node.remove(); 386 387 parentNode.addChildren(siblingIndex, node); 388 return this; 389 } 390 391 /** 392 * Insert the specified HTML into the DOM after this node (as a following sibling). 393 * @param html HTML to add after this node 394 * @return this node, for chaining 395 * @see #before(String) 396 */ 397 public Node after(String html) { 398 addSiblingHtml(siblingIndex + 1, html); 399 return this; 400 } 401 402 /** 403 * Insert the specified node into the DOM after this node (as a following sibling). 404 * @param node to add after this node 405 * @return this node, for chaining 406 * @see #before(Node) 407 */ 408 public Node after(Node node) { 409 Validate.notNull(node); 410 Validate.notNull(parentNode); 411 412 // if the incoming node is a sibling of this, remove it first so siblingIndex is correct on add 413 if (node.parentNode == parentNode) node.remove(); 414 415 parentNode.addChildren(siblingIndex + 1, node); 416 return this; 417 } 418 419 private void addSiblingHtml(int index, String html) { 420 Validate.notNull(html); 421 Validate.notNull(parentNode); 422 423 Element context = parentNode instanceof Element ? (Element) parentNode : null; 424 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, context, baseUri()); 425 parentNode.addChildren(index, nodes.toArray(new Node[0])); 426 } 427 428 /** 429 Wrap the supplied HTML around this node. 430 431 @param html HTML to wrap around this node, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. If 432 the input HTML does not parse to a result starting with an Element, this will be a no-op. 433 @return this node, for chaining. 434 */ 435 public Node wrap(String html) { 436 Validate.notEmpty(html); 437 438 // Parse context - parent (because wrapping), this, or null 439 Element context = 440 parentNode != null && parentNode instanceof Element ? (Element) parentNode : 441 this instanceof Element ? (Element) this : 442 null; 443 List<Node> wrapChildren = NodeUtils.parser(this).parseFragmentInput(html, context, baseUri()); 444 Node wrapNode = wrapChildren.get(0); 445 if (!(wrapNode instanceof Element)) // nothing to wrap with; noop 446 return this; 447 448 Element wrap = (Element) wrapNode; 449 Element deepest = getDeepChild(wrap); 450 if (parentNode != null) 451 parentNode.replaceChild(this, wrap); 452 deepest.addChildren(this); // side effect of tricking wrapChildren to lose first 453 454 // remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is remainder 455 if (wrapChildren.size() > 0) { 456 //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here) 457 for (int i = 0; i < wrapChildren.size(); i++) { 458 Node remainder = wrapChildren.get(i); 459 // if no parent, this could be the wrap node, so skip 460 if (wrap == remainder) 461 continue; 462 463 if (remainder.parentNode != null) 464 remainder.parentNode.removeChild(remainder); 465 wrap.after(remainder); 466 } 467 } 468 return this; 469 } 470 471 /** 472 * Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping 473 * the node but keeping its children. 474 * <p> 475 * For example, with the input html: 476 * </p> 477 * <p>{@code <div>One <span>Two <b>Three</b></span></div>}</p> 478 * Calling {@code element.unwrap()} on the {@code span} element will result in the html: 479 * <p>{@code <div>One Two <b>Three</b></div>}</p> 480 * and the {@code "Two "} {@link TextNode} being returned. 481 * 482 * @return the first child of this node, after the node has been unwrapped. @{code Null} if the node had no children. 483 * @see #remove() 484 * @see #wrap(String) 485 */ 486 public @Nullable Node unwrap() { 487 Validate.notNull(parentNode); 488 Node firstChild = firstChild(); 489 parentNode.addChildren(siblingIndex, this.childNodesAsArray()); 490 this.remove(); 491 492 return firstChild; 493 } 494 495 private static Element getDeepChild(Element el) { 496 Element child = el.firstElementChild(); 497 while (child != null) { 498 el = child; 499 child = child.firstElementChild(); 500 } 501 return el; 502 } 503 504 /** 505 * Replace this node in the DOM with the supplied node. 506 * @param in the node that will replace the existing node. 507 */ 508 public void replaceWith(Node in) { 509 Validate.notNull(in); 510 if (parentNode == null) parentNode = in.parentNode; // allows old to have been temp removed before replacing 511 Validate.notNull(parentNode); 512 parentNode.replaceChild(this, in); 513 } 514 515 protected void setParentNode(Node parentNode) { 516 Validate.notNull(parentNode); 517 if (this.parentNode != null) 518 this.parentNode.removeChild(this); 519 this.parentNode = parentNode; 520 } 521 522 protected void replaceChild(Node out, Node in) { 523 Validate.isTrue(out.parentNode == this); 524 Validate.notNull(in); 525 if (out == in) return; // no-op self replacement 526 527 if (in.parentNode != null) 528 in.parentNode.removeChild(in); 529 530 final int index = out.siblingIndex; 531 ensureChildNodes().set(index, in); 532 in.parentNode = this; 533 in.setSiblingIndex(index); 534 out.parentNode = null; 535 } 536 537 protected void removeChild(Node out) { 538 Validate.isTrue(out.parentNode == this); 539 final int index = out.siblingIndex; 540 ensureChildNodes().remove(index); 541 reindexChildren(index); 542 out.parentNode = null; 543 } 544 545 protected void addChildren(Node... children) { 546 //most used. short circuit addChildren(int), which hits reindex children and array copy 547 final List<Node> nodes = ensureChildNodes(); 548 549 for (Node child: children) { 550 reparentChild(child); 551 nodes.add(child); 552 child.setSiblingIndex(nodes.size()-1); 553 } 554 } 555 556 protected void addChildren(int index, Node... children) { 557 Validate.notNull(children); 558 if (children.length == 0) { 559 return; 560 } 561 final List<Node> nodes = ensureChildNodes(); 562 563 // fast path - if used as a wrap (index=0, children = child[0].parent.children - do inplace 564 final Node firstParent = children[0].parent(); 565 if (firstParent != null && firstParent.childNodeSize() == children.length) { 566 boolean sameList = true; 567 final List<Node> firstParentNodes = firstParent.ensureChildNodes(); 568 // identity check contents to see if same 569 int i = children.length; 570 while (i-- > 0) { 571 if (children[i] != firstParentNodes.get(i)) { 572 sameList = false; 573 break; 574 } 575 } 576 if (sameList) { // moving, so OK to empty firstParent and short-circuit 577 boolean wasEmpty = childNodeSize() == 0; 578 firstParent.empty(); 579 nodes.addAll(index, Arrays.asList(children)); 580 i = children.length; 581 while (i-- > 0) { 582 children[i].parentNode = this; 583 } 584 if (!(wasEmpty && children[0].siblingIndex == 0)) // skip reindexing if we just moved 585 reindexChildren(index); 586 return; 587 } 588 } 589 590 Validate.noNullElements(children); 591 for (Node child : children) { 592 reparentChild(child); 593 } 594 nodes.addAll(index, Arrays.asList(children)); 595 reindexChildren(index); 596 } 597 598 protected void reparentChild(Node child) { 599 child.setParentNode(this); 600 } 601 602 private void reindexChildren(int start) { 603 final int size = childNodeSize(); 604 if (size == 0) return; 605 final List<Node> childNodes = ensureChildNodes(); 606 for (int i = start; i < size; i++) { 607 childNodes.get(i).setSiblingIndex(i); 608 } 609 } 610 611 /** 612 Retrieves this node's sibling nodes. Similar to {@link #childNodes() node.parent.childNodes()}, but does not 613 include this node (a node is not a sibling of itself). 614 @return node siblings. If the node has no parent, returns an empty list. 615 */ 616 public List<Node> siblingNodes() { 617 if (parentNode == null) 618 return Collections.emptyList(); 619 620 List<Node> nodes = parentNode.ensureChildNodes(); 621 List<Node> siblings = new ArrayList<>(nodes.size() - 1); 622 for (Node node: nodes) 623 if (node != this) 624 siblings.add(node); 625 return siblings; 626 } 627 628 /** 629 Get this node's next sibling. 630 @return next sibling, or {@code null} if this is the last sibling 631 */ 632 public @Nullable Node nextSibling() { 633 if (parentNode == null) 634 return null; // root 635 636 final List<Node> siblings = parentNode.ensureChildNodes(); 637 final int index = siblingIndex+1; 638 if (siblings.size() > index) 639 return siblings.get(index); 640 else 641 return null; 642 } 643 644 /** 645 Get this node's previous sibling. 646 @return the previous sibling, or @{code null} if this is the first sibling 647 */ 648 public @Nullable Node previousSibling() { 649 if (parentNode == null) 650 return null; // root 651 652 if (siblingIndex > 0) 653 return parentNode.ensureChildNodes().get(siblingIndex-1); 654 else 655 return null; 656 } 657 658 /** 659 * Get the list index of this node in its node sibling list. E.g. if this is the first node 660 * sibling, returns 0. 661 * @return position in node sibling list 662 * @see org.jsoup.nodes.Element#elementSiblingIndex() 663 */ 664 public int siblingIndex() { 665 return siblingIndex; 666 } 667 668 protected void setSiblingIndex(int siblingIndex) { 669 this.siblingIndex = siblingIndex; 670 } 671 672 /** 673 Gets the first child node of this node, or {@code null} if there is none. This could be any Node type, such as an 674 Element, TextNode, Comment, etc. Use {@link Element#firstElementChild()} to get the first Element child. 675 @return the first child node, or null if there are no children. 676 @see Element#firstElementChild() 677 @see #lastChild() 678 @since 1.15.2 679 */ 680 public @Nullable Node firstChild() { 681 if (childNodeSize() == 0) return null; 682 return ensureChildNodes().get(0); 683 } 684 685 /** 686 Gets the last child node of this node, or {@code null} if there is none. 687 @return the last child node, or null if there are no children. 688 @see Element#lastElementChild() 689 @see #firstChild() 690 @since 1.15.2 691 */ 692 public @Nullable Node lastChild() { 693 final int size = childNodeSize(); 694 if (size == 0) return null; 695 List<Node> children = ensureChildNodes(); 696 return children.get(size - 1); 697 } 698 699 /** 700 * Perform a depth-first traversal through this node and its descendants. 701 * @param nodeVisitor the visitor callbacks to perform on each node 702 * @return this node, for chaining 703 */ 704 public Node traverse(NodeVisitor nodeVisitor) { 705 Validate.notNull(nodeVisitor); 706 NodeTraversor.traverse(nodeVisitor, this); 707 return this; 708 } 709 710 /** 711 Perform the supplied action on this Node and each of its descendants, during a depth-first traversal. Nodes may be 712 inspected, changed, added, replaced, or removed. 713 @param action the function to perform on the node 714 @return this Node, for chaining 715 @see Element#forEach(Consumer) 716 */ 717 public Node forEachNode(Consumer<? super Node> action) { 718 Validate.notNull(action); 719 nodeStream().forEach(action); 720 return this; 721 } 722 723 /** 724 * Perform a depth-first filtered traversal through this node and its descendants. 725 * @param nodeFilter the filter callbacks to perform on each node 726 * @return this node, for chaining 727 */ 728 public Node filter(NodeFilter nodeFilter) { 729 Validate.notNull(nodeFilter); 730 NodeTraversor.filter(nodeFilter, this); 731 return this; 732 } 733 734 /** 735 Returns a Stream of this Node and all of its descendant Nodes. The stream has document order. 736 @return a stream of all nodes. 737 @see Element#stream() 738 @since 1.17.1 739 */ 740 public Stream<Node> nodeStream() { 741 return NodeUtils.stream(this, Node.class); 742 } 743 744 /** 745 Returns a Stream of this and descendant nodes, containing only nodes of the specified type. The stream has document 746 order. 747 @return a stream of nodes filtered by type. 748 @see Element#stream() 749 @since 1.17.1 750 */ 751 public <T extends Node> Stream<T> nodeStream(Class<T> type) { 752 return NodeUtils.stream(this, type); 753 } 754 755 /** 756 Get the outer HTML of this node. For example, on a {@code p} element, may return {@code <p>Para</p>}. 757 @return outer HTML 758 @see Element#html() 759 @see Element#text() 760 */ 761 public String outerHtml() { 762 StringBuilder accum = StringUtil.borrowBuilder(); 763 outerHtml(accum); 764 return StringUtil.releaseBuilder(accum); 765 } 766 767 protected void outerHtml(Appendable accum) { 768 Printer printer = Printer.printerFor(this, accum); 769 NodeTraversor.traverse(printer, this); 770 } 771 772 /** 773 Get the outer HTML of this node. 774 775 @param accum accumulator to place HTML into 776 @param out 777 @throws IOException if appending to the given accumulator fails. 778 */ 779 abstract void outerHtmlHead(final Appendable accum, final Document.OutputSettings out) throws IOException; 780 781 abstract void outerHtmlTail(final Appendable accum, final Document.OutputSettings out) throws IOException; 782 783 /** 784 * Write this node and its children to the given {@link Appendable}. 785 * 786 * @param appendable the {@link Appendable} to write to. 787 * @return the supplied {@link Appendable}, for chaining. 788 */ 789 public <T extends Appendable> T html(T appendable) { 790 outerHtml(appendable); 791 return appendable; 792 } 793 794 /** 795 Get the source range (start and end positions) in the original input source from which this node was parsed. 796 Position tracking must be enabled prior to parsing the content. For an Element, this will be the positions of the 797 start tag. 798 @return the range for the start of the node, or {@code untracked} if its range was not tracked. 799 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 800 @see Range#isImplicit() 801 @see Element#endSourceRange() 802 @see Attributes#sourceRange(String name) 803 @since 1.15.2 804 */ 805 public Range sourceRange() { 806 return Range.of(this, true); 807 } 808 809 /** Test if this node is the first child, or first following blank text. */ 810 final boolean isEffectivelyFirst() { 811 if (siblingIndex == 0) return true; 812 if (siblingIndex == 1) { 813 final Node prev = previousSibling(); 814 return prev instanceof TextNode && (((TextNode) prev).isBlank()); 815 } 816 return false; 817 } 818 819 /** 820 * Gets this node's outer HTML. 821 * @return outer HTML. 822 * @see #outerHtml() 823 */ 824 public String toString() { 825 return outerHtml(); 826 } 827 828 protected void indent(Appendable accum, int depth, Document.OutputSettings out) throws IOException { 829 accum.append('\n').append(StringUtil.padding(depth * out.indentAmount(), out.maxPaddingWidth())); 830 } 831 832 /** 833 * Check if this node is the same instance of another (object identity test). 834 * <p>For an node value equality check, see {@link #hasSameValue(Object)}</p> 835 * @param o other object to compare to 836 * @return true if the content of this node is the same as the other 837 * @see Node#hasSameValue(Object) 838 */ 839 @Override 840 public boolean equals(@Nullable Object o) { 841 // implemented just so that javadoc is clear this is an identity test 842 return this == o; 843 } 844 845 /** 846 Provides a hashCode for this Node, based on its object identity. Changes to the Node's content will not impact the 847 result. 848 @return an object identity based hashcode for this Node 849 */ 850 @Override 851 public int hashCode() { 852 // implemented so that javadoc and scanners are clear this is an identity test 853 return super.hashCode(); 854 } 855 856 /** 857 * Check if this node has the same content as another node. A node is considered the same if its name, attributes and content match the 858 * other node; particularly its position in the tree does not influence its similarity. 859 * @param o other object to compare to 860 * @return true if the content of this node is the same as the other 861 */ 862 public boolean hasSameValue(@Nullable Object o) { 863 if (this == o) return true; 864 if (o == null || getClass() != o.getClass()) return false; 865 866 return this.outerHtml().equals(((Node) o).outerHtml()); 867 } 868 869 /** 870 Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings. 871 <p><ul> 872 <li>If this node is a {@link LeafNode}, the clone will have no parent.</li> 873 <li>If this node is an {@link Element}, the clone will have a simple owning {@link Document} to retain the 874 configured output settings and parser.</li> 875 </ul></p> 876 <p>The cloned node may be adopted into another Document or node structure using 877 {@link Element#appendChild(Node)}.</p> 878 879 @return a stand-alone cloned node, including clones of any children 880 @see #shallowClone() 881 */ 882 @SuppressWarnings("MethodDoesntCallSuperMethod") 883 // because it does call super.clone in doClone - analysis just isn't following 884 @Override 885 public Node clone() { 886 Node thisClone = doClone(null); // splits for orphan 887 888 // Queue up nodes that need their children cloned (BFS). 889 final LinkedList<Node> nodesToProcess = new LinkedList<>(); 890 nodesToProcess.add(thisClone); 891 892 while (!nodesToProcess.isEmpty()) { 893 Node currParent = nodesToProcess.remove(); 894 895 final int size = currParent.childNodeSize(); 896 for (int i = 0; i < size; i++) { 897 final List<Node> childNodes = currParent.ensureChildNodes(); 898 Node childClone = childNodes.get(i).doClone(currParent); 899 childNodes.set(i, childClone); 900 nodesToProcess.add(childClone); 901 } 902 } 903 904 return thisClone; 905 } 906 907 /** 908 * Create a stand-alone, shallow copy of this node. None of its children (if any) will be cloned, and it will have 909 * no parent or sibling nodes. 910 * @return a single independent copy of this node 911 * @see #clone() 912 */ 913 public Node shallowClone() { 914 return doClone(null); 915 } 916 917 /* 918 * Return a clone of the node using the given parent (which can be null). 919 * Not a deep copy of children. 920 */ 921 protected Node doClone(@Nullable Node parent) { 922 Node clone; 923 924 try { 925 clone = (Node) super.clone(); 926 } catch (CloneNotSupportedException e) { 927 throw new RuntimeException(e); 928 } 929 930 clone.parentNode = parent; // can be null, to create an orphan split 931 clone.siblingIndex = parent == null ? 0 : siblingIndex; 932 // if not keeping the parent, shallowClone the ownerDocument to preserve its settings 933 if (parent == null && !(this instanceof Document)) { 934 Document doc = ownerDocument(); 935 if (doc != null) { 936 Document docClone = doc.shallowClone(); 937 clone.parentNode = docClone; 938 docClone.ensureChildNodes().add(clone); 939 } 940 } 941 942 return clone; 943 } 944}