001package org.jsoup.nodes; 002 003import org.jsoup.SerializationException; 004import org.jsoup.helper.Validate; 005import org.jsoup.internal.StringUtil; 006import org.jsoup.select.NodeFilter; 007import org.jsoup.select.NodeTraversor; 008import org.jsoup.select.NodeVisitor; 009import org.jspecify.annotations.Nullable; 010 011import java.io.IOException; 012import java.util.ArrayList; 013import java.util.Arrays; 014import java.util.Collections; 015import java.util.Iterator; 016import java.util.LinkedList; 017import java.util.List; 018import java.util.function.Consumer; 019import java.util.stream.Stream; 020 021/** 022 The base, abstract Node model. {@link Element}, {@link Document}, {@link Comment}, {@link TextNode}, et al., 023 are instances of Node. 024 025 @author Jonathan Hedley, jonathan@hedley.net */ 026public abstract class Node implements Cloneable { 027 static final List<Node> EmptyNodes = Collections.emptyList(); 028 static final String EmptyString = ""; 029 @Nullable Node parentNode; // Nodes don't always have parents 030 int siblingIndex; 031 032 /** 033 * Default constructor. Doesn't set up base uri, children, or attributes; use with caution. 034 */ 035 protected Node() { 036 } 037 038 /** 039 Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof). 040 @return node name 041 */ 042 public abstract String nodeName(); 043 044 /** 045 Get the normalized name of this node. For node types other than Element, this is the same as {@link #nodeName()}. 046 For an Element, will be the lower-cased tag name. 047 @return normalized node name 048 @since 1.15.4. 049 */ 050 public String normalName() { 051 return nodeName(); 052 } 053 054 /** 055 Test if this node has the specified normalized name, in any namespace. 056 * @param normalName a normalized element name (e.g. {@code div}). 057 * @return true if the element's normal name matches exactly 058 * @since 1.17.2 059 */ 060 public boolean nameIs(String normalName) { 061 return normalName().equals(normalName); 062 } 063 064 /** 065 Test if this node's parent has the specified normalized name. 066 * @param normalName a normalized name (e.g. {@code div}). 067 * @return true if the parent element's normal name matches exactly 068 * @since 1.17.2 069 */ 070 public boolean parentNameIs(String normalName) { 071 return parentNode != null && parentNode.normalName().equals(normalName); 072 } 073 074 /** 075 Test if this node's parent is an Element with the specified normalized name and namespace. 076 * @param normalName a normalized element name (e.g. {@code div}). 077 * @param namespace the namespace 078 * @return true if the parent element's normal name matches exactly, and that element is in the specified namespace 079 * @since 1.17.2 080 */ 081 public boolean parentElementIs(String normalName, String namespace) { 082 return parentNode != null && parentNode instanceof Element 083 && ((Element) parentNode).elementIs(normalName, namespace); 084 } 085 086 /** 087 * Check if this Node has an actual Attributes object. 088 */ 089 protected abstract boolean hasAttributes(); 090 091 /** 092 Checks if this node has a parent. Nodes won't have parents if (e.g.) they are newly created and not added as a child 093 to an existing node, or if they are a {@link #shallowClone()}. In such cases, {@link #parent()} will return {@code null}. 094 @return if this node has a parent. 095 */ 096 public boolean hasParent() { 097 return parentNode != null; 098 } 099 100 /** 101 * Get an attribute's value by its key. <b>Case insensitive</b> 102 * <p> 103 * To get an absolute URL from an attribute that may be a relative URL, prefix the key with <code><b>abs:</b></code>, 104 * which is a shortcut to the {@link #absUrl} method. 105 * </p> 106 * E.g.: 107 * <blockquote><code>String url = a.attr("abs:href");</code></blockquote> 108 * 109 * @param attributeKey The attribute key. 110 * @return The attribute, or empty string if not present (to avoid nulls). 111 * @see #attributes() 112 * @see #hasAttr(String) 113 * @see #absUrl(String) 114 */ 115 public String attr(String attributeKey) { 116 Validate.notNull(attributeKey); 117 if (!hasAttributes()) 118 return EmptyString; 119 120 String val = attributes().getIgnoreCase(attributeKey); 121 if (val.length() > 0) 122 return val; 123 else if (attributeKey.startsWith("abs:")) 124 return absUrl(attributeKey.substring("abs:".length())); 125 else return ""; 126 } 127 128 /** 129 * Get each of the Element's attributes. 130 * @return attributes (which implements Iterable, with the same order as presented in the original HTML). 131 */ 132 public abstract Attributes attributes(); 133 134 /** 135 Get the number of attributes that this Node has. 136 @return the number of attributes 137 @since 1.14.2 138 */ 139 public int attributesSize() { 140 // added so that we can test how many attributes exist without implicitly creating the Attributes object 141 return hasAttributes() ? attributes().size() : 0; 142 } 143 144 /** 145 * Set an attribute (key=value). If the attribute already exists, it is replaced. The attribute key comparison is 146 * <b>case insensitive</b>. The key will be set with case sensitivity as set in the parser settings. 147 * @param attributeKey The attribute key. 148 * @param attributeValue The attribute value. 149 * @return this (for chaining) 150 */ 151 public Node attr(String attributeKey, String attributeValue) { 152 attributeKey = NodeUtils.parser(this).settings().normalizeAttribute(attributeKey); 153 attributes().putIgnoreCase(attributeKey, attributeValue); 154 return this; 155 } 156 157 /** 158 * Test if this Node has an attribute. <b>Case insensitive</b>. 159 * @param attributeKey The attribute key to check. 160 * @return true if the attribute exists, false if not. 161 */ 162 public boolean hasAttr(String attributeKey) { 163 Validate.notNull(attributeKey); 164 if (!hasAttributes()) 165 return false; 166 167 if (attributeKey.startsWith("abs:")) { 168 String key = attributeKey.substring("abs:".length()); 169 if (attributes().hasKeyIgnoreCase(key) && !absUrl(key).isEmpty()) 170 return true; 171 } 172 return attributes().hasKeyIgnoreCase(attributeKey); 173 } 174 175 /** 176 * Remove an attribute from this node. 177 * @param attributeKey The attribute to remove. 178 * @return this (for chaining) 179 */ 180 public Node removeAttr(String attributeKey) { 181 Validate.notNull(attributeKey); 182 if (hasAttributes()) 183 attributes().removeIgnoreCase(attributeKey); 184 return this; 185 } 186 187 /** 188 * Clear (remove) each of the attributes in this node. 189 * @return this, for chaining 190 */ 191 public Node clearAttributes() { 192 if (hasAttributes()) { 193 Iterator<Attribute> it = attributes().iterator(); 194 while (it.hasNext()) { 195 it.next(); 196 it.remove(); 197 } 198 } 199 return this; 200 } 201 202 /** 203 Get the base URI that applies to this node. Will return an empty string if not defined. Used to make relative links 204 absolute. 205 206 @return base URI 207 @see #absUrl 208 */ 209 public abstract String baseUri(); 210 211 /** 212 * Set the baseUri for just this node (not its descendants), if this Node tracks base URIs. 213 * @param baseUri new URI 214 */ 215 protected abstract void doSetBaseUri(String baseUri); 216 217 /** 218 Update the base URI of this node and all of its descendants. 219 @param baseUri base URI to set 220 */ 221 public void setBaseUri(final String baseUri) { 222 Validate.notNull(baseUri); 223 doSetBaseUri(baseUri); 224 } 225 226 /** 227 * Get an absolute URL from a URL attribute that may be relative (such as an <code><a href></code> or 228 * <code><img src></code>). 229 * <p> 230 * E.g.: <code>String absUrl = linkEl.absUrl("href");</code> 231 * </p> 232 * <p> 233 * If the attribute value is already absolute (i.e. it starts with a protocol, like 234 * <code>http://</code> or <code>https://</code> etc), and it successfully parses as a URL, the attribute is 235 * returned directly. Otherwise, it is treated as a URL relative to the element's {@link #baseUri}, and made 236 * absolute using that. 237 * </p> 238 * <p> 239 * As an alternate, you can use the {@link #attr} method with the <code>abs:</code> prefix, e.g.: 240 * <code>String absUrl = linkEl.attr("abs:href");</code> 241 * </p> 242 * 243 * @param attributeKey The attribute key 244 * @return An absolute URL if one could be made, or an empty string (not null) if the attribute was missing or 245 * could not be made successfully into a URL. 246 * @see #attr 247 * @see java.net.URL#URL(java.net.URL, String) 248 */ 249 public String absUrl(String attributeKey) { 250 Validate.notEmpty(attributeKey); 251 if (!(hasAttributes() && attributes().hasKeyIgnoreCase(attributeKey))) // not using hasAttr, so that we don't recurse down hasAttr->absUrl 252 return ""; 253 254 return StringUtil.resolve(baseUri(), attributes().getIgnoreCase(attributeKey)); 255 } 256 257 protected abstract List<Node> ensureChildNodes(); 258 259 /** 260 Get a child node by its 0-based index. 261 @param index index of child node 262 @return the child node at this index. 263 @throws IndexOutOfBoundsException if the index is out of bounds. 264 */ 265 public Node childNode(int index) { 266 return ensureChildNodes().get(index); 267 } 268 269 /** 270 Get this node's children. Presented as an unmodifiable list: new children can not be added, but the child nodes 271 themselves can be manipulated. 272 @return list of children. If no children, returns an empty list. 273 */ 274 public List<Node> childNodes() { 275 if (childNodeSize() == 0) 276 return EmptyNodes; 277 278 List<Node> children = ensureChildNodes(); 279 List<Node> rewrap = new ArrayList<>(children.size()); // wrapped so that looping and moving will not throw a CME as the source changes 280 rewrap.addAll(children); 281 return Collections.unmodifiableList(rewrap); 282 } 283 284 /** 285 * Returns a deep copy of this node's children. Changes made to these nodes will not be reflected in the original 286 * nodes 287 * @return a deep copy of this node's children 288 */ 289 public List<Node> childNodesCopy() { 290 final List<Node> nodes = ensureChildNodes(); 291 final ArrayList<Node> children = new ArrayList<>(nodes.size()); 292 for (Node node : nodes) { 293 children.add(node.clone()); 294 } 295 return children; 296 } 297 298 /** 299 * Get the number of child nodes that this node holds. 300 * @return the number of child nodes that this node holds. 301 */ 302 public abstract int childNodeSize(); 303 304 protected Node[] childNodesAsArray() { 305 return ensureChildNodes().toArray(new Node[0]); 306 } 307 308 /** 309 * Delete all this node's children. 310 * @return this node, for chaining 311 */ 312 public abstract Node empty(); 313 314 315 /** 316 Gets this node's parent node. 317 @return parent node; or null if no parent. 318 @see #hasParent() 319 */ 320 public @Nullable Node parent() { 321 return parentNode; 322 } 323 324 /** 325 Gets this node's parent node. Not overridable by extending classes, so useful if you really just need the Node type. 326 @return parent node; or null if no parent. 327 */ 328 public @Nullable final Node parentNode() { 329 return parentNode; 330 } 331 332 /** 333 * Get this node's root node; that is, its topmost ancestor. If this node is the top ancestor, returns {@code this}. 334 * @return topmost ancestor. 335 */ 336 public Node root() { 337 Node node = this; 338 while (node.parentNode != null) 339 node = node.parentNode; 340 return node; 341 } 342 343 /** 344 * Gets the Document associated with this Node. 345 * @return the Document associated with this Node, or null if there is no such Document. 346 */ 347 public @Nullable Document ownerDocument() { 348 Node root = root(); 349 return (root instanceof Document) ? (Document) root : null; 350 } 351 352 /** 353 * Remove (delete) this node from the DOM tree. If this node has children, they are also removed. If this node is 354 * an orphan, nothing happens. 355 */ 356 public void remove() { 357 if (parentNode != null) 358 parentNode.removeChild(this); 359 } 360 361 /** 362 * Insert the specified HTML into the DOM before this node (as a preceding sibling). 363 * @param html HTML to add before this node 364 * @return this node, for chaining 365 * @see #after(String) 366 */ 367 public Node before(String html) { 368 addSiblingHtml(siblingIndex, html); 369 return this; 370 } 371 372 /** 373 * Insert the specified node into the DOM before this node (as a preceding sibling). 374 * @param node to add before this node 375 * @return this node, for chaining 376 * @see #after(Node) 377 */ 378 public Node before(Node node) { 379 Validate.notNull(node); 380 Validate.notNull(parentNode); 381 382 // if the incoming node is a sibling of this, remove it first so siblingIndex is correct on add 383 if (node.parentNode == parentNode) node.remove(); 384 385 parentNode.addChildren(siblingIndex, node); 386 return this; 387 } 388 389 /** 390 * Insert the specified HTML into the DOM after this node (as a following sibling). 391 * @param html HTML to add after this node 392 * @return this node, for chaining 393 * @see #before(String) 394 */ 395 public Node after(String html) { 396 addSiblingHtml(siblingIndex + 1, html); 397 return this; 398 } 399 400 /** 401 * Insert the specified node into the DOM after this node (as a following sibling). 402 * @param node to add after this node 403 * @return this node, for chaining 404 * @see #before(Node) 405 */ 406 public Node after(Node node) { 407 Validate.notNull(node); 408 Validate.notNull(parentNode); 409 410 // if the incoming node is a sibling of this, remove it first so siblingIndex is correct on add 411 if (node.parentNode == parentNode) node.remove(); 412 413 parentNode.addChildren(siblingIndex + 1, node); 414 return this; 415 } 416 417 private void addSiblingHtml(int index, String html) { 418 Validate.notNull(html); 419 Validate.notNull(parentNode); 420 421 Element context = parentNode instanceof Element ? (Element) parentNode : null; 422 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, context, baseUri()); 423 parentNode.addChildren(index, nodes.toArray(new Node[0])); 424 } 425 426 /** 427 Wrap the supplied HTML around this node. 428 429 @param html HTML to wrap around this node, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. If 430 the input HTML does not parse to a result starting with an Element, this will be a no-op. 431 @return this node, for chaining. 432 */ 433 public Node wrap(String html) { 434 Validate.notEmpty(html); 435 436 // Parse context - parent (because wrapping), this, or null 437 Element context = 438 parentNode != null && parentNode instanceof Element ? (Element) parentNode : 439 this instanceof Element ? (Element) this : 440 null; 441 List<Node> wrapChildren = NodeUtils.parser(this).parseFragmentInput(html, context, baseUri()); 442 Node wrapNode = wrapChildren.get(0); 443 if (!(wrapNode instanceof Element)) // nothing to wrap with; noop 444 return this; 445 446 Element wrap = (Element) wrapNode; 447 Element deepest = getDeepChild(wrap); 448 if (parentNode != null) 449 parentNode.replaceChild(this, wrap); 450 deepest.addChildren(this); // side effect of tricking wrapChildren to lose first 451 452 // remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is remainder 453 if (wrapChildren.size() > 0) { 454 //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here) 455 for (int i = 0; i < wrapChildren.size(); i++) { 456 Node remainder = wrapChildren.get(i); 457 // if no parent, this could be the wrap node, so skip 458 if (wrap == remainder) 459 continue; 460 461 if (remainder.parentNode != null) 462 remainder.parentNode.removeChild(remainder); 463 wrap.after(remainder); 464 } 465 } 466 return this; 467 } 468 469 /** 470 * Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping 471 * the node but keeping its children. 472 * <p> 473 * For example, with the input html: 474 * </p> 475 * <p>{@code <div>One <span>Two <b>Three</b></span></div>}</p> 476 * Calling {@code element.unwrap()} on the {@code span} element will result in the html: 477 * <p>{@code <div>One Two <b>Three</b></div>}</p> 478 * and the {@code "Two "} {@link TextNode} being returned. 479 * 480 * @return the first child of this node, after the node has been unwrapped. @{code Null} if the node had no children. 481 * @see #remove() 482 * @see #wrap(String) 483 */ 484 public @Nullable Node unwrap() { 485 Validate.notNull(parentNode); 486 Node firstChild = firstChild(); 487 parentNode.addChildren(siblingIndex, this.childNodesAsArray()); 488 this.remove(); 489 490 return firstChild; 491 } 492 493 private static Element getDeepChild(Element el) { 494 Element child = el.firstElementChild(); 495 while (child != null) { 496 el = child; 497 child = child.firstElementChild(); 498 } 499 return el; 500 } 501 502 void nodelistChanged() { 503 // Element overrides this to clear its shadow children elements 504 } 505 506 /** 507 * Replace this node in the DOM with the supplied node. 508 * @param in the node that will replace the existing node. 509 */ 510 public void replaceWith(Node in) { 511 Validate.notNull(in); 512 if (parentNode == null) parentNode = in.parentNode; // allows old to have been temp removed before replacing 513 Validate.notNull(parentNode); 514 parentNode.replaceChild(this, in); 515 } 516 517 protected void setParentNode(Node parentNode) { 518 Validate.notNull(parentNode); 519 if (this.parentNode != null) 520 this.parentNode.removeChild(this); 521 this.parentNode = parentNode; 522 } 523 524 protected void replaceChild(Node out, Node in) { 525 Validate.isTrue(out.parentNode == this); 526 Validate.notNull(in); 527 if (out == in) return; // no-op self replacement 528 529 if (in.parentNode != null) 530 in.parentNode.removeChild(in); 531 532 final int index = out.siblingIndex; 533 ensureChildNodes().set(index, in); 534 in.parentNode = this; 535 in.setSiblingIndex(index); 536 out.parentNode = null; 537 } 538 539 protected void removeChild(Node out) { 540 Validate.isTrue(out.parentNode == this); 541 final int index = out.siblingIndex; 542 ensureChildNodes().remove(index); 543 reindexChildren(index); 544 out.parentNode = null; 545 } 546 547 protected void addChildren(Node... children) { 548 //most used. short circuit addChildren(int), which hits reindex children and array copy 549 final List<Node> nodes = ensureChildNodes(); 550 551 for (Node child: children) { 552 reparentChild(child); 553 nodes.add(child); 554 child.setSiblingIndex(nodes.size()-1); 555 } 556 } 557 558 protected void addChildren(int index, Node... children) { 559 Validate.notNull(children); 560 if (children.length == 0) { 561 return; 562 } 563 final List<Node> nodes = ensureChildNodes(); 564 565 // fast path - if used as a wrap (index=0, children = child[0].parent.children - do inplace 566 final Node firstParent = children[0].parent(); 567 if (firstParent != null && firstParent.childNodeSize() == children.length) { 568 boolean sameList = true; 569 final List<Node> firstParentNodes = firstParent.ensureChildNodes(); 570 // identity check contents to see if same 571 int i = children.length; 572 while (i-- > 0) { 573 if (children[i] != firstParentNodes.get(i)) { 574 sameList = false; 575 break; 576 } 577 } 578 if (sameList) { // moving, so OK to empty firstParent and short-circuit 579 boolean wasEmpty = childNodeSize() == 0; 580 firstParent.empty(); 581 nodes.addAll(index, Arrays.asList(children)); 582 i = children.length; 583 while (i-- > 0) { 584 children[i].parentNode = this; 585 } 586 if (!(wasEmpty && children[0].siblingIndex == 0)) // skip reindexing if we just moved 587 reindexChildren(index); 588 return; 589 } 590 } 591 592 Validate.noNullElements(children); 593 for (Node child : children) { 594 reparentChild(child); 595 } 596 nodes.addAll(index, Arrays.asList(children)); 597 reindexChildren(index); 598 } 599 600 protected void reparentChild(Node child) { 601 child.setParentNode(this); 602 } 603 604 private void reindexChildren(int start) { 605 final int size = childNodeSize(); 606 if (size == 0) return; 607 final List<Node> childNodes = ensureChildNodes(); 608 for (int i = start; i < size; i++) { 609 childNodes.get(i).setSiblingIndex(i); 610 } 611 } 612 613 /** 614 Retrieves this node's sibling nodes. Similar to {@link #childNodes() node.parent.childNodes()}, but does not 615 include this node (a node is not a sibling of itself). 616 @return node siblings. If the node has no parent, returns an empty list. 617 */ 618 public List<Node> siblingNodes() { 619 if (parentNode == null) 620 return Collections.emptyList(); 621 622 List<Node> nodes = parentNode.ensureChildNodes(); 623 List<Node> siblings = new ArrayList<>(nodes.size() - 1); 624 for (Node node: nodes) 625 if (node != this) 626 siblings.add(node); 627 return siblings; 628 } 629 630 /** 631 Get this node's next sibling. 632 @return next sibling, or {@code null} if this is the last sibling 633 */ 634 public @Nullable Node nextSibling() { 635 if (parentNode == null) 636 return null; // root 637 638 final List<Node> siblings = parentNode.ensureChildNodes(); 639 final int index = siblingIndex+1; 640 if (siblings.size() > index) 641 return siblings.get(index); 642 else 643 return null; 644 } 645 646 /** 647 Get this node's previous sibling. 648 @return the previous sibling, or @{code null} if this is the first sibling 649 */ 650 public @Nullable Node previousSibling() { 651 if (parentNode == null) 652 return null; // root 653 654 if (siblingIndex > 0) 655 return parentNode.ensureChildNodes().get(siblingIndex-1); 656 else 657 return null; 658 } 659 660 /** 661 * Get the list index of this node in its node sibling list. E.g. if this is the first node 662 * sibling, returns 0. 663 * @return position in node sibling list 664 * @see org.jsoup.nodes.Element#elementSiblingIndex() 665 */ 666 public int siblingIndex() { 667 return siblingIndex; 668 } 669 670 protected void setSiblingIndex(int siblingIndex) { 671 this.siblingIndex = siblingIndex; 672 } 673 674 /** 675 Gets the first child node of this node, or {@code null} if there is none. This could be any Node type, such as an 676 Element, TextNode, Comment, etc. Use {@link Element#firstElementChild()} to get the first Element child. 677 @return the first child node, or null if there are no children. 678 @see Element#firstElementChild() 679 @see #lastChild() 680 @since 1.15.2 681 */ 682 public @Nullable Node firstChild() { 683 if (childNodeSize() == 0) return null; 684 return ensureChildNodes().get(0); 685 } 686 687 /** 688 Gets the last child node of this node, or {@code null} if there is none. 689 @return the last child node, or null if there are no children. 690 @see Element#lastElementChild() 691 @see #firstChild() 692 @since 1.15.2 693 */ 694 public @Nullable Node lastChild() { 695 final int size = childNodeSize(); 696 if (size == 0) return null; 697 List<Node> children = ensureChildNodes(); 698 return children.get(size - 1); 699 } 700 701 /** 702 * Perform a depth-first traversal through this node and its descendants. 703 * @param nodeVisitor the visitor callbacks to perform on each node 704 * @return this node, for chaining 705 */ 706 public Node traverse(NodeVisitor nodeVisitor) { 707 Validate.notNull(nodeVisitor); 708 NodeTraversor.traverse(nodeVisitor, this); 709 return this; 710 } 711 712 /** 713 Perform the supplied action on this Node and each of its descendants, during a depth-first traversal. Nodes may be 714 inspected, changed, added, replaced, or removed. 715 @param action the function to perform on the node 716 @return this Node, for chaining 717 @see Element#forEach(Consumer) 718 */ 719 public Node forEachNode(Consumer<? super Node> action) { 720 Validate.notNull(action); 721 nodeStream().forEach(action); 722 return this; 723 } 724 725 /** 726 * Perform a depth-first filtered traversal through this node and its descendants. 727 * @param nodeFilter the filter callbacks to perform on each node 728 * @return this node, for chaining 729 */ 730 public Node filter(NodeFilter nodeFilter) { 731 Validate.notNull(nodeFilter); 732 NodeTraversor.filter(nodeFilter, this); 733 return this; 734 } 735 736 /** 737 Returns a Stream of this Node and all of its descendant Nodes. The stream has document order. 738 @return a stream of all nodes. 739 @see Element#stream() 740 @since 1.17.1 741 */ 742 public Stream<Node> nodeStream() { 743 return NodeUtils.stream(this, Node.class); 744 } 745 746 /** 747 Returns a Stream of this and descendant nodes, containing only nodes of the specified type. The stream has document 748 order. 749 @return a stream of nodes filtered by type. 750 @see Element#stream() 751 @since 1.17.1 752 */ 753 public <T extends Node> Stream<T> nodeStream(Class<T> type) { 754 return NodeUtils.stream(this, type); 755 } 756 757 /** 758 Get the outer HTML of this node. For example, on a {@code p} element, may return {@code <p>Para</p>}. 759 @return outer HTML 760 @see Element#html() 761 @see Element#text() 762 */ 763 public String outerHtml() { 764 StringBuilder accum = StringUtil.borrowBuilder(); 765 outerHtml(accum); 766 return StringUtil.releaseBuilder(accum); 767 } 768 769 protected void outerHtml(Appendable accum) { 770 NodeTraversor.traverse(new OuterHtmlVisitor(accum, NodeUtils.outputSettings(this)), this); 771 } 772 773 /** 774 Get the outer HTML of this node. 775 @param accum accumulator to place HTML into 776 @throws IOException if appending to the given accumulator fails. 777 */ 778 abstract void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException; 779 780 abstract void outerHtmlTail(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException; 781 782 /** 783 * Write this node and its children to the given {@link Appendable}. 784 * 785 * @param appendable the {@link Appendable} to write to. 786 * @return the supplied {@link Appendable}, for chaining. 787 */ 788 public <T extends Appendable> T html(T appendable) { 789 outerHtml(appendable); 790 return appendable; 791 } 792 793 /** 794 Get the source range (start and end positions) in the original input source from which this node was parsed. 795 Position tracking must be enabled prior to parsing the content. For an Element, this will be the positions of the 796 start tag. 797 @return the range for the start of the node, or {@code untracked} if its range was not tracked. 798 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 799 @see Range#isImplicit() 800 @see Element#endSourceRange() 801 @see Attributes#sourceRange(String name) 802 @since 1.15.2 803 */ 804 public Range sourceRange() { 805 return Range.of(this, true); 806 } 807 808 /** Test if this node is the first child, or first following blank text. */ 809 final boolean isEffectivelyFirst() { 810 if (siblingIndex == 0) return true; 811 if (siblingIndex == 1) { 812 final Node prev = previousSibling(); 813 return prev instanceof TextNode && (((TextNode) prev).isBlank()); 814 } 815 return false; 816 } 817 818 /** 819 * Gets this node's outer HTML. 820 * @return outer HTML. 821 * @see #outerHtml() 822 */ 823 public String toString() { 824 return outerHtml(); 825 } 826 827 protected void indent(Appendable accum, int depth, Document.OutputSettings out) throws IOException { 828 accum.append('\n').append(StringUtil.padding(depth * out.indentAmount(), out.maxPaddingWidth())); 829 } 830 831 /** 832 * Check if this node is the same instance of another (object identity test). 833 * <p>For an node value equality check, see {@link #hasSameValue(Object)}</p> 834 * @param o other object to compare to 835 * @return true if the content of this node is the same as the other 836 * @see Node#hasSameValue(Object) 837 */ 838 @Override 839 public boolean equals(@Nullable Object o) { 840 // implemented just so that javadoc is clear this is an identity test 841 return this == o; 842 } 843 844 /** 845 Provides a hashCode for this Node, based on its object identity. Changes to the Node's content will not impact the 846 result. 847 @return an object identity based hashcode for this Node 848 */ 849 @Override 850 public int hashCode() { 851 // implemented so that javadoc and scanners are clear this is an identity test 852 return super.hashCode(); 853 } 854 855 /** 856 * Check if this node has the same content as another node. A node is considered the same if its name, attributes and content match the 857 * other node; particularly its position in the tree does not influence its similarity. 858 * @param o other object to compare to 859 * @return true if the content of this node is the same as the other 860 */ 861 public boolean hasSameValue(@Nullable Object o) { 862 if (this == o) return true; 863 if (o == null || getClass() != o.getClass()) return false; 864 865 return this.outerHtml().equals(((Node) o).outerHtml()); 866 } 867 868 /** 869 * Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings or 870 * parent node. As a stand-alone object, any changes made to the clone or any of its children will not impact the 871 * original node. 872 * <p> 873 * The cloned node may be adopted into another Document or node structure using {@link Element#appendChild(Node)}. 874 * @return a stand-alone cloned node, including clones of any children 875 * @see #shallowClone() 876 */ 877 @SuppressWarnings("MethodDoesntCallSuperMethod") // because it does call super.clone in doClone - analysis just isn't following 878 @Override 879 public Node clone() { 880 Node thisClone = doClone(null); // splits for orphan 881 882 // Queue up nodes that need their children cloned (BFS). 883 final LinkedList<Node> nodesToProcess = new LinkedList<>(); 884 nodesToProcess.add(thisClone); 885 886 while (!nodesToProcess.isEmpty()) { 887 Node currParent = nodesToProcess.remove(); 888 889 final int size = currParent.childNodeSize(); 890 for (int i = 0; i < size; i++) { 891 final List<Node> childNodes = currParent.ensureChildNodes(); 892 Node childClone = childNodes.get(i).doClone(currParent); 893 childNodes.set(i, childClone); 894 nodesToProcess.add(childClone); 895 } 896 } 897 898 return thisClone; 899 } 900 901 /** 902 * Create a stand-alone, shallow copy of this node. None of its children (if any) will be cloned, and it will have 903 * no parent or sibling nodes. 904 * @return a single independent copy of this node 905 * @see #clone() 906 */ 907 public Node shallowClone() { 908 return doClone(null); 909 } 910 911 /* 912 * Return a clone of the node using the given parent (which can be null). 913 * Not a deep copy of children. 914 */ 915 protected Node doClone(@Nullable Node parent) { 916 Node clone; 917 918 try { 919 clone = (Node) super.clone(); 920 } catch (CloneNotSupportedException e) { 921 throw new RuntimeException(e); 922 } 923 924 clone.parentNode = parent; // can be null, to create an orphan split 925 clone.siblingIndex = parent == null ? 0 : siblingIndex; 926 // if not keeping the parent, shallowClone the ownerDocument to preserve its settings 927 if (parent == null && !(this instanceof Document)) { 928 Document doc = ownerDocument(); 929 if (doc != null) { 930 Document docClone = doc.shallowClone(); 931 clone.parentNode = docClone; 932 docClone.ensureChildNodes().add(clone); 933 } 934 } 935 936 return clone; 937 } 938 939 private static class OuterHtmlVisitor implements NodeVisitor { 940 private final Appendable accum; 941 private final Document.OutputSettings out; 942 943 OuterHtmlVisitor(Appendable accum, Document.OutputSettings out) { 944 this.accum = accum; 945 this.out = out; 946 } 947 948 @Override public void head(Node node, int depth) { 949 try { 950 node.outerHtmlHead(accum, depth, out); 951 } catch (IOException exception) { 952 throw new SerializationException(exception); 953 } 954 } 955 956 @Override public void tail(Node node, int depth) { 957 if (!node.nodeName().equals("#text")) { // saves a void hit. 958 try { 959 node.outerHtmlTail(accum, depth, out); 960 } catch (IOException exception) { 961 throw new SerializationException(exception); 962 } 963 } 964 } 965 } 966}