001package org.jsoup.nodes; 002 003import org.jsoup.helper.ChangeNotifyingArrayList; 004import org.jsoup.helper.Validate; 005import org.jsoup.internal.StringUtil; 006import org.jsoup.parser.ParseSettings; 007import org.jsoup.parser.Parser; 008import org.jsoup.parser.Tag; 009import org.jsoup.parser.TokenQueue; 010import org.jsoup.select.Collector; 011import org.jsoup.select.Elements; 012import org.jsoup.select.Evaluator; 013import org.jsoup.select.NodeFilter; 014import org.jsoup.select.NodeTraversor; 015import org.jsoup.select.NodeVisitor; 016import org.jsoup.select.QueryParser; 017import org.jsoup.select.Selector; 018import org.jspecify.annotations.Nullable; 019 020import java.io.IOException; 021import java.lang.ref.WeakReference; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collection; 025import java.util.Collections; 026import java.util.LinkedHashSet; 027import java.util.List; 028import java.util.Map; 029import java.util.Set; 030import java.util.concurrent.atomic.AtomicBoolean; 031import java.util.function.Consumer; 032import java.util.regex.Pattern; 033import java.util.regex.PatternSyntaxException; 034import java.util.stream.Collectors; 035import java.util.stream.Stream; 036 037import static org.jsoup.internal.Normalizer.normalize; 038import static org.jsoup.nodes.TextNode.lastCharIsWhitespace; 039import static org.jsoup.parser.Parser.NamespaceHtml; 040import static org.jsoup.parser.TokenQueue.escapeCssIdentifier; 041 042/** 043 An HTML Element consists of a tag name, attributes, and child nodes (including text nodes and other elements). 044 <p> 045 From an Element, you can extract data, traverse the node graph, and manipulate the HTML. 046*/ 047public class Element extends Node { 048 private static final List<Element> EmptyChildren = Collections.emptyList(); 049 private static final Pattern ClassSplit = Pattern.compile("\\s+"); 050 private static final String BaseUriKey = Attributes.internalKey("baseUri"); 051 private Tag tag; 052 private @Nullable WeakReference<List<Element>> shadowChildrenRef; // points to child elements shadowed from node children 053 List<Node> childNodes; 054 @Nullable Attributes attributes; // field is nullable but all methods for attributes are non-null 055 056 /** 057 * Create a new, standalone element, in the specified namespace. 058 * @param tag tag name 059 * @param namespace namespace for this element 060 */ 061 public Element(String tag, String namespace) { 062 this(Tag.valueOf(tag, namespace, ParseSettings.preserveCase), null); 063 } 064 065 /** 066 * Create a new, standalone element, in the HTML namespace. 067 * @param tag tag name 068 * @see #Element(String tag, String namespace) 069 */ 070 public Element(String tag) { 071 this(Tag.valueOf(tag, Parser.NamespaceHtml, ParseSettings.preserveCase), "", null); 072 } 073 074 /** 075 * Create a new, standalone Element. (Standalone in that it has no parent.) 076 * 077 * @param tag tag of this element 078 * @param baseUri the base URI (optional, may be null to inherit from parent, or "" to clear parent's) 079 * @param attributes initial attributes (optional, may be null) 080 * @see #appendChild(Node) 081 * @see #appendElement(String) 082 */ 083 public Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attributes) { 084 Validate.notNull(tag); 085 childNodes = EmptyNodes; 086 this.attributes = attributes; 087 this.tag = tag; 088 if (baseUri != null) 089 this.setBaseUri(baseUri); 090 } 091 092 /** 093 * Create a new Element from a Tag and a base URI. 094 * 095 * @param tag element tag 096 * @param baseUri the base URI of this element. Optional, and will inherit from its parent, if any. 097 * @see Tag#valueOf(String, ParseSettings) 098 */ 099 public Element(Tag tag, @Nullable String baseUri) { 100 this(tag, baseUri, null); 101 } 102 103 /** 104 Internal test to check if a nodelist object has been created. 105 */ 106 protected boolean hasChildNodes() { 107 return childNodes != EmptyNodes; 108 } 109 110 @Override protected List<Node> ensureChildNodes() { 111 if (childNodes == EmptyNodes) { 112 childNodes = new NodeList(this, 4); 113 } 114 return childNodes; 115 } 116 117 @Override 118 protected boolean hasAttributes() { 119 return attributes != null; 120 } 121 122 @Override 123 public Attributes attributes() { 124 if (attributes == null) // not using hasAttributes, as doesn't clear warning 125 attributes = new Attributes(); 126 return attributes; 127 } 128 129 @Override 130 public String baseUri() { 131 return searchUpForAttribute(this, BaseUriKey); 132 } 133 134 private static String searchUpForAttribute(final Element start, final String key) { 135 Element el = start; 136 while (el != null) { 137 if (el.attributes != null && el.attributes.hasKey(key)) 138 return el.attributes.get(key); 139 el = el.parent(); 140 } 141 return ""; 142 } 143 144 @Override 145 protected void doSetBaseUri(String baseUri) { 146 attributes().put(BaseUriKey, baseUri); 147 } 148 149 @Override 150 public int childNodeSize() { 151 return childNodes.size(); 152 } 153 154 @Override 155 public String nodeName() { 156 return tag.getName(); 157 } 158 159 /** 160 * Get the name of the tag for this element. E.g. {@code div}. If you are using {@link ParseSettings#preserveCase 161 * case preserving parsing}, this will return the source's original case. 162 * 163 * @return the tag name 164 */ 165 public String tagName() { 166 return tag.getName(); 167 } 168 169 /** 170 * Get the normalized name of this Element's tag. This will always be the lower-cased version of the tag, regardless 171 * of the tag case preserving setting of the parser. For e.g., {@code <DIV>} and {@code <div>} both have a 172 * normal name of {@code div}. 173 * @return normal name 174 */ 175 @Override 176 public String normalName() { 177 return tag.normalName(); 178 } 179 180 /** 181 Test if this Element has the specified normalized name, and is in the specified namespace. 182 * @param normalName a normalized element name (e.g. {@code div}). 183 * @param namespace the namespace 184 * @return true if the element's normal name matches exactly, and is in the specified namespace 185 * @since 1.17.2 186 */ 187 public boolean elementIs(String normalName, String namespace) { 188 return tag.normalName().equals(normalName) && tag.namespace().equals(namespace); 189 } 190 191 /** 192 * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with 193 * {@code el.tagName("div");}. 194 * 195 * @param tagName new tag name for this element 196 * @return this element, for chaining 197 * @see Elements#tagName(String) 198 */ 199 public Element tagName(String tagName) { 200 return tagName(tagName, tag.namespace()); 201 } 202 203 /** 204 * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with 205 * {@code el.tagName("div");}. 206 * 207 * @param tagName new tag name for this element 208 * @param namespace the new namespace for this element 209 * @return this element, for chaining 210 * @see Elements#tagName(String) 211 */ 212 public Element tagName(String tagName, String namespace) { 213 Validate.notEmptyParam(tagName, "tagName"); 214 Validate.notEmptyParam(namespace, "namespace"); 215 tag = Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()); // maintains the case option of the original parse 216 return this; 217 } 218 219 /** 220 * Get the Tag for this element. 221 * 222 * @return the tag object 223 */ 224 public Tag tag() { 225 return tag; 226 } 227 228 /** 229 * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element 230 * {@code <span> == false}). 231 * 232 * @return true if block, false if not (and thus inline) 233 */ 234 public boolean isBlock() { 235 return tag.isBlock(); 236 } 237 238 /** 239 * Get the {@code id} attribute of this element. 240 * 241 * @return The id attribute, if present, or an empty string if not. 242 */ 243 public String id() { 244 return attributes != null ? attributes.getIgnoreCase("id") :""; 245 } 246 247 /** 248 Set the {@code id} attribute of this element. 249 @param id the ID value to use 250 @return this Element, for chaining 251 */ 252 public Element id(String id) { 253 Validate.notNull(id); 254 attr("id", id); 255 return this; 256 } 257 258 /** 259 * Set an attribute value on this element. If this element already has an attribute with the 260 * key, its value is updated; otherwise, a new attribute is added. 261 * 262 * @return this element 263 */ 264 @Override public Element attr(String attributeKey, String attributeValue) { 265 super.attr(attributeKey, attributeValue); 266 return this; 267 } 268 269 /** 270 * Set a boolean attribute value on this element. Setting to <code>true</code> sets the attribute value to "" and 271 * marks the attribute as boolean so no value is written out. Setting to <code>false</code> removes the attribute 272 * with the same key if it exists. 273 * 274 * @param attributeKey the attribute key 275 * @param attributeValue the attribute value 276 * 277 * @return this element 278 */ 279 public Element attr(String attributeKey, boolean attributeValue) { 280 attributes().put(attributeKey, attributeValue); 281 return this; 282 } 283 284 /** 285 Get an Attribute by key. Changes made via {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc 286 will cascade back to this Element. 287 @param key the (case-sensitive) attribute key 288 @return the Attribute for this key, or null if not present. 289 @since 1.17.2 290 */ 291 @Nullable public Attribute attribute(String key) { 292 return hasAttributes() ? attributes().attribute(key) : null; 293 } 294 295 /** 296 * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key 297 * starting with "data-" is included the dataset. 298 * <p> 299 * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset 300 * {@code package=jsoup, language=java}. 301 * <p> 302 * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected 303 * in the other map. 304 * <p> 305 * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector. 306 * @return a map of {@code key=value} custom data attributes. 307 */ 308 public Map<String, String> dataset() { 309 return attributes().dataset(); 310 } 311 312 @Override @Nullable 313 public final Element parent() { 314 return (Element) parentNode; 315 } 316 317 /** 318 * Get this element's parent and ancestors, up to the document root. 319 * @return this element's stack of parents, starting with the closest first. 320 */ 321 public Elements parents() { 322 Elements parents = new Elements(); 323 Element parent = this.parent(); 324 while (parent != null && !parent.nameIs("#root")) { 325 parents.add(parent); 326 parent = parent.parent(); 327 } 328 return parents; 329 } 330 331 /** 332 * Get a child element of this element, by its 0-based index number. 333 * <p> 334 * Note that an element can have both mixed Nodes and Elements as children. This method inspects 335 * a filtered list of children that are elements, and the index is based on that filtered list. 336 * </p> 337 * 338 * @param index the index number of the element to retrieve 339 * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException} 340 * @see #childNode(int) 341 */ 342 public Element child(int index) { 343 return childElementsList().get(index); 344 } 345 346 /** 347 * Get the number of child nodes of this element that are elements. 348 * <p> 349 * This method works on the same filtered list like {@link #child(int)}. Use {@link #childNodes()} and {@link 350 * #childNodeSize()} to get the unfiltered Nodes (e.g. includes TextNodes etc.) 351 * </p> 352 * 353 * @return the number of child nodes that are elements 354 * @see #children() 355 * @see #child(int) 356 */ 357 public int childrenSize() { 358 return childElementsList().size(); 359 } 360 361 /** 362 * Get this element's child elements. 363 * <p> 364 * This is effectively a filter on {@link #childNodes()} to get Element nodes. 365 * </p> 366 * @return child elements. If this element has no children, returns an empty list. 367 * @see #childNodes() 368 */ 369 public Elements children() { 370 return new Elements(childElementsList()); 371 } 372 373 /** 374 * Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated. 375 * TODO - think about pulling this out as a helper as there are other shadow lists (like in Attributes) kept around. 376 * @return a list of child elements 377 */ 378 List<Element> childElementsList() { 379 if (childNodeSize() == 0) 380 return EmptyChildren; // short circuit creating empty 381 382 List<Element> children; 383 if (shadowChildrenRef == null || (children = shadowChildrenRef.get()) == null) { 384 final int size = childNodes.size(); 385 children = new ArrayList<>(size); 386 //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here) 387 for (int i = 0; i < size; i++) { 388 final Node node = childNodes.get(i); 389 if (node instanceof Element) 390 children.add((Element) node); 391 } 392 shadowChildrenRef = new WeakReference<>(children); 393 } 394 return children; 395 } 396 397 /** 398 * Clears the cached shadow child elements. 399 */ 400 @Override 401 void nodelistChanged() { 402 super.nodelistChanged(); 403 shadowChildrenRef = null; 404 } 405 406 /** 407 Returns a Stream of this Element and all of its descendant Elements. The stream has document order. 408 @return a stream of this element and its descendants. 409 @see #nodeStream() 410 @since 1.17.1 411 */ 412 public Stream<Element> stream() { 413 return NodeUtils.stream(this, Element.class); 414 } 415 416 private <T> List<T> filterNodes(Class<T> clazz) { 417 return childNodes.stream() 418 .filter(clazz::isInstance) 419 .map(clazz::cast) 420 .collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList)); 421 } 422 423 /** 424 * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated. 425 * <p> 426 * This is effectively a filter on {@link #childNodes()} to get Text nodes. 427 * @return child text nodes. If this element has no text nodes, returns an 428 * empty list. 429 * </p> 430 * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected: 431 * <ul> 432 * <li>{@code p.text()} = {@code "One Two Three Four"}</li> 433 * <li>{@code p.ownText()} = {@code "One Three Four"}</li> 434 * <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li> 435 * <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li> 436 * <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li> 437 * </ul> 438 */ 439 public List<TextNode> textNodes() { 440 return filterNodes(TextNode.class); 441 } 442 443 /** 444 * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated. 445 * <p> 446 * This is effectively a filter on {@link #childNodes()} to get Data nodes. 447 * </p> 448 * @return child data nodes. If this element has no data nodes, returns an 449 * empty list. 450 * @see #data() 451 */ 452 public List<DataNode> dataNodes() { 453 return filterNodes(DataNode.class); 454 } 455 456 /** 457 * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements 458 * may include this element, or any of its children. 459 * <p>This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because 460 * multiple filters can be combined, e.g.:</p> 461 * <ul> 462 * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes) 463 * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely) 464 * </ul> 465 * <p>See the query syntax documentation in {@link org.jsoup.select.Selector}.</p> 466 * <p>Also known as {@code querySelectorAll()} in the Web DOM.</p> 467 * 468 * @param cssQuery a {@link Selector} CSS-like query 469 * @return an {@link Elements} list containing elements that match the query (empty if none match) 470 * @see Selector selector query syntax 471 * @see QueryParser#parse(String) 472 * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. 473 */ 474 public Elements select(String cssQuery) { 475 return Selector.select(cssQuery, this); 476 } 477 478 /** 479 * Find elements that match the supplied Evaluator. This has the same functionality as {@link #select(String)}, but 480 * may be useful if you are running the same query many times (on many documents) and want to save the overhead of 481 * repeatedly parsing the CSS query. 482 * @param evaluator an element evaluator 483 * @return an {@link Elements} list containing elements that match the query (empty if none match) 484 */ 485 public Elements select(Evaluator evaluator) { 486 return Selector.select(evaluator, this); 487 } 488 489 /** 490 * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context. 491 * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query 492 * execution stops on the first hit.</p> 493 * <p>Also known as {@code querySelector()} in the Web DOM.</p> 494 * @param cssQuery cssQuery a {@link Selector} CSS-like query 495 * @return the first matching element, or <b>{@code null}</b> if there is no match. 496 * @see #expectFirst(String) 497 */ 498 public @Nullable Element selectFirst(String cssQuery) { 499 return Selector.selectFirst(cssQuery, this); 500 } 501 502 /** 503 * Finds the first Element that matches the supplied Evaluator, with this element as the starting context, or 504 * {@code null} if none match. 505 * 506 * @param evaluator an element evaluator 507 * @return the first matching element (walking down the tree, starting from this element), or {@code null} if none 508 * match. 509 */ 510 public @Nullable Element selectFirst(Evaluator evaluator) { 511 return Collector.findFirst(evaluator, this); 512 } 513 514 /** 515 Just like {@link #selectFirst(String)}, but if there is no match, throws an {@link IllegalArgumentException}. This 516 is useful if you want to simply abort processing on a failed match. 517 @param cssQuery a {@link Selector} CSS-like query 518 @return the first matching element 519 @throws IllegalArgumentException if no match is found 520 @since 1.15.2 521 */ 522 public Element expectFirst(String cssQuery) { 523 return (Element) Validate.ensureNotNull( 524 Selector.selectFirst(cssQuery, this), 525 parent() != null ? 526 "No elements matched the query '%s' on element '%s'.": 527 "No elements matched the query '%s' in the document." 528 , cssQuery, this.tagName() 529 ); 530 } 531 532 /** 533 * Checks if this element matches the given {@link Selector} CSS query. Also knows as {@code matches()} in the Web 534 * DOM. 535 * 536 * @param cssQuery a {@link Selector} CSS query 537 * @return if this element matches the query 538 */ 539 public boolean is(String cssQuery) { 540 return is(QueryParser.parse(cssQuery)); 541 } 542 543 /** 544 * Check if this element matches the given evaluator. 545 * @param evaluator an element evaluator 546 * @return if this element matches 547 */ 548 public boolean is(Evaluator evaluator) { 549 return evaluator.matches(this.root(), this); 550 } 551 552 /** 553 * Find the closest element up the tree of parents that matches the specified CSS query. Will return itself, an 554 * ancestor, or {@code null} if there is no such matching element. 555 * @param cssQuery a {@link Selector} CSS query 556 * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not 557 * found. 558 */ 559 public @Nullable Element closest(String cssQuery) { 560 return closest(QueryParser.parse(cssQuery)); 561 } 562 563 /** 564 * Find the closest element up the tree of parents that matches the specified evaluator. Will return itself, an 565 * ancestor, or {@code null} if there is no such matching element. 566 * @param evaluator a query evaluator 567 * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not 568 * found. 569 */ 570 public @Nullable Element closest(Evaluator evaluator) { 571 Validate.notNull(evaluator); 572 Element el = this; 573 final Element root = root(); 574 do { 575 if (evaluator.matches(root, el)) 576 return el; 577 el = el.parent(); 578 } while (el != null); 579 return null; 580 } 581 582 /** 583 Find Elements that match the supplied {@index XPath} expression. 584 <p>Note that for convenience of writing the Xpath expression, namespaces are disabled, and queries can be 585 expressed using the element's local name only.</p> 586 <p>By default, XPath 1.0 expressions are supported. If you would to use XPath 2.0 or higher, you can provide an 587 alternate XPathFactory implementation:</p> 588 <ol> 589 <li>Add the implementation to your classpath. E.g. to use <a href="https://www.saxonica.com/products/products.xml">Saxon-HE</a>, add <a href="https://mvnrepository.com/artifact/net.sf.saxon/Saxon-HE">net.sf.saxon:Saxon-HE</a> to your build.</li> 590 <li>Set the system property <code>javax.xml.xpath.XPathFactory:jsoup</code> to the implementing classname. E.g.:<br> 591 <code>System.setProperty(W3CDom.XPathFactoryProperty, "net.sf.saxon.xpath.XPathFactoryImpl");</code> 592 </li> 593 </ol> 594 595 @param xpath XPath expression 596 @return matching elements, or an empty list if none match. 597 @see #selectXpath(String, Class) 598 @since 1.14.3 599 */ 600 public Elements selectXpath(String xpath) { 601 return new Elements(NodeUtils.selectXpath(xpath, this, Element.class)); 602 } 603 604 /** 605 Find Nodes that match the supplied XPath expression. 606 <p>For example, to select TextNodes under {@code p} elements: </p> 607 <pre>List<TextNode> textNodes = doc.selectXpath("//body//p//text()", TextNode.class);</pre> 608 <p>Note that in the jsoup DOM, Attribute objects are not Nodes. To directly select attribute values, do something 609 like:</p> 610 <pre>List<String> hrefs = doc.selectXpath("//a").eachAttr("href");</pre> 611 @param xpath XPath expression 612 @param nodeType the jsoup node type to return 613 @see #selectXpath(String) 614 @return a list of matching nodes 615 @since 1.14.3 616 */ 617 public <T extends Node> List<T> selectXpath(String xpath, Class<T> nodeType) { 618 return NodeUtils.selectXpath(xpath, this, nodeType); 619 } 620 621 /** 622 * Insert a node to the end of this Element's children. The incoming node will be re-parented. 623 * 624 * @param child node to add. 625 * @return this Element, for chaining 626 * @see #prependChild(Node) 627 * @see #insertChildren(int, Collection) 628 */ 629 public Element appendChild(Node child) { 630 Validate.notNull(child); 631 632 // was - Node#addChildren(child). short-circuits an array create and a loop. 633 reparentChild(child); 634 ensureChildNodes(); 635 childNodes.add(child); 636 child.setSiblingIndex(childNodes.size() - 1); 637 return this; 638 } 639 640 /** 641 Insert the given nodes to the end of this Element's children. 642 643 @param children nodes to add 644 @return this Element, for chaining 645 @see #insertChildren(int, Collection) 646 */ 647 public Element appendChildren(Collection<? extends Node> children) { 648 insertChildren(-1, children); 649 return this; 650 } 651 652 /** 653 * Add this element to the supplied parent element, as its next child. 654 * 655 * @param parent element to which this element will be appended 656 * @return this element, so that you can continue modifying the element 657 */ 658 public Element appendTo(Element parent) { 659 Validate.notNull(parent); 660 parent.appendChild(this); 661 return this; 662 } 663 664 /** 665 * Add a node to the start of this element's children. 666 * 667 * @param child node to add. 668 * @return this element, so that you can add more child nodes or elements. 669 */ 670 public Element prependChild(Node child) { 671 Validate.notNull(child); 672 673 addChildren(0, child); 674 return this; 675 } 676 677 /** 678 Insert the given nodes to the start of this Element's children. 679 680 @param children nodes to add 681 @return this Element, for chaining 682 @see #insertChildren(int, Collection) 683 */ 684 public Element prependChildren(Collection<? extends Node> children) { 685 insertChildren(0, children); 686 return this; 687 } 688 689 690 /** 691 * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the 692 * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. 693 * 694 * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the 695 * end 696 * @param children child nodes to insert 697 * @return this element, for chaining. 698 */ 699 public Element insertChildren(int index, Collection<? extends Node> children) { 700 Validate.notNull(children, "Children collection to be inserted must not be null."); 701 int currentSize = childNodeSize(); 702 if (index < 0) index += currentSize +1; // roll around 703 Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); 704 705 ArrayList<Node> nodes = new ArrayList<>(children); 706 Node[] nodeArray = nodes.toArray(new Node[0]); 707 addChildren(index, nodeArray); 708 return this; 709 } 710 711 /** 712 * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the 713 * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. 714 * 715 * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the 716 * end 717 * @param children child nodes to insert 718 * @return this element, for chaining. 719 */ 720 public Element insertChildren(int index, Node... children) { 721 Validate.notNull(children, "Children collection to be inserted must not be null."); 722 int currentSize = childNodeSize(); 723 if (index < 0) index += currentSize +1; // roll around 724 Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); 725 726 addChildren(index, children); 727 return this; 728 } 729 730 /** 731 * Create a new element by tag name, and add it as this Element's last child. 732 * 733 * @param tagName the name of the tag (e.g. {@code div}). 734 * @return the new element, to allow you to add content to it, e.g.: 735 * {@code parent.appendElement("h1").attr("id", "header").text("Welcome");} 736 */ 737 public Element appendElement(String tagName) { 738 return appendElement(tagName, tag.namespace()); 739 } 740 741 /** 742 * Create a new element by tag name and namespace, add it as this Element's last child. 743 * 744 * @param tagName the name of the tag (e.g. {@code div}). 745 * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml}) 746 * @return the new element, in the specified namespace 747 */ 748 public Element appendElement(String tagName, String namespace) { 749 Element child = new Element(Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()), baseUri()); 750 appendChild(child); 751 return child; 752 } 753 754 /** 755 * Create a new element by tag name, and add it as this Element's first child. 756 * 757 * @param tagName the name of the tag (e.g. {@code div}). 758 * @return the new element, to allow you to add content to it, e.g.: 759 * {@code parent.prependElement("h1").attr("id", "header").text("Welcome");} 760 */ 761 public Element prependElement(String tagName) { 762 return prependElement(tagName, tag.namespace()); 763 } 764 765 /** 766 * Create a new element by tag name and namespace, and add it as this Element's first child. 767 * 768 * @param tagName the name of the tag (e.g. {@code div}). 769 * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml}) 770 * @return the new element, in the specified namespace 771 */ 772 public Element prependElement(String tagName, String namespace) { 773 Element child = new Element(Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()), baseUri()); 774 prependChild(child); 775 return child; 776 } 777 778 /** 779 * Create and append a new TextNode to this element. 780 * 781 * @param text the (un-encoded) text to add 782 * @return this element 783 */ 784 public Element appendText(String text) { 785 Validate.notNull(text); 786 TextNode node = new TextNode(text); 787 appendChild(node); 788 return this; 789 } 790 791 /** 792 * Create and prepend a new TextNode to this element. 793 * 794 * @param text the decoded text to add 795 * @return this element 796 */ 797 public Element prependText(String text) { 798 Validate.notNull(text); 799 TextNode node = new TextNode(text); 800 prependChild(node); 801 return this; 802 } 803 804 /** 805 * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children. 806 * @param html HTML to add inside this element, after the existing HTML 807 * @return this element 808 * @see #html(String) 809 */ 810 public Element append(String html) { 811 Validate.notNull(html); 812 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri()); 813 addChildren(nodes.toArray(new Node[0])); 814 return this; 815 } 816 817 /** 818 * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children. 819 * @param html HTML to add inside this element, before the existing HTML 820 * @return this element 821 * @see #html(String) 822 */ 823 public Element prepend(String html) { 824 Validate.notNull(html); 825 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri()); 826 addChildren(0, nodes.toArray(new Node[0])); 827 return this; 828 } 829 830 /** 831 * Insert the specified HTML into the DOM before this element (as a preceding sibling). 832 * 833 * @param html HTML to add before this element 834 * @return this element, for chaining 835 * @see #after(String) 836 */ 837 @Override 838 public Element before(String html) { 839 return (Element) super.before(html); 840 } 841 842 /** 843 * Insert the specified node into the DOM before this node (as a preceding sibling). 844 * @param node to add before this element 845 * @return this Element, for chaining 846 * @see #after(Node) 847 */ 848 @Override 849 public Element before(Node node) { 850 return (Element) super.before(node); 851 } 852 853 /** 854 * Insert the specified HTML into the DOM after this element (as a following sibling). 855 * 856 * @param html HTML to add after this element 857 * @return this element, for chaining 858 * @see #before(String) 859 */ 860 @Override 861 public Element after(String html) { 862 return (Element) super.after(html); 863 } 864 865 /** 866 * Insert the specified node into the DOM after this node (as a following sibling). 867 * @param node to add after this element 868 * @return this element, for chaining 869 * @see #before(Node) 870 */ 871 @Override 872 public Element after(Node node) { 873 return (Element) super.after(node); 874 } 875 876 /** 877 * Remove all the element's child nodes. Any attributes are left as-is. Each child node has its parent set to 878 * {@code null}. 879 * @return this element 880 */ 881 @Override 882 public Element empty() { 883 // Detach each of the children -> parent links: 884 for (Node child : childNodes) { 885 child.parentNode = null; 886 } 887 childNodes.clear(); 888 return this; 889 } 890 891 /** 892 * Wrap the supplied HTML around this element. 893 * 894 * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. 895 * @return this element, for chaining. 896 */ 897 @Override 898 public Element wrap(String html) { 899 return (Element) super.wrap(html); 900 } 901 902 /** 903 * Get a CSS selector that will uniquely select this element. 904 * <p> 905 * If the element has an ID, returns #id; 906 * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'}, 907 * followed by a unique selector for the element (tag.class.class:nth-child(n)). 908 * </p> 909 * 910 * @return the CSS Path that can be used to retrieve the element in a selector. 911 */ 912 public String cssSelector() { 913 if (id().length() > 0) { 914 // prefer to return the ID - but check that it's actually unique first! 915 String idSel = "#" + escapeCssIdentifier(id()); 916 Document doc = ownerDocument(); 917 if (doc != null) { 918 Elements els = doc.select(idSel); 919 if (els.size() == 1 && els.get(0) == this) // otherwise, continue to the nth-child impl 920 return idSel; 921 } else { 922 return idSel; // no ownerdoc, return the ID selector 923 } 924 } 925 926 StringBuilder selector = StringUtil.borrowBuilder(); 927 Element el = this; 928 while (el != null && !(el instanceof Document)) { 929 selector.insert(0, el.cssSelectorComponent()); 930 el = el.parent(); 931 } 932 return StringUtil.releaseBuilder(selector); 933 } 934 935 private String cssSelectorComponent() { 936 // Escape tagname, and translate HTML namespace ns:tag to CSS namespace syntax ns|tag 937 String tagName = escapeCssIdentifier(tagName()).replace("\\:", "|"); 938 StringBuilder selector = StringUtil.borrowBuilder().append(tagName); 939 String classes = classNames().stream().map(TokenQueue::escapeCssIdentifier) 940 .collect(StringUtil.joining(".")); 941 if (!classes.isEmpty()) 942 selector.append('.').append(classes); 943 944 if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node 945 return StringUtil.releaseBuilder(selector); 946 947 selector.insert(0, " > "); 948 if (parent().select(selector.toString()).size() > 1) 949 selector.append(String.format( 950 ":nth-child(%d)", elementSiblingIndex() + 1)); 951 952 return StringUtil.releaseBuilder(selector); 953 } 954 955 /** 956 * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling 957 * of itself, so will not be included in the returned list. 958 * @return sibling elements 959 */ 960 public Elements siblingElements() { 961 if (parentNode == null) 962 return new Elements(0); 963 964 List<Element> elements = parent().childElementsList(); 965 Elements siblings = new Elements(elements.size() - 1); 966 for (Element el: elements) 967 if (el != this) 968 siblings.add(el); 969 return siblings; 970 } 971 972 /** 973 * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s, 974 * the {@code nextElementSibling} of the first {@code p} is the second {@code p}. 975 * <p> 976 * This is similar to {@link #nextSibling()}, but specifically finds only Elements 977 * </p> 978 * @return the next element, or null if there is no next element 979 * @see #previousElementSibling() 980 */ 981 public @Nullable Element nextElementSibling() { 982 Node next = this; 983 while ((next = next.nextSibling()) != null) { 984 if (next instanceof Element) return (Element) next; 985 } 986 return null; 987 } 988 989 /** 990 * Get each of the sibling elements that come after this element. 991 * 992 * @return each of the element siblings after this element, or an empty list if there are no next sibling elements 993 */ 994 public Elements nextElementSiblings() { 995 return nextElementSiblings(true); 996 } 997 998 /** 999 * Gets the previous element sibling of this element. 1000 * @return the previous element, or null if there is no previous element 1001 * @see #nextElementSibling() 1002 */ 1003 public @Nullable Element previousElementSibling() { 1004 Node prev = this; 1005 while ((prev = prev.previousSibling()) != null) { 1006 if (prev instanceof Element) return (Element) prev; 1007 } 1008 return null; 1009 } 1010 1011 /** 1012 * Get each of the element siblings before this element. 1013 * 1014 * @return the previous element siblings, or an empty list if there are none. 1015 */ 1016 public Elements previousElementSiblings() { 1017 return nextElementSiblings(false); 1018 } 1019 1020 private Elements nextElementSiblings(boolean next) { 1021 Elements els = new Elements(); 1022 if (parentNode == null) 1023 return els; 1024 els.add(this); 1025 return next ? els.nextAll() : els.prevAll(); 1026 } 1027 1028 /** 1029 * Gets the first Element sibling of this element. That may be this element. 1030 * @return the first sibling that is an element (aka the parent's first element child) 1031 */ 1032 public Element firstElementSibling() { 1033 if (parent() != null) { 1034 //noinspection DataFlowIssue (not nullable, would be this is no other sibs) 1035 return parent().firstElementChild(); 1036 } else 1037 return this; // orphan is its own first sibling 1038 } 1039 1040 /** 1041 * Get the list index of this element in its element sibling list. I.e. if this is the first element 1042 * sibling, returns 0. 1043 * @return position in element sibling list 1044 */ 1045 public int elementSiblingIndex() { 1046 if (parent() == null) return 0; 1047 return indexInList(this, parent().childElementsList()); 1048 } 1049 1050 /** 1051 * Gets the last element sibling of this element. That may be this element. 1052 * @return the last sibling that is an element (aka the parent's last element child) 1053 */ 1054 public Element lastElementSibling() { 1055 if (parent() != null) { 1056 //noinspection DataFlowIssue (not nullable, would be this if no other sibs) 1057 return parent().lastElementChild(); 1058 } else 1059 return this; 1060 } 1061 1062 private static <E extends Element> int indexInList(Element search, List<E> elements) { 1063 final int size = elements.size(); 1064 for (int i = 0; i < size; i++) { 1065 if (elements.get(i) == search) 1066 return i; 1067 } 1068 return 0; 1069 } 1070 1071 /** 1072 Gets the first child of this Element that is an Element, or {@code null} if there is none. 1073 @return the first Element child node, or null. 1074 @see #firstChild() 1075 @see #lastElementChild() 1076 @since 1.15.2 1077 */ 1078 public @Nullable Element firstElementChild() { 1079 Node child = firstChild(); 1080 while (child != null) { 1081 if (child instanceof Element) return (Element) child; 1082 child = child.nextSibling(); 1083 } 1084 return null; 1085 } 1086 1087 /** 1088 Gets the last child of this Element that is an Element, or @{code null} if there is none. 1089 @return the last Element child node, or null. 1090 @see #lastChild() 1091 @see #firstElementChild() 1092 @since 1.15.2 1093 */ 1094 public @Nullable Element lastElementChild() { 1095 Node child = lastChild(); 1096 while (child != null) { 1097 if (child instanceof Element) return (Element) child; 1098 child = child.previousSibling(); 1099 } 1100 return null; 1101 } 1102 1103 // DOM type methods 1104 1105 /** 1106 * Finds elements, including and recursively under this element, with the specified tag name. 1107 * @param tagName The tag name to search for (case insensitively). 1108 * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match. 1109 */ 1110 public Elements getElementsByTag(String tagName) { 1111 Validate.notEmpty(tagName); 1112 tagName = normalize(tagName); 1113 1114 return Collector.collect(new Evaluator.Tag(tagName), this); 1115 } 1116 1117 /** 1118 * Find an element by ID, including or under this element. 1119 * <p> 1120 * Note that this finds the first matching ID, starting with this element. If you search down from a different 1121 * starting point, it is possible to find a different element by ID. For unique element by ID within a Document, 1122 * use {@link Document#getElementById(String)} 1123 * @param id The ID to search for. 1124 * @return The first matching element by ID, starting with this element, or null if none found. 1125 */ 1126 public @Nullable Element getElementById(String id) { 1127 Validate.notEmpty(id); 1128 1129 Elements elements = Collector.collect(new Evaluator.Id(id), this); 1130 if (elements.size() > 0) 1131 return elements.get(0); 1132 else 1133 return null; 1134 } 1135 1136 /** 1137 * Find elements that have this class, including or under this element. Case-insensitive. 1138 * <p> 1139 * Elements can have multiple classes (e.g. {@code <div class="header round first">}). This method 1140 * checks each class, so you can find the above with {@code el.getElementsByClass("header");}. 1141 * 1142 * @param className the name of the class to search for. 1143 * @return elements with the supplied class name, empty if none 1144 * @see #hasClass(String) 1145 * @see #classNames() 1146 */ 1147 public Elements getElementsByClass(String className) { 1148 Validate.notEmpty(className); 1149 1150 return Collector.collect(new Evaluator.Class(className), this); 1151 } 1152 1153 /** 1154 * Find elements that have a named attribute set. Case-insensitive. 1155 * 1156 * @param key name of the attribute, e.g. {@code href} 1157 * @return elements that have this attribute, empty if none 1158 */ 1159 public Elements getElementsByAttribute(String key) { 1160 Validate.notEmpty(key); 1161 key = key.trim(); 1162 1163 return Collector.collect(new Evaluator.Attribute(key), this); 1164 } 1165 1166 /** 1167 * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements 1168 * that have HTML5 datasets. 1169 * @param keyPrefix name prefix of the attribute e.g. {@code data-} 1170 * @return elements that have attribute names that start with the prefix, empty if none. 1171 */ 1172 public Elements getElementsByAttributeStarting(String keyPrefix) { 1173 Validate.notEmpty(keyPrefix); 1174 keyPrefix = keyPrefix.trim(); 1175 1176 return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this); 1177 } 1178 1179 /** 1180 * Find elements that have an attribute with the specific value. Case-insensitive. 1181 * 1182 * @param key name of the attribute 1183 * @param value value of the attribute 1184 * @return elements that have this attribute with this value, empty if none 1185 */ 1186 public Elements getElementsByAttributeValue(String key, String value) { 1187 return Collector.collect(new Evaluator.AttributeWithValue(key, value), this); 1188 } 1189 1190 /** 1191 * Find elements that either do not have this attribute, or have it with a different value. Case-insensitive. 1192 * 1193 * @param key name of the attribute 1194 * @param value value of the attribute 1195 * @return elements that do not have a matching attribute 1196 */ 1197 public Elements getElementsByAttributeValueNot(String key, String value) { 1198 return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this); 1199 } 1200 1201 /** 1202 * Find elements that have attributes that start with the value prefix. Case-insensitive. 1203 * 1204 * @param key name of the attribute 1205 * @param valuePrefix start of attribute value 1206 * @return elements that have attributes that start with the value prefix 1207 */ 1208 public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) { 1209 return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this); 1210 } 1211 1212 /** 1213 * Find elements that have attributes that end with the value suffix. Case-insensitive. 1214 * 1215 * @param key name of the attribute 1216 * @param valueSuffix end of the attribute value 1217 * @return elements that have attributes that end with the value suffix 1218 */ 1219 public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) { 1220 return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this); 1221 } 1222 1223 /** 1224 * Find elements that have attributes whose value contains the match string. Case-insensitive. 1225 * 1226 * @param key name of the attribute 1227 * @param match substring of value to search for 1228 * @return elements that have attributes containing this text 1229 */ 1230 public Elements getElementsByAttributeValueContaining(String key, String match) { 1231 return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this); 1232 } 1233 1234 /** 1235 * Find elements that have an attribute whose value matches the supplied regular expression. 1236 * @param key name of the attribute 1237 * @param pattern compiled regular expression to match against attribute values 1238 * @return elements that have attributes matching this regular expression 1239 */ 1240 public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) { 1241 return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this); 1242 1243 } 1244 1245 /** 1246 * Find elements that have attributes whose values match the supplied regular expression. 1247 * @param key name of the attribute 1248 * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options. 1249 * @return elements that have attributes matching this regular expression 1250 */ 1251 public Elements getElementsByAttributeValueMatching(String key, String regex) { 1252 Pattern pattern; 1253 try { 1254 pattern = Pattern.compile(regex); 1255 } catch (PatternSyntaxException e) { 1256 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1257 } 1258 return getElementsByAttributeValueMatching(key, pattern); 1259 } 1260 1261 /** 1262 * Find elements whose sibling index is less than the supplied index. 1263 * @param index 0-based index 1264 * @return elements less than index 1265 */ 1266 public Elements getElementsByIndexLessThan(int index) { 1267 return Collector.collect(new Evaluator.IndexLessThan(index), this); 1268 } 1269 1270 /** 1271 * Find elements whose sibling index is greater than the supplied index. 1272 * @param index 0-based index 1273 * @return elements greater than index 1274 */ 1275 public Elements getElementsByIndexGreaterThan(int index) { 1276 return Collector.collect(new Evaluator.IndexGreaterThan(index), this); 1277 } 1278 1279 /** 1280 * Find elements whose sibling index is equal to the supplied index. 1281 * @param index 0-based index 1282 * @return elements equal to index 1283 */ 1284 public Elements getElementsByIndexEquals(int index) { 1285 return Collector.collect(new Evaluator.IndexEquals(index), this); 1286 } 1287 1288 /** 1289 * Find elements that contain the specified string. The search is case-insensitive. The text may appear directly 1290 * in the element, or in any of its descendants. 1291 * @param searchText to look for in the element's text 1292 * @return elements that contain the string, case-insensitive. 1293 * @see Element#text() 1294 */ 1295 public Elements getElementsContainingText(String searchText) { 1296 return Collector.collect(new Evaluator.ContainsText(searchText), this); 1297 } 1298 1299 /** 1300 * Find elements that directly contain the specified string. The search is case-insensitive. The text must appear directly 1301 * in the element, not in any of its descendants. 1302 * @param searchText to look for in the element's own text 1303 * @return elements that contain the string, case-insensitive. 1304 * @see Element#ownText() 1305 */ 1306 public Elements getElementsContainingOwnText(String searchText) { 1307 return Collector.collect(new Evaluator.ContainsOwnText(searchText), this); 1308 } 1309 1310 /** 1311 * Find elements whose text matches the supplied regular expression. 1312 * @param pattern regular expression to match text against 1313 * @return elements matching the supplied regular expression. 1314 * @see Element#text() 1315 */ 1316 public Elements getElementsMatchingText(Pattern pattern) { 1317 return Collector.collect(new Evaluator.Matches(pattern), this); 1318 } 1319 1320 /** 1321 * Find elements whose text matches the supplied regular expression. 1322 * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options. 1323 * @return elements matching the supplied regular expression. 1324 * @see Element#text() 1325 */ 1326 public Elements getElementsMatchingText(String regex) { 1327 Pattern pattern; 1328 try { 1329 pattern = Pattern.compile(regex); 1330 } catch (PatternSyntaxException e) { 1331 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1332 } 1333 return getElementsMatchingText(pattern); 1334 } 1335 1336 /** 1337 * Find elements whose own text matches the supplied regular expression. 1338 * @param pattern regular expression to match text against 1339 * @return elements matching the supplied regular expression. 1340 * @see Element#ownText() 1341 */ 1342 public Elements getElementsMatchingOwnText(Pattern pattern) { 1343 return Collector.collect(new Evaluator.MatchesOwn(pattern), this); 1344 } 1345 1346 /** 1347 * Find elements whose own text matches the supplied regular expression. 1348 * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options. 1349 * @return elements matching the supplied regular expression. 1350 * @see Element#ownText() 1351 */ 1352 public Elements getElementsMatchingOwnText(String regex) { 1353 Pattern pattern; 1354 try { 1355 pattern = Pattern.compile(regex); 1356 } catch (PatternSyntaxException e) { 1357 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1358 } 1359 return getElementsMatchingOwnText(pattern); 1360 } 1361 1362 /** 1363 * Find all elements under this element (including self, and children of children). 1364 * 1365 * @return all elements 1366 */ 1367 public Elements getAllElements() { 1368 return Collector.collect(new Evaluator.AllElements(), this); 1369 } 1370 1371 /** 1372 Gets the <b>normalized, combined text</b> of this element and all its children. Whitespace is normalized and 1373 trimmed. 1374 <p>For example, given HTML {@code <p>Hello <b>there</b> now! </p>}, {@code p.text()} returns {@code "Hello there 1375 now!"} 1376 <p>If you do not want normalized text, use {@link #wholeText()}. If you want just the text of this node (and not 1377 children), use {@link #ownText()} 1378 <p>Note that this method returns the textual content that would be presented to a reader. The contents of data 1379 nodes (such as {@code <script>} tags) are not considered text. Use {@link #data()} or {@link #html()} to retrieve 1380 that content. 1381 1382 @return decoded, normalized text, or empty string if none. 1383 @see #wholeText() 1384 @see #ownText() 1385 @see #textNodes() 1386 */ 1387 public String text() { 1388 final StringBuilder accum = StringUtil.borrowBuilder(); 1389 NodeTraversor.traverse(new TextAccumulator(accum), this); 1390 return StringUtil.releaseBuilder(accum).trim(); 1391 } 1392 1393 private static class TextAccumulator implements NodeVisitor { 1394 private final StringBuilder accum; 1395 1396 public TextAccumulator(StringBuilder accum) { 1397 this.accum = accum; 1398 } 1399 1400 @Override public void head(Node node, int depth) { 1401 if (node instanceof TextNode) { 1402 TextNode textNode = (TextNode) node; 1403 appendNormalisedText(accum, textNode); 1404 } else if (node instanceof Element) { 1405 Element element = (Element) node; 1406 if (accum.length() > 0 && 1407 (element.isBlock() || element.nameIs("br")) && 1408 !lastCharIsWhitespace(accum)) 1409 accum.append(' '); 1410 } 1411 } 1412 1413 @Override public void tail(Node node, int depth) { 1414 // make sure there is a space between block tags and immediately following text nodes or inline elements <div>One</div>Two should be "One Two". 1415 if (node instanceof Element) { 1416 Element element = (Element) node; 1417 Node next = node.nextSibling(); 1418 if (element.isBlock() && (next instanceof TextNode || next instanceof Element && !((Element) next).tag.formatAsBlock()) && !lastCharIsWhitespace(accum)) 1419 accum.append(' '); 1420 } 1421 1422 } 1423 } 1424 1425 /** 1426 Get the non-normalized, decoded text of this element and its children, including only any newlines and spaces 1427 present in the original source. 1428 @return decoded, non-normalized text 1429 @see #text() 1430 @see #wholeOwnText() 1431 */ 1432 public String wholeText() { 1433 return wholeTextOf(nodeStream()); 1434 } 1435 1436 private static String wholeTextOf(Stream<Node> stream) { 1437 return stream.map(node -> { 1438 if (node instanceof TextNode) return ((TextNode) node).getWholeText(); 1439 if (node.nameIs("br")) return "\n"; 1440 return ""; 1441 }).collect(StringUtil.joining("")); 1442 } 1443 1444 /** 1445 Get the non-normalized, decoded text of this element, <b>not including</b> any child elements, including any 1446 newlines and spaces present in the original source. 1447 @return decoded, non-normalized text that is a direct child of this Element 1448 @see #text() 1449 @see #wholeText() 1450 @see #ownText() 1451 @since 1.15.1 1452 */ 1453 public String wholeOwnText() { 1454 return wholeTextOf(childNodes.stream()); 1455 } 1456 1457 /** 1458 * Gets the (normalized) text owned by this element only; does not get the combined text of all children. 1459 * <p> 1460 * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"}, 1461 * whereas {@code p.text()} returns {@code "Hello there now!"}. 1462 * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element. 1463 * 1464 * @return decoded text, or empty string if none. 1465 * @see #text() 1466 * @see #textNodes() 1467 */ 1468 public String ownText() { 1469 StringBuilder sb = StringUtil.borrowBuilder(); 1470 ownText(sb); 1471 return StringUtil.releaseBuilder(sb).trim(); 1472 } 1473 1474 private void ownText(StringBuilder accum) { 1475 for (int i = 0; i < childNodeSize(); i++) { 1476 Node child = childNodes.get(i); 1477 if (child instanceof TextNode) { 1478 TextNode textNode = (TextNode) child; 1479 appendNormalisedText(accum, textNode); 1480 } else if (child.nameIs("br") && !lastCharIsWhitespace(accum)) { 1481 accum.append(" "); 1482 } 1483 } 1484 } 1485 1486 private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { 1487 String text = textNode.getWholeText(); 1488 if (preserveWhitespace(textNode.parentNode) || textNode instanceof CDataNode) 1489 accum.append(text); 1490 else 1491 StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum)); 1492 } 1493 1494 static boolean preserveWhitespace(@Nullable Node node) { 1495 // looks only at this element and five levels up, to prevent recursion & needless stack searches 1496 if (node instanceof Element) { 1497 Element el = (Element) node; 1498 int i = 0; 1499 do { 1500 if (el.tag.preserveWhitespace()) 1501 return true; 1502 el = el.parent(); 1503 i++; 1504 } while (i < 6 && el != null); 1505 } 1506 return false; 1507 } 1508 1509 /** 1510 * Set the text of this element. Any existing contents (text or elements) will be cleared. 1511 * <p>As a special case, for {@code <script>} and {@code <style>} tags, the input text will be treated as data, 1512 * not visible text.</p> 1513 * @param text decoded text 1514 * @return this element 1515 */ 1516 public Element text(String text) { 1517 Validate.notNull(text); 1518 empty(); 1519 // special case for script/style in HTML: should be data node 1520 Document owner = ownerDocument(); 1521 // an alternate impl would be to run through the parser 1522 if (owner != null && owner.parser().isContentForTagData(normalName())) 1523 appendChild(new DataNode(text)); 1524 else 1525 appendChild(new TextNode(text)); 1526 1527 return this; 1528 } 1529 1530 /** 1531 Checks if the current element or any of its child elements contain non-whitespace text. 1532 @return {@code true} if the element has non-blank text content, {@code false} otherwise. 1533 */ 1534 public boolean hasText() { 1535 AtomicBoolean hasText = new AtomicBoolean(false); 1536 filter((node, depth) -> { 1537 if (node instanceof TextNode) { 1538 TextNode textNode = (TextNode) node; 1539 if (!textNode.isBlank()) { 1540 hasText.set(true); 1541 return NodeFilter.FilterResult.STOP; 1542 } 1543 } 1544 return NodeFilter.FilterResult.CONTINUE; 1545 }); 1546 return hasText.get(); 1547 } 1548 1549 /** 1550 * Get the combined data of this element. Data is e.g. the inside of a {@code <script>} tag. Note that data is NOT the 1551 * text of the element. Use {@link #text()} to get the text that would be visible to a user, and {@code data()} 1552 * for the contents of scripts, comments, CSS styles, etc. 1553 * 1554 * @return the data, or empty string if none 1555 * 1556 * @see #dataNodes() 1557 */ 1558 public String data() { 1559 StringBuilder sb = StringUtil.borrowBuilder(); 1560 traverse((childNode, depth) -> { 1561 if (childNode instanceof DataNode) { 1562 DataNode data = (DataNode) childNode; 1563 sb.append(data.getWholeData()); 1564 } else if (childNode instanceof Comment) { 1565 Comment comment = (Comment) childNode; 1566 sb.append(comment.getData()); 1567 } else if (childNode instanceof CDataNode) { 1568 // this shouldn't really happen because the html parser won't see the cdata as anything special when parsing script. 1569 // but in case another type gets through. 1570 CDataNode cDataNode = (CDataNode) childNode; 1571 sb.append(cDataNode.getWholeText()); 1572 } 1573 }); 1574 return StringUtil.releaseBuilder(sb); 1575 } 1576 1577 /** 1578 * Gets the literal value of this element's "class" attribute, which may include multiple class names, space 1579 * separated. (E.g. on <code><div class="header gray"></code> returns, "<code>header gray</code>") 1580 * @return The literal class attribute, or <b>empty string</b> if no class attribute set. 1581 */ 1582 public String className() { 1583 return attr("class").trim(); 1584 } 1585 1586 /** 1587 * Get each of the element's class names. E.g. on element {@code <div class="header gray">}, 1588 * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to 1589 * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them. 1590 * @return set of classnames, empty if no class attribute 1591 */ 1592 public Set<String> classNames() { 1593 String[] names = ClassSplit.split(className()); 1594 Set<String> classNames = new LinkedHashSet<>(Arrays.asList(names)); 1595 classNames.remove(""); // if classNames() was empty, would include an empty class 1596 1597 return classNames; 1598 } 1599 1600 /** 1601 Set the element's {@code class} attribute to the supplied class names. 1602 @param classNames set of classes 1603 @return this element, for chaining 1604 */ 1605 public Element classNames(Set<String> classNames) { 1606 Validate.notNull(classNames); 1607 if (classNames.isEmpty()) { 1608 attributes().remove("class"); 1609 } else { 1610 attributes().put("class", StringUtil.join(classNames, " ")); 1611 } 1612 return this; 1613 } 1614 1615 /** 1616 * Tests if this element has a class. Case-insensitive. 1617 * @param className name of class to check for 1618 * @return true if it does, false if not 1619 */ 1620 // performance sensitive 1621 public boolean hasClass(String className) { 1622 if (attributes == null) 1623 return false; 1624 1625 final String classAttr = attributes.getIgnoreCase("class"); 1626 final int len = classAttr.length(); 1627 final int wantLen = className.length(); 1628 1629 if (len == 0 || len < wantLen) { 1630 return false; 1631 } 1632 1633 // if both lengths are equal, only need compare the className with the attribute 1634 if (len == wantLen) { 1635 return className.equalsIgnoreCase(classAttr); 1636 } 1637 1638 // otherwise, scan for whitespace and compare regions (with no string or arraylist allocations) 1639 boolean inClass = false; 1640 int start = 0; 1641 for (int i = 0; i < len; i++) { 1642 if (Character.isWhitespace(classAttr.charAt(i))) { 1643 if (inClass) { 1644 // white space ends a class name, compare it with the requested one, ignore case 1645 if (i - start == wantLen && classAttr.regionMatches(true, start, className, 0, wantLen)) { 1646 return true; 1647 } 1648 inClass = false; 1649 } 1650 } else { 1651 if (!inClass) { 1652 // we're in a class name : keep the start of the substring 1653 inClass = true; 1654 start = i; 1655 } 1656 } 1657 } 1658 1659 // check the last entry 1660 if (inClass && len - start == wantLen) { 1661 return classAttr.regionMatches(true, start, className, 0, wantLen); 1662 } 1663 1664 return false; 1665 } 1666 1667 /** 1668 Add a class name to this element's {@code class} attribute. 1669 @param className class name to add 1670 @return this element 1671 */ 1672 public Element addClass(String className) { 1673 Validate.notNull(className); 1674 1675 Set<String> classes = classNames(); 1676 classes.add(className); 1677 classNames(classes); 1678 1679 return this; 1680 } 1681 1682 /** 1683 Remove a class name from this element's {@code class} attribute. 1684 @param className class name to remove 1685 @return this element 1686 */ 1687 public Element removeClass(String className) { 1688 Validate.notNull(className); 1689 1690 Set<String> classes = classNames(); 1691 classes.remove(className); 1692 classNames(classes); 1693 1694 return this; 1695 } 1696 1697 /** 1698 Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it. 1699 @param className class name to toggle 1700 @return this element 1701 */ 1702 public Element toggleClass(String className) { 1703 Validate.notNull(className); 1704 1705 Set<String> classes = classNames(); 1706 if (classes.contains(className)) 1707 classes.remove(className); 1708 else 1709 classes.add(className); 1710 classNames(classes); 1711 1712 return this; 1713 } 1714 1715 /** 1716 * Get the value of a form element (input, textarea, etc). 1717 * @return the value of the form element, or empty string if not set. 1718 */ 1719 public String val() { 1720 if (elementIs("textarea", NamespaceHtml)) 1721 return text(); 1722 else 1723 return attr("value"); 1724 } 1725 1726 /** 1727 * Set the value of a form element (input, textarea, etc). 1728 * @param value value to set 1729 * @return this element (for chaining) 1730 */ 1731 public Element val(String value) { 1732 if (elementIs("textarea", NamespaceHtml)) 1733 text(value); 1734 else 1735 attr("value", value); 1736 return this; 1737 } 1738 1739 /** 1740 Get the source range (start and end positions) of the end (closing) tag for this Element. Position tracking must be 1741 enabled prior to parsing the content. 1742 @return the range of the closing tag for this element, or {@code untracked} if its range was not tracked. 1743 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 1744 @see Node#sourceRange() 1745 @see Range#isImplicit() 1746 @since 1.15.2 1747 */ 1748 public Range endSourceRange() { 1749 return Range.of(this, false); 1750 } 1751 1752 boolean shouldIndent(final Document.OutputSettings out) { 1753 return out.prettyPrint() && isFormatAsBlock(out) && !isInlineable(out) && !preserveWhitespace(parentNode); 1754 } 1755 1756 @Override 1757 void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException { 1758 if (shouldIndent(out)) { 1759 if (accum instanceof StringBuilder) { 1760 if (((StringBuilder) accum).length() > 0) 1761 indent(accum, depth, out); 1762 } else { 1763 indent(accum, depth, out); 1764 } 1765 } 1766 accum.append('<').append(tagName()); 1767 if (attributes != null) attributes.html(accum, out); 1768 1769 // selfclosing includes unknown tags, isEmpty defines tags that are always empty 1770 if (childNodes.isEmpty() && tag.isSelfClosing()) { 1771 if (out.syntax() == Document.OutputSettings.Syntax.html && tag.isEmpty()) 1772 accum.append('>'); 1773 else 1774 accum.append(" />"); // <img> in html, <img /> in xml 1775 } 1776 else 1777 accum.append('>'); 1778 } 1779 1780 @Override 1781 void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) throws IOException { 1782 if (!(childNodes.isEmpty() && tag.isSelfClosing())) { 1783 if (out.prettyPrint() && (!childNodes.isEmpty() && ( 1784 (tag.formatAsBlock() && !preserveWhitespace(parentNode)) || 1785 (out.outline() && (childNodes.size()>1 || (childNodes.size()==1 && (childNodes.get(0) instanceof Element)))) 1786 ))) 1787 indent(accum, depth, out); 1788 accum.append("</").append(tagName()).append('>'); 1789 } 1790 } 1791 1792 /** 1793 * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return 1794 * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.) 1795 * 1796 * @return String of HTML. 1797 * @see #outerHtml() 1798 */ 1799 public String html() { 1800 StringBuilder accum = StringUtil.borrowBuilder(); 1801 html(accum); 1802 String html = StringUtil.releaseBuilder(accum); 1803 return NodeUtils.outputSettings(this).prettyPrint() ? html.trim() : html; 1804 } 1805 1806 @Override 1807 public <T extends Appendable> T html(T appendable) { 1808 final int size = childNodes.size(); 1809 for (int i = 0; i < size; i++) 1810 childNodes.get(i).outerHtml(appendable); 1811 1812 return appendable; 1813 } 1814 1815 /** 1816 * Set this element's inner HTML. Clears the existing HTML first. 1817 * @param html HTML to parse and set into this element 1818 * @return this element 1819 * @see #append(String) 1820 */ 1821 public Element html(String html) { 1822 empty(); 1823 append(html); 1824 return this; 1825 } 1826 1827 @Override 1828 public Element clone() { 1829 return (Element) super.clone(); 1830 } 1831 1832 @Override 1833 public Element shallowClone() { 1834 // simpler than implementing a clone version with no child copy 1835 String baseUri = baseUri(); 1836 if (baseUri.isEmpty()) baseUri = null; // saves setting a blank internal attribute 1837 return new Element(tag, baseUri, attributes == null ? null : attributes.clone()); 1838 } 1839 1840 @Override 1841 protected Element doClone(@Nullable Node parent) { 1842 Element clone = (Element) super.doClone(parent); 1843 clone.attributes = attributes != null ? attributes.clone() : null; 1844 clone.childNodes = new NodeList(clone, childNodes.size()); 1845 clone.childNodes.addAll(childNodes); // the children then get iterated and cloned in Node.clone 1846 1847 return clone; 1848 } 1849 1850 // overrides of Node for call chaining 1851 @Override 1852 public Element clearAttributes() { 1853 if (attributes != null) { 1854 super.clearAttributes(); // keeps internal attributes via iterator 1855 if (attributes.size() == 0) 1856 attributes = null; // only remove entirely if no internal attributes 1857 } 1858 1859 return this; 1860 } 1861 1862 @Override 1863 public Element removeAttr(String attributeKey) { 1864 return (Element) super.removeAttr(attributeKey); 1865 } 1866 1867 @Override 1868 public Element root() { 1869 return (Element) super.root(); // probably a document, but always at least an element 1870 } 1871 1872 @Override 1873 public Element traverse(NodeVisitor nodeVisitor) { 1874 return (Element) super.traverse(nodeVisitor); 1875 } 1876 1877 @Override 1878 public Element forEachNode(Consumer<? super Node> action) { 1879 return (Element) super.forEachNode(action); 1880 } 1881 1882 /** 1883 Perform the supplied action on this Element and each of its descendant Elements, during a depth-first traversal. 1884 Elements may be inspected, changed, added, replaced, or removed. 1885 @param action the function to perform on the element 1886 @return this Element, for chaining 1887 @see Node#forEachNode(Consumer) 1888 @deprecated use {@link #stream()}.{@link Stream#forEach(Consumer) forEach(Consumer)} instead. (Removing this method 1889 so Element can implement Iterable, which this signature conflicts with due to the non-void return.) 1890 */ 1891 @Deprecated 1892 public Element forEach(Consumer<? super Element> action) { 1893 stream().forEach(action); 1894 return this; 1895 } 1896 1897 @Override 1898 public Element filter(NodeFilter nodeFilter) { 1899 return (Element) super.filter(nodeFilter); 1900 } 1901 1902 private static final class NodeList extends ChangeNotifyingArrayList<Node> { 1903 private final Element owner; 1904 1905 NodeList(Element owner, int initialCapacity) { 1906 super(initialCapacity); 1907 this.owner = owner; 1908 } 1909 1910 @Override public void onContentsChanged() { 1911 owner.nodelistChanged(); 1912 } 1913 } 1914 1915 private boolean isFormatAsBlock(Document.OutputSettings out) { 1916 return tag.isBlock() || (parent() != null && parent().tag().formatAsBlock()) || out.outline(); 1917 } 1918 1919 private boolean isInlineable(Document.OutputSettings out) { 1920 if (!tag.isInline()) 1921 return false; 1922 return (parent() == null || parent().isBlock()) 1923 && !isEffectivelyFirst() 1924 && !out.outline() 1925 && !nameIs("br"); 1926 } 1927}