001package org.jsoup.nodes; 002 003import org.jsoup.helper.ChangeNotifyingArrayList; 004import org.jsoup.helper.Validate; 005import org.jsoup.internal.Normalizer; 006import org.jsoup.internal.StringUtil; 007import org.jsoup.parser.ParseSettings; 008import org.jsoup.parser.Parser; 009import org.jsoup.parser.Tag; 010import org.jsoup.parser.TokenQueue; 011import org.jsoup.select.Collector; 012import org.jsoup.select.Elements; 013import org.jsoup.select.Evaluator; 014import org.jsoup.select.NodeFilter; 015import org.jsoup.select.NodeTraversor; 016import org.jsoup.select.NodeVisitor; 017import org.jsoup.select.QueryParser; 018import org.jsoup.select.Selector; 019import org.jspecify.annotations.Nullable; 020 021import java.io.IOException; 022import java.lang.ref.WeakReference; 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.Collection; 026import java.util.Collections; 027import java.util.Iterator; 028import java.util.LinkedHashSet; 029import java.util.List; 030import java.util.Map; 031import java.util.Set; 032import java.util.concurrent.atomic.AtomicBoolean; 033import java.util.function.Consumer; 034import java.util.regex.Pattern; 035import java.util.regex.PatternSyntaxException; 036import java.util.stream.Collectors; 037import java.util.stream.Stream; 038 039import static org.jsoup.internal.Normalizer.normalize; 040import static org.jsoup.nodes.Document.OutputSettings.Syntax.html; 041import static org.jsoup.nodes.Document.OutputSettings.Syntax.xml; 042import static org.jsoup.nodes.TextNode.lastCharIsWhitespace; 043import static org.jsoup.parser.Parser.NamespaceHtml; 044import static org.jsoup.parser.TokenQueue.escapeCssIdentifier; 045 046/** 047 An HTML Element consists of a tag name, attributes, and child nodes (including text nodes and other elements). 048 <p> 049 From an Element, you can extract data, traverse the node graph, and manipulate the HTML. 050*/ 051public class Element extends Node implements Iterable<Element> { 052 private static final List<Element> EmptyChildren = Collections.emptyList(); 053 private static final Pattern ClassSplit = Pattern.compile("\\s+"); 054 private static final String BaseUriKey = Attributes.internalKey("baseUri"); 055 private Tag tag; 056 private @Nullable WeakReference<List<Element>> shadowChildrenRef; // points to child elements shadowed from node children 057 List<Node> childNodes; 058 @Nullable Attributes attributes; // field is nullable but all methods for attributes are non-null 059 060 /** 061 * Create a new, standalone element, in the specified namespace. 062 * @param tag tag name 063 * @param namespace namespace for this element 064 */ 065 public Element(String tag, String namespace) { 066 this(Tag.valueOf(tag, namespace, ParseSettings.preserveCase), null); 067 } 068 069 /** 070 * Create a new, standalone element, in the HTML namespace. 071 * @param tag tag name 072 * @see #Element(String tag, String namespace) 073 */ 074 public Element(String tag) { 075 this(Tag.valueOf(tag, Parser.NamespaceHtml, ParseSettings.preserveCase), "", null); 076 } 077 078 /** 079 * Create a new, standalone Element. (Standalone in that it has no parent.) 080 * 081 * @param tag tag of this element 082 * @param baseUri the base URI (optional, may be null to inherit from parent, or "" to clear parent's) 083 * @param attributes initial attributes (optional, may be null) 084 * @see #appendChild(Node) 085 * @see #appendElement(String) 086 */ 087 public Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attributes) { 088 Validate.notNull(tag); 089 childNodes = EmptyNodes; 090 this.attributes = attributes; 091 this.tag = tag; 092 if (baseUri != null) 093 this.setBaseUri(baseUri); 094 } 095 096 /** 097 * Create a new Element from a Tag and a base URI. 098 * 099 * @param tag element tag 100 * @param baseUri the base URI of this element. Optional, and will inherit from its parent, if any. 101 * @see Tag#valueOf(String, ParseSettings) 102 */ 103 public Element(Tag tag, @Nullable String baseUri) { 104 this(tag, baseUri, null); 105 } 106 107 /** 108 Internal test to check if a nodelist object has been created. 109 */ 110 protected boolean hasChildNodes() { 111 return childNodes != EmptyNodes; 112 } 113 114 @Override protected List<Node> ensureChildNodes() { 115 if (childNodes == EmptyNodes) { 116 childNodes = new NodeList(this, 4); 117 } 118 return childNodes; 119 } 120 121 @Override 122 protected boolean hasAttributes() { 123 return attributes != null; 124 } 125 126 @Override 127 public Attributes attributes() { 128 if (attributes == null) // not using hasAttributes, as doesn't clear warning 129 attributes = new Attributes(); 130 return attributes; 131 } 132 133 @Override 134 public String baseUri() { 135 return searchUpForAttribute(this, BaseUriKey); 136 } 137 138 private static String searchUpForAttribute(final Element start, final String key) { 139 Element el = start; 140 while (el != null) { 141 if (el.attributes != null && el.attributes.hasKey(key)) 142 return el.attributes.get(key); 143 el = el.parent(); 144 } 145 return ""; 146 } 147 148 @Override 149 protected void doSetBaseUri(String baseUri) { 150 attributes().put(BaseUriKey, baseUri); 151 } 152 153 @Override 154 public int childNodeSize() { 155 return childNodes.size(); 156 } 157 158 @Override 159 public String nodeName() { 160 return tag.getName(); 161 } 162 163 /** 164 * Get the name of the tag for this element. E.g. {@code div}. If you are using {@link ParseSettings#preserveCase 165 * case preserving parsing}, this will return the source's original case. 166 * 167 * @return the tag name 168 */ 169 public String tagName() { 170 return tag.getName(); 171 } 172 173 /** 174 * Get the normalized name of this Element's tag. This will always be the lower-cased version of the tag, regardless 175 * of the tag case preserving setting of the parser. For e.g., {@code <DIV>} and {@code <div>} both have a 176 * normal name of {@code div}. 177 * @return normal name 178 */ 179 @Override 180 public String normalName() { 181 return tag.normalName(); 182 } 183 184 /** 185 Test if this Element has the specified normalized name, and is in the specified namespace. 186 * @param normalName a normalized element name (e.g. {@code div}). 187 * @param namespace the namespace 188 * @return true if the element's normal name matches exactly, and is in the specified namespace 189 * @since 1.17.2 190 */ 191 public boolean elementIs(String normalName, String namespace) { 192 return tag.normalName().equals(normalName) && tag.namespace().equals(namespace); 193 } 194 195 /** 196 * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with 197 * {@code el.tagName("div");}. 198 * 199 * @param tagName new tag name for this element 200 * @return this element, for chaining 201 * @see Elements#tagName(String) 202 */ 203 public Element tagName(String tagName) { 204 return tagName(tagName, tag.namespace()); 205 } 206 207 /** 208 * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with 209 * {@code el.tagName("div");}. 210 * 211 * @param tagName new tag name for this element 212 * @param namespace the new namespace for this element 213 * @return this element, for chaining 214 * @see Elements#tagName(String) 215 */ 216 public Element tagName(String tagName, String namespace) { 217 Validate.notEmptyParam(tagName, "tagName"); 218 Validate.notEmptyParam(namespace, "namespace"); 219 tag = Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()); // maintains the case option of the original parse 220 return this; 221 } 222 223 /** 224 * Get the Tag for this element. 225 * 226 * @return the tag object 227 */ 228 public Tag tag() { 229 return tag; 230 } 231 232 /** 233 * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element 234 * {@code <span> == false}). 235 * 236 * @return true if block, false if not (and thus inline) 237 */ 238 public boolean isBlock() { 239 return tag.isBlock(); 240 } 241 242 /** 243 * Get the {@code id} attribute of this element. 244 * 245 * @return The id attribute, if present, or an empty string if not. 246 */ 247 public String id() { 248 return attributes != null ? attributes.getIgnoreCase("id") :""; 249 } 250 251 /** 252 Set the {@code id} attribute of this element. 253 @param id the ID value to use 254 @return this Element, for chaining 255 */ 256 public Element id(String id) { 257 Validate.notNull(id); 258 attr("id", id); 259 return this; 260 } 261 262 /** 263 * Set an attribute value on this element. If this element already has an attribute with the 264 * key, its value is updated; otherwise, a new attribute is added. 265 * 266 * @return this element 267 */ 268 @Override public Element attr(String attributeKey, String attributeValue) { 269 super.attr(attributeKey, attributeValue); 270 return this; 271 } 272 273 /** 274 * Set a boolean attribute value on this element. Setting to <code>true</code> sets the attribute value to "" and 275 * marks the attribute as boolean so no value is written out. Setting to <code>false</code> removes the attribute 276 * with the same key if it exists. 277 * 278 * @param attributeKey the attribute key 279 * @param attributeValue the attribute value 280 * 281 * @return this element 282 */ 283 public Element attr(String attributeKey, boolean attributeValue) { 284 attributes().put(attributeKey, attributeValue); 285 return this; 286 } 287 288 /** 289 Get an Attribute by key. Changes made via {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc 290 will cascade back to this Element. 291 @param key the (case-sensitive) attribute key 292 @return the Attribute for this key, or null if not present. 293 @since 1.17.2 294 */ 295 @Nullable public Attribute attribute(String key) { 296 return hasAttributes() ? attributes().attribute(key) : null; 297 } 298 299 /** 300 * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key 301 * starting with "data-" is included the dataset. 302 * <p> 303 * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset 304 * {@code package=jsoup, language=java}. 305 * <p> 306 * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected 307 * in the other map. 308 * <p> 309 * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector. 310 * @return a map of {@code key=value} custom data attributes. 311 */ 312 public Map<String, String> dataset() { 313 return attributes().dataset(); 314 } 315 316 @Override @Nullable 317 public final Element parent() { 318 return (Element) parentNode; 319 } 320 321 /** 322 * Get this element's parent and ancestors, up to the document root. 323 * @return this element's stack of parents, starting with the closest first. 324 */ 325 public Elements parents() { 326 Elements parents = new Elements(); 327 Element parent = this.parent(); 328 while (parent != null && !parent.nameIs("#root")) { 329 parents.add(parent); 330 parent = parent.parent(); 331 } 332 return parents; 333 } 334 335 /** 336 * Get a child element of this element, by its 0-based index number. 337 * <p> 338 * Note that an element can have both mixed Nodes and Elements as children. This method inspects 339 * a filtered list of children that are elements, and the index is based on that filtered list. 340 * </p> 341 * 342 * @param index the index number of the element to retrieve 343 * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException} 344 * @see #childNode(int) 345 */ 346 public Element child(int index) { 347 return childElementsList().get(index); 348 } 349 350 /** 351 * Get the number of child nodes of this element that are elements. 352 * <p> 353 * This method works on the same filtered list like {@link #child(int)}. Use {@link #childNodes()} and {@link 354 * #childNodeSize()} to get the unfiltered Nodes (e.g. includes TextNodes etc.) 355 * </p> 356 * 357 * @return the number of child nodes that are elements 358 * @see #children() 359 * @see #child(int) 360 */ 361 public int childrenSize() { 362 return childElementsList().size(); 363 } 364 365 /** 366 * Get this element's child elements. 367 * <p> 368 * This is effectively a filter on {@link #childNodes()} to get Element nodes. 369 * </p> 370 * @return child elements. If this element has no children, returns an empty list. 371 * @see #childNodes() 372 */ 373 public Elements children() { 374 return new Elements(childElementsList()); 375 } 376 377 /** 378 * Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated. 379 * TODO - think about pulling this out as a helper as there are other shadow lists (like in Attributes) kept around. 380 * @return a list of child elements 381 */ 382 List<Element> childElementsList() { 383 if (childNodeSize() == 0) 384 return EmptyChildren; // short circuit creating empty 385 386 List<Element> children; 387 if (shadowChildrenRef == null || (children = shadowChildrenRef.get()) == null) { 388 final int size = childNodes.size(); 389 children = new ArrayList<>(size); 390 //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here) 391 for (int i = 0; i < size; i++) { 392 final Node node = childNodes.get(i); 393 if (node instanceof Element) 394 children.add((Element) node); 395 } 396 shadowChildrenRef = new WeakReference<>(children); 397 } 398 return children; 399 } 400 401 /** 402 * Clears the cached shadow child elements. 403 */ 404 @Override 405 void nodelistChanged() { 406 super.nodelistChanged(); 407 shadowChildrenRef = null; 408 } 409 410 /** 411 Returns a Stream of this Element and all of its descendant Elements. The stream has document order. 412 @return a stream of this element and its descendants. 413 @see #nodeStream() 414 @since 1.17.1 415 */ 416 public Stream<Element> stream() { 417 return NodeUtils.stream(this, Element.class); 418 } 419 420 private <T> List<T> filterNodes(Class<T> clazz) { 421 return childNodes.stream() 422 .filter(clazz::isInstance) 423 .map(clazz::cast) 424 .collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList)); 425 } 426 427 /** 428 * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated. 429 * <p> 430 * This is effectively a filter on {@link #childNodes()} to get Text nodes. 431 * @return child text nodes. If this element has no text nodes, returns an 432 * empty list. 433 * </p> 434 * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected: 435 * <ul> 436 * <li>{@code p.text()} = {@code "One Two Three Four"}</li> 437 * <li>{@code p.ownText()} = {@code "One Three Four"}</li> 438 * <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li> 439 * <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li> 440 * <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li> 441 * </ul> 442 */ 443 public List<TextNode> textNodes() { 444 return filterNodes(TextNode.class); 445 } 446 447 /** 448 * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated. 449 * <p> 450 * This is effectively a filter on {@link #childNodes()} to get Data nodes. 451 * </p> 452 * @return child data nodes. If this element has no data nodes, returns an 453 * empty list. 454 * @see #data() 455 */ 456 public List<DataNode> dataNodes() { 457 return filterNodes(DataNode.class); 458 } 459 460 /** 461 * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements 462 * may include this element, or any of its children. 463 * <p>This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because 464 * multiple filters can be combined, e.g.:</p> 465 * <ul> 466 * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes) 467 * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely) 468 * </ul> 469 * <p>See the query syntax documentation in {@link org.jsoup.select.Selector}.</p> 470 * <p>Also known as {@code querySelectorAll()} in the Web DOM.</p> 471 * 472 * @param cssQuery a {@link Selector} CSS-like query 473 * @return an {@link Elements} list containing elements that match the query (empty if none match) 474 * @see Selector selector query syntax 475 * @see #select(Evaluator) 476 * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. 477 */ 478 public Elements select(String cssQuery) { 479 return Selector.select(cssQuery, this); 480 } 481 482 /** 483 * Find elements that match the supplied Evaluator. This has the same functionality as {@link #select(String)}, but 484 * may be useful if you are running the same query many times (on many documents) and want to save the overhead of 485 * repeatedly parsing the CSS query. 486 * @param evaluator an element evaluator 487 * @return an {@link Elements} list containing elements that match the query (empty if none match) 488 * @see QueryParser#parse(String) 489 */ 490 public Elements select(Evaluator evaluator) { 491 return Selector.select(evaluator, this); 492 } 493 494 /** 495 Selects elements from the given root that match the specified {@link Selector} CSS query, with this element as the 496 starting context, and returns them as a lazy Stream. Matched elements may include this element, or any of its 497 children. 498 <p> 499 Unlike {@link #select(String query)}, which returns a complete list of all matching elements, this method returns a 500 {@link Stream} that processes elements lazily as they are needed. The stream operates in a "pull" model — elements 501 are fetched from the root as the stream is traversed. You can use standard {@code Stream} operations such as 502 {@code filter}, {@code map}, or {@code findFirst} to process elements on demand. 503 </p> 504 505 @param cssQuery a {@link Selector} CSS-like query 506 @return a {@link Stream} containing elements that match the query (empty if none match) 507 @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. 508 @see Selector selector query syntax 509 @see QueryParser#parse(String) 510 @since 1.19.1 511 */ 512 public Stream<Element> selectStream(String cssQuery) { 513 return Selector.selectStream(cssQuery, this); 514 } 515 516 /** 517 Find a Stream of elements that match the supplied Evaluator. 518 519 @param evaluator an element Evaluator 520 @return a {@link Stream} containing elements that match the query (empty if none match) 521 @since 1.19.1 522 */ 523 public Stream<Element> selectStream(Evaluator evaluator) { 524 return Selector.selectStream(evaluator, this); 525 } 526 527 /** 528 * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context. 529 * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query 530 * execution stops on the first hit.</p> 531 * <p>Also known as {@code querySelector()} in the Web DOM.</p> 532 * @param cssQuery cssQuery a {@link Selector} CSS-like query 533 * @return the first matching element, or <b>{@code null}</b> if there is no match. 534 * @see #expectFirst(String) 535 */ 536 public @Nullable Element selectFirst(String cssQuery) { 537 return Selector.selectFirst(cssQuery, this); 538 } 539 540 /** 541 * Finds the first Element that matches the supplied Evaluator, with this element as the starting context, or 542 * {@code null} if none match. 543 * 544 * @param evaluator an element evaluator 545 * @return the first matching element (walking down the tree, starting from this element), or {@code null} if none 546 * match. 547 */ 548 public @Nullable Element selectFirst(Evaluator evaluator) { 549 return Collector.findFirst(evaluator, this); 550 } 551 552 /** 553 Just like {@link #selectFirst(String)}, but if there is no match, throws an {@link IllegalArgumentException}. This 554 is useful if you want to simply abort processing on a failed match. 555 @param cssQuery a {@link Selector} CSS-like query 556 @return the first matching element 557 @throws IllegalArgumentException if no match is found 558 @since 1.15.2 559 */ 560 public Element expectFirst(String cssQuery) { 561 return (Element) Validate.ensureNotNull( 562 Selector.selectFirst(cssQuery, this), 563 parent() != null ? 564 "No elements matched the query '%s' on element '%s'.": 565 "No elements matched the query '%s' in the document." 566 , cssQuery, this.tagName() 567 ); 568 } 569 570 /** 571 * Checks if this element matches the given {@link Selector} CSS query. Also knows as {@code matches()} in the Web 572 * DOM. 573 * 574 * @param cssQuery a {@link Selector} CSS query 575 * @return if this element matches the query 576 */ 577 public boolean is(String cssQuery) { 578 return is(QueryParser.parse(cssQuery)); 579 } 580 581 /** 582 * Check if this element matches the given evaluator. 583 * @param evaluator an element evaluator 584 * @return if this element matches 585 */ 586 public boolean is(Evaluator evaluator) { 587 return evaluator.matches(this.root(), this); 588 } 589 590 /** 591 * Find the closest element up the tree of parents that matches the specified CSS query. Will return itself, an 592 * ancestor, or {@code null} if there is no such matching element. 593 * @param cssQuery a {@link Selector} CSS query 594 * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not 595 * found. 596 */ 597 public @Nullable Element closest(String cssQuery) { 598 return closest(QueryParser.parse(cssQuery)); 599 } 600 601 /** 602 * Find the closest element up the tree of parents that matches the specified evaluator. Will return itself, an 603 * ancestor, or {@code null} if there is no such matching element. 604 * @param evaluator a query evaluator 605 * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not 606 * found. 607 */ 608 public @Nullable Element closest(Evaluator evaluator) { 609 Validate.notNull(evaluator); 610 Element el = this; 611 final Element root = root(); 612 do { 613 if (evaluator.matches(root, el)) 614 return el; 615 el = el.parent(); 616 } while (el != null); 617 return null; 618 } 619 620 /** 621 Find Elements that match the supplied {@index XPath} expression. 622 <p>Note that for convenience of writing the Xpath expression, namespaces are disabled, and queries can be 623 expressed using the element's local name only.</p> 624 <p>By default, XPath 1.0 expressions are supported. If you would to use XPath 2.0 or higher, you can provide an 625 alternate XPathFactory implementation:</p> 626 <ol> 627 <li>Add the implementation to your classpath. E.g. to use <a href="https://www.saxonica.com/products/products.xml">Saxon-HE</a>, add <a href="https://mvnrepository.com/artifact/net.sf.saxon/Saxon-HE">net.sf.saxon:Saxon-HE</a> to your build.</li> 628 <li>Set the system property <code>javax.xml.xpath.XPathFactory:jsoup</code> to the implementing classname. E.g.:<br> 629 <code>System.setProperty(W3CDom.XPathFactoryProperty, "net.sf.saxon.xpath.XPathFactoryImpl");</code> 630 </li> 631 </ol> 632 633 @param xpath XPath expression 634 @return matching elements, or an empty list if none match. 635 @see #selectXpath(String, Class) 636 @since 1.14.3 637 */ 638 public Elements selectXpath(String xpath) { 639 return new Elements(NodeUtils.selectXpath(xpath, this, Element.class)); 640 } 641 642 /** 643 Find Nodes that match the supplied XPath expression. 644 <p>For example, to select TextNodes under {@code p} elements: </p> 645 <pre>List<TextNode> textNodes = doc.selectXpath("//body//p//text()", TextNode.class);</pre> 646 <p>Note that in the jsoup DOM, Attribute objects are not Nodes. To directly select attribute values, do something 647 like:</p> 648 <pre>List<String> hrefs = doc.selectXpath("//a").eachAttr("href");</pre> 649 @param xpath XPath expression 650 @param nodeType the jsoup node type to return 651 @see #selectXpath(String) 652 @return a list of matching nodes 653 @since 1.14.3 654 */ 655 public <T extends Node> List<T> selectXpath(String xpath, Class<T> nodeType) { 656 return NodeUtils.selectXpath(xpath, this, nodeType); 657 } 658 659 /** 660 * Insert a node to the end of this Element's children. The incoming node will be re-parented. 661 * 662 * @param child node to add. 663 * @return this Element, for chaining 664 * @see #prependChild(Node) 665 * @see #insertChildren(int, Collection) 666 */ 667 public Element appendChild(Node child) { 668 Validate.notNull(child); 669 670 // was - Node#addChildren(child). short-circuits an array create and a loop. 671 reparentChild(child); 672 ensureChildNodes(); 673 childNodes.add(child); 674 child.setSiblingIndex(childNodes.size() - 1); 675 return this; 676 } 677 678 /** 679 Insert the given nodes to the end of this Element's children. 680 681 @param children nodes to add 682 @return this Element, for chaining 683 @see #insertChildren(int, Collection) 684 */ 685 public Element appendChildren(Collection<? extends Node> children) { 686 insertChildren(-1, children); 687 return this; 688 } 689 690 /** 691 * Add this element to the supplied parent element, as its next child. 692 * 693 * @param parent element to which this element will be appended 694 * @return this element, so that you can continue modifying the element 695 */ 696 public Element appendTo(Element parent) { 697 Validate.notNull(parent); 698 parent.appendChild(this); 699 return this; 700 } 701 702 /** 703 * Add a node to the start of this element's children. 704 * 705 * @param child node to add. 706 * @return this element, so that you can add more child nodes or elements. 707 */ 708 public Element prependChild(Node child) { 709 Validate.notNull(child); 710 711 addChildren(0, child); 712 return this; 713 } 714 715 /** 716 Insert the given nodes to the start of this Element's children. 717 718 @param children nodes to add 719 @return this Element, for chaining 720 @see #insertChildren(int, Collection) 721 */ 722 public Element prependChildren(Collection<? extends Node> children) { 723 insertChildren(0, children); 724 return this; 725 } 726 727 728 /** 729 * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the 730 * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. 731 * 732 * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the 733 * end 734 * @param children child nodes to insert 735 * @return this element, for chaining. 736 */ 737 public Element insertChildren(int index, Collection<? extends Node> children) { 738 Validate.notNull(children, "Children collection to be inserted must not be null."); 739 int currentSize = childNodeSize(); 740 if (index < 0) index += currentSize +1; // roll around 741 Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); 742 743 ArrayList<Node> nodes = new ArrayList<>(children); 744 Node[] nodeArray = nodes.toArray(new Node[0]); 745 addChildren(index, nodeArray); 746 return this; 747 } 748 749 /** 750 * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the 751 * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. 752 * 753 * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the 754 * end 755 * @param children child nodes to insert 756 * @return this element, for chaining. 757 */ 758 public Element insertChildren(int index, Node... children) { 759 Validate.notNull(children, "Children collection to be inserted must not be null."); 760 int currentSize = childNodeSize(); 761 if (index < 0) index += currentSize +1; // roll around 762 Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); 763 764 addChildren(index, children); 765 return this; 766 } 767 768 /** 769 * Create a new element by tag name, and add it as this Element's last child. 770 * 771 * @param tagName the name of the tag (e.g. {@code div}). 772 * @return the new element, to allow you to add content to it, e.g.: 773 * {@code parent.appendElement("h1").attr("id", "header").text("Welcome");} 774 */ 775 public Element appendElement(String tagName) { 776 return appendElement(tagName, tag.namespace()); 777 } 778 779 /** 780 * Create a new element by tag name and namespace, add it as this Element's last child. 781 * 782 * @param tagName the name of the tag (e.g. {@code div}). 783 * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml}) 784 * @return the new element, in the specified namespace 785 */ 786 public Element appendElement(String tagName, String namespace) { 787 Element child = new Element(Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()), baseUri()); 788 appendChild(child); 789 return child; 790 } 791 792 /** 793 * Create a new element by tag name, and add it as this Element's first child. 794 * 795 * @param tagName the name of the tag (e.g. {@code div}). 796 * @return the new element, to allow you to add content to it, e.g.: 797 * {@code parent.prependElement("h1").attr("id", "header").text("Welcome");} 798 */ 799 public Element prependElement(String tagName) { 800 return prependElement(tagName, tag.namespace()); 801 } 802 803 /** 804 * Create a new element by tag name and namespace, and add it as this Element's first child. 805 * 806 * @param tagName the name of the tag (e.g. {@code div}). 807 * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml}) 808 * @return the new element, in the specified namespace 809 */ 810 public Element prependElement(String tagName, String namespace) { 811 Element child = new Element(Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()), baseUri()); 812 prependChild(child); 813 return child; 814 } 815 816 /** 817 * Create and append a new TextNode to this element. 818 * 819 * @param text the (un-encoded) text to add 820 * @return this element 821 */ 822 public Element appendText(String text) { 823 Validate.notNull(text); 824 TextNode node = new TextNode(text); 825 appendChild(node); 826 return this; 827 } 828 829 /** 830 * Create and prepend a new TextNode to this element. 831 * 832 * @param text the decoded text to add 833 * @return this element 834 */ 835 public Element prependText(String text) { 836 Validate.notNull(text); 837 TextNode node = new TextNode(text); 838 prependChild(node); 839 return this; 840 } 841 842 /** 843 * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children. 844 * @param html HTML to add inside this element, after the existing HTML 845 * @return this element 846 * @see #html(String) 847 */ 848 public Element append(String html) { 849 Validate.notNull(html); 850 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri()); 851 addChildren(nodes.toArray(new Node[0])); 852 return this; 853 } 854 855 /** 856 * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children. 857 * @param html HTML to add inside this element, before the existing HTML 858 * @return this element 859 * @see #html(String) 860 */ 861 public Element prepend(String html) { 862 Validate.notNull(html); 863 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri()); 864 addChildren(0, nodes.toArray(new Node[0])); 865 return this; 866 } 867 868 /** 869 * Insert the specified HTML into the DOM before this element (as a preceding sibling). 870 * 871 * @param html HTML to add before this element 872 * @return this element, for chaining 873 * @see #after(String) 874 */ 875 @Override 876 public Element before(String html) { 877 return (Element) super.before(html); 878 } 879 880 /** 881 * Insert the specified node into the DOM before this node (as a preceding sibling). 882 * @param node to add before this element 883 * @return this Element, for chaining 884 * @see #after(Node) 885 */ 886 @Override 887 public Element before(Node node) { 888 return (Element) super.before(node); 889 } 890 891 /** 892 * Insert the specified HTML into the DOM after this element (as a following sibling). 893 * 894 * @param html HTML to add after this element 895 * @return this element, for chaining 896 * @see #before(String) 897 */ 898 @Override 899 public Element after(String html) { 900 return (Element) super.after(html); 901 } 902 903 /** 904 * Insert the specified node into the DOM after this node (as a following sibling). 905 * @param node to add after this element 906 * @return this element, for chaining 907 * @see #before(Node) 908 */ 909 @Override 910 public Element after(Node node) { 911 return (Element) super.after(node); 912 } 913 914 /** 915 * Remove all the element's child nodes. Any attributes are left as-is. Each child node has its parent set to 916 * {@code null}. 917 * @return this element 918 */ 919 @Override 920 public Element empty() { 921 // Detach each of the children -> parent links: 922 for (Node child : childNodes) { 923 child.parentNode = null; 924 } 925 childNodes.clear(); 926 return this; 927 } 928 929 /** 930 * Wrap the supplied HTML around this element. 931 * 932 * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. 933 * @return this element, for chaining. 934 */ 935 @Override 936 public Element wrap(String html) { 937 return (Element) super.wrap(html); 938 } 939 940 /** 941 * Get a CSS selector that will uniquely select this element. 942 * <p> 943 * If the element has an ID, returns #id; 944 * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'}, 945 * followed by a unique selector for the element (tag.class.class:nth-child(n)). 946 * </p> 947 * 948 * @return the CSS Path that can be used to retrieve the element in a selector. 949 */ 950 public String cssSelector() { 951 if (id().length() > 0) { 952 // prefer to return the ID - but check that it's actually unique first! 953 String idSel = "#" + escapeCssIdentifier(id()); 954 Document doc = ownerDocument(); 955 if (doc != null) { 956 Elements els = doc.select(idSel); 957 if (els.size() == 1 && els.get(0) == this) // otherwise, continue to the nth-child impl 958 return idSel; 959 } else { 960 return idSel; // no ownerdoc, return the ID selector 961 } 962 } 963 964 StringBuilder selector = StringUtil.borrowBuilder(); 965 Element el = this; 966 while (el != null && !(el instanceof Document)) { 967 selector.insert(0, el.cssSelectorComponent()); 968 el = el.parent(); 969 } 970 return StringUtil.releaseBuilder(selector); 971 } 972 973 private String cssSelectorComponent() { 974 // Escape tagname, and translate HTML namespace ns:tag to CSS namespace syntax ns|tag 975 String tagName = escapeCssIdentifier(tagName()).replace("\\:", "|"); 976 StringBuilder selector = StringUtil.borrowBuilder().append(tagName); 977 String classes = classNames().stream().map(TokenQueue::escapeCssIdentifier) 978 .collect(StringUtil.joining(".")); 979 if (!classes.isEmpty()) 980 selector.append('.').append(classes); 981 982 if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node 983 return StringUtil.releaseBuilder(selector); 984 985 selector.insert(0, " > "); 986 if (parent().select(selector.toString()).size() > 1) 987 selector.append(String.format( 988 ":nth-child(%d)", elementSiblingIndex() + 1)); 989 990 return StringUtil.releaseBuilder(selector); 991 } 992 993 /** 994 * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling 995 * of itself, so will not be included in the returned list. 996 * @return sibling elements 997 */ 998 public Elements siblingElements() { 999 if (parentNode == null) 1000 return new Elements(0); 1001 1002 List<Element> elements = parent().childElementsList(); 1003 Elements siblings = new Elements(elements.size() - 1); 1004 for (Element el: elements) 1005 if (el != this) 1006 siblings.add(el); 1007 return siblings; 1008 } 1009 1010 /** 1011 * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s, 1012 * the {@code nextElementSibling} of the first {@code p} is the second {@code p}. 1013 * <p> 1014 * This is similar to {@link #nextSibling()}, but specifically finds only Elements 1015 * </p> 1016 * @return the next element, or null if there is no next element 1017 * @see #previousElementSibling() 1018 */ 1019 public @Nullable Element nextElementSibling() { 1020 Node next = this; 1021 while ((next = next.nextSibling()) != null) { 1022 if (next instanceof Element) return (Element) next; 1023 } 1024 return null; 1025 } 1026 1027 /** 1028 * Get each of the sibling elements that come after this element. 1029 * 1030 * @return each of the element siblings after this element, or an empty list if there are no next sibling elements 1031 */ 1032 public Elements nextElementSiblings() { 1033 return nextElementSiblings(true); 1034 } 1035 1036 /** 1037 * Gets the previous element sibling of this element. 1038 * @return the previous element, or null if there is no previous element 1039 * @see #nextElementSibling() 1040 */ 1041 public @Nullable Element previousElementSibling() { 1042 Node prev = this; 1043 while ((prev = prev.previousSibling()) != null) { 1044 if (prev instanceof Element) return (Element) prev; 1045 } 1046 return null; 1047 } 1048 1049 /** 1050 * Get each of the element siblings before this element. 1051 * 1052 * @return the previous element siblings, or an empty list if there are none. 1053 */ 1054 public Elements previousElementSiblings() { 1055 return nextElementSiblings(false); 1056 } 1057 1058 private Elements nextElementSiblings(boolean next) { 1059 Elements els = new Elements(); 1060 if (parentNode == null) 1061 return els; 1062 els.add(this); 1063 return next ? els.nextAll() : els.prevAll(); 1064 } 1065 1066 /** 1067 * Gets the first Element sibling of this element. That may be this element. 1068 * @return the first sibling that is an element (aka the parent's first element child) 1069 */ 1070 public Element firstElementSibling() { 1071 if (parent() != null) { 1072 //noinspection DataFlowIssue (not nullable, would be this is no other sibs) 1073 return parent().firstElementChild(); 1074 } else 1075 return this; // orphan is its own first sibling 1076 } 1077 1078 /** 1079 * Get the list index of this element in its element sibling list. I.e. if this is the first element 1080 * sibling, returns 0. 1081 * @return position in element sibling list 1082 */ 1083 public int elementSiblingIndex() { 1084 if (parent() == null) return 0; 1085 return indexInList(this, parent().childElementsList()); 1086 } 1087 1088 /** 1089 * Gets the last element sibling of this element. That may be this element. 1090 * @return the last sibling that is an element (aka the parent's last element child) 1091 */ 1092 public Element lastElementSibling() { 1093 if (parent() != null) { 1094 //noinspection DataFlowIssue (not nullable, would be this if no other sibs) 1095 return parent().lastElementChild(); 1096 } else 1097 return this; 1098 } 1099 1100 private static <E extends Element> int indexInList(Element search, List<E> elements) { 1101 final int size = elements.size(); 1102 for (int i = 0; i < size; i++) { 1103 if (elements.get(i) == search) 1104 return i; 1105 } 1106 return 0; 1107 } 1108 1109 /** 1110 Gets the first child of this Element that is an Element, or {@code null} if there is none. 1111 @return the first Element child node, or null. 1112 @see #firstChild() 1113 @see #lastElementChild() 1114 @since 1.15.2 1115 */ 1116 public @Nullable Element firstElementChild() { 1117 Node child = firstChild(); 1118 while (child != null) { 1119 if (child instanceof Element) return (Element) child; 1120 child = child.nextSibling(); 1121 } 1122 return null; 1123 } 1124 1125 /** 1126 Gets the last child of this Element that is an Element, or @{code null} if there is none. 1127 @return the last Element child node, or null. 1128 @see #lastChild() 1129 @see #firstElementChild() 1130 @since 1.15.2 1131 */ 1132 public @Nullable Element lastElementChild() { 1133 Node child = lastChild(); 1134 while (child != null) { 1135 if (child instanceof Element) return (Element) child; 1136 child = child.previousSibling(); 1137 } 1138 return null; 1139 } 1140 1141 // DOM type methods 1142 1143 /** 1144 * Finds elements, including and recursively under this element, with the specified tag name. 1145 * @param tagName The tag name to search for (case insensitively). 1146 * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match. 1147 */ 1148 public Elements getElementsByTag(String tagName) { 1149 Validate.notEmpty(tagName); 1150 tagName = normalize(tagName); 1151 1152 return Collector.collect(new Evaluator.Tag(tagName), this); 1153 } 1154 1155 /** 1156 * Find an element by ID, including or under this element. 1157 * <p> 1158 * Note that this finds the first matching ID, starting with this element. If you search down from a different 1159 * starting point, it is possible to find a different element by ID. For unique element by ID within a Document, 1160 * use {@link Document#getElementById(String)} 1161 * @param id The ID to search for. 1162 * @return The first matching element by ID, starting with this element, or null if none found. 1163 */ 1164 public @Nullable Element getElementById(String id) { 1165 Validate.notEmpty(id); 1166 return Collector.findFirst(new Evaluator.Id(id), this); 1167 } 1168 1169 /** 1170 * Find elements that have this class, including or under this element. Case-insensitive. 1171 * <p> 1172 * Elements can have multiple classes (e.g. {@code <div class="header round first">}). This method 1173 * checks each class, so you can find the above with {@code el.getElementsByClass("header");}. 1174 * 1175 * @param className the name of the class to search for. 1176 * @return elements with the supplied class name, empty if none 1177 * @see #hasClass(String) 1178 * @see #classNames() 1179 */ 1180 public Elements getElementsByClass(String className) { 1181 Validate.notEmpty(className); 1182 1183 return Collector.collect(new Evaluator.Class(className), this); 1184 } 1185 1186 /** 1187 * Find elements that have a named attribute set. Case-insensitive. 1188 * 1189 * @param key name of the attribute, e.g. {@code href} 1190 * @return elements that have this attribute, empty if none 1191 */ 1192 public Elements getElementsByAttribute(String key) { 1193 Validate.notEmpty(key); 1194 key = key.trim(); 1195 1196 return Collector.collect(new Evaluator.Attribute(key), this); 1197 } 1198 1199 /** 1200 * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements 1201 * that have HTML5 datasets. 1202 * @param keyPrefix name prefix of the attribute e.g. {@code data-} 1203 * @return elements that have attribute names that start with the prefix, empty if none. 1204 */ 1205 public Elements getElementsByAttributeStarting(String keyPrefix) { 1206 Validate.notEmpty(keyPrefix); 1207 keyPrefix = keyPrefix.trim(); 1208 1209 return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this); 1210 } 1211 1212 /** 1213 * Find elements that have an attribute with the specific value. Case-insensitive. 1214 * 1215 * @param key name of the attribute 1216 * @param value value of the attribute 1217 * @return elements that have this attribute with this value, empty if none 1218 */ 1219 public Elements getElementsByAttributeValue(String key, String value) { 1220 return Collector.collect(new Evaluator.AttributeWithValue(key, value), this); 1221 } 1222 1223 /** 1224 * Find elements that either do not have this attribute, or have it with a different value. Case-insensitive. 1225 * 1226 * @param key name of the attribute 1227 * @param value value of the attribute 1228 * @return elements that do not have a matching attribute 1229 */ 1230 public Elements getElementsByAttributeValueNot(String key, String value) { 1231 return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this); 1232 } 1233 1234 /** 1235 * Find elements that have attributes that start with the value prefix. Case-insensitive. 1236 * 1237 * @param key name of the attribute 1238 * @param valuePrefix start of attribute value 1239 * @return elements that have attributes that start with the value prefix 1240 */ 1241 public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) { 1242 return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this); 1243 } 1244 1245 /** 1246 * Find elements that have attributes that end with the value suffix. Case-insensitive. 1247 * 1248 * @param key name of the attribute 1249 * @param valueSuffix end of the attribute value 1250 * @return elements that have attributes that end with the value suffix 1251 */ 1252 public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) { 1253 return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this); 1254 } 1255 1256 /** 1257 * Find elements that have attributes whose value contains the match string. Case-insensitive. 1258 * 1259 * @param key name of the attribute 1260 * @param match substring of value to search for 1261 * @return elements that have attributes containing this text 1262 */ 1263 public Elements getElementsByAttributeValueContaining(String key, String match) { 1264 return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this); 1265 } 1266 1267 /** 1268 * Find elements that have an attribute whose value matches the supplied regular expression. 1269 * @param key name of the attribute 1270 * @param pattern compiled regular expression to match against attribute values 1271 * @return elements that have attributes matching this regular expression 1272 */ 1273 public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) { 1274 return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this); 1275 1276 } 1277 1278 /** 1279 * Find elements that have attributes whose values match the supplied regular expression. 1280 * @param key name of the attribute 1281 * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options. 1282 * @return elements that have attributes matching this regular expression 1283 */ 1284 public Elements getElementsByAttributeValueMatching(String key, String regex) { 1285 Pattern pattern; 1286 try { 1287 pattern = Pattern.compile(regex); 1288 } catch (PatternSyntaxException e) { 1289 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1290 } 1291 return getElementsByAttributeValueMatching(key, pattern); 1292 } 1293 1294 /** 1295 * Find elements whose sibling index is less than the supplied index. 1296 * @param index 0-based index 1297 * @return elements less than index 1298 */ 1299 public Elements getElementsByIndexLessThan(int index) { 1300 return Collector.collect(new Evaluator.IndexLessThan(index), this); 1301 } 1302 1303 /** 1304 * Find elements whose sibling index is greater than the supplied index. 1305 * @param index 0-based index 1306 * @return elements greater than index 1307 */ 1308 public Elements getElementsByIndexGreaterThan(int index) { 1309 return Collector.collect(new Evaluator.IndexGreaterThan(index), this); 1310 } 1311 1312 /** 1313 * Find elements whose sibling index is equal to the supplied index. 1314 * @param index 0-based index 1315 * @return elements equal to index 1316 */ 1317 public Elements getElementsByIndexEquals(int index) { 1318 return Collector.collect(new Evaluator.IndexEquals(index), this); 1319 } 1320 1321 /** 1322 * Find elements that contain the specified string. The search is case-insensitive. The text may appear directly 1323 * in the element, or in any of its descendants. 1324 * @param searchText to look for in the element's text 1325 * @return elements that contain the string, case-insensitive. 1326 * @see Element#text() 1327 */ 1328 public Elements getElementsContainingText(String searchText) { 1329 return Collector.collect(new Evaluator.ContainsText(searchText), this); 1330 } 1331 1332 /** 1333 * Find elements that directly contain the specified string. The search is case-insensitive. The text must appear directly 1334 * in the element, not in any of its descendants. 1335 * @param searchText to look for in the element's own text 1336 * @return elements that contain the string, case-insensitive. 1337 * @see Element#ownText() 1338 */ 1339 public Elements getElementsContainingOwnText(String searchText) { 1340 return Collector.collect(new Evaluator.ContainsOwnText(searchText), this); 1341 } 1342 1343 /** 1344 * Find elements whose text matches the supplied regular expression. 1345 * @param pattern regular expression to match text against 1346 * @return elements matching the supplied regular expression. 1347 * @see Element#text() 1348 */ 1349 public Elements getElementsMatchingText(Pattern pattern) { 1350 return Collector.collect(new Evaluator.Matches(pattern), this); 1351 } 1352 1353 /** 1354 * Find elements whose text matches the supplied regular expression. 1355 * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options. 1356 * @return elements matching the supplied regular expression. 1357 * @see Element#text() 1358 */ 1359 public Elements getElementsMatchingText(String regex) { 1360 Pattern pattern; 1361 try { 1362 pattern = Pattern.compile(regex); 1363 } catch (PatternSyntaxException e) { 1364 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1365 } 1366 return getElementsMatchingText(pattern); 1367 } 1368 1369 /** 1370 * Find elements whose own text matches the supplied regular expression. 1371 * @param pattern regular expression to match text against 1372 * @return elements matching the supplied regular expression. 1373 * @see Element#ownText() 1374 */ 1375 public Elements getElementsMatchingOwnText(Pattern pattern) { 1376 return Collector.collect(new Evaluator.MatchesOwn(pattern), this); 1377 } 1378 1379 /** 1380 * Find elements whose own text matches the supplied regular expression. 1381 * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options. 1382 * @return elements matching the supplied regular expression. 1383 * @see Element#ownText() 1384 */ 1385 public Elements getElementsMatchingOwnText(String regex) { 1386 Pattern pattern; 1387 try { 1388 pattern = Pattern.compile(regex); 1389 } catch (PatternSyntaxException e) { 1390 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1391 } 1392 return getElementsMatchingOwnText(pattern); 1393 } 1394 1395 /** 1396 * Find all elements under this element (including self, and children of children). 1397 * 1398 * @return all elements 1399 */ 1400 public Elements getAllElements() { 1401 return Collector.collect(new Evaluator.AllElements(), this); 1402 } 1403 1404 /** 1405 Gets the <b>normalized, combined text</b> of this element and all its children. Whitespace is normalized and 1406 trimmed. 1407 <p>For example, given HTML {@code <p>Hello <b>there</b> now! </p>}, {@code p.text()} returns {@code "Hello there 1408 now!"} 1409 <p>If you do not want normalized text, use {@link #wholeText()}. If you want just the text of this node (and not 1410 children), use {@link #ownText()} 1411 <p>Note that this method returns the textual content that would be presented to a reader. The contents of data 1412 nodes (such as {@code <script>} tags) are not considered text. Use {@link #data()} or {@link #html()} to retrieve 1413 that content. 1414 1415 @return decoded, normalized text, or empty string if none. 1416 @see #wholeText() 1417 @see #ownText() 1418 @see #textNodes() 1419 */ 1420 public String text() { 1421 final StringBuilder accum = StringUtil.borrowBuilder(); 1422 NodeTraversor.traverse(new TextAccumulator(accum), this); 1423 return StringUtil.releaseBuilder(accum).trim(); 1424 } 1425 1426 private static class TextAccumulator implements NodeVisitor { 1427 private final StringBuilder accum; 1428 1429 public TextAccumulator(StringBuilder accum) { 1430 this.accum = accum; 1431 } 1432 1433 @Override public void head(Node node, int depth) { 1434 if (node instanceof TextNode) { 1435 TextNode textNode = (TextNode) node; 1436 appendNormalisedText(accum, textNode); 1437 } else if (node instanceof Element) { 1438 Element element = (Element) node; 1439 if (accum.length() > 0 && 1440 (element.isBlock() || element.nameIs("br")) && 1441 !lastCharIsWhitespace(accum)) 1442 accum.append(' '); 1443 } 1444 } 1445 1446 @Override public void tail(Node node, int depth) { 1447 // make sure there is a space between block tags and immediately following text nodes or inline elements <div>One</div>Two should be "One Two". 1448 if (node instanceof Element) { 1449 Element element = (Element) node; 1450 Node next = node.nextSibling(); 1451 if (element.isBlock() && (next instanceof TextNode || next instanceof Element && !((Element) next).tag.formatAsBlock()) && !lastCharIsWhitespace(accum)) 1452 accum.append(' '); 1453 } 1454 1455 } 1456 } 1457 1458 /** 1459 Get the non-normalized, decoded text of this element and its children, including only any newlines and spaces 1460 present in the original source. 1461 @return decoded, non-normalized text 1462 @see #text() 1463 @see #wholeOwnText() 1464 */ 1465 public String wholeText() { 1466 return wholeTextOf(nodeStream()); 1467 } 1468 1469 private static String wholeTextOf(Stream<Node> stream) { 1470 return stream.map(node -> { 1471 if (node instanceof TextNode) return ((TextNode) node).getWholeText(); 1472 if (node.nameIs("br")) return "\n"; 1473 return ""; 1474 }).collect(StringUtil.joining("")); 1475 } 1476 1477 /** 1478 Get the non-normalized, decoded text of this element, <b>not including</b> any child elements, including any 1479 newlines and spaces present in the original source. 1480 @return decoded, non-normalized text that is a direct child of this Element 1481 @see #text() 1482 @see #wholeText() 1483 @see #ownText() 1484 @since 1.15.1 1485 */ 1486 public String wholeOwnText() { 1487 return wholeTextOf(childNodes.stream()); 1488 } 1489 1490 /** 1491 * Gets the (normalized) text owned by this element only; does not get the combined text of all children. 1492 * <p> 1493 * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"}, 1494 * whereas {@code p.text()} returns {@code "Hello there now!"}. 1495 * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element. 1496 * 1497 * @return decoded text, or empty string if none. 1498 * @see #text() 1499 * @see #textNodes() 1500 */ 1501 public String ownText() { 1502 StringBuilder sb = StringUtil.borrowBuilder(); 1503 ownText(sb); 1504 return StringUtil.releaseBuilder(sb).trim(); 1505 } 1506 1507 private void ownText(StringBuilder accum) { 1508 for (int i = 0; i < childNodeSize(); i++) { 1509 Node child = childNodes.get(i); 1510 if (child instanceof TextNode) { 1511 TextNode textNode = (TextNode) child; 1512 appendNormalisedText(accum, textNode); 1513 } else if (child.nameIs("br") && !lastCharIsWhitespace(accum)) { 1514 accum.append(" "); 1515 } 1516 } 1517 } 1518 1519 private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { 1520 String text = textNode.getWholeText(); 1521 if (preserveWhitespace(textNode.parentNode) || textNode instanceof CDataNode) 1522 accum.append(text); 1523 else 1524 StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum)); 1525 } 1526 1527 static boolean preserveWhitespace(@Nullable Node node) { 1528 // looks only at this element and five levels up, to prevent recursion & needless stack searches 1529 if (node instanceof Element) { 1530 Element el = (Element) node; 1531 int i = 0; 1532 do { 1533 if (el.tag.preserveWhitespace()) 1534 return true; 1535 el = el.parent(); 1536 i++; 1537 } while (i < 6 && el != null); 1538 } 1539 return false; 1540 } 1541 1542 /** 1543 * Set the text of this element. Any existing contents (text or elements) will be cleared. 1544 * <p>As a special case, for {@code <script>} and {@code <style>} tags, the input text will be treated as data, 1545 * not visible text.</p> 1546 * @param text decoded text 1547 * @return this element 1548 */ 1549 public Element text(String text) { 1550 Validate.notNull(text); 1551 empty(); 1552 // special case for script/style in HTML: should be data node 1553 Document owner = ownerDocument(); 1554 // an alternate impl would be to run through the parser 1555 if (owner != null && owner.parser().isContentForTagData(normalName())) 1556 appendChild(new DataNode(text)); 1557 else 1558 appendChild(new TextNode(text)); 1559 1560 return this; 1561 } 1562 1563 /** 1564 Checks if the current element or any of its child elements contain non-whitespace text. 1565 @return {@code true} if the element has non-blank text content, {@code false} otherwise. 1566 */ 1567 public boolean hasText() { 1568 AtomicBoolean hasText = new AtomicBoolean(false); 1569 filter((node, depth) -> { 1570 if (node instanceof TextNode) { 1571 TextNode textNode = (TextNode) node; 1572 if (!textNode.isBlank()) { 1573 hasText.set(true); 1574 return NodeFilter.FilterResult.STOP; 1575 } 1576 } 1577 return NodeFilter.FilterResult.CONTINUE; 1578 }); 1579 return hasText.get(); 1580 } 1581 1582 /** 1583 * Get the combined data of this element. Data is e.g. the inside of a {@code <script>} tag. Note that data is NOT the 1584 * text of the element. Use {@link #text()} to get the text that would be visible to a user, and {@code data()} 1585 * for the contents of scripts, comments, CSS styles, etc. 1586 * 1587 * @return the data, or empty string if none 1588 * 1589 * @see #dataNodes() 1590 */ 1591 public String data() { 1592 StringBuilder sb = StringUtil.borrowBuilder(); 1593 traverse((childNode, depth) -> { 1594 if (childNode instanceof DataNode) { 1595 DataNode data = (DataNode) childNode; 1596 sb.append(data.getWholeData()); 1597 } else if (childNode instanceof Comment) { 1598 Comment comment = (Comment) childNode; 1599 sb.append(comment.getData()); 1600 } else if (childNode instanceof CDataNode) { 1601 // this shouldn't really happen because the html parser won't see the cdata as anything special when parsing script. 1602 // but in case another type gets through. 1603 CDataNode cDataNode = (CDataNode) childNode; 1604 sb.append(cDataNode.getWholeText()); 1605 } 1606 }); 1607 return StringUtil.releaseBuilder(sb); 1608 } 1609 1610 /** 1611 * Gets the literal value of this element's "class" attribute, which may include multiple class names, space 1612 * separated. (E.g. on <code><div class="header gray"></code> returns, "<code>header gray</code>") 1613 * @return The literal class attribute, or <b>empty string</b> if no class attribute set. 1614 */ 1615 public String className() { 1616 return attr("class").trim(); 1617 } 1618 1619 /** 1620 * Get each of the element's class names. E.g. on element {@code <div class="header gray">}, 1621 * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to 1622 * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them. 1623 * @return set of classnames, empty if no class attribute 1624 */ 1625 public Set<String> classNames() { 1626 String[] names = ClassSplit.split(className()); 1627 Set<String> classNames = new LinkedHashSet<>(Arrays.asList(names)); 1628 classNames.remove(""); // if classNames() was empty, would include an empty class 1629 1630 return classNames; 1631 } 1632 1633 /** 1634 Set the element's {@code class} attribute to the supplied class names. 1635 @param classNames set of classes 1636 @return this element, for chaining 1637 */ 1638 public Element classNames(Set<String> classNames) { 1639 Validate.notNull(classNames); 1640 if (classNames.isEmpty()) { 1641 attributes().remove("class"); 1642 } else { 1643 attributes().put("class", StringUtil.join(classNames, " ")); 1644 } 1645 return this; 1646 } 1647 1648 /** 1649 * Tests if this element has a class. Case-insensitive. 1650 * @param className name of class to check for 1651 * @return true if it does, false if not 1652 */ 1653 // performance sensitive 1654 public boolean hasClass(String className) { 1655 if (attributes == null) 1656 return false; 1657 1658 final String classAttr = attributes.getIgnoreCase("class"); 1659 final int len = classAttr.length(); 1660 final int wantLen = className.length(); 1661 1662 if (len == 0 || len < wantLen) { 1663 return false; 1664 } 1665 1666 // if both lengths are equal, only need compare the className with the attribute 1667 if (len == wantLen) { 1668 return className.equalsIgnoreCase(classAttr); 1669 } 1670 1671 // otherwise, scan for whitespace and compare regions (with no string or arraylist allocations) 1672 boolean inClass = false; 1673 int start = 0; 1674 for (int i = 0; i < len; i++) { 1675 if (Character.isWhitespace(classAttr.charAt(i))) { 1676 if (inClass) { 1677 // white space ends a class name, compare it with the requested one, ignore case 1678 if (i - start == wantLen && classAttr.regionMatches(true, start, className, 0, wantLen)) { 1679 return true; 1680 } 1681 inClass = false; 1682 } 1683 } else { 1684 if (!inClass) { 1685 // we're in a class name : keep the start of the substring 1686 inClass = true; 1687 start = i; 1688 } 1689 } 1690 } 1691 1692 // check the last entry 1693 if (inClass && len - start == wantLen) { 1694 return classAttr.regionMatches(true, start, className, 0, wantLen); 1695 } 1696 1697 return false; 1698 } 1699 1700 /** 1701 Add a class name to this element's {@code class} attribute. 1702 @param className class name to add 1703 @return this element 1704 */ 1705 public Element addClass(String className) { 1706 Validate.notNull(className); 1707 1708 Set<String> classes = classNames(); 1709 classes.add(className); 1710 classNames(classes); 1711 1712 return this; 1713 } 1714 1715 /** 1716 Remove a class name from this element's {@code class} attribute. 1717 @param className class name to remove 1718 @return this element 1719 */ 1720 public Element removeClass(String className) { 1721 Validate.notNull(className); 1722 1723 Set<String> classes = classNames(); 1724 classes.remove(className); 1725 classNames(classes); 1726 1727 return this; 1728 } 1729 1730 /** 1731 Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it. 1732 @param className class name to toggle 1733 @return this element 1734 */ 1735 public Element toggleClass(String className) { 1736 Validate.notNull(className); 1737 1738 Set<String> classes = classNames(); 1739 if (classes.contains(className)) 1740 classes.remove(className); 1741 else 1742 classes.add(className); 1743 classNames(classes); 1744 1745 return this; 1746 } 1747 1748 /** 1749 * Get the value of a form element (input, textarea, etc). 1750 * @return the value of the form element, or empty string if not set. 1751 */ 1752 public String val() { 1753 if (elementIs("textarea", NamespaceHtml)) 1754 return text(); 1755 else 1756 return attr("value"); 1757 } 1758 1759 /** 1760 * Set the value of a form element (input, textarea, etc). 1761 * @param value value to set 1762 * @return this element (for chaining) 1763 */ 1764 public Element val(String value) { 1765 if (elementIs("textarea", NamespaceHtml)) 1766 text(value); 1767 else 1768 attr("value", value); 1769 return this; 1770 } 1771 1772 /** 1773 Get the source range (start and end positions) of the end (closing) tag for this Element. Position tracking must be 1774 enabled prior to parsing the content. 1775 @return the range of the closing tag for this element, or {@code untracked} if its range was not tracked. 1776 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 1777 @see Node#sourceRange() 1778 @see Range#isImplicit() 1779 @since 1.15.2 1780 */ 1781 public Range endSourceRange() { 1782 return Range.of(this, false); 1783 } 1784 1785 boolean shouldIndent(final Document.OutputSettings out) { 1786 return out.prettyPrint() && isFormatAsBlock(out) && !isInlineable(out) && !preserveWhitespace(parentNode); 1787 } 1788 1789 @Override 1790 void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException { 1791 if (shouldIndent(out)) { 1792 if (accum instanceof StringBuilder) { 1793 if (((StringBuilder) accum).length() > 0) 1794 indent(accum, depth, out); 1795 } else { 1796 indent(accum, depth, out); 1797 } 1798 } 1799 accum.append('<').append(safeTagName(out.syntax())); 1800 if (attributes != null) attributes.html(accum, out); 1801 1802 // selfclosing includes unknown tags, isEmpty defines tags that are always empty 1803 if (childNodes.isEmpty() && tag.isSelfClosing()) { 1804 if (out.syntax() == html && tag.isEmpty()) 1805 accum.append('>'); 1806 else 1807 accum.append(" />"); // <img> in html, <img /> in xml 1808 } 1809 else 1810 accum.append('>'); 1811 } 1812 1813 @Override 1814 void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) throws IOException { 1815 if (!(childNodes.isEmpty() && tag.isSelfClosing())) { 1816 if (out.prettyPrint() && (!childNodes.isEmpty() && ( 1817 (tag.formatAsBlock() && !preserveWhitespace(parentNode)) || 1818 (out.outline() && (childNodes.size()>1 || (childNodes.size()==1 && (childNodes.get(0) instanceof Element)))) 1819 ))) 1820 indent(accum, depth, out); 1821 accum.append("</").append(safeTagName(out.syntax())).append('>'); 1822 } 1823 } 1824 1825 /* If XML syntax, normalizes < to _ in tag name. */ 1826 private String safeTagName(Document.OutputSettings.Syntax syntax) { 1827 return syntax == xml ? Normalizer.xmlSafeTagName(tagName()) : tagName(); 1828 } 1829 1830 /** 1831 * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return 1832 * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.) 1833 * 1834 * @return String of HTML. 1835 * @see #outerHtml() 1836 */ 1837 public String html() { 1838 StringBuilder accum = StringUtil.borrowBuilder(); 1839 html(accum); 1840 String html = StringUtil.releaseBuilder(accum); 1841 return NodeUtils.outputSettings(this).prettyPrint() ? html.trim() : html; 1842 } 1843 1844 @Override 1845 public <T extends Appendable> T html(T appendable) { 1846 final int size = childNodes.size(); 1847 for (int i = 0; i < size; i++) 1848 childNodes.get(i).outerHtml(appendable); 1849 1850 return appendable; 1851 } 1852 1853 /** 1854 * Set this element's inner HTML. Clears the existing HTML first. 1855 * @param html HTML to parse and set into this element 1856 * @return this element 1857 * @see #append(String) 1858 */ 1859 public Element html(String html) { 1860 empty(); 1861 append(html); 1862 return this; 1863 } 1864 1865 @Override 1866 public Element clone() { 1867 return (Element) super.clone(); 1868 } 1869 1870 @Override 1871 public Element shallowClone() { 1872 // simpler than implementing a clone version with no child copy 1873 String baseUri = baseUri(); 1874 if (baseUri.isEmpty()) baseUri = null; // saves setting a blank internal attribute 1875 return new Element(tag, baseUri, attributes == null ? null : attributes.clone()); 1876 } 1877 1878 @Override 1879 protected Element doClone(@Nullable Node parent) { 1880 Element clone = (Element) super.doClone(parent); 1881 clone.attributes = attributes != null ? attributes.clone() : null; 1882 clone.childNodes = new NodeList(clone, childNodes.size()); 1883 clone.childNodes.addAll(childNodes); // the children then get iterated and cloned in Node.clone 1884 1885 return clone; 1886 } 1887 1888 // overrides of Node for call chaining 1889 @Override 1890 public Element clearAttributes() { 1891 if (attributes != null) { 1892 super.clearAttributes(); // keeps internal attributes via iterator 1893 if (attributes.size() == 0) 1894 attributes = null; // only remove entirely if no internal attributes 1895 } 1896 1897 return this; 1898 } 1899 1900 @Override 1901 public Element removeAttr(String attributeKey) { 1902 return (Element) super.removeAttr(attributeKey); 1903 } 1904 1905 @Override 1906 public Element root() { 1907 return (Element) super.root(); // probably a document, but always at least an element 1908 } 1909 1910 @Override 1911 public Element traverse(NodeVisitor nodeVisitor) { 1912 return (Element) super.traverse(nodeVisitor); 1913 } 1914 1915 @Override 1916 public Element forEachNode(Consumer<? super Node> action) { 1917 return (Element) super.forEachNode(action); 1918 } 1919 1920 /** 1921 Perform the supplied action on this Element and each of its descendant Elements, during a depth-first traversal. 1922 Elements may be inspected, changed, added, replaced, or removed. 1923 @param action the function to perform on the element 1924 @see Node#forEachNode(Consumer) 1925 */ 1926 @Override 1927 public void forEach(Consumer<? super Element> action) { 1928 stream().forEach(action); 1929 } 1930 1931 /** 1932 Returns an Iterator that iterates this Element and each of its descendant Elements, in document order. 1933 @return an Iterator 1934 */ 1935 @Override 1936 public Iterator<Element> iterator() { 1937 return new NodeIterator<>(this, Element.class); 1938 } 1939 1940 @Override 1941 public Element filter(NodeFilter nodeFilter) { 1942 return (Element) super.filter(nodeFilter); 1943 } 1944 1945 private static final class NodeList extends ChangeNotifyingArrayList<Node> { 1946 private final Element owner; 1947 1948 NodeList(Element owner, int initialCapacity) { 1949 super(initialCapacity); 1950 this.owner = owner; 1951 } 1952 1953 @Override public void onContentsChanged() { 1954 owner.nodelistChanged(); 1955 } 1956 } 1957 1958 private boolean isFormatAsBlock(Document.OutputSettings out) { 1959 return tag.isBlock() || (parent() != null && parent().tag().formatAsBlock()) || out.outline(); 1960 } 1961 1962 private boolean isInlineable(Document.OutputSettings out) { 1963 if (!tag.isInline()) 1964 return false; 1965 return (parent() == null || parent().isBlock()) 1966 && !isEffectivelyFirst() 1967 && !out.outline() 1968 && !nameIs("br"); 1969 } 1970}