001package org.jsoup.nodes; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.internal.Normalizer; 005import org.jsoup.internal.StringUtil; 006import org.jsoup.parser.ParseSettings; 007import org.jsoup.parser.Parser; 008import org.jsoup.parser.Tag; 009import org.jsoup.parser.TokenQueue; 010import org.jsoup.select.Collector; 011import org.jsoup.select.Elements; 012import org.jsoup.select.Evaluator; 013import org.jsoup.select.NodeFilter; 014import org.jsoup.select.NodeTraversor; 015import org.jsoup.select.NodeVisitor; 016import org.jsoup.select.QueryParser; 017import org.jsoup.select.Selector; 018import org.jspecify.annotations.Nullable; 019 020import java.io.IOException; 021import java.lang.ref.WeakReference; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collection; 025import java.util.Collections; 026import java.util.Iterator; 027import java.util.LinkedHashSet; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031import java.util.concurrent.atomic.AtomicBoolean; 032import java.util.function.Consumer; 033import java.util.regex.Pattern; 034import java.util.regex.PatternSyntaxException; 035import java.util.stream.Collectors; 036import java.util.stream.Stream; 037 038import static org.jsoup.internal.Normalizer.normalize; 039import static org.jsoup.nodes.Document.OutputSettings.Syntax.xml; 040import static org.jsoup.nodes.TextNode.lastCharIsWhitespace; 041import static org.jsoup.parser.Parser.NamespaceHtml; 042import static org.jsoup.parser.TokenQueue.escapeCssIdentifier; 043 044/** 045 An HTML Element consists of a tag name, attributes, and child nodes (including text nodes and other elements). 046 <p> 047 From an Element, you can extract data, traverse the node graph, and manipulate the HTML. 048*/ 049public class Element extends Node implements Iterable<Element> { 050 private static final List<Element> EmptyChildren = Collections.emptyList(); 051 private static final NodeList EmptyNodeList = new NodeList(0); 052 private static final Pattern ClassSplit = Pattern.compile("\\s+"); 053 private static final String BaseUriKey = Attributes.internalKey("baseUri"); 054 Tag tag; 055 NodeList childNodes; 056 @Nullable Attributes attributes; // field is nullable but all methods for attributes are non-null 057 058 /** 059 * Create a new, standalone element, in the specified namespace. 060 * @param tag tag name 061 * @param namespace namespace for this element 062 */ 063 public Element(String tag, String namespace) { 064 this(Tag.valueOf(tag, namespace, ParseSettings.preserveCase), null); 065 } 066 067 /** 068 * Create a new, standalone element, in the HTML namespace. 069 * @param tag tag name 070 * @see #Element(String tag, String namespace) 071 */ 072 public Element(String tag) { 073 this(tag, Parser.NamespaceHtml); 074 } 075 076 /** 077 * Create a new, standalone Element. (Standalone in that it has no parent.) 078 * 079 * @param tag tag of this element 080 * @param baseUri the base URI (optional, may be null to inherit from parent, or "" to clear parent's) 081 * @param attributes initial attributes (optional, may be null) 082 * @see #appendChild(Node) 083 * @see #appendElement(String) 084 */ 085 public Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attributes) { 086 Validate.notNull(tag); 087 childNodes = EmptyNodeList; 088 this.attributes = attributes; 089 this.tag = tag; 090 if (baseUri != null) 091 this.setBaseUri(baseUri); 092 } 093 094 /** 095 * Create a new Element from a Tag and a base URI. 096 * 097 * @param tag element tag 098 * @param baseUri the base URI of this element. Optional, and will inherit from its parent, if any. 099 * @see Tag#valueOf(String, ParseSettings) 100 */ 101 public Element(Tag tag, @Nullable String baseUri) { 102 this(tag, baseUri, null); 103 } 104 105 /** 106 Internal test to check if a nodelist object has been created. 107 */ 108 protected boolean hasChildNodes() { 109 return childNodes != EmptyNodeList; 110 } 111 112 @Override protected List<Node> ensureChildNodes() { 113 if (childNodes == EmptyNodeList) { 114 childNodes = new NodeList(4); 115 } 116 return childNodes; 117 } 118 119 @Override 120 protected boolean hasAttributes() { 121 return attributes != null; 122 } 123 124 @Override 125 public Attributes attributes() { 126 if (attributes == null) // not using hasAttributes, as doesn't clear warning 127 attributes = new Attributes(); 128 return attributes; 129 } 130 131 @Override 132 public String baseUri() { 133 return searchUpForAttribute(this, BaseUriKey); 134 } 135 136 private static String searchUpForAttribute(final Element start, final String key) { 137 Element el = start; 138 while (el != null) { 139 if (el.attributes != null && el.attributes.hasKey(key)) 140 return el.attributes.get(key); 141 el = el.parent(); 142 } 143 return ""; 144 } 145 146 @Override 147 protected void doSetBaseUri(String baseUri) { 148 attributes().put(BaseUriKey, baseUri); 149 } 150 151 @Override 152 public int childNodeSize() { 153 return childNodes.size(); 154 } 155 156 @Override 157 public String nodeName() { 158 return tag.getName(); 159 } 160 161 /** 162 * Get the name of the tag for this element. E.g. {@code div}. If you are using {@link ParseSettings#preserveCase 163 * case preserving parsing}, this will return the source's original case. 164 * 165 * @return the tag name 166 */ 167 public String tagName() { 168 return tag.getName(); 169 } 170 171 /** 172 * Get the normalized name of this Element's tag. This will always be the lower-cased version of the tag, regardless 173 * of the tag case preserving setting of the parser. For e.g., {@code <DIV>} and {@code <div>} both have a 174 * normal name of {@code div}. 175 * @return normal name 176 */ 177 @Override 178 public String normalName() { 179 return tag.normalName(); 180 } 181 182 /** 183 Test if this Element has the specified normalized name, and is in the specified namespace. 184 * @param normalName a normalized element name (e.g. {@code div}). 185 * @param namespace the namespace 186 * @return true if the element's normal name matches exactly, and is in the specified namespace 187 * @since 1.17.2 188 */ 189 public boolean elementIs(String normalName, String namespace) { 190 return tag.normalName().equals(normalName) && tag.namespace().equals(namespace); 191 } 192 193 /** 194 * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with 195 * {@code el.tagName("div");}. 196 * 197 * @param tagName new tag name for this element 198 * @return this element, for chaining 199 * @see Elements#tagName(String) 200 */ 201 public Element tagName(String tagName) { 202 return tagName(tagName, tag.namespace()); 203 } 204 205 /** 206 * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with 207 * {@code el.tagName("div");}. 208 * 209 * @param tagName new tag name for this element 210 * @param namespace the new namespace for this element 211 * @return this element, for chaining 212 * @see Elements#tagName(String) 213 */ 214 public Element tagName(String tagName, String namespace) { 215 Validate.notEmptyParam(tagName, "tagName"); 216 Validate.notEmptyParam(namespace, "namespace"); 217 Parser parser = NodeUtils.parser(this); 218 tag = parser.tagSet().valueOf(tagName, namespace, parser.settings()); // maintains the case option of the original parse 219 return this; 220 } 221 222 /** 223 * Get the Tag for this element. 224 * 225 * @return the tag object 226 */ 227 public Tag tag() { 228 return tag; 229 } 230 231 /** 232 Change the Tag of this element. 233 @param tag the new tag 234 @return this element, for chaining 235 @since 1.20.1 236 */ 237 public Element tag(Tag tag) { 238 Validate.notNull(tag); 239 this.tag = tag; 240 return this; 241 } 242 243 /** 244 * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element 245 * {@code <span> == false}). 246 * 247 * @return true if block, false if not (and thus inline) 248 */ 249 public boolean isBlock() { 250 return tag.isBlock(); 251 } 252 253 /** 254 * Get the {@code id} attribute of this element. 255 * 256 * @return The id attribute, if present, or an empty string if not. 257 */ 258 public String id() { 259 return attributes != null ? attributes.getIgnoreCase("id") :""; 260 } 261 262 /** 263 Set the {@code id} attribute of this element. 264 @param id the ID value to use 265 @return this Element, for chaining 266 */ 267 public Element id(String id) { 268 Validate.notNull(id); 269 attr("id", id); 270 return this; 271 } 272 273 /** 274 * Set an attribute value on this element. If this element already has an attribute with the 275 * key, its value is updated; otherwise, a new attribute is added. 276 * 277 * @return this element 278 */ 279 @Override public Element attr(String attributeKey, String attributeValue) { 280 super.attr(attributeKey, attributeValue); 281 return this; 282 } 283 284 /** 285 * Set a boolean attribute value on this element. Setting to <code>true</code> sets the attribute value to "" and 286 * marks the attribute as boolean so no value is written out. Setting to <code>false</code> removes the attribute 287 * with the same key if it exists. 288 * 289 * @param attributeKey the attribute key 290 * @param attributeValue the attribute value 291 * 292 * @return this element 293 */ 294 public Element attr(String attributeKey, boolean attributeValue) { 295 attributes().put(attributeKey, attributeValue); 296 return this; 297 } 298 299 /** 300 Get an Attribute by key. Changes made via {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc 301 will cascade back to this Element. 302 @param key the (case-sensitive) attribute key 303 @return the Attribute for this key, or null if not present. 304 @since 1.17.2 305 */ 306 @Nullable public Attribute attribute(String key) { 307 return hasAttributes() ? attributes().attribute(key) : null; 308 } 309 310 /** 311 * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key 312 * starting with "data-" is included the dataset. 313 * <p> 314 * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset 315 * {@code package=jsoup, language=java}. 316 * <p> 317 * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected 318 * in the other map. 319 * <p> 320 * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector. 321 * @return a map of {@code key=value} custom data attributes. 322 */ 323 public Map<String, String> dataset() { 324 return attributes().dataset(); 325 } 326 327 @Override @Nullable 328 public final Element parent() { 329 return (Element) parentNode; 330 } 331 332 /** 333 * Get this element's parent and ancestors, up to the document root. 334 * @return this element's stack of parents, starting with the closest first. 335 */ 336 public Elements parents() { 337 Elements parents = new Elements(); 338 Element parent = this.parent(); 339 while (parent != null && !parent.nameIs("#root")) { 340 parents.add(parent); 341 parent = parent.parent(); 342 } 343 return parents; 344 } 345 346 /** 347 * Get a child element of this element, by its 0-based index number. 348 * <p> 349 * Note that an element can have both mixed Nodes and Elements as children. This method inspects 350 * a filtered list of children that are elements, and the index is based on that filtered list. 351 * </p> 352 * 353 * @param index the index number of the element to retrieve 354 * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException} 355 * @see #childNode(int) 356 */ 357 public Element child(int index) { 358 return childElementsList().get(index); 359 } 360 361 /** 362 * Get the number of child nodes of this element that are elements. 363 * <p> 364 * This method works on the same filtered list like {@link #child(int)}. Use {@link #childNodes()} and {@link 365 * #childNodeSize()} to get the unfiltered Nodes (e.g. includes TextNodes etc.) 366 * </p> 367 * 368 * @return the number of child nodes that are elements 369 * @see #children() 370 * @see #child(int) 371 */ 372 public int childrenSize() { 373 return childElementsList().size(); 374 } 375 376 /** 377 * Get this element's child elements. 378 * <p> 379 * This is effectively a filter on {@link #childNodes()} to get Element nodes. 380 * </p> 381 * @return child elements. If this element has no children, returns an empty list. 382 * @see #childNodes() 383 */ 384 public Elements children() { 385 return new Elements(childElementsList()); 386 } 387 388 /** 389 * Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated. 390 * TODO - think about pulling this out as a helper as there are other shadow lists (like in Attributes) kept around. 391 * @return a list of child elements 392 */ 393 List<Element> childElementsList() { 394 if (childNodeSize() == 0) return EmptyChildren; // short circuit creating empty 395 List<Element> children = cachedChildren(); 396 if (children == null) { 397 children = filterNodes(Element.class); 398 stashChildren(children); 399 } 400 return children; 401 } 402 403 private static final String childElsKey = "jsoup.childEls"; 404 private static final String childElsMod = "jsoup.childElsMod"; 405 406 /** returns the cached child els, if they exist, and the modcount of our childnodes matches the stashed modcount */ 407 private @Nullable List<Element> cachedChildren() { 408 Map<String, Object> userData = attributes().userData(); 409 //noinspection unchecked 410 WeakReference<List<Element>> ref = (WeakReference<List<Element>>) userData.get(childElsKey); 411 if (ref != null) { 412 List<Element> els = ref.get(); 413 if (els != null) { 414 Integer modCount = (Integer) userData.get(childElsMod); 415 if (modCount != null && modCount == childNodes.modCount()) 416 return els; 417 } 418 } 419 return null; 420 } 421 422 /** caches the child els into the Attribute user data. */ 423 private void stashChildren(List<Element> els) { 424 Map<String, Object> userData = attributes().userData(); 425 WeakReference<List<Element>> ref = new WeakReference<>(els); 426 userData.put(childElsKey, ref); 427 userData.put(childElsMod, childNodes.modCount()); 428 } 429 430 /** 431 Returns a Stream of this Element and all of its descendant Elements. The stream has document order. 432 @return a stream of this element and its descendants. 433 @see #nodeStream() 434 @since 1.17.1 435 */ 436 public Stream<Element> stream() { 437 return NodeUtils.stream(this, Element.class); 438 } 439 440 private <T> List<T> filterNodes(Class<T> clazz) { 441 return childNodes.stream() 442 .filter(clazz::isInstance) 443 .map(clazz::cast) 444 .collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList)); 445 } 446 447 /** 448 * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated. 449 * <p> 450 * This is effectively a filter on {@link #childNodes()} to get Text nodes. 451 * @return child text nodes. If this element has no text nodes, returns an 452 * empty list. 453 * </p> 454 * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected: 455 * <ul> 456 * <li>{@code p.text()} = {@code "One Two Three Four"}</li> 457 * <li>{@code p.ownText()} = {@code "One Three Four"}</li> 458 * <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li> 459 * <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li> 460 * <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li> 461 * </ul> 462 */ 463 public List<TextNode> textNodes() { 464 return filterNodes(TextNode.class); 465 } 466 467 /** 468 * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated. 469 * <p> 470 * This is effectively a filter on {@link #childNodes()} to get Data nodes. 471 * </p> 472 * @return child data nodes. If this element has no data nodes, returns an 473 * empty list. 474 * @see #data() 475 */ 476 public List<DataNode> dataNodes() { 477 return filterNodes(DataNode.class); 478 } 479 480 /** 481 * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements 482 * may include this element, or any of its children. 483 * <p>This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because 484 * multiple filters can be combined, e.g.:</p> 485 * <ul> 486 * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes) 487 * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely) 488 * </ul> 489 * <p>See the query syntax documentation in {@link org.jsoup.select.Selector}.</p> 490 * <p>Also known as {@code querySelectorAll()} in the Web DOM.</p> 491 * 492 * @param cssQuery a {@link Selector} CSS-like query 493 * @return an {@link Elements} list containing elements that match the query (empty if none match) 494 * @see Selector selector query syntax 495 * @see #select(Evaluator) 496 * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. 497 */ 498 public Elements select(String cssQuery) { 499 return Selector.select(cssQuery, this); 500 } 501 502 /** 503 * Find elements that match the supplied Evaluator. This has the same functionality as {@link #select(String)}, but 504 * may be useful if you are running the same query many times (on many documents) and want to save the overhead of 505 * repeatedly parsing the CSS query. 506 * @param evaluator an element evaluator 507 * @return an {@link Elements} list containing elements that match the query (empty if none match) 508 * @see QueryParser#parse(String) 509 */ 510 public Elements select(Evaluator evaluator) { 511 return Selector.select(evaluator, this); 512 } 513 514 /** 515 Selects elements from the given root that match the specified {@link Selector} CSS query, with this element as the 516 starting context, and returns them as a lazy Stream. Matched elements may include this element, or any of its 517 children. 518 <p> 519 Unlike {@link #select(String query)}, which returns a complete list of all matching elements, this method returns a 520 {@link Stream} that processes elements lazily as they are needed. The stream operates in a "pull" model — elements 521 are fetched from the root as the stream is traversed. You can use standard {@code Stream} operations such as 522 {@code filter}, {@code map}, or {@code findFirst} to process elements on demand. 523 </p> 524 525 @param cssQuery a {@link Selector} CSS-like query 526 @return a {@link Stream} containing elements that match the query (empty if none match) 527 @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. 528 @see Selector selector query syntax 529 @see QueryParser#parse(String) 530 @since 1.19.1 531 */ 532 public Stream<Element> selectStream(String cssQuery) { 533 return Selector.selectStream(cssQuery, this); 534 } 535 536 /** 537 Find a Stream of elements that match the supplied Evaluator. 538 539 @param evaluator an element Evaluator 540 @return a {@link Stream} containing elements that match the query (empty if none match) 541 @since 1.19.1 542 */ 543 public Stream<Element> selectStream(Evaluator evaluator) { 544 return Selector.selectStream(evaluator, this); 545 } 546 547 /** 548 * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context. 549 * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query 550 * execution stops on the first hit.</p> 551 * <p>Also known as {@code querySelector()} in the Web DOM.</p> 552 * @param cssQuery cssQuery a {@link Selector} CSS-like query 553 * @return the first matching element, or <b>{@code null}</b> if there is no match. 554 * @see #expectFirst(String) 555 */ 556 public @Nullable Element selectFirst(String cssQuery) { 557 return Selector.selectFirst(cssQuery, this); 558 } 559 560 /** 561 * Finds the first Element that matches the supplied Evaluator, with this element as the starting context, or 562 * {@code null} if none match. 563 * 564 * @param evaluator an element evaluator 565 * @return the first matching element (walking down the tree, starting from this element), or {@code null} if none 566 * match. 567 */ 568 public @Nullable Element selectFirst(Evaluator evaluator) { 569 return Collector.findFirst(evaluator, this); 570 } 571 572 /** 573 Just like {@link #selectFirst(String)}, but if there is no match, throws an {@link IllegalArgumentException}. This 574 is useful if you want to simply abort processing on a failed match. 575 @param cssQuery a {@link Selector} CSS-like query 576 @return the first matching element 577 @throws IllegalArgumentException if no match is found 578 @since 1.15.2 579 */ 580 public Element expectFirst(String cssQuery) { 581 return (Element) Validate.ensureNotNull( 582 Selector.selectFirst(cssQuery, this), 583 parent() != null ? 584 "No elements matched the query '%s' on element '%s'.": 585 "No elements matched the query '%s' in the document." 586 , cssQuery, this.tagName() 587 ); 588 } 589 590 /** 591 * Checks if this element matches the given {@link Selector} CSS query. Also knows as {@code matches()} in the Web 592 * DOM. 593 * 594 * @param cssQuery a {@link Selector} CSS query 595 * @return if this element matches the query 596 */ 597 public boolean is(String cssQuery) { 598 return is(QueryParser.parse(cssQuery)); 599 } 600 601 /** 602 * Check if this element matches the given evaluator. 603 * @param evaluator an element evaluator 604 * @return if this element matches 605 */ 606 public boolean is(Evaluator evaluator) { 607 return evaluator.matches(this.root(), this); 608 } 609 610 /** 611 * Find the closest element up the tree of parents that matches the specified CSS query. Will return itself, an 612 * ancestor, or {@code null} if there is no such matching element. 613 * @param cssQuery a {@link Selector} CSS query 614 * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not 615 * found. 616 */ 617 public @Nullable Element closest(String cssQuery) { 618 return closest(QueryParser.parse(cssQuery)); 619 } 620 621 /** 622 * Find the closest element up the tree of parents that matches the specified evaluator. Will return itself, an 623 * ancestor, or {@code null} if there is no such matching element. 624 * @param evaluator a query evaluator 625 * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not 626 * found. 627 */ 628 public @Nullable Element closest(Evaluator evaluator) { 629 Validate.notNull(evaluator); 630 Element el = this; 631 final Element root = root(); 632 do { 633 if (evaluator.matches(root, el)) 634 return el; 635 el = el.parent(); 636 } while (el != null); 637 return null; 638 } 639 640 /** 641 Find Elements that match the supplied {@index XPath} expression. 642 <p>Note that for convenience of writing the Xpath expression, namespaces are disabled, and queries can be 643 expressed using the element's local name only.</p> 644 <p>By default, XPath 1.0 expressions are supported. If you would to use XPath 2.0 or higher, you can provide an 645 alternate XPathFactory implementation:</p> 646 <ol> 647 <li>Add the implementation to your classpath. E.g. to use <a href="https://www.saxonica.com/products/products.xml">Saxon-HE</a>, add <a href="https://mvnrepository.com/artifact/net.sf.saxon/Saxon-HE">net.sf.saxon:Saxon-HE</a> to your build.</li> 648 <li>Set the system property <code>javax.xml.xpath.XPathFactory:jsoup</code> to the implementing classname. E.g.:<br> 649 <code>System.setProperty(W3CDom.XPathFactoryProperty, "net.sf.saxon.xpath.XPathFactoryImpl");</code> 650 </li> 651 </ol> 652 653 @param xpath XPath expression 654 @return matching elements, or an empty list if none match. 655 @see #selectXpath(String, Class) 656 @since 1.14.3 657 */ 658 public Elements selectXpath(String xpath) { 659 return new Elements(NodeUtils.selectXpath(xpath, this, Element.class)); 660 } 661 662 /** 663 Find Nodes that match the supplied XPath expression. 664 <p>For example, to select TextNodes under {@code p} elements: </p> 665 <pre>List<TextNode> textNodes = doc.selectXpath("//body//p//text()", TextNode.class);</pre> 666 <p>Note that in the jsoup DOM, Attribute objects are not Nodes. To directly select attribute values, do something 667 like:</p> 668 <pre>List<String> hrefs = doc.selectXpath("//a").eachAttr("href");</pre> 669 @param xpath XPath expression 670 @param nodeType the jsoup node type to return 671 @see #selectXpath(String) 672 @return a list of matching nodes 673 @since 1.14.3 674 */ 675 public <T extends Node> List<T> selectXpath(String xpath, Class<T> nodeType) { 676 return NodeUtils.selectXpath(xpath, this, nodeType); 677 } 678 679 /** 680 * Insert a node to the end of this Element's children. The incoming node will be re-parented. 681 * 682 * @param child node to add. 683 * @return this Element, for chaining 684 * @see #prependChild(Node) 685 * @see #insertChildren(int, Collection) 686 */ 687 public Element appendChild(Node child) { 688 Validate.notNull(child); 689 690 // was - Node#addChildren(child). short-circuits an array create and a loop. 691 reparentChild(child); 692 ensureChildNodes(); 693 childNodes.add(child); 694 child.setSiblingIndex(childNodes.size() - 1); 695 return this; 696 } 697 698 /** 699 Insert the given nodes to the end of this Element's children. 700 701 @param children nodes to add 702 @return this Element, for chaining 703 @see #insertChildren(int, Collection) 704 */ 705 public Element appendChildren(Collection<? extends Node> children) { 706 insertChildren(-1, children); 707 return this; 708 } 709 710 /** 711 * Add this element to the supplied parent element, as its next child. 712 * 713 * @param parent element to which this element will be appended 714 * @return this element, so that you can continue modifying the element 715 */ 716 public Element appendTo(Element parent) { 717 Validate.notNull(parent); 718 parent.appendChild(this); 719 return this; 720 } 721 722 /** 723 * Add a node to the start of this element's children. 724 * 725 * @param child node to add. 726 * @return this element, so that you can add more child nodes or elements. 727 */ 728 public Element prependChild(Node child) { 729 Validate.notNull(child); 730 731 addChildren(0, child); 732 return this; 733 } 734 735 /** 736 Insert the given nodes to the start of this Element's children. 737 738 @param children nodes to add 739 @return this Element, for chaining 740 @see #insertChildren(int, Collection) 741 */ 742 public Element prependChildren(Collection<? extends Node> children) { 743 insertChildren(0, children); 744 return this; 745 } 746 747 748 /** 749 * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the 750 * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. 751 * 752 * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the 753 * end 754 * @param children child nodes to insert 755 * @return this element, for chaining. 756 */ 757 public Element insertChildren(int index, Collection<? extends Node> children) { 758 Validate.notNull(children, "Children collection to be inserted must not be null."); 759 int currentSize = childNodeSize(); 760 if (index < 0) index += currentSize +1; // roll around 761 Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); 762 763 ArrayList<Node> nodes = new ArrayList<>(children); 764 Node[] nodeArray = nodes.toArray(new Node[0]); 765 addChildren(index, nodeArray); 766 return this; 767 } 768 769 /** 770 * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the 771 * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. 772 * 773 * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the 774 * end 775 * @param children child nodes to insert 776 * @return this element, for chaining. 777 */ 778 public Element insertChildren(int index, Node... children) { 779 Validate.notNull(children, "Children collection to be inserted must not be null."); 780 int currentSize = childNodeSize(); 781 if (index < 0) index += currentSize +1; // roll around 782 Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); 783 784 addChildren(index, children); 785 return this; 786 } 787 788 /** 789 * Create a new element by tag name, and add it as this Element's last child. 790 * 791 * @param tagName the name of the tag (e.g. {@code div}). 792 * @return the new element, to allow you to add content to it, e.g.: 793 * {@code parent.appendElement("h1").attr("id", "header").text("Welcome");} 794 */ 795 public Element appendElement(String tagName) { 796 return appendElement(tagName, tag.namespace()); 797 } 798 799 /** 800 * Create a new element by tag name and namespace, add it as this Element's last child. 801 * 802 * @param tagName the name of the tag (e.g. {@code div}). 803 * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml}) 804 * @return the new element, in the specified namespace 805 */ 806 public Element appendElement(String tagName, String namespace) { 807 Parser parser = NodeUtils.parser(this); 808 Element child = new Element(parser.tagSet().valueOf(tagName, namespace, parser.settings()), baseUri()); 809 appendChild(child); 810 return child; 811 } 812 813 /** 814 * Create a new element by tag name, and add it as this Element's first child. 815 * 816 * @param tagName the name of the tag (e.g. {@code div}). 817 * @return the new element, to allow you to add content to it, e.g.: 818 * {@code parent.prependElement("h1").attr("id", "header").text("Welcome");} 819 */ 820 public Element prependElement(String tagName) { 821 return prependElement(tagName, tag.namespace()); 822 } 823 824 /** 825 * Create a new element by tag name and namespace, and add it as this Element's first child. 826 * 827 * @param tagName the name of the tag (e.g. {@code div}). 828 * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml}) 829 * @return the new element, in the specified namespace 830 */ 831 public Element prependElement(String tagName, String namespace) { 832 Parser parser = NodeUtils.parser(this); 833 Element child = new Element(parser.tagSet().valueOf(tagName, namespace, parser.settings()), baseUri()); 834 prependChild(child); 835 return child; 836 } 837 838 /** 839 * Create and append a new TextNode to this element. 840 * 841 * @param text the (un-encoded) text to add 842 * @return this element 843 */ 844 public Element appendText(String text) { 845 Validate.notNull(text); 846 TextNode node = new TextNode(text); 847 appendChild(node); 848 return this; 849 } 850 851 /** 852 * Create and prepend a new TextNode to this element. 853 * 854 * @param text the decoded text to add 855 * @return this element 856 */ 857 public Element prependText(String text) { 858 Validate.notNull(text); 859 TextNode node = new TextNode(text); 860 prependChild(node); 861 return this; 862 } 863 864 /** 865 * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children. 866 * @param html HTML to add inside this element, after the existing HTML 867 * @return this element 868 * @see #html(String) 869 */ 870 public Element append(String html) { 871 Validate.notNull(html); 872 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri()); 873 addChildren(nodes.toArray(new Node[0])); 874 return this; 875 } 876 877 /** 878 * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children. 879 * @param html HTML to add inside this element, before the existing HTML 880 * @return this element 881 * @see #html(String) 882 */ 883 public Element prepend(String html) { 884 Validate.notNull(html); 885 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri()); 886 addChildren(0, nodes.toArray(new Node[0])); 887 return this; 888 } 889 890 /** 891 * Insert the specified HTML into the DOM before this element (as a preceding sibling). 892 * 893 * @param html HTML to add before this element 894 * @return this element, for chaining 895 * @see #after(String) 896 */ 897 @Override 898 public Element before(String html) { 899 return (Element) super.before(html); 900 } 901 902 /** 903 * Insert the specified node into the DOM before this node (as a preceding sibling). 904 * @param node to add before this element 905 * @return this Element, for chaining 906 * @see #after(Node) 907 */ 908 @Override 909 public Element before(Node node) { 910 return (Element) super.before(node); 911 } 912 913 /** 914 * Insert the specified HTML into the DOM after this element (as a following sibling). 915 * 916 * @param html HTML to add after this element 917 * @return this element, for chaining 918 * @see #before(String) 919 */ 920 @Override 921 public Element after(String html) { 922 return (Element) super.after(html); 923 } 924 925 /** 926 * Insert the specified node into the DOM after this node (as a following sibling). 927 * @param node to add after this element 928 * @return this element, for chaining 929 * @see #before(Node) 930 */ 931 @Override 932 public Element after(Node node) { 933 return (Element) super.after(node); 934 } 935 936 /** 937 * Remove all the element's child nodes. Any attributes are left as-is. Each child node has its parent set to 938 * {@code null}. 939 * @return this element 940 */ 941 @Override 942 public Element empty() { 943 // Detach each of the children -> parent links: 944 for (Node child : childNodes) { 945 child.parentNode = null; 946 } 947 childNodes.clear(); 948 return this; 949 } 950 951 /** 952 * Wrap the supplied HTML around this element. 953 * 954 * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. 955 * @return this element, for chaining. 956 */ 957 @Override 958 public Element wrap(String html) { 959 return (Element) super.wrap(html); 960 } 961 962 /** 963 Gets an #id selector for this element, if it has a unique ID. Otherwise, returns an empty string. 964 965 @param ownerDoc the document that owns this element, if there is one 966 */ 967 private String uniqueIdSelector(@Nullable Document ownerDoc) { 968 String id = id(); 969 if (!id.isEmpty()) { // check if the ID is unique and matches this 970 String idSel = "#" + escapeCssIdentifier(id); 971 if (ownerDoc != null) { 972 Elements els = ownerDoc.select(idSel); 973 if (els.size() == 1 && els.get(0) == this) return idSel; 974 } else { 975 return idSel; 976 } 977 } 978 return EmptyString; 979 } 980 981 /** 982 Get a CSS selector that will uniquely select this element. 983 <p> 984 If the element has an ID, returns #id; otherwise returns the parent (if any) CSS selector, followed by 985 {@literal '>'}, followed by a unique selector for the element (tag.class.class:nth-child(n)). 986 </p> 987 988 @return the CSS Path that can be used to retrieve the element in a selector. 989 */ 990 public String cssSelector() { 991 Document ownerDoc = ownerDocument(); 992 String idSel = uniqueIdSelector(ownerDoc); 993 if (!idSel.isEmpty()) return idSel; 994 995 // No unique ID, work up the parent stack and find either a unique ID to hang from, or just a GP > Parent > Child chain 996 StringBuilder selector = StringUtil.borrowBuilder(); 997 Element el = this; 998 while (el != null && !(el instanceof Document)) { 999 idSel = el.uniqueIdSelector(ownerDoc); 1000 if (!idSel.isEmpty()) { 1001 selector.insert(0, idSel); 1002 break; // found a unique ID to use as ancestor; stop 1003 } 1004 selector.insert(0, el.cssSelectorComponent()); 1005 el = el.parent(); 1006 } 1007 return StringUtil.releaseBuilder(selector); 1008 } 1009 1010 private String cssSelectorComponent() { 1011 // Escape tagname, and translate HTML namespace ns:tag to CSS namespace syntax ns|tag 1012 String tagName = escapeCssIdentifier(tagName()).replace("\\:", "|"); 1013 StringBuilder selector = StringUtil.borrowBuilder().append(tagName); 1014 String classes = classNames().stream().map(TokenQueue::escapeCssIdentifier) 1015 .collect(StringUtil.joining(".")); 1016 if (!classes.isEmpty()) 1017 selector.append('.').append(classes); 1018 1019 if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node 1020 return StringUtil.releaseBuilder(selector); 1021 1022 selector.insert(0, " > "); 1023 if (parent().select(selector.toString()).size() > 1) 1024 selector.append(String.format( 1025 ":nth-child(%d)", elementSiblingIndex() + 1)); 1026 1027 return StringUtil.releaseBuilder(selector); 1028 } 1029 1030 /** 1031 * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling 1032 * of itself, so will not be included in the returned list. 1033 * @return sibling elements 1034 */ 1035 public Elements siblingElements() { 1036 if (parentNode == null) 1037 return new Elements(0); 1038 1039 List<Element> elements = parent().childElementsList(); 1040 Elements siblings = new Elements(elements.size() - 1); 1041 for (Element el: elements) 1042 if (el != this) 1043 siblings.add(el); 1044 return siblings; 1045 } 1046 1047 /** 1048 * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s, 1049 * the {@code nextElementSibling} of the first {@code p} is the second {@code p}. 1050 * <p> 1051 * This is similar to {@link #nextSibling()}, but specifically finds only Elements 1052 * </p> 1053 * @return the next element, or null if there is no next element 1054 * @see #previousElementSibling() 1055 */ 1056 public @Nullable Element nextElementSibling() { 1057 Node next = this; 1058 while ((next = next.nextSibling()) != null) { 1059 if (next instanceof Element) return (Element) next; 1060 } 1061 return null; 1062 } 1063 1064 /** 1065 * Get each of the sibling elements that come after this element. 1066 * 1067 * @return each of the element siblings after this element, or an empty list if there are no next sibling elements 1068 */ 1069 public Elements nextElementSiblings() { 1070 return nextElementSiblings(true); 1071 } 1072 1073 /** 1074 * Gets the previous element sibling of this element. 1075 * @return the previous element, or null if there is no previous element 1076 * @see #nextElementSibling() 1077 */ 1078 public @Nullable Element previousElementSibling() { 1079 Node prev = this; 1080 while ((prev = prev.previousSibling()) != null) { 1081 if (prev instanceof Element) return (Element) prev; 1082 } 1083 return null; 1084 } 1085 1086 /** 1087 * Get each of the element siblings before this element. 1088 * 1089 * @return the previous element siblings, or an empty list if there are none. 1090 */ 1091 public Elements previousElementSiblings() { 1092 return nextElementSiblings(false); 1093 } 1094 1095 private Elements nextElementSiblings(boolean next) { 1096 Elements els = new Elements(); 1097 if (parentNode == null) 1098 return els; 1099 els.add(this); 1100 return next ? els.nextAll() : els.prevAll(); 1101 } 1102 1103 /** 1104 * Gets the first Element sibling of this element. That may be this element. 1105 * @return the first sibling that is an element (aka the parent's first element child) 1106 */ 1107 public Element firstElementSibling() { 1108 if (parent() != null) { 1109 //noinspection DataFlowIssue (not nullable, would be this is no other sibs) 1110 return parent().firstElementChild(); 1111 } else 1112 return this; // orphan is its own first sibling 1113 } 1114 1115 /** 1116 * Get the list index of this element in its element sibling list. I.e. if this is the first element 1117 * sibling, returns 0. 1118 * @return position in element sibling list 1119 */ 1120 public int elementSiblingIndex() { 1121 if (parent() == null) return 0; 1122 return indexInList(this, parent().childElementsList()); 1123 } 1124 1125 /** 1126 * Gets the last element sibling of this element. That may be this element. 1127 * @return the last sibling that is an element (aka the parent's last element child) 1128 */ 1129 public Element lastElementSibling() { 1130 if (parent() != null) { 1131 //noinspection DataFlowIssue (not nullable, would be this if no other sibs) 1132 return parent().lastElementChild(); 1133 } else 1134 return this; 1135 } 1136 1137 private static <E extends Element> int indexInList(Element search, List<E> elements) { 1138 final int size = elements.size(); 1139 for (int i = 0; i < size; i++) { 1140 if (elements.get(i) == search) 1141 return i; 1142 } 1143 return 0; 1144 } 1145 1146 /** 1147 Gets the first child of this Element that is an Element, or {@code null} if there is none. 1148 @return the first Element child node, or null. 1149 @see #firstChild() 1150 @see #lastElementChild() 1151 @since 1.15.2 1152 */ 1153 public @Nullable Element firstElementChild() { 1154 Node child = firstChild(); 1155 while (child != null) { 1156 if (child instanceof Element) return (Element) child; 1157 child = child.nextSibling(); 1158 } 1159 return null; 1160 } 1161 1162 /** 1163 Gets the last child of this Element that is an Element, or @{code null} if there is none. 1164 @return the last Element child node, or null. 1165 @see #lastChild() 1166 @see #firstElementChild() 1167 @since 1.15.2 1168 */ 1169 public @Nullable Element lastElementChild() { 1170 Node child = lastChild(); 1171 while (child != null) { 1172 if (child instanceof Element) return (Element) child; 1173 child = child.previousSibling(); 1174 } 1175 return null; 1176 } 1177 1178 // DOM type methods 1179 1180 /** 1181 * Finds elements, including and recursively under this element, with the specified tag name. 1182 * @param tagName The tag name to search for (case insensitively). 1183 * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match. 1184 */ 1185 public Elements getElementsByTag(String tagName) { 1186 Validate.notEmpty(tagName); 1187 tagName = normalize(tagName); 1188 1189 return Collector.collect(new Evaluator.Tag(tagName), this); 1190 } 1191 1192 /** 1193 * Find an element by ID, including or under this element. 1194 * <p> 1195 * Note that this finds the first matching ID, starting with this element. If you search down from a different 1196 * starting point, it is possible to find a different element by ID. For unique element by ID within a Document, 1197 * use {@link Document#getElementById(String)} 1198 * @param id The ID to search for. 1199 * @return The first matching element by ID, starting with this element, or null if none found. 1200 */ 1201 public @Nullable Element getElementById(String id) { 1202 Validate.notEmpty(id); 1203 return Collector.findFirst(new Evaluator.Id(id), this); 1204 } 1205 1206 /** 1207 * Find elements that have this class, including or under this element. Case-insensitive. 1208 * <p> 1209 * Elements can have multiple classes (e.g. {@code <div class="header round first">}). This method 1210 * checks each class, so you can find the above with {@code el.getElementsByClass("header");}. 1211 * 1212 * @param className the name of the class to search for. 1213 * @return elements with the supplied class name, empty if none 1214 * @see #hasClass(String) 1215 * @see #classNames() 1216 */ 1217 public Elements getElementsByClass(String className) { 1218 Validate.notEmpty(className); 1219 1220 return Collector.collect(new Evaluator.Class(className), this); 1221 } 1222 1223 /** 1224 * Find elements that have a named attribute set. Case-insensitive. 1225 * 1226 * @param key name of the attribute, e.g. {@code href} 1227 * @return elements that have this attribute, empty if none 1228 */ 1229 public Elements getElementsByAttribute(String key) { 1230 Validate.notEmpty(key); 1231 key = key.trim(); 1232 1233 return Collector.collect(new Evaluator.Attribute(key), this); 1234 } 1235 1236 /** 1237 * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements 1238 * that have HTML5 datasets. 1239 * @param keyPrefix name prefix of the attribute e.g. {@code data-} 1240 * @return elements that have attribute names that start with the prefix, empty if none. 1241 */ 1242 public Elements getElementsByAttributeStarting(String keyPrefix) { 1243 Validate.notEmpty(keyPrefix); 1244 keyPrefix = keyPrefix.trim(); 1245 1246 return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this); 1247 } 1248 1249 /** 1250 * Find elements that have an attribute with the specific value. Case-insensitive. 1251 * 1252 * @param key name of the attribute 1253 * @param value value of the attribute 1254 * @return elements that have this attribute with this value, empty if none 1255 */ 1256 public Elements getElementsByAttributeValue(String key, String value) { 1257 return Collector.collect(new Evaluator.AttributeWithValue(key, value), this); 1258 } 1259 1260 /** 1261 * Find elements that either do not have this attribute, or have it with a different value. Case-insensitive. 1262 * 1263 * @param key name of the attribute 1264 * @param value value of the attribute 1265 * @return elements that do not have a matching attribute 1266 */ 1267 public Elements getElementsByAttributeValueNot(String key, String value) { 1268 return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this); 1269 } 1270 1271 /** 1272 * Find elements that have attributes that start with the value prefix. Case-insensitive. 1273 * 1274 * @param key name of the attribute 1275 * @param valuePrefix start of attribute value 1276 * @return elements that have attributes that start with the value prefix 1277 */ 1278 public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) { 1279 return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this); 1280 } 1281 1282 /** 1283 * Find elements that have attributes that end with the value suffix. Case-insensitive. 1284 * 1285 * @param key name of the attribute 1286 * @param valueSuffix end of the attribute value 1287 * @return elements that have attributes that end with the value suffix 1288 */ 1289 public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) { 1290 return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this); 1291 } 1292 1293 /** 1294 * Find elements that have attributes whose value contains the match string. Case-insensitive. 1295 * 1296 * @param key name of the attribute 1297 * @param match substring of value to search for 1298 * @return elements that have attributes containing this text 1299 */ 1300 public Elements getElementsByAttributeValueContaining(String key, String match) { 1301 return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this); 1302 } 1303 1304 /** 1305 * Find elements that have an attribute whose value matches the supplied regular expression. 1306 * @param key name of the attribute 1307 * @param pattern compiled regular expression to match against attribute values 1308 * @return elements that have attributes matching this regular expression 1309 */ 1310 public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) { 1311 return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this); 1312 1313 } 1314 1315 /** 1316 * Find elements that have attributes whose values match the supplied regular expression. 1317 * @param key name of the attribute 1318 * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options. 1319 * @return elements that have attributes matching this regular expression 1320 */ 1321 public Elements getElementsByAttributeValueMatching(String key, String regex) { 1322 Pattern pattern; 1323 try { 1324 pattern = Pattern.compile(regex); 1325 } catch (PatternSyntaxException e) { 1326 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1327 } 1328 return getElementsByAttributeValueMatching(key, pattern); 1329 } 1330 1331 /** 1332 * Find elements whose sibling index is less than the supplied index. 1333 * @param index 0-based index 1334 * @return elements less than index 1335 */ 1336 public Elements getElementsByIndexLessThan(int index) { 1337 return Collector.collect(new Evaluator.IndexLessThan(index), this); 1338 } 1339 1340 /** 1341 * Find elements whose sibling index is greater than the supplied index. 1342 * @param index 0-based index 1343 * @return elements greater than index 1344 */ 1345 public Elements getElementsByIndexGreaterThan(int index) { 1346 return Collector.collect(new Evaluator.IndexGreaterThan(index), this); 1347 } 1348 1349 /** 1350 * Find elements whose sibling index is equal to the supplied index. 1351 * @param index 0-based index 1352 * @return elements equal to index 1353 */ 1354 public Elements getElementsByIndexEquals(int index) { 1355 return Collector.collect(new Evaluator.IndexEquals(index), this); 1356 } 1357 1358 /** 1359 * Find elements that contain the specified string. The search is case-insensitive. The text may appear directly 1360 * in the element, or in any of its descendants. 1361 * @param searchText to look for in the element's text 1362 * @return elements that contain the string, case-insensitive. 1363 * @see Element#text() 1364 */ 1365 public Elements getElementsContainingText(String searchText) { 1366 return Collector.collect(new Evaluator.ContainsText(searchText), this); 1367 } 1368 1369 /** 1370 * Find elements that directly contain the specified string. The search is case-insensitive. The text must appear directly 1371 * in the element, not in any of its descendants. 1372 * @param searchText to look for in the element's own text 1373 * @return elements that contain the string, case-insensitive. 1374 * @see Element#ownText() 1375 */ 1376 public Elements getElementsContainingOwnText(String searchText) { 1377 return Collector.collect(new Evaluator.ContainsOwnText(searchText), this); 1378 } 1379 1380 /** 1381 * Find elements whose text matches the supplied regular expression. 1382 * @param pattern regular expression to match text against 1383 * @return elements matching the supplied regular expression. 1384 * @see Element#text() 1385 */ 1386 public Elements getElementsMatchingText(Pattern pattern) { 1387 return Collector.collect(new Evaluator.Matches(pattern), this); 1388 } 1389 1390 /** 1391 * Find elements whose text matches the supplied regular expression. 1392 * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options. 1393 * @return elements matching the supplied regular expression. 1394 * @see Element#text() 1395 */ 1396 public Elements getElementsMatchingText(String regex) { 1397 Pattern pattern; 1398 try { 1399 pattern = Pattern.compile(regex); 1400 } catch (PatternSyntaxException e) { 1401 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1402 } 1403 return getElementsMatchingText(pattern); 1404 } 1405 1406 /** 1407 * Find elements whose own text matches the supplied regular expression. 1408 * @param pattern regular expression to match text against 1409 * @return elements matching the supplied regular expression. 1410 * @see Element#ownText() 1411 */ 1412 public Elements getElementsMatchingOwnText(Pattern pattern) { 1413 return Collector.collect(new Evaluator.MatchesOwn(pattern), this); 1414 } 1415 1416 /** 1417 * Find elements whose own text matches the supplied regular expression. 1418 * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options. 1419 * @return elements matching the supplied regular expression. 1420 * @see Element#ownText() 1421 */ 1422 public Elements getElementsMatchingOwnText(String regex) { 1423 Pattern pattern; 1424 try { 1425 pattern = Pattern.compile(regex); 1426 } catch (PatternSyntaxException e) { 1427 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1428 } 1429 return getElementsMatchingOwnText(pattern); 1430 } 1431 1432 /** 1433 * Find all elements under this element (including self, and children of children). 1434 * 1435 * @return all elements 1436 */ 1437 public Elements getAllElements() { 1438 return Collector.collect(new Evaluator.AllElements(), this); 1439 } 1440 1441 /** 1442 Gets the <b>normalized, combined text</b> of this element and all its children. Whitespace is normalized and 1443 trimmed. 1444 <p>For example, given HTML {@code <p>Hello <b>there</b> now! </p>}, {@code p.text()} returns {@code "Hello there 1445 now!"} 1446 <p>If you do not want normalized text, use {@link #wholeText()}. If you want just the text of this node (and not 1447 children), use {@link #ownText()} 1448 <p>Note that this method returns the textual content that would be presented to a reader. The contents of data 1449 nodes (such as {@code <script>} tags) are not considered text. Use {@link #data()} or {@link #html()} to retrieve 1450 that content. 1451 1452 @return decoded, normalized text, or empty string if none. 1453 @see #wholeText() 1454 @see #ownText() 1455 @see #textNodes() 1456 */ 1457 public String text() { 1458 final StringBuilder accum = StringUtil.borrowBuilder(); 1459 NodeTraversor.traverse(new TextAccumulator(accum), this); 1460 return StringUtil.releaseBuilder(accum).trim(); 1461 } 1462 1463 private static class TextAccumulator implements NodeVisitor { 1464 private final StringBuilder accum; 1465 1466 public TextAccumulator(StringBuilder accum) { 1467 this.accum = accum; 1468 } 1469 1470 @Override public void head(Node node, int depth) { 1471 if (node instanceof TextNode) { 1472 TextNode textNode = (TextNode) node; 1473 appendNormalisedText(accum, textNode); 1474 } else if (node instanceof Element) { 1475 Element element = (Element) node; 1476 if (accum.length() > 0 && 1477 (element.isBlock() || element.nameIs("br")) && 1478 !lastCharIsWhitespace(accum)) 1479 accum.append(' '); 1480 } 1481 } 1482 1483 @Override public void tail(Node node, int depth) { 1484 // make sure there is a space between block tags and immediately following text nodes or inline elements <div>One</div>Two should be "One Two". 1485 if (node instanceof Element) { 1486 Element element = (Element) node; 1487 Node next = node.nextSibling(); 1488 if (!element.tag.isInline() && (next instanceof TextNode || next instanceof Element && ((Element) next).tag.isInline()) && !lastCharIsWhitespace(accum)) 1489 accum.append(' '); 1490 } 1491 1492 } 1493 } 1494 1495 /** 1496 Get the non-normalized, decoded text of this element and its children, including only any newlines and spaces 1497 present in the original source. 1498 @return decoded, non-normalized text 1499 @see #text() 1500 @see #wholeOwnText() 1501 */ 1502 public String wholeText() { 1503 return wholeTextOf(nodeStream()); 1504 } 1505 1506 private static String wholeTextOf(Stream<Node> stream) { 1507 return stream.map(node -> { 1508 if (node instanceof TextNode) return ((TextNode) node).getWholeText(); 1509 if (node.nameIs("br")) return "\n"; 1510 return ""; 1511 }).collect(StringUtil.joining("")); 1512 } 1513 1514 /** 1515 Get the non-normalized, decoded text of this element, <b>not including</b> any child elements, including any 1516 newlines and spaces present in the original source. 1517 @return decoded, non-normalized text that is a direct child of this Element 1518 @see #text() 1519 @see #wholeText() 1520 @see #ownText() 1521 @since 1.15.1 1522 */ 1523 public String wholeOwnText() { 1524 return wholeTextOf(childNodes.stream()); 1525 } 1526 1527 /** 1528 * Gets the (normalized) text owned by this element only; does not get the combined text of all children. 1529 * <p> 1530 * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"}, 1531 * whereas {@code p.text()} returns {@code "Hello there now!"}. 1532 * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element. 1533 * 1534 * @return decoded text, or empty string if none. 1535 * @see #text() 1536 * @see #textNodes() 1537 */ 1538 public String ownText() { 1539 StringBuilder sb = StringUtil.borrowBuilder(); 1540 ownText(sb); 1541 return StringUtil.releaseBuilder(sb).trim(); 1542 } 1543 1544 private void ownText(StringBuilder accum) { 1545 for (int i = 0; i < childNodeSize(); i++) { 1546 Node child = childNodes.get(i); 1547 if (child instanceof TextNode) { 1548 TextNode textNode = (TextNode) child; 1549 appendNormalisedText(accum, textNode); 1550 } else if (child.nameIs("br") && !lastCharIsWhitespace(accum)) { 1551 accum.append(" "); 1552 } 1553 } 1554 } 1555 1556 private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { 1557 String text = textNode.getWholeText(); 1558 if (preserveWhitespace(textNode.parentNode) || textNode instanceof CDataNode) 1559 accum.append(text); 1560 else 1561 StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum)); 1562 } 1563 1564 static boolean preserveWhitespace(@Nullable Node node) { 1565 // looks only at this element and five levels up, to prevent recursion & needless stack searches 1566 if (node instanceof Element) { 1567 Element el = (Element) node; 1568 int i = 0; 1569 do { 1570 if (el.tag.preserveWhitespace()) 1571 return true; 1572 el = el.parent(); 1573 i++; 1574 } while (i < 6 && el != null); 1575 } 1576 return false; 1577 } 1578 1579 /** 1580 * Set the text of this element. Any existing contents (text or elements) will be cleared. 1581 * <p>As a special case, for {@code <script>} and {@code <style>} tags, the input text will be treated as data, 1582 * not visible text.</p> 1583 * @param text decoded text 1584 * @return this element 1585 */ 1586 public Element text(String text) { 1587 Validate.notNull(text); 1588 empty(); 1589 // special case for script/style in HTML (or customs): should be data node 1590 if (tag().is(Tag.Data)) 1591 appendChild(new DataNode(text)); 1592 else 1593 appendChild(new TextNode(text)); 1594 1595 return this; 1596 } 1597 1598 /** 1599 Checks if the current element or any of its child elements contain non-whitespace text. 1600 @return {@code true} if the element has non-blank text content, {@code false} otherwise. 1601 */ 1602 public boolean hasText() { 1603 AtomicBoolean hasText = new AtomicBoolean(false); 1604 filter((node, depth) -> { 1605 if (node instanceof TextNode) { 1606 TextNode textNode = (TextNode) node; 1607 if (!textNode.isBlank()) { 1608 hasText.set(true); 1609 return NodeFilter.FilterResult.STOP; 1610 } 1611 } 1612 return NodeFilter.FilterResult.CONTINUE; 1613 }); 1614 return hasText.get(); 1615 } 1616 1617 /** 1618 * Get the combined data of this element. Data is e.g. the inside of a {@code <script>} tag. Note that data is NOT the 1619 * text of the element. Use {@link #text()} to get the text that would be visible to a user, and {@code data()} 1620 * for the contents of scripts, comments, CSS styles, etc. 1621 * 1622 * @return the data, or empty string if none 1623 * 1624 * @see #dataNodes() 1625 */ 1626 public String data() { 1627 StringBuilder sb = StringUtil.borrowBuilder(); 1628 traverse((childNode, depth) -> { 1629 if (childNode instanceof DataNode) { 1630 DataNode data = (DataNode) childNode; 1631 sb.append(data.getWholeData()); 1632 } else if (childNode instanceof Comment) { 1633 Comment comment = (Comment) childNode; 1634 sb.append(comment.getData()); 1635 } else if (childNode instanceof CDataNode) { 1636 // this shouldn't really happen because the html parser won't see the cdata as anything special when parsing script. 1637 // but in case another type gets through. 1638 CDataNode cDataNode = (CDataNode) childNode; 1639 sb.append(cDataNode.getWholeText()); 1640 } 1641 }); 1642 return StringUtil.releaseBuilder(sb); 1643 } 1644 1645 /** 1646 * Gets the literal value of this element's "class" attribute, which may include multiple class names, space 1647 * separated. (E.g. on <code><div class="header gray"></code> returns, "<code>header gray</code>") 1648 * @return The literal class attribute, or <b>empty string</b> if no class attribute set. 1649 */ 1650 public String className() { 1651 return attr("class").trim(); 1652 } 1653 1654 /** 1655 * Get each of the element's class names. E.g. on element {@code <div class="header gray">}, 1656 * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to 1657 * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them. 1658 * @return set of classnames, empty if no class attribute 1659 */ 1660 public Set<String> classNames() { 1661 String[] names = ClassSplit.split(className()); 1662 Set<String> classNames = new LinkedHashSet<>(Arrays.asList(names)); 1663 classNames.remove(""); // if classNames() was empty, would include an empty class 1664 1665 return classNames; 1666 } 1667 1668 /** 1669 Set the element's {@code class} attribute to the supplied class names. 1670 @param classNames set of classes 1671 @return this element, for chaining 1672 */ 1673 public Element classNames(Set<String> classNames) { 1674 Validate.notNull(classNames); 1675 if (classNames.isEmpty()) { 1676 attributes().remove("class"); 1677 } else { 1678 attributes().put("class", StringUtil.join(classNames, " ")); 1679 } 1680 return this; 1681 } 1682 1683 /** 1684 * Tests if this element has a class. Case-insensitive. 1685 * @param className name of class to check for 1686 * @return true if it does, false if not 1687 */ 1688 // performance sensitive 1689 public boolean hasClass(String className) { 1690 if (attributes == null) 1691 return false; 1692 1693 final String classAttr = attributes.getIgnoreCase("class"); 1694 final int len = classAttr.length(); 1695 final int wantLen = className.length(); 1696 1697 if (len == 0 || len < wantLen) { 1698 return false; 1699 } 1700 1701 // if both lengths are equal, only need compare the className with the attribute 1702 if (len == wantLen) { 1703 return className.equalsIgnoreCase(classAttr); 1704 } 1705 1706 // otherwise, scan for whitespace and compare regions (with no string or arraylist allocations) 1707 boolean inClass = false; 1708 int start = 0; 1709 for (int i = 0; i < len; i++) { 1710 if (Character.isWhitespace(classAttr.charAt(i))) { 1711 if (inClass) { 1712 // white space ends a class name, compare it with the requested one, ignore case 1713 if (i - start == wantLen && classAttr.regionMatches(true, start, className, 0, wantLen)) { 1714 return true; 1715 } 1716 inClass = false; 1717 } 1718 } else { 1719 if (!inClass) { 1720 // we're in a class name : keep the start of the substring 1721 inClass = true; 1722 start = i; 1723 } 1724 } 1725 } 1726 1727 // check the last entry 1728 if (inClass && len - start == wantLen) { 1729 return classAttr.regionMatches(true, start, className, 0, wantLen); 1730 } 1731 1732 return false; 1733 } 1734 1735 /** 1736 Add a class name to this element's {@code class} attribute. 1737 @param className class name to add 1738 @return this element 1739 */ 1740 public Element addClass(String className) { 1741 Validate.notNull(className); 1742 1743 Set<String> classes = classNames(); 1744 classes.add(className); 1745 classNames(classes); 1746 1747 return this; 1748 } 1749 1750 /** 1751 Remove a class name from this element's {@code class} attribute. 1752 @param className class name to remove 1753 @return this element 1754 */ 1755 public Element removeClass(String className) { 1756 Validate.notNull(className); 1757 1758 Set<String> classes = classNames(); 1759 classes.remove(className); 1760 classNames(classes); 1761 1762 return this; 1763 } 1764 1765 /** 1766 Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it. 1767 @param className class name to toggle 1768 @return this element 1769 */ 1770 public Element toggleClass(String className) { 1771 Validate.notNull(className); 1772 1773 Set<String> classes = classNames(); 1774 if (classes.contains(className)) 1775 classes.remove(className); 1776 else 1777 classes.add(className); 1778 classNames(classes); 1779 1780 return this; 1781 } 1782 1783 /** 1784 * Get the value of a form element (input, textarea, etc). 1785 * @return the value of the form element, or empty string if not set. 1786 */ 1787 public String val() { 1788 if (elementIs("textarea", NamespaceHtml)) 1789 return text(); 1790 else 1791 return attr("value"); 1792 } 1793 1794 /** 1795 * Set the value of a form element (input, textarea, etc). 1796 * @param value value to set 1797 * @return this element (for chaining) 1798 */ 1799 public Element val(String value) { 1800 if (elementIs("textarea", NamespaceHtml)) 1801 text(value); 1802 else 1803 attr("value", value); 1804 return this; 1805 } 1806 1807 /** 1808 Get the source range (start and end positions) of the end (closing) tag for this Element. Position tracking must be 1809 enabled prior to parsing the content. 1810 @return the range of the closing tag for this element, or {@code untracked} if its range was not tracked. 1811 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 1812 @see Node#sourceRange() 1813 @see Range#isImplicit() 1814 @since 1.15.2 1815 */ 1816 public Range endSourceRange() { 1817 return Range.of(this, false); 1818 } 1819 1820 @Override 1821 void outerHtmlHead(final Appendable accum, Document.OutputSettings out) throws IOException { 1822 String tagName = safeTagName(out.syntax()); 1823 accum.append('<').append(tagName); 1824 if (attributes != null) attributes.html(accum, out); 1825 1826 if (childNodes.isEmpty()) { 1827 boolean xmlMode = out.syntax() == xml || !tag.namespace().equals(NamespaceHtml); 1828 if (xmlMode && (tag.is(Tag.SeenSelfClose) || (tag.isKnownTag() && (tag.isEmpty() || tag.isSelfClosing())))) { 1829 accum.append(" />"); 1830 } else if (!xmlMode && tag.isEmpty()) { // html void element 1831 accum.append('>'); 1832 } else { 1833 accum.append("></").append(tagName).append('>'); 1834 } 1835 } else { 1836 accum.append('>'); 1837 } 1838 } 1839 1840 @Override 1841 void outerHtmlTail(Appendable accum, Document.OutputSettings out) throws IOException { 1842 if (!childNodes.isEmpty()) 1843 accum.append("</").append(safeTagName(out.syntax())).append('>'); 1844 // if empty, we have already closed in htmlHead 1845 } 1846 1847 /* If XML syntax, normalizes < to _ in tag name. */ 1848 @Nullable private String safeTagName(Document.OutputSettings.Syntax syntax) { 1849 return syntax == xml ? Normalizer.xmlSafeTagName(tagName()) : tagName(); 1850 } 1851 1852 /** 1853 * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return 1854 * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.) 1855 * 1856 * @return String of HTML. 1857 * @see #outerHtml() 1858 */ 1859 public String html() { 1860 StringBuilder accum = StringUtil.borrowBuilder(); 1861 html(accum); 1862 String html = StringUtil.releaseBuilder(accum); 1863 return NodeUtils.outputSettings(this).prettyPrint() ? html.trim() : html; 1864 } 1865 1866 @Override 1867 public <T extends Appendable> T html(T accum) { 1868 Node child = firstChild(); 1869 if (child != null) { 1870 Printer printer = Printer.printerFor(child, accum); 1871 while (child != null) { 1872 NodeTraversor.traverse(printer, child); 1873 child = child.nextSibling(); 1874 } 1875 } 1876 return accum; 1877 } 1878 1879 /** 1880 * Set this element's inner HTML. Clears the existing HTML first. 1881 * @param html HTML to parse and set into this element 1882 * @return this element 1883 * @see #append(String) 1884 */ 1885 public Element html(String html) { 1886 empty(); 1887 append(html); 1888 return this; 1889 } 1890 1891 @Override 1892 public Element clone() { 1893 return (Element) super.clone(); 1894 } 1895 1896 @Override 1897 public Element shallowClone() { 1898 // simpler than implementing a clone version with no child copy 1899 String baseUri = baseUri(); 1900 if (baseUri.isEmpty()) baseUri = null; // saves setting a blank internal attribute 1901 return new Element(tag, baseUri, attributes == null ? null : attributes.clone()); 1902 } 1903 1904 @Override 1905 protected Element doClone(@Nullable Node parent) { 1906 Element clone = (Element) super.doClone(parent); 1907 clone.attributes = attributes != null ? attributes.clone() : null; 1908 clone.childNodes = new NodeList(childNodes.size()); 1909 clone.childNodes.addAll(childNodes); // the children then get iterated and cloned in Node.clone 1910 1911 return clone; 1912 } 1913 1914 // overrides of Node for call chaining 1915 @Override 1916 public Element clearAttributes() { 1917 if (attributes != null) { 1918 super.clearAttributes(); // keeps internal attributes via iterator 1919 if (attributes.size() == 0) 1920 attributes = null; // only remove entirely if no internal attributes 1921 } 1922 1923 return this; 1924 } 1925 1926 @Override 1927 public Element removeAttr(String attributeKey) { 1928 return (Element) super.removeAttr(attributeKey); 1929 } 1930 1931 @Override 1932 public Element root() { 1933 return (Element) super.root(); // probably a document, but always at least an element 1934 } 1935 1936 @Override 1937 public Element traverse(NodeVisitor nodeVisitor) { 1938 return (Element) super.traverse(nodeVisitor); 1939 } 1940 1941 @Override 1942 public Element forEachNode(Consumer<? super Node> action) { 1943 return (Element) super.forEachNode(action); 1944 } 1945 1946 /** 1947 Perform the supplied action on this Element and each of its descendant Elements, during a depth-first traversal. 1948 Elements may be inspected, changed, added, replaced, or removed. 1949 @param action the function to perform on the element 1950 @see Node#forEachNode(Consumer) 1951 */ 1952 @Override 1953 public void forEach(Consumer<? super Element> action) { 1954 stream().forEach(action); 1955 } 1956 1957 /** 1958 Returns an Iterator that iterates this Element and each of its descendant Elements, in document order. 1959 @return an Iterator 1960 */ 1961 @Override 1962 public Iterator<Element> iterator() { 1963 return new NodeIterator<>(this, Element.class); 1964 } 1965 1966 @Override 1967 public Element filter(NodeFilter nodeFilter) { 1968 return (Element) super.filter(nodeFilter); 1969 } 1970 1971 static final class NodeList extends ArrayList<Node> { 1972 public NodeList(int size) { 1973 super(size); 1974 } 1975 1976 int modCount() { 1977 return this.modCount; 1978 } 1979 } 1980}