001package org.jsoup.nodes; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.internal.Normalizer; 005import org.jsoup.internal.QuietAppendable; 006import org.jsoup.internal.StringUtil; 007import org.jsoup.parser.ParseSettings; 008import org.jsoup.parser.Parser; 009import org.jsoup.parser.Tag; 010import org.jsoup.parser.TokenQueue; 011import org.jsoup.select.Collector; 012import org.jsoup.select.Elements; 013import org.jsoup.select.Evaluator; 014import org.jsoup.select.NodeFilter; 015import org.jsoup.select.NodeVisitor; 016import org.jsoup.select.Nodes; 017import org.jsoup.select.Selector; 018import org.jspecify.annotations.Nullable; 019 020import java.lang.ref.WeakReference; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.Collection; 024import java.util.Collections; 025import java.util.Iterator; 026import java.util.LinkedHashSet; 027import java.util.List; 028import java.util.Map; 029import java.util.Set; 030import java.util.concurrent.atomic.AtomicBoolean; 031import java.util.function.Consumer; 032import java.util.regex.Pattern; 033import java.util.regex.PatternSyntaxException; 034import java.util.stream.Collectors; 035import java.util.stream.Stream; 036 037import static org.jsoup.internal.Normalizer.normalize; 038import static org.jsoup.nodes.Document.OutputSettings.Syntax.xml; 039import static org.jsoup.nodes.TextNode.lastCharIsWhitespace; 040import static org.jsoup.parser.Parser.NamespaceHtml; 041import static org.jsoup.parser.TokenQueue.escapeCssIdentifier; 042import static org.jsoup.select.Selector.evaluatorOf; 043 044/** 045 An HTML Element consists of a tag name, attributes, and child nodes (including text nodes and other elements). 046 <p> 047 From an Element, you can extract data, traverse the node graph, and manipulate the HTML. 048*/ 049public class Element extends Node implements Iterable<Element> { 050 private static final List<Element> EmptyChildren = Collections.emptyList(); 051 private static final NodeList EmptyNodeList = new NodeList(0); 052 private static final Pattern ClassSplit = Pattern.compile("\\s+"); 053 private static final String BaseUriKey = Attributes.internalKey("baseUri"); 054 Tag tag; 055 NodeList childNodes; 056 @Nullable Attributes attributes; // field is nullable but all methods for attributes are non-null 057 058 /** 059 * Create a new, standalone element, in the specified namespace. 060 * @param tag tag name 061 * @param namespace namespace for this element 062 */ 063 public Element(String tag, String namespace) { 064 this(Tag.valueOf(tag, namespace, ParseSettings.preserveCase), null); 065 } 066 067 /** 068 * Create a new, standalone element, in the HTML namespace. 069 * @param tag tag name 070 * @see #Element(String tag, String namespace) 071 */ 072 public Element(String tag) { 073 this(tag, Parser.NamespaceHtml); 074 } 075 076 /** 077 * Create a new, standalone Element. (Standalone in that it has no parent.) 078 * 079 * @param tag tag of this element 080 * @param baseUri the base URI (optional, may be null to inherit from parent, or "" to clear parent's) 081 * @param attributes initial attributes (optional, may be null) 082 * @see #appendChild(Node) 083 * @see #appendElement(String) 084 */ 085 public Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attributes) { 086 Validate.notNull(tag); 087 childNodes = EmptyNodeList; 088 this.attributes = attributes; 089 this.tag = tag; 090 if (baseUri != null) 091 this.setBaseUri(baseUri); 092 } 093 094 /** 095 * Create a new Element from a Tag and a base URI. 096 * 097 * @param tag element tag 098 * @param baseUri the base URI of this element. Optional, and will inherit from its parent, if any. 099 * @see Tag#valueOf(String, ParseSettings) 100 */ 101 public Element(Tag tag, @Nullable String baseUri) { 102 this(tag, baseUri, null); 103 } 104 105 /** 106 Internal test to check if a nodelist object has been created. 107 */ 108 protected boolean hasChildNodes() { 109 return childNodes != EmptyNodeList; 110 } 111 112 @Override protected List<Node> ensureChildNodes() { 113 if (childNodes == EmptyNodeList) { 114 childNodes = new NodeList(4); 115 } 116 return childNodes; 117 } 118 119 @Override 120 protected boolean hasAttributes() { 121 return attributes != null; 122 } 123 124 @Override 125 public Attributes attributes() { 126 if (attributes == null) // not using hasAttributes, as doesn't clear warning 127 attributes = new Attributes(); 128 return attributes; 129 } 130 131 @Override 132 public String baseUri() { 133 return searchUpForAttribute(this, BaseUriKey); 134 } 135 136 private static String searchUpForAttribute(final Element start, final String key) { 137 Element el = start; 138 while (el != null) { 139 if (el.attributes != null && el.attributes.hasKey(key)) 140 return el.attributes.get(key); 141 el = el.parent(); 142 } 143 return ""; 144 } 145 146 @Override 147 protected void doSetBaseUri(String baseUri) { 148 attributes().put(BaseUriKey, baseUri); 149 } 150 151 @Override 152 public int childNodeSize() { 153 return childNodes.size(); 154 } 155 156 @Override 157 public String nodeName() { 158 return tag.getName(); 159 } 160 161 /** 162 * Get the name of the tag for this element. E.g. {@code div}. If you are using {@link ParseSettings#preserveCase 163 * case preserving parsing}, this will return the source's original case. 164 * 165 * @return the tag name 166 */ 167 public String tagName() { 168 return tag.getName(); 169 } 170 171 /** 172 * Get the normalized name of this Element's tag. This will always be the lower-cased version of the tag, regardless 173 * of the tag case preserving setting of the parser. For e.g., {@code <DIV>} and {@code <div>} both have a 174 * normal name of {@code div}. 175 * @return normal name 176 */ 177 @Override 178 public String normalName() { 179 return tag.normalName(); 180 } 181 182 /** 183 Test if this Element has the specified normalized name, and is in the specified namespace. 184 * @param normalName a normalized element name (e.g. {@code div}). 185 * @param namespace the namespace 186 * @return true if the element's normal name matches exactly, and is in the specified namespace 187 * @since 1.17.2 188 */ 189 public boolean elementIs(String normalName, String namespace) { 190 return tag.normalName().equals(normalName) && tag.namespace().equals(namespace); 191 } 192 193 /** 194 * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with 195 * {@code el.tagName("div");}. 196 * 197 * @param tagName new tag name for this element 198 * @return this element, for chaining 199 * @see Elements#tagName(String) 200 */ 201 public Element tagName(String tagName) { 202 return tagName(tagName, tag.namespace()); 203 } 204 205 /** 206 * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with 207 * {@code el.tagName("div");}. 208 * 209 * @param tagName new tag name for this element 210 * @param namespace the new namespace for this element 211 * @return this element, for chaining 212 * @see Elements#tagName(String) 213 */ 214 public Element tagName(String tagName, String namespace) { 215 Validate.notEmptyParam(tagName, "tagName"); 216 Validate.notEmptyParam(namespace, "namespace"); 217 Parser parser = NodeUtils.parser(this); 218 tag = parser.tagSet().valueOf(tagName, namespace, parser.settings()); // maintains the case option of the original parse 219 return this; 220 } 221 222 /** 223 * Get the Tag for this element. 224 * 225 * @return the tag object 226 */ 227 public Tag tag() { 228 return tag; 229 } 230 231 /** 232 Change the Tag of this element. 233 @param tag the new tag 234 @return this element, for chaining 235 @since 1.20.1 236 */ 237 public Element tag(Tag tag) { 238 Validate.notNull(tag); 239 this.tag = tag; 240 return this; 241 } 242 243 /** 244 * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element 245 * {@code <span> == false}). 246 * 247 * @return true if block, false if not (and thus inline) 248 */ 249 public boolean isBlock() { 250 return tag.isBlock(); 251 } 252 253 /** 254 * Get the {@code id} attribute of this element. 255 * 256 * @return The id attribute, if present, or an empty string if not. 257 */ 258 public String id() { 259 return attributes != null ? attributes.getIgnoreCase("id") :""; 260 } 261 262 /** 263 Set the {@code id} attribute of this element. 264 @param id the ID value to use 265 @return this Element, for chaining 266 */ 267 public Element id(String id) { 268 Validate.notNull(id); 269 attr("id", id); 270 return this; 271 } 272 273 /** 274 * Set an attribute value on this element. If this element already has an attribute with the 275 * key, its value is updated; otherwise, a new attribute is added. 276 * 277 * @return this element 278 */ 279 @Override public Element attr(String attributeKey, String attributeValue) { 280 super.attr(attributeKey, attributeValue); 281 return this; 282 } 283 284 /** 285 * Set a boolean attribute value on this element. Setting to <code>true</code> sets the attribute value to "" and 286 * marks the attribute as boolean so no value is written out. Setting to <code>false</code> removes the attribute 287 * with the same key if it exists. 288 * 289 * @param attributeKey the attribute key 290 * @param attributeValue the attribute value 291 * 292 * @return this element 293 */ 294 public Element attr(String attributeKey, boolean attributeValue) { 295 attributes().put(attributeKey, attributeValue); 296 return this; 297 } 298 299 /** 300 Get an Attribute by key. Changes made via {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc 301 will cascade back to this Element. 302 @param key the (case-sensitive) attribute key 303 @return the Attribute for this key, or null if not present. 304 @since 1.17.2 305 */ 306 @Nullable public Attribute attribute(String key) { 307 return hasAttributes() ? attributes().attribute(key) : null; 308 } 309 310 /** 311 * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key 312 * starting with "data-" is included the dataset. 313 * <p> 314 * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset 315 * {@code package=jsoup, language=java}. 316 * <p> 317 * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected 318 * in the other map. 319 * <p> 320 * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector. 321 * @return a map of {@code key=value} custom data attributes. 322 */ 323 public Map<String, String> dataset() { 324 return attributes().dataset(); 325 } 326 327 @Override @Nullable 328 public final Element parent() { 329 return (Element) parentNode; 330 } 331 332 /** 333 * Get this element's parent and ancestors, up to the document root. 334 * @return this element's stack of parents, starting with the closest first. 335 */ 336 public Elements parents() { 337 Elements parents = new Elements(); 338 Element parent = this.parent(); 339 while (parent != null && !parent.nameIs("#root")) { 340 parents.add(parent); 341 parent = parent.parent(); 342 } 343 return parents; 344 } 345 346 /** 347 * Get a child element of this element, by its 0-based index number. 348 * <p> 349 * Note that an element can have both mixed Nodes and Elements as children. This method inspects 350 * a filtered list of children that are elements, and the index is based on that filtered list. 351 * </p> 352 * 353 * @param index the index number of the element to retrieve 354 * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException} 355 * @see #childNode(int) 356 */ 357 public Element child(int index) { 358 return childElementsList().get(index); 359 } 360 361 /** 362 * Get the number of child nodes of this element that are elements. 363 * <p> 364 * This method works on the same filtered list like {@link #child(int)}. Use {@link #childNodes()} and {@link 365 * #childNodeSize()} to get the unfiltered Nodes (e.g. includes TextNodes etc.) 366 * </p> 367 * 368 * @return the number of child nodes that are elements 369 * @see #children() 370 * @see #child(int) 371 */ 372 public int childrenSize() { 373 return childElementsList().size(); 374 } 375 376 /** 377 * Get this element's child elements. 378 * <p> 379 * This is effectively a filter on {@link #childNodes()} to get Element nodes. 380 * </p> 381 * @return child elements. If this element has no children, returns an empty list. 382 * @see #childNodes() 383 */ 384 public Elements children() { 385 return new Elements(childElementsList()); 386 } 387 388 /** 389 * Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated. 390 * @return a list of child elements 391 */ 392 List<Element> childElementsList() { 393 if (childNodeSize() == 0) return EmptyChildren; // short circuit creating empty 394 List<Element> children = cachedChildren(); 395 if (children == null) { 396 children = filterNodes(Element.class); 397 stashChildren(children); 398 } 399 return children; 400 } 401 402 private static final String childElsKey = "jsoup.childEls"; 403 private static final String childElsMod = "jsoup.childElsMod"; 404 405 /** returns the cached child els, if they exist, and the modcount of our childnodes matches the stashed modcount */ 406 private @Nullable List<Element> cachedChildren() { 407 Map<String, Object> userData = attributes().userData(); 408 //noinspection unchecked 409 WeakReference<List<Element>> ref = (WeakReference<List<Element>>) userData.get(childElsKey); 410 if (ref != null) { 411 List<Element> els = ref.get(); 412 if (els != null) { 413 Integer modCount = (Integer) userData.get(childElsMod); 414 if (modCount != null && modCount == childNodes.modCount()) 415 return els; 416 } 417 } 418 return null; 419 } 420 421 /** caches the child els into the Attribute user data. */ 422 private void stashChildren(List<Element> els) { 423 Map<String, Object> userData = attributes().userData(); 424 WeakReference<List<Element>> ref = new WeakReference<>(els); 425 userData.put(childElsKey, ref); 426 userData.put(childElsMod, childNodes.modCount()); 427 } 428 429 /** 430 Returns a Stream of this Element and all of its descendant Elements. The stream has document order. 431 @return a stream of this element and its descendants. 432 @see #nodeStream() 433 @since 1.17.1 434 */ 435 public Stream<Element> stream() { 436 return NodeUtils.stream(this, Element.class); 437 } 438 439 private <T> List<T> filterNodes(Class<T> clazz) { 440 return childNodes.stream() 441 .filter(clazz::isInstance) 442 .map(clazz::cast) 443 .collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList)); 444 } 445 446 /** 447 * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated. 448 * <p> 449 * This is effectively a filter on {@link #childNodes()} to get Text nodes. 450 * @return child text nodes. If this element has no text nodes, returns an 451 * empty list. 452 * </p> 453 * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected: 454 * <ul> 455 * <li>{@code p.text()} = {@code "One Two Three Four"}</li> 456 * <li>{@code p.ownText()} = {@code "One Three Four"}</li> 457 * <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li> 458 * <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li> 459 * <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li> 460 * </ul> 461 */ 462 public List<TextNode> textNodes() { 463 return filterNodes(TextNode.class); 464 } 465 466 /** 467 * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated. 468 * <p> 469 * This is effectively a filter on {@link #childNodes()} to get Data nodes. 470 * </p> 471 * @return child data nodes. If this element has no data nodes, returns an 472 * empty list. 473 * @see #data() 474 */ 475 public List<DataNode> dataNodes() { 476 return filterNodes(DataNode.class); 477 } 478 479 /** 480 * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements 481 * may include this element, or any of its descendents. 482 * <p>If the query starts with a combinator (e.g. {@code *} or {@code >}), that will combine to this element.</p> 483 * <p>This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because 484 * multiple filters can be combined, e.g.:</p> 485 * <ul> 486 * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)</li> 487 * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)</li> 488 * <li>{@code el.select("* div")} - finds all divs that descend from this element (and excludes this element)</li> 489 * <li>{@code el.select("> div")} - finds all divs that are direct children of this element (and excludes this element)</li> 490 * </ul> 491 * <p>See the query syntax documentation in {@link org.jsoup.select.Selector}.</p> 492 * <p>Also known as {@code querySelectorAll()} in the Web DOM.</p> 493 * 494 * @param cssQuery a {@link Selector} CSS-like query 495 * @return an {@link Elements} list containing elements that match the query (empty if none match) 496 * @see Selector selector query syntax 497 * @see #select(Evaluator) 498 * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. 499 */ 500 public Elements select(String cssQuery) { 501 return Selector.select(cssQuery, this); 502 } 503 504 /** 505 * Find elements that match the supplied Evaluator. This has the same functionality as {@link #select(String)}, but 506 * may be useful if you are running the same query many times (on many documents) and want to save the overhead of 507 * repeatedly parsing the CSS query. 508 * @param evaluator an element evaluator 509 * @return an {@link Elements} list containing elements that match the query (empty if none match) 510 * @see Selector#evaluatorOf(String css) 511 */ 512 public Elements select(Evaluator evaluator) { 513 return Selector.select(evaluator, this); 514 } 515 516 /** 517 Selects elements from the given root that match the specified {@link Selector} CSS query, with this element as the 518 starting context, and returns them as a lazy Stream. Matched elements may include this element, or any of its 519 children. 520 <p> 521 Unlike {@link #select(String query)}, which returns a complete list of all matching elements, this method returns a 522 {@link Stream} that processes elements lazily as they are needed. The stream operates in a "pull" model — elements 523 are fetched from the root as the stream is traversed. You can use standard {@code Stream} operations such as 524 {@code filter}, {@code map}, or {@code findFirst} to process elements on demand. 525 </p> 526 527 @param cssQuery a {@link Selector} CSS-like query 528 @return a {@link Stream} containing elements that match the query (empty if none match) 529 @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. 530 @see Selector selector query syntax 531 @see #selectStream(Evaluator eval) 532 @since 1.19.1 533 */ 534 public Stream<Element> selectStream(String cssQuery) { 535 return Selector.selectStream(cssQuery, this); 536 } 537 538 /** 539 Find a Stream of elements that match the supplied Evaluator. 540 541 @param evaluator an element Evaluator 542 @return a {@link Stream} containing elements that match the query (empty if none match) 543 @see Selector#evaluatorOf(String css) 544 @since 1.19.1 545 */ 546 public Stream<Element> selectStream(Evaluator evaluator) { 547 return Selector.selectStream(evaluator, this); 548 } 549 550 /** 551 * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context. 552 * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query 553 * execution stops on the first hit.</p> 554 * <p>Also known as {@code querySelector()} in the Web DOM.</p> 555 * @param cssQuery cssQuery a {@link Selector} CSS-like query 556 * @return the first matching element, or <b>{@code null}</b> if there is no match. 557 * @see #expectFirst(String) 558 */ 559 public @Nullable Element selectFirst(String cssQuery) { 560 return Selector.selectFirst(cssQuery, this); 561 } 562 563 /** 564 * Finds the first Element that matches the supplied Evaluator, with this element as the starting context, or 565 * {@code null} if none match. 566 * 567 * @param evaluator an element evaluator 568 * @return the first matching element (walking down the tree, starting from this element), or {@code null} if none 569 * match. 570 */ 571 public @Nullable Element selectFirst(Evaluator evaluator) { 572 return Collector.findFirst(evaluator, this); 573 } 574 575 /** 576 Just like {@link #selectFirst(String)}, but if there is no match, throws an {@link IllegalArgumentException}. This 577 is useful if you want to simply abort processing on a failed match. 578 @param cssQuery a {@link Selector} CSS-like query 579 @return the first matching element 580 @throws IllegalArgumentException if no match is found 581 @since 1.15.2 582 */ 583 public Element expectFirst(String cssQuery) { 584 return Validate.expectNotNull( 585 Selector.selectFirst(cssQuery, this), 586 parent() != null ? 587 "No elements matched the query '%s' on element '%s'." : 588 "No elements matched the query '%s' in the document." 589 , cssQuery, this.tagName() 590 ); 591 } 592 593 /** 594 Find nodes that match the supplied {@link Evaluator}, with this element as the starting context. Matched 595 nodes may include this element, or any of its descendents. 596 597 @param evaluator an evaluator 598 @return a list of nodes that match the query (empty if none match) 599 @since 1.21.1 600 */ 601 public Nodes<Node> selectNodes(Evaluator evaluator) { 602 return selectNodes(evaluator, Node.class); 603 } 604 605 /** 606 Find nodes that match the supplied {@link Selector} CSS query, with this element as the starting context. Matched 607 nodes may include this element, or any of its descendents. 608 <p>To select leaf nodes, the query should specify the node type, e.g. {@code ::text}, 609 {@code ::comment}, {@code ::data}, {@code ::leafnode}.</p> 610 611 @param cssQuery a {@link Selector} CSS query 612 @return a list of nodes that match the query (empty if none match) 613 @since 1.21.1 614 */ 615 public Nodes<Node> selectNodes(String cssQuery) { 616 return selectNodes(cssQuery, Node.class); 617 } 618 619 /** 620 Find nodes that match the supplied Evaluator, with this element as the starting context. Matched 621 nodes may include this element, or any of its descendents. 622 623 @param evaluator an evaluator 624 @param type the type of node to collect (e.g. {@link Element}, {@link LeafNode}, {@link TextNode} etc) 625 @param <T> the type of node to collect 626 @return a list of nodes that match the query (empty if none match) 627 @since 1.21.1 628 */ 629 public <T extends Node> Nodes<T> selectNodes(Evaluator evaluator, Class<T> type) { 630 Validate.notNull(evaluator); 631 return Collector.collectNodes(evaluator, this, type); 632 } 633 634 /** 635 Find nodes that match the supplied {@link Selector} CSS query, with this element as the starting context. Matched 636 nodes may include this element, or any of its descendents. 637 <p>To select specific node types, use {@code ::text}, {@code ::comment}, {@code ::leafnode}, etc. For example, to 638 select all text nodes under {@code p} elements: </p> 639 <pre> Nodes<TextNode> textNodes = doc.selectNodes("p ::text", TextNode.class);</pre> 640 641 @param cssQuery a {@link Selector} CSS query 642 @param type the type of node to collect (e.g. {@link Element}, {@link LeafNode}, {@link TextNode} etc) 643 @param <T> the type of node to collect 644 @return a list of nodes that match the query (empty if none match) 645 @since 1.21.1 646 */ 647 public <T extends Node> Nodes<T> selectNodes(String cssQuery, Class<T> type) { 648 Validate.notEmpty(cssQuery); 649 return selectNodes(evaluatorOf(cssQuery), type); 650 } 651 652 /** 653 Find the first Node that matches the {@link Selector} CSS query, with this element as the starting context. 654 <p>This is effectively the same as calling {@code element.selectNodes(query).first()}, but is more efficient as 655 query 656 execution stops on the first hit.</p> 657 <p>Also known as {@code querySelector()} in the Web DOM.</p> 658 659 @param cssQuery cssQuery a {@link Selector} CSS-like query 660 @return the first matching node, or <b>{@code null}</b> if there is no match. 661 @since 1.21.1 662 @see #expectFirst(String) 663 */ 664 public @Nullable <T extends Node> T selectFirstNode(String cssQuery, Class<T> type) { 665 return selectFirstNode(evaluatorOf(cssQuery), type); 666 } 667 668 /** 669 Finds the first Node that matches the supplied Evaluator, with this element as the starting context, or 670 {@code null} if none match. 671 672 @param evaluator an element evaluator 673 @return the first matching node (walking down the tree, starting from this element), or {@code null} if none 674 match. 675 @since 1.21.1 676 */ 677 public @Nullable <T extends Node> T selectFirstNode(Evaluator evaluator, Class<T> type) { 678 return Collector.findFirstNode(evaluator, this, type); 679 } 680 681 /** 682 Just like {@link #selectFirstNode(String, Class)}, but if there is no match, throws an 683 {@link IllegalArgumentException}. This is useful if you want to simply abort processing on a failed match. 684 685 @param cssQuery a {@link Selector} CSS-like query 686 @return the first matching node 687 @throws IllegalArgumentException if no match is found 688 @since 1.21.1 689 */ 690 public <T extends Node> T expectFirstNode(String cssQuery, Class<T> type) { 691 return Validate.expectNotNull( 692 selectFirstNode(cssQuery, type), 693 parent() != null ? 694 "No nodes matched the query '%s' on element '%s'.": 695 "No nodes matched the query '%s' in the document." 696 , cssQuery, this.tagName() 697 ); 698 } 699 700 /** 701 * Checks if this element matches the given {@link Selector} CSS query. Also knows as {@code matches()} in the Web 702 * DOM. 703 * 704 * @param cssQuery a {@link Selector} CSS query 705 * @return if this element matches the query 706 */ 707 public boolean is(String cssQuery) { 708 return is(evaluatorOf(cssQuery)); 709 } 710 711 /** 712 * Check if this element matches the given evaluator. 713 * @param evaluator an element evaluator 714 * @return if this element matches 715 */ 716 public boolean is(Evaluator evaluator) { 717 return evaluator.matches(this.root(), this); 718 } 719 720 /** 721 * Find the closest element up the tree of parents that matches the specified CSS query. Will return itself, an 722 * ancestor, or {@code null} if there is no such matching element. 723 * @param cssQuery a {@link Selector} CSS query 724 * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not 725 * found. 726 */ 727 public @Nullable Element closest(String cssQuery) { 728 return closest(evaluatorOf(cssQuery)); 729 } 730 731 /** 732 * Find the closest element up the tree of parents that matches the specified evaluator. Will return itself, an 733 * ancestor, or {@code null} if there is no such matching element. 734 * @param evaluator a query evaluator 735 * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not 736 * found. 737 */ 738 public @Nullable Element closest(Evaluator evaluator) { 739 Validate.notNull(evaluator); 740 Element el = this; 741 final Element root = root(); 742 do { 743 if (evaluator.matches(root, el)) 744 return el; 745 el = el.parent(); 746 } while (el != null); 747 return null; 748 } 749 750 /** 751 Find Elements that match the supplied {@index XPath} expression. 752 <p>Note that for convenience of writing the Xpath expression, namespaces are disabled, and queries can be 753 expressed using the element's local name only.</p> 754 <p>By default, XPath 1.0 expressions are supported. If you would to use XPath 2.0 or higher, you can provide an 755 alternate XPathFactory implementation:</p> 756 <ol> 757 <li>Add the implementation to your classpath. E.g. to use <a href="https://www.saxonica.com/products/products.xml">Saxon-HE</a>, add <a href="https://mvnrepository.com/artifact/net.sf.saxon/Saxon-HE">net.sf.saxon:Saxon-HE</a> to your build.</li> 758 <li>Set the system property <code>javax.xml.xpath.XPathFactory:jsoup</code> to the implementing classname. E.g.:<br> 759 <code>System.setProperty(W3CDom.XPathFactoryProperty, "net.sf.saxon.xpath.XPathFactoryImpl");</code> 760 </li> 761 </ol> 762 763 @param xpath XPath expression 764 @return matching elements, or an empty list if none match. 765 @see #selectXpath(String, Class) 766 @since 1.14.3 767 */ 768 public Elements selectXpath(String xpath) { 769 return new Elements(NodeUtils.selectXpath(xpath, this, Element.class)); 770 } 771 772 /** 773 Find Nodes that match the supplied XPath expression. 774 <p>For example, to select TextNodes under {@code p} elements: </p> 775 <pre>List<TextNode> textNodes = doc.selectXpath("//body//p//text()", TextNode.class);</pre> 776 <p>Note that in the jsoup DOM, Attribute objects are not Nodes. To directly select attribute values, do something 777 like:</p> 778 <pre>List<String> hrefs = doc.selectXpath("//a").eachAttr("href");</pre> 779 @param xpath XPath expression 780 @param nodeType the jsoup node type to return 781 @see #selectXpath(String) 782 @return a list of matching nodes 783 @since 1.14.3 784 */ 785 public <T extends Node> List<T> selectXpath(String xpath, Class<T> nodeType) { 786 return NodeUtils.selectXpath(xpath, this, nodeType); 787 } 788 789 /** 790 * Insert a node to the end of this Element's children. The incoming node will be re-parented. 791 * 792 * @param child node to add. 793 * @return this Element, for chaining 794 * @see #prependChild(Node) 795 * @see #insertChildren(int, Collection) 796 */ 797 public Element appendChild(Node child) { 798 Validate.notNull(child); 799 800 // was - Node#addChildren(child). short-circuits an array create and a loop. 801 reparentChild(child); 802 ensureChildNodes(); 803 childNodes.add(child); 804 child.setSiblingIndex(childNodes.size() - 1); 805 return this; 806 } 807 808 /** 809 Insert the given nodes to the end of this Element's children. 810 811 @param children nodes to add 812 @return this Element, for chaining 813 @see #insertChildren(int, Collection) 814 */ 815 public Element appendChildren(Collection<? extends Node> children) { 816 insertChildren(-1, children); 817 return this; 818 } 819 820 /** 821 * Add this element to the supplied parent element, as its next child. 822 * 823 * @param parent element to which this element will be appended 824 * @return this element, so that you can continue modifying the element 825 */ 826 public Element appendTo(Element parent) { 827 Validate.notNull(parent); 828 parent.appendChild(this); 829 return this; 830 } 831 832 /** 833 * Add a node to the start of this element's children. 834 * 835 * @param child node to add. 836 * @return this element, so that you can add more child nodes or elements. 837 */ 838 public Element prependChild(Node child) { 839 Validate.notNull(child); 840 841 addChildren(0, child); 842 return this; 843 } 844 845 /** 846 Insert the given nodes to the start of this Element's children. 847 848 @param children nodes to add 849 @return this Element, for chaining 850 @see #insertChildren(int, Collection) 851 */ 852 public Element prependChildren(Collection<? extends Node> children) { 853 insertChildren(0, children); 854 return this; 855 } 856 857 858 /** 859 * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the 860 * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. 861 * 862 * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the 863 * end 864 * @param children child nodes to insert 865 * @return this element, for chaining. 866 */ 867 public Element insertChildren(int index, Collection<? extends Node> children) { 868 Validate.notNull(children, "Children collection to be inserted must not be null."); 869 int currentSize = childNodeSize(); 870 if (index < 0) index += currentSize +1; // roll around 871 Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); 872 873 ArrayList<Node> nodes = new ArrayList<>(children); 874 Node[] nodeArray = nodes.toArray(new Node[0]); 875 addChildren(index, nodeArray); 876 return this; 877 } 878 879 /** 880 * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the 881 * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. 882 * 883 * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the 884 * end 885 * @param children child nodes to insert 886 * @return this element, for chaining. 887 */ 888 public Element insertChildren(int index, Node... children) { 889 Validate.notNull(children, "Children collection to be inserted must not be null."); 890 int currentSize = childNodeSize(); 891 if (index < 0) index += currentSize +1; // roll around 892 Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); 893 894 addChildren(index, children); 895 return this; 896 } 897 898 /** 899 * Create a new element by tag name, and add it as this Element's last child. 900 * 901 * @param tagName the name of the tag (e.g. {@code div}). 902 * @return the new element, to allow you to add content to it, e.g.: 903 * {@code parent.appendElement("h1").attr("id", "header").text("Welcome");} 904 */ 905 public Element appendElement(String tagName) { 906 return appendElement(tagName, tag.namespace()); 907 } 908 909 /** 910 * Create a new element by tag name and namespace, add it as this Element's last child. 911 * 912 * @param tagName the name of the tag (e.g. {@code div}). 913 * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml}) 914 * @return the new element, in the specified namespace 915 */ 916 public Element appendElement(String tagName, String namespace) { 917 Parser parser = NodeUtils.parser(this); 918 Element child = new Element(parser.tagSet().valueOf(tagName, namespace, parser.settings()), baseUri()); 919 appendChild(child); 920 return child; 921 } 922 923 /** 924 * Create a new element by tag name, and add it as this Element's first child. 925 * 926 * @param tagName the name of the tag (e.g. {@code div}). 927 * @return the new element, to allow you to add content to it, e.g.: 928 * {@code parent.prependElement("h1").attr("id", "header").text("Welcome");} 929 */ 930 public Element prependElement(String tagName) { 931 return prependElement(tagName, tag.namespace()); 932 } 933 934 /** 935 * Create a new element by tag name and namespace, and add it as this Element's first child. 936 * 937 * @param tagName the name of the tag (e.g. {@code div}). 938 * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml}) 939 * @return the new element, in the specified namespace 940 */ 941 public Element prependElement(String tagName, String namespace) { 942 Parser parser = NodeUtils.parser(this); 943 Element child = new Element(parser.tagSet().valueOf(tagName, namespace, parser.settings()), baseUri()); 944 prependChild(child); 945 return child; 946 } 947 948 /** 949 * Create and append a new TextNode to this element. 950 * 951 * @param text the (un-encoded) text to add 952 * @return this element 953 */ 954 public Element appendText(String text) { 955 Validate.notNull(text); 956 TextNode node = new TextNode(text); 957 appendChild(node); 958 return this; 959 } 960 961 /** 962 * Create and prepend a new TextNode to this element. 963 * 964 * @param text the decoded text to add 965 * @return this element 966 */ 967 public Element prependText(String text) { 968 Validate.notNull(text); 969 TextNode node = new TextNode(text); 970 prependChild(node); 971 return this; 972 } 973 974 /** 975 * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children. 976 * @param html HTML to add inside this element, after the existing HTML 977 * @return this element 978 * @see #html(String) 979 */ 980 public Element append(String html) { 981 Validate.notNull(html); 982 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri()); 983 addChildren(nodes.toArray(new Node[0])); 984 return this; 985 } 986 987 /** 988 * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children. 989 * @param html HTML to add inside this element, before the existing HTML 990 * @return this element 991 * @see #html(String) 992 */ 993 public Element prepend(String html) { 994 Validate.notNull(html); 995 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri()); 996 addChildren(0, nodes.toArray(new Node[0])); 997 return this; 998 } 999 1000 /** 1001 * Insert the specified HTML into the DOM before this element (as a preceding sibling). 1002 * 1003 * @param html HTML to add before this element 1004 * @return this element, for chaining 1005 * @see #after(String) 1006 */ 1007 @Override 1008 public Element before(String html) { 1009 return (Element) super.before(html); 1010 } 1011 1012 /** 1013 * Insert the specified node into the DOM before this node (as a preceding sibling). 1014 * @param node to add before this element 1015 * @return this Element, for chaining 1016 * @see #after(Node) 1017 */ 1018 @Override 1019 public Element before(Node node) { 1020 return (Element) super.before(node); 1021 } 1022 1023 /** 1024 * Insert the specified HTML into the DOM after this element (as a following sibling). 1025 * 1026 * @param html HTML to add after this element 1027 * @return this element, for chaining 1028 * @see #before(String) 1029 */ 1030 @Override 1031 public Element after(String html) { 1032 return (Element) super.after(html); 1033 } 1034 1035 /** 1036 * Insert the specified node into the DOM after this node (as a following sibling). 1037 * @param node to add after this element 1038 * @return this element, for chaining 1039 * @see #before(Node) 1040 */ 1041 @Override 1042 public Element after(Node node) { 1043 return (Element) super.after(node); 1044 } 1045 1046 /** 1047 * Remove all the element's child nodes. Any attributes are left as-is. Each child node has its parent set to 1048 * {@code null}. 1049 * @return this element 1050 */ 1051 @Override 1052 public Element empty() { 1053 // Detach each of the children -> parent links: 1054 for (Node child : childNodes) { 1055 child.parentNode = null; 1056 } 1057 childNodes.clear(); 1058 return this; 1059 } 1060 1061 /** 1062 * Wrap the supplied HTML around this element. 1063 * 1064 * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. 1065 * @return this element, for chaining. 1066 */ 1067 @Override 1068 public Element wrap(String html) { 1069 return (Element) super.wrap(html); 1070 } 1071 1072 /** 1073 Gets an #id selector for this element, if it has a unique ID. Otherwise, returns an empty string. 1074 1075 @param ownerDoc the document that owns this element, if there is one 1076 */ 1077 private String uniqueIdSelector(@Nullable Document ownerDoc) { 1078 String id = id(); 1079 if (!id.isEmpty()) { // check if the ID is unique and matches this 1080 String idSel = "#" + escapeCssIdentifier(id); 1081 if (ownerDoc != null) { 1082 Elements els = ownerDoc.select(idSel); 1083 if (els.size() == 1 && els.get(0) == this) return idSel; 1084 } else { 1085 return idSel; 1086 } 1087 } 1088 return EmptyString; 1089 } 1090 1091 /** 1092 Get a CSS selector that will uniquely select this element. 1093 <p> 1094 If the element has an ID, returns #id; otherwise returns the parent (if any) CSS selector, followed by 1095 {@literal '>'}, followed by a unique selector for the element (tag.class.class:nth-child(n)). 1096 </p> 1097 1098 @return the CSS Path that can be used to retrieve the element in a selector. 1099 */ 1100 public String cssSelector() { 1101 Document ownerDoc = ownerDocument(); 1102 String idSel = uniqueIdSelector(ownerDoc); 1103 if (!idSel.isEmpty()) return idSel; 1104 1105 // No unique ID, work up the parent stack and find either a unique ID to hang from, or just a GP > Parent > Child chain 1106 StringBuilder selector = StringUtil.borrowBuilder(); 1107 Element el = this; 1108 while (el != null && !(el instanceof Document)) { 1109 idSel = el.uniqueIdSelector(ownerDoc); 1110 if (!idSel.isEmpty()) { 1111 selector.insert(0, idSel); 1112 break; // found a unique ID to use as ancestor; stop 1113 } 1114 selector.insert(0, el.cssSelectorComponent()); 1115 el = el.parent(); 1116 } 1117 return StringUtil.releaseBuilder(selector); 1118 } 1119 1120 private String cssSelectorComponent() { 1121 // Escape tagname, and translate HTML namespace ns:tag to CSS namespace syntax ns|tag 1122 String tagName = escapeCssIdentifier(tagName()).replace("\\:", "|"); 1123 StringBuilder selector = StringUtil.borrowBuilder().append(tagName); 1124 String classes = classNames().stream().map(TokenQueue::escapeCssIdentifier) 1125 .collect(StringUtil.joining(".")); 1126 if (!classes.isEmpty()) 1127 selector.append('.').append(classes); 1128 1129 if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node 1130 return StringUtil.releaseBuilder(selector); 1131 1132 selector.insert(0, " > "); 1133 if (parent().select(selector.toString()).size() > 1) 1134 selector.append(String.format( 1135 ":nth-child(%d)", elementSiblingIndex() + 1)); 1136 1137 return StringUtil.releaseBuilder(selector); 1138 } 1139 1140 /** 1141 * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling 1142 * of itself, so will not be included in the returned list. 1143 * @return sibling elements 1144 */ 1145 public Elements siblingElements() { 1146 if (parentNode == null) 1147 return new Elements(0); 1148 1149 List<Element> elements = parent().childElementsList(); 1150 Elements siblings = new Elements(elements.size() - 1); 1151 for (Element el: elements) 1152 if (el != this) 1153 siblings.add(el); 1154 return siblings; 1155 } 1156 1157 1158 1159 /** 1160 * Get each of the sibling elements that come after this element. 1161 * 1162 * @return each of the element siblings after this element, or an empty list if there are no next sibling elements 1163 */ 1164 public Elements nextElementSiblings() { 1165 return nextElementSiblings(true); 1166 } 1167 1168 /** 1169 * Get each of the element siblings before this element. 1170 * 1171 * @return the previous element siblings, or an empty list if there are none. 1172 */ 1173 public Elements previousElementSiblings() { 1174 return nextElementSiblings(false); 1175 } 1176 1177 private Elements nextElementSiblings(boolean next) { 1178 Elements els = new Elements(); 1179 if (parentNode == null) 1180 return els; 1181 els.add(this); 1182 return next ? els.nextAll() : els.prevAll(); 1183 } 1184 1185 /** 1186 * Gets the first Element sibling of this element. That may be this element. 1187 * @return the first sibling that is an element (aka the parent's first element child) 1188 */ 1189 public Element firstElementSibling() { 1190 if (parent() != null) { 1191 //noinspection DataFlowIssue (not nullable, would be this is no other sibs) 1192 return parent().firstElementChild(); 1193 } else 1194 return this; // orphan is its own first sibling 1195 } 1196 1197 /** 1198 * Get the list index of this element in its element sibling list. I.e. if this is the first element 1199 * sibling, returns 0. 1200 * @return position in element sibling list 1201 */ 1202 public int elementSiblingIndex() { 1203 if (parent() == null) return 0; 1204 return indexInList(this, parent().childElementsList()); 1205 } 1206 1207 /** 1208 * Gets the last element sibling of this element. That may be this element. 1209 * @return the last sibling that is an element (aka the parent's last element child) 1210 */ 1211 public Element lastElementSibling() { 1212 if (parent() != null) { 1213 //noinspection DataFlowIssue (not nullable, would be this if no other sibs) 1214 return parent().lastElementChild(); 1215 } else 1216 return this; 1217 } 1218 1219 private static <E extends Element> int indexInList(Element search, List<E> elements) { 1220 final int size = elements.size(); 1221 for (int i = 0; i < size; i++) { 1222 if (elements.get(i) == search) 1223 return i; 1224 } 1225 return 0; 1226 } 1227 1228 /** 1229 Gets the first child of this Element that is an Element, or {@code null} if there is none. 1230 @return the first Element child node, or null. 1231 @see #firstChild() 1232 @see #lastElementChild() 1233 @since 1.15.2 1234 */ 1235 public @Nullable Element firstElementChild() { 1236 Node child = firstChild(); 1237 while (child != null) { 1238 if (child instanceof Element) return (Element) child; 1239 child = child.nextSibling(); 1240 } 1241 return null; 1242 } 1243 1244 /** 1245 Gets the last child of this Element that is an Element, or @{code null} if there is none. 1246 @return the last Element child node, or null. 1247 @see #lastChild() 1248 @see #firstElementChild() 1249 @since 1.15.2 1250 */ 1251 public @Nullable Element lastElementChild() { 1252 Node child = lastChild(); 1253 while (child != null) { 1254 if (child instanceof Element) return (Element) child; 1255 child = child.previousSibling(); 1256 } 1257 return null; 1258 } 1259 1260 // DOM type methods 1261 1262 /** 1263 * Finds elements, including and recursively under this element, with the specified tag name. 1264 * @param tagName The tag name to search for (case insensitively). 1265 * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match. 1266 */ 1267 public Elements getElementsByTag(String tagName) { 1268 Validate.notEmpty(tagName); 1269 tagName = normalize(tagName); 1270 1271 return Collector.collect(new Evaluator.Tag(tagName), this); 1272 } 1273 1274 /** 1275 * Find an element by ID, including or under this element. 1276 * <p> 1277 * Note that this finds the first matching ID, starting with this element. If you search down from a different 1278 * starting point, it is possible to find a different element by ID. For unique element by ID within a Document, 1279 * use {@link Document#getElementById(String)} 1280 * @param id The ID to search for. 1281 * @return The first matching element by ID, starting with this element, or null if none found. 1282 */ 1283 public @Nullable Element getElementById(String id) { 1284 Validate.notEmpty(id); 1285 return Collector.findFirst(new Evaluator.Id(id), this); 1286 } 1287 1288 /** 1289 * Find elements that have this class, including or under this element. Case-insensitive. 1290 * <p> 1291 * Elements can have multiple classes (e.g. {@code <div class="header round first">}). This method 1292 * checks each class, so you can find the above with {@code el.getElementsByClass("header");}. 1293 * 1294 * @param className the name of the class to search for. 1295 * @return elements with the supplied class name, empty if none 1296 * @see #hasClass(String) 1297 * @see #classNames() 1298 */ 1299 public Elements getElementsByClass(String className) { 1300 Validate.notEmpty(className); 1301 1302 return Collector.collect(new Evaluator.Class(className), this); 1303 } 1304 1305 /** 1306 * Find elements that have a named attribute set. Case-insensitive. 1307 * 1308 * @param key name of the attribute, e.g. {@code href} 1309 * @return elements that have this attribute, empty if none 1310 */ 1311 public Elements getElementsByAttribute(String key) { 1312 Validate.notEmpty(key); 1313 key = key.trim(); 1314 1315 return Collector.collect(new Evaluator.Attribute(key), this); 1316 } 1317 1318 /** 1319 * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements 1320 * that have HTML5 datasets. 1321 * @param keyPrefix name prefix of the attribute e.g. {@code data-} 1322 * @return elements that have attribute names that start with the prefix, empty if none. 1323 */ 1324 public Elements getElementsByAttributeStarting(String keyPrefix) { 1325 Validate.notEmpty(keyPrefix); 1326 keyPrefix = keyPrefix.trim(); 1327 1328 return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this); 1329 } 1330 1331 /** 1332 * Find elements that have an attribute with the specific value. Case-insensitive. 1333 * 1334 * @param key name of the attribute 1335 * @param value value of the attribute 1336 * @return elements that have this attribute with this value, empty if none 1337 */ 1338 public Elements getElementsByAttributeValue(String key, String value) { 1339 return Collector.collect(new Evaluator.AttributeWithValue(key, value), this); 1340 } 1341 1342 /** 1343 * Find elements that either do not have this attribute, or have it with a different value. Case-insensitive. 1344 * 1345 * @param key name of the attribute 1346 * @param value value of the attribute 1347 * @return elements that do not have a matching attribute 1348 */ 1349 public Elements getElementsByAttributeValueNot(String key, String value) { 1350 return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this); 1351 } 1352 1353 /** 1354 * Find elements that have attributes that start with the value prefix. Case-insensitive. 1355 * 1356 * @param key name of the attribute 1357 * @param valuePrefix start of attribute value 1358 * @return elements that have attributes that start with the value prefix 1359 */ 1360 public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) { 1361 return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this); 1362 } 1363 1364 /** 1365 * Find elements that have attributes that end with the value suffix. Case-insensitive. 1366 * 1367 * @param key name of the attribute 1368 * @param valueSuffix end of the attribute value 1369 * @return elements that have attributes that end with the value suffix 1370 */ 1371 public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) { 1372 return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this); 1373 } 1374 1375 /** 1376 * Find elements that have attributes whose value contains the match string. Case-insensitive. 1377 * 1378 * @param key name of the attribute 1379 * @param match substring of value to search for 1380 * @return elements that have attributes containing this text 1381 */ 1382 public Elements getElementsByAttributeValueContaining(String key, String match) { 1383 return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this); 1384 } 1385 1386 /** 1387 * Find elements that have an attribute whose value matches the supplied regular expression. 1388 * @param key name of the attribute 1389 * @param pattern compiled regular expression to match against attribute values 1390 * @return elements that have attributes matching this regular expression 1391 */ 1392 public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) { 1393 return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this); 1394 1395 } 1396 1397 /** 1398 * Find elements that have attributes whose values match the supplied regular expression. 1399 * @param key name of the attribute 1400 * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options. 1401 * @return elements that have attributes matching this regular expression 1402 */ 1403 public Elements getElementsByAttributeValueMatching(String key, String regex) { 1404 Pattern pattern; 1405 try { 1406 pattern = Pattern.compile(regex); 1407 } catch (PatternSyntaxException e) { 1408 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1409 } 1410 return getElementsByAttributeValueMatching(key, pattern); 1411 } 1412 1413 /** 1414 * Find elements whose sibling index is less than the supplied index. 1415 * @param index 0-based index 1416 * @return elements less than index 1417 */ 1418 public Elements getElementsByIndexLessThan(int index) { 1419 return Collector.collect(new Evaluator.IndexLessThan(index), this); 1420 } 1421 1422 /** 1423 * Find elements whose sibling index is greater than the supplied index. 1424 * @param index 0-based index 1425 * @return elements greater than index 1426 */ 1427 public Elements getElementsByIndexGreaterThan(int index) { 1428 return Collector.collect(new Evaluator.IndexGreaterThan(index), this); 1429 } 1430 1431 /** 1432 * Find elements whose sibling index is equal to the supplied index. 1433 * @param index 0-based index 1434 * @return elements equal to index 1435 */ 1436 public Elements getElementsByIndexEquals(int index) { 1437 return Collector.collect(new Evaluator.IndexEquals(index), this); 1438 } 1439 1440 /** 1441 * Find elements that contain the specified string. The search is case-insensitive. The text may appear directly 1442 * in the element, or in any of its descendants. 1443 * @param searchText to look for in the element's text 1444 * @return elements that contain the string, case-insensitive. 1445 * @see Element#text() 1446 */ 1447 public Elements getElementsContainingText(String searchText) { 1448 return Collector.collect(new Evaluator.ContainsText(searchText), this); 1449 } 1450 1451 /** 1452 * Find elements that directly contain the specified string. The search is case-insensitive. The text must appear directly 1453 * in the element, not in any of its descendants. 1454 * @param searchText to look for in the element's own text 1455 * @return elements that contain the string, case-insensitive. 1456 * @see Element#ownText() 1457 */ 1458 public Elements getElementsContainingOwnText(String searchText) { 1459 return Collector.collect(new Evaluator.ContainsOwnText(searchText), this); 1460 } 1461 1462 /** 1463 * Find elements whose text matches the supplied regular expression. 1464 * @param pattern regular expression to match text against 1465 * @return elements matching the supplied regular expression. 1466 * @see Element#text() 1467 */ 1468 public Elements getElementsMatchingText(Pattern pattern) { 1469 return Collector.collect(new Evaluator.Matches(pattern), this); 1470 } 1471 1472 /** 1473 * Find elements whose text matches the supplied regular expression. 1474 * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options. 1475 * @return elements matching the supplied regular expression. 1476 * @see Element#text() 1477 */ 1478 public Elements getElementsMatchingText(String regex) { 1479 Pattern pattern; 1480 try { 1481 pattern = Pattern.compile(regex); 1482 } catch (PatternSyntaxException e) { 1483 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1484 } 1485 return getElementsMatchingText(pattern); 1486 } 1487 1488 /** 1489 * Find elements whose own text matches the supplied regular expression. 1490 * @param pattern regular expression to match text against 1491 * @return elements matching the supplied regular expression. 1492 * @see Element#ownText() 1493 */ 1494 public Elements getElementsMatchingOwnText(Pattern pattern) { 1495 return Collector.collect(new Evaluator.MatchesOwn(pattern), this); 1496 } 1497 1498 /** 1499 * Find elements whose own text matches the supplied regular expression. 1500 * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options. 1501 * @return elements matching the supplied regular expression. 1502 * @see Element#ownText() 1503 */ 1504 public Elements getElementsMatchingOwnText(String regex) { 1505 Pattern pattern; 1506 try { 1507 pattern = Pattern.compile(regex); 1508 } catch (PatternSyntaxException e) { 1509 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1510 } 1511 return getElementsMatchingOwnText(pattern); 1512 } 1513 1514 /** 1515 * Find all elements under this element (including self, and children of children). 1516 * 1517 * @return all elements 1518 */ 1519 public Elements getAllElements() { 1520 return Collector.collect(new Evaluator.AllElements(), this); 1521 } 1522 1523 /** 1524 Gets the <b>normalized, combined text</b> of this element and all its children. Whitespace is normalized and 1525 trimmed. 1526 <p>For example, given HTML {@code <p>Hello <b>there</b> now! </p>}, {@code p.text()} returns {@code "Hello there 1527 now!"} 1528 <p>If you do not want normalized text, use {@link #wholeText()}. If you want just the text of this node (and not 1529 children), use {@link #ownText()} 1530 <p>Note that this method returns the textual content that would be presented to a reader. The contents of data 1531 nodes (such as {@code <script>} tags) are not considered text. Use {@link #data()} or {@link #html()} to retrieve 1532 that content. 1533 1534 @return decoded, normalized text, or empty string if none. 1535 @see #wholeText() 1536 @see #ownText() 1537 @see #textNodes() 1538 */ 1539 public String text() { 1540 final StringBuilder accum = StringUtil.borrowBuilder(); 1541 new TextAccumulator(accum).traverse(this); 1542 return StringUtil.releaseBuilder(accum).trim(); 1543 } 1544 1545 private static class TextAccumulator implements NodeVisitor { 1546 private final StringBuilder accum; 1547 1548 public TextAccumulator(StringBuilder accum) { 1549 this.accum = accum; 1550 } 1551 1552 @Override public void head(Node node, int depth) { 1553 if (node instanceof TextNode) { 1554 TextNode textNode = (TextNode) node; 1555 appendNormalisedText(accum, textNode); 1556 } else if (node instanceof Element) { 1557 Element element = (Element) node; 1558 if (accum.length() > 0 && 1559 (element.isBlock() || element.nameIs("br")) && 1560 !lastCharIsWhitespace(accum)) 1561 accum.append(' '); 1562 } 1563 } 1564 1565 @Override public void tail(Node node, int depth) { 1566 // make sure there is a space between block tags and immediately following text nodes or inline elements <div>One</div>Two should be "One Two". 1567 if (node instanceof Element) { 1568 Element element = (Element) node; 1569 Node next = node.nextSibling(); 1570 if (!element.tag.isInline() && (next instanceof TextNode || next instanceof Element && ((Element) next).tag.isInline()) && !lastCharIsWhitespace(accum)) 1571 accum.append(' '); 1572 } 1573 1574 } 1575 } 1576 1577 /** 1578 Get the non-normalized, decoded text of this element and its children, including only any newlines and spaces 1579 present in the original source. 1580 @return decoded, non-normalized text 1581 @see #text() 1582 @see #wholeOwnText() 1583 */ 1584 public String wholeText() { 1585 return wholeTextOf(nodeStream()); 1586 } 1587 1588 /** 1589 An Element's nodeValue is its whole own text. 1590 */ 1591 @Override 1592 public String nodeValue() { 1593 return wholeOwnText(); 1594 } 1595 1596 private static String wholeTextOf(Stream<Node> stream) { 1597 return stream.map(node -> { 1598 if (node instanceof TextNode) return ((TextNode) node).getWholeText(); 1599 if (node.nameIs("br")) return "\n"; 1600 return ""; 1601 }).collect(StringUtil.joining("")); 1602 } 1603 1604 /** 1605 Get the non-normalized, decoded text of this element, <b>not including</b> any child elements, including any 1606 newlines and spaces present in the original source. 1607 @return decoded, non-normalized text that is a direct child of this Element 1608 @see #text() 1609 @see #wholeText() 1610 @see #ownText() 1611 @since 1.15.1 1612 */ 1613 public String wholeOwnText() { 1614 return wholeTextOf(childNodes.stream()); 1615 } 1616 1617 /** 1618 * Gets the (normalized) text owned by this element only; does not get the combined text of all children. 1619 * <p> 1620 * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"}, 1621 * whereas {@code p.text()} returns {@code "Hello there now!"}. 1622 * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element. 1623 * 1624 * @return decoded text, or empty string if none. 1625 * @see #text() 1626 * @see #textNodes() 1627 */ 1628 public String ownText() { 1629 StringBuilder sb = StringUtil.borrowBuilder(); 1630 ownText(sb); 1631 return StringUtil.releaseBuilder(sb).trim(); 1632 } 1633 1634 private void ownText(StringBuilder accum) { 1635 for (int i = 0; i < childNodeSize(); i++) { 1636 Node child = childNodes.get(i); 1637 if (child instanceof TextNode) { 1638 TextNode textNode = (TextNode) child; 1639 appendNormalisedText(accum, textNode); 1640 } else if (child.nameIs("br") && !lastCharIsWhitespace(accum)) { 1641 accum.append(" "); 1642 } 1643 } 1644 } 1645 1646 private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { 1647 String text = textNode.getWholeText(); 1648 if (preserveWhitespace(textNode.parentNode) || textNode instanceof CDataNode) 1649 accum.append(text); 1650 else 1651 StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum)); 1652 } 1653 1654 static boolean preserveWhitespace(@Nullable Node node) { 1655 // looks only at this element and five levels up, to prevent recursion & needless stack searches 1656 if (node instanceof Element) { 1657 Element el = (Element) node; 1658 int i = 0; 1659 do { 1660 if (el.tag.preserveWhitespace()) 1661 return true; 1662 el = el.parent(); 1663 i++; 1664 } while (i < 6 && el != null); 1665 } 1666 return false; 1667 } 1668 1669 /** 1670 * Set the text of this element. Any existing contents (text or elements) will be cleared. 1671 * <p>As a special case, for {@code <script>} and {@code <style>} tags, the input text will be treated as data, 1672 * not visible text.</p> 1673 * @param text decoded text 1674 * @return this element 1675 */ 1676 public Element text(String text) { 1677 Validate.notNull(text); 1678 empty(); 1679 // special case for script/style in HTML (or customs): should be data node 1680 if (tag().is(Tag.Data)) 1681 appendChild(new DataNode(text)); 1682 else 1683 appendChild(new TextNode(text)); 1684 1685 return this; 1686 } 1687 1688 /** 1689 Checks if the current element or any of its child elements contain non-whitespace text. 1690 @return {@code true} if the element has non-blank text content, {@code false} otherwise. 1691 */ 1692 public boolean hasText() { 1693 AtomicBoolean hasText = new AtomicBoolean(false); 1694 filter((node, depth) -> { 1695 if (node instanceof TextNode) { 1696 TextNode textNode = (TextNode) node; 1697 if (!textNode.isBlank()) { 1698 hasText.set(true); 1699 return NodeFilter.FilterResult.STOP; 1700 } 1701 } 1702 return NodeFilter.FilterResult.CONTINUE; 1703 }); 1704 return hasText.get(); 1705 } 1706 1707 /** 1708 * Get the combined data of this element. Data is e.g. the inside of a {@code <script>} tag. Note that data is NOT the 1709 * text of the element. Use {@link #text()} to get the text that would be visible to a user, and {@code data()} 1710 * for the contents of scripts, comments, CSS styles, etc. 1711 * 1712 * @return the data, or empty string if none 1713 * 1714 * @see #dataNodes() 1715 */ 1716 public String data() { 1717 StringBuilder sb = StringUtil.borrowBuilder(); 1718 traverse((childNode, depth) -> { 1719 if (childNode instanceof DataNode) { 1720 DataNode data = (DataNode) childNode; 1721 sb.append(data.getWholeData()); 1722 } else if (childNode instanceof Comment) { 1723 Comment comment = (Comment) childNode; 1724 sb.append(comment.getData()); 1725 } else if (childNode instanceof CDataNode) { 1726 // this shouldn't really happen because the html parser won't see the cdata as anything special when parsing script. 1727 // but in case another type gets through. 1728 CDataNode cDataNode = (CDataNode) childNode; 1729 sb.append(cDataNode.getWholeText()); 1730 } 1731 }); 1732 return StringUtil.releaseBuilder(sb); 1733 } 1734 1735 /** 1736 * Gets the literal value of this element's "class" attribute, which may include multiple class names, space 1737 * separated. (E.g. on <code><div class="header gray"></code> returns, "<code>header gray</code>") 1738 * @return The literal class attribute, or <b>empty string</b> if no class attribute set. 1739 */ 1740 public String className() { 1741 return attr("class").trim(); 1742 } 1743 1744 /** 1745 * Get each of the element's class names. E.g. on element {@code <div class="header gray">}, 1746 * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to 1747 * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them. 1748 * @return set of classnames, empty if no class attribute 1749 */ 1750 public Set<String> classNames() { 1751 String[] names = ClassSplit.split(className()); 1752 Set<String> classNames = new LinkedHashSet<>(Arrays.asList(names)); 1753 classNames.remove(""); // if classNames() was empty, would include an empty class 1754 1755 return classNames; 1756 } 1757 1758 /** 1759 Set the element's {@code class} attribute to the supplied class names. 1760 @param classNames set of classes 1761 @return this element, for chaining 1762 */ 1763 public Element classNames(Set<String> classNames) { 1764 Validate.notNull(classNames); 1765 if (classNames.isEmpty()) { 1766 attributes().remove("class"); 1767 } else { 1768 attributes().put("class", StringUtil.join(classNames, " ")); 1769 } 1770 return this; 1771 } 1772 1773 /** 1774 * Tests if this element has a class. Case-insensitive. 1775 * @param className name of class to check for 1776 * @return true if it does, false if not 1777 */ 1778 // performance sensitive 1779 public boolean hasClass(String className) { 1780 if (attributes == null) 1781 return false; 1782 1783 final String classAttr = attributes.getIgnoreCase("class"); 1784 final int len = classAttr.length(); 1785 final int wantLen = className.length(); 1786 1787 if (len == 0 || len < wantLen) { 1788 return false; 1789 } 1790 1791 // if both lengths are equal, only need compare the className with the attribute 1792 if (len == wantLen) { 1793 return className.equalsIgnoreCase(classAttr); 1794 } 1795 1796 // otherwise, scan for whitespace and compare regions (with no string or arraylist allocations) 1797 boolean inClass = false; 1798 int start = 0; 1799 for (int i = 0; i < len; i++) { 1800 if (Character.isWhitespace(classAttr.charAt(i))) { 1801 if (inClass) { 1802 // white space ends a class name, compare it with the requested one, ignore case 1803 if (i - start == wantLen && classAttr.regionMatches(true, start, className, 0, wantLen)) { 1804 return true; 1805 } 1806 inClass = false; 1807 } 1808 } else { 1809 if (!inClass) { 1810 // we're in a class name : keep the start of the substring 1811 inClass = true; 1812 start = i; 1813 } 1814 } 1815 } 1816 1817 // check the last entry 1818 if (inClass && len - start == wantLen) { 1819 return classAttr.regionMatches(true, start, className, 0, wantLen); 1820 } 1821 1822 return false; 1823 } 1824 1825 /** 1826 Add a class name to this element's {@code class} attribute. 1827 @param className class name to add 1828 @return this element 1829 */ 1830 public Element addClass(String className) { 1831 Validate.notNull(className); 1832 1833 Set<String> classes = classNames(); 1834 classes.add(className); 1835 classNames(classes); 1836 1837 return this; 1838 } 1839 1840 /** 1841 Remove a class name from this element's {@code class} attribute. 1842 @param className class name to remove 1843 @return this element 1844 */ 1845 public Element removeClass(String className) { 1846 Validate.notNull(className); 1847 1848 Set<String> classes = classNames(); 1849 classes.remove(className); 1850 classNames(classes); 1851 1852 return this; 1853 } 1854 1855 /** 1856 Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it. 1857 @param className class name to toggle 1858 @return this element 1859 */ 1860 public Element toggleClass(String className) { 1861 Validate.notNull(className); 1862 1863 Set<String> classes = classNames(); 1864 if (classes.contains(className)) 1865 classes.remove(className); 1866 else 1867 classes.add(className); 1868 classNames(classes); 1869 1870 return this; 1871 } 1872 1873 /** 1874 * Get the value of a form element (input, textarea, etc). 1875 * @return the value of the form element, or empty string if not set. 1876 */ 1877 public String val() { 1878 if (elementIs("textarea", NamespaceHtml)) 1879 return text(); 1880 else 1881 return attr("value"); 1882 } 1883 1884 /** 1885 * Set the value of a form element (input, textarea, etc). 1886 * @param value value to set 1887 * @return this element (for chaining) 1888 */ 1889 public Element val(String value) { 1890 if (elementIs("textarea", NamespaceHtml)) 1891 text(value); 1892 else 1893 attr("value", value); 1894 return this; 1895 } 1896 1897 /** 1898 Get the source range (start and end positions) of the end (closing) tag for this Element. Position tracking must be 1899 enabled prior to parsing the content. 1900 @return the range of the closing tag for this element, or {@code untracked} if its range was not tracked. 1901 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 1902 @see Node#sourceRange() 1903 @see Range#isImplicit() 1904 @since 1.15.2 1905 */ 1906 public Range endSourceRange() { 1907 return Range.of(this, false); 1908 } 1909 1910 @Override 1911 void outerHtmlHead(final QuietAppendable accum, Document.OutputSettings out) { 1912 String tagName = safeTagName(out.syntax()); 1913 accum.append('<').append(tagName); 1914 if (attributes != null) attributes.html(accum, out); 1915 1916 if (childNodes.isEmpty()) { 1917 boolean xmlMode = out.syntax() == xml || !tag.namespace().equals(NamespaceHtml); 1918 if (xmlMode && (tag.is(Tag.SeenSelfClose) || (tag.isKnownTag() && (tag.isEmpty() || tag.isSelfClosing())))) { 1919 accum.append(" />"); 1920 } else if (!xmlMode && tag.isEmpty()) { // html void element 1921 accum.append('>'); 1922 } else { 1923 accum.append("></").append(tagName).append('>'); 1924 } 1925 } else { 1926 accum.append('>'); 1927 } 1928 } 1929 1930 @Override 1931 void outerHtmlTail(QuietAppendable accum, Document.OutputSettings out) { 1932 if (!childNodes.isEmpty()) 1933 accum.append("</").append(safeTagName(out.syntax())).append('>'); 1934 // if empty, we have already closed in htmlHead 1935 } 1936 1937 /* If XML syntax, normalizes < to _ in tag name. */ 1938 @Nullable private String safeTagName(Document.OutputSettings.Syntax syntax) { 1939 return syntax == xml ? Normalizer.xmlSafeTagName(tagName()) : tagName(); 1940 } 1941 1942 /** 1943 * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return 1944 * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.) 1945 * 1946 * @return String of HTML. 1947 * @see #outerHtml() 1948 */ 1949 public String html() { 1950 StringBuilder sb = StringUtil.borrowBuilder(); 1951 html(sb); 1952 String html = StringUtil.releaseBuilder(sb); 1953 return NodeUtils.outputSettings(this).prettyPrint() ? html.trim() : html; 1954 } 1955 1956 @Override 1957 public <T extends Appendable> T html(T accum) { 1958 Node child = firstChild(); 1959 if (child != null) { 1960 Printer printer = Printer.printerFor(child, QuietAppendable.wrap(accum)); 1961 while (child != null) { 1962 printer.traverse(child); 1963 child = child.nextSibling(); 1964 } 1965 } 1966 return accum; 1967 } 1968 1969 /** 1970 * Set this element's inner HTML. Clears the existing HTML first. 1971 * @param html HTML to parse and set into this element 1972 * @return this element 1973 * @see #append(String) 1974 */ 1975 public Element html(String html) { 1976 empty(); 1977 append(html); 1978 return this; 1979 } 1980 1981 @Override 1982 public Element clone() { 1983 return (Element) super.clone(); 1984 } 1985 1986 @Override 1987 public Element shallowClone() { 1988 // simpler than implementing a clone version with no child copy 1989 String baseUri = baseUri(); 1990 if (baseUri.isEmpty()) baseUri = null; // saves setting a blank internal attribute 1991 return new Element(tag, baseUri, attributes == null ? null : attributes.clone()); 1992 } 1993 1994 @Override 1995 protected Element doClone(@Nullable Node parent) { 1996 Element clone = (Element) super.doClone(parent); 1997 clone.childNodes = new NodeList(childNodes.size()); 1998 clone.childNodes.addAll(childNodes); // the children then get iterated and cloned in Node.clone 1999 if (attributes != null) { 2000 clone.attributes = attributes.clone(); 2001 // clear any cached children 2002 clone.attributes.userData(childElsKey, null); 2003 } 2004 2005 return clone; 2006 } 2007 2008 // overrides of Node for call chaining 2009 @Override 2010 public Element clearAttributes() { 2011 if (attributes != null) { 2012 super.clearAttributes(); // keeps internal attributes via iterator 2013 if (attributes.size() == 0) 2014 attributes = null; // only remove entirely if no internal attributes 2015 } 2016 2017 return this; 2018 } 2019 2020 @Override 2021 public Element removeAttr(String attributeKey) { 2022 return (Element) super.removeAttr(attributeKey); 2023 } 2024 2025 @Override 2026 public Element root() { 2027 return (Element) super.root(); // probably a document, but always at least an element 2028 } 2029 2030 @Override 2031 public Element traverse(NodeVisitor nodeVisitor) { 2032 return (Element) super.traverse(nodeVisitor); 2033 } 2034 2035 @Override 2036 public Element forEachNode(Consumer<? super Node> action) { 2037 return (Element) super.forEachNode(action); 2038 } 2039 2040 /** 2041 Perform the supplied action on this Element and each of its descendant Elements, during a depth-first traversal. 2042 Elements may be inspected, changed, added, replaced, or removed. 2043 @param action the function to perform on the element 2044 @see Node#forEachNode(Consumer) 2045 */ 2046 @Override 2047 public void forEach(Consumer<? super Element> action) { 2048 stream().forEach(action); 2049 } 2050 2051 /** 2052 Returns an Iterator that iterates this Element and each of its descendant Elements, in document order. 2053 @return an Iterator 2054 */ 2055 @Override 2056 public Iterator<Element> iterator() { 2057 return new NodeIterator<>(this, Element.class); 2058 } 2059 2060 @Override 2061 public Element filter(NodeFilter nodeFilter) { 2062 return (Element) super.filter(nodeFilter); 2063 } 2064 2065 static final class NodeList extends ArrayList<Node> { 2066 public NodeList(int size) { 2067 super(size); 2068 } 2069 2070 int modCount() { 2071 return this.modCount; 2072 } 2073 } 2074}