001package org.jsoup.nodes;
002
003import org.jsoup.helper.Validate;
004import org.jsoup.internal.Normalizer;
005import org.jsoup.internal.QuietAppendable;
006import org.jsoup.internal.StringUtil;
007import org.jsoup.parser.ParseSettings;
008import org.jsoup.parser.Parser;
009import org.jsoup.parser.Tag;
010import org.jsoup.parser.TokenQueue;
011import org.jsoup.select.Collector;
012import org.jsoup.select.Elements;
013import org.jsoup.select.Evaluator;
014import org.jsoup.select.NodeFilter;
015import org.jsoup.select.NodeVisitor;
016import org.jsoup.select.Nodes;
017import org.jsoup.select.Selector;
018import org.jspecify.annotations.Nullable;
019
020import java.lang.ref.WeakReference;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Collection;
024import java.util.Collections;
025import java.util.Iterator;
026import java.util.LinkedHashSet;
027import java.util.List;
028import java.util.Map;
029import java.util.Set;
030import java.util.concurrent.atomic.AtomicBoolean;
031import java.util.function.Consumer;
032import java.util.regex.Pattern;
033import java.util.regex.PatternSyntaxException;
034import java.util.stream.Collectors;
035import java.util.stream.Stream;
036
037import static org.jsoup.internal.Normalizer.normalize;
038import static org.jsoup.nodes.Document.OutputSettings.Syntax.xml;
039import static org.jsoup.nodes.TextNode.lastCharIsWhitespace;
040import static org.jsoup.parser.Parser.NamespaceHtml;
041import static org.jsoup.parser.TokenQueue.escapeCssIdentifier;
042import static org.jsoup.select.Selector.evaluatorOf;
043
044/**
045 An HTML Element consists of a tag name, attributes, and child nodes (including text nodes and other elements).
046 <p>
047 From an Element, you can extract data, traverse the node graph, and manipulate the HTML.
048*/
049public class Element extends Node implements Iterable<Element> {
050    private static final List<Element> EmptyChildren = Collections.emptyList();
051    private static final NodeList EmptyNodeList = new NodeList(0);
052    private static final Pattern ClassSplit = Pattern.compile("\\s+");
053    private static final String BaseUriKey = Attributes.internalKey("baseUri");
054    Tag tag;
055    NodeList childNodes;
056    @Nullable Attributes attributes; // field is nullable but all methods for attributes are non-null
057
058    /**
059     * Create a new, standalone element, in the specified namespace.
060     * @param tag tag name
061     * @param namespace namespace for this element
062     */
063    public Element(String tag, String namespace) {
064        this(Tag.valueOf(tag, namespace, ParseSettings.preserveCase), null);
065    }
066
067    /**
068     * Create a new, standalone element, in the HTML namespace.
069     * @param tag tag name
070     * @see #Element(String tag, String namespace)
071     */
072    public Element(String tag) {
073        this(tag, Parser.NamespaceHtml);
074    }
075
076    /**
077     * Create a new, standalone Element. (Standalone in that it has no parent.)
078     *
079     * @param tag tag of this element
080     * @param baseUri the base URI (optional, may be null to inherit from parent, or "" to clear parent's)
081     * @param attributes initial attributes (optional, may be null)
082     * @see #appendChild(Node)
083     * @see #appendElement(String)
084     */
085    public Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attributes) {
086        Validate.notNull(tag);
087        childNodes = EmptyNodeList;
088        this.attributes = attributes;
089        this.tag = tag;
090        if (baseUri != null)
091            this.setBaseUri(baseUri);
092    }
093
094    /**
095     * Create a new Element from a Tag and a base URI.
096     *
097     * @param tag element tag
098     * @param baseUri the base URI of this element. Optional, and will inherit from its parent, if any.
099     * @see Tag#valueOf(String, ParseSettings)
100     */
101    public Element(Tag tag, @Nullable String baseUri) {
102        this(tag, baseUri, null);
103    }
104
105    /**
106     Internal test to check if a nodelist object has been created.
107     */
108    protected boolean hasChildNodes() {
109        return childNodes != EmptyNodeList;
110    }
111
112    @Override protected List<Node> ensureChildNodes() {
113        if (childNodes == EmptyNodeList) {
114            childNodes = new NodeList(4);
115        }
116        return childNodes;
117    }
118
119    @Override
120    protected boolean hasAttributes() {
121        return attributes != null;
122    }
123
124    @Override
125    public Attributes attributes() {
126        if (attributes == null) // not using hasAttributes, as doesn't clear warning
127            attributes = new Attributes();
128        return attributes;
129    }
130
131    @Override
132    public String baseUri() {
133        return searchUpForAttribute(this, BaseUriKey);
134    }
135
136    private static String searchUpForAttribute(final Element start, final String key) {
137        Element el = start;
138        while (el != null) {
139            if (el.attributes != null && el.attributes.hasKey(key))
140                return el.attributes.get(key);
141            el = el.parent();
142        }
143        return "";
144    }
145
146    @Override
147    protected void doSetBaseUri(String baseUri) {
148        attributes().put(BaseUriKey, baseUri);
149    }
150
151    @Override
152    public int childNodeSize() {
153        return childNodes.size();
154    }
155
156    @Override
157    public String nodeName() {
158        return tag.getName();
159    }
160
161    /**
162     * Get the name of the tag for this element. E.g. {@code div}. If you are using {@link ParseSettings#preserveCase
163     * case preserving parsing}, this will return the source's original case.
164     *
165     * @return the tag name
166     */
167    public String tagName() {
168        return tag.getName();
169    }
170
171    /**
172     * Get the normalized name of this Element's tag. This will always be the lower-cased version of the tag, regardless
173     * of the tag case preserving setting of the parser. For e.g., {@code <DIV>} and {@code <div>} both have a
174     * normal name of {@code div}.
175     * @return normal name
176     */
177    @Override
178    public String normalName() {
179        return tag.normalName();
180    }
181
182    /**
183     Test if this Element has the specified normalized name, and is in the specified namespace.
184     * @param normalName a normalized element name (e.g. {@code div}).
185     * @param namespace the namespace
186     * @return true if the element's normal name matches exactly, and is in the specified namespace
187     * @since 1.17.2
188     */
189    public boolean elementIs(String normalName, String namespace) {
190        return tag.normalName().equals(normalName) && tag.namespace().equals(namespace);
191    }
192
193    /**
194     * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with
195     * {@code el.tagName("div");}.
196     *
197     * @param tagName new tag name for this element
198     * @return this element, for chaining
199     * @see Elements#tagName(String)
200     */
201    public Element tagName(String tagName) {
202        return tagName(tagName, tag.namespace());
203    }
204
205    /**
206     * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with
207     * {@code el.tagName("div");}.
208     *
209     * @param tagName new tag name for this element
210     * @param namespace the new namespace for this element
211     * @return this element, for chaining
212     * @see Elements#tagName(String)
213     */
214    public Element tagName(String tagName, String namespace) {
215        Validate.notEmptyParam(tagName, "tagName");
216        Validate.notEmptyParam(namespace, "namespace");
217        Parser parser = NodeUtils.parser(this);
218        tag = parser.tagSet().valueOf(tagName, namespace, parser.settings()); // maintains the case option of the original parse
219        return this;
220    }
221
222    /**
223     * Get the Tag for this element.
224     *
225     * @return the tag object
226     */
227    public Tag tag() {
228        return tag;
229    }
230
231    /**
232     Change the Tag of this element.
233     @param tag the new tag
234     @return this element, for chaining
235     @since 1.20.1
236     */
237    public Element tag(Tag tag) {
238        Validate.notNull(tag);
239        this.tag = tag;
240        return this;
241    }
242
243    /**
244     * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element
245     * {@code <span> == false}).
246     *
247     * @return true if block, false if not (and thus inline)
248     */
249    public boolean isBlock() {
250        return tag.isBlock();
251    }
252
253    /**
254     * Get the {@code id} attribute of this element.
255     *
256     * @return The id attribute, if present, or an empty string if not.
257     */
258    public String id() {
259        return attributes != null ? attributes.getIgnoreCase("id") :"";
260    }
261
262    /**
263     Set the {@code id} attribute of this element.
264     @param id the ID value to use
265     @return this Element, for chaining
266     */
267    public Element id(String id) {
268        Validate.notNull(id);
269        attr("id", id);
270        return this;
271    }
272
273    /**
274     * Set an attribute value on this element. If this element already has an attribute with the
275     * key, its value is updated; otherwise, a new attribute is added.
276     *
277     * @return this element
278     */
279    @Override public Element attr(String attributeKey, String attributeValue) {
280        super.attr(attributeKey, attributeValue);
281        return this;
282    }
283
284    /**
285     * Set a boolean attribute value on this element. Setting to <code>true</code> sets the attribute value to "" and
286     * marks the attribute as boolean so no value is written out. Setting to <code>false</code> removes the attribute
287     * with the same key if it exists.
288     *
289     * @param attributeKey the attribute key
290     * @param attributeValue the attribute value
291     *
292     * @return this element
293     */
294    public Element attr(String attributeKey, boolean attributeValue) {
295        attributes().put(attributeKey, attributeValue);
296        return this;
297    }
298
299    /**
300     Get an Attribute by key. Changes made via {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc
301     will cascade back to this Element.
302     @param key the (case-sensitive) attribute key
303     @return the Attribute for this key, or null if not present.
304     @since 1.17.2
305     */
306    @Nullable public Attribute attribute(String key) {
307        return hasAttributes() ? attributes().attribute(key) : null;
308    }
309
310    /**
311     * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key
312     * starting with "data-" is included the dataset.
313     * <p>
314     * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset
315     * {@code package=jsoup, language=java}.
316     * <p>
317     * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected
318     * in the other map.
319     * <p>
320     * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector.
321     * @return a map of {@code key=value} custom data attributes.
322     */
323    public Map<String, String> dataset() {
324        return attributes().dataset();
325    }
326
327    @Override @Nullable
328    public final Element parent() {
329        return (Element) parentNode;
330    }
331
332    /**
333     * Get this element's parent and ancestors, up to the document root.
334     * @return this element's stack of parents, starting with the closest first.
335     */
336    public Elements parents() {
337        Elements parents = new Elements();
338        Element parent = this.parent();
339        while (parent != null && !parent.nameIs("#root")) {
340            parents.add(parent);
341            parent = parent.parent();
342        }
343        return parents;
344    }
345
346    /**
347     * Get a child element of this element, by its 0-based index number.
348     * <p>
349     * Note that an element can have both mixed Nodes and Elements as children. This method inspects
350     * a filtered list of children that are elements, and the index is based on that filtered list.
351     * </p>
352     *
353     * @param index the index number of the element to retrieve
354     * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException}
355     * @see #childNode(int)
356     */
357    public Element child(int index) {
358        return childElementsList().get(index);
359    }
360
361    /**
362     * Get the number of child nodes of this element that are elements.
363     * <p>
364     * This method works on the same filtered list like {@link #child(int)}. Use {@link #childNodes()} and {@link
365     * #childNodeSize()} to get the unfiltered Nodes (e.g. includes TextNodes etc.)
366     * </p>
367     *
368     * @return the number of child nodes that are elements
369     * @see #children()
370     * @see #child(int)
371     */
372    public int childrenSize() {
373        return childElementsList().size();
374    }
375
376    /**
377     * Get this element's child elements.
378     * <p>
379     * This is effectively a filter on {@link #childNodes()} to get Element nodes.
380     * </p>
381     * @return child elements. If this element has no children, returns an empty list.
382     * @see #childNodes()
383     */
384    public Elements children() {
385        return new Elements(childElementsList());
386    }
387
388    /**
389     * Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated.
390     * @return a list of child elements
391     */
392    List<Element> childElementsList() {
393        if (childNodeSize() == 0) return EmptyChildren; // short circuit creating empty
394        List<Element> children = cachedChildren();
395        if (children == null) {
396            children = filterNodes(Element.class);
397            stashChildren(children);
398        }
399        return children;
400    }
401
402    private static final String childElsKey = "jsoup.childEls";
403    private static final String childElsMod = "jsoup.childElsMod";
404
405    /** returns the cached child els, if they exist, and the modcount of our childnodes matches the stashed modcount */
406    private @Nullable List<Element> cachedChildren() {
407        Map<String, Object> userData = attributes().userData();
408        //noinspection unchecked
409        WeakReference<List<Element>> ref = (WeakReference<List<Element>>) userData.get(childElsKey);
410        if (ref != null) {
411            List<Element> els = ref.get();
412            if (els != null) {
413                Integer modCount = (Integer) userData.get(childElsMod);
414                if (modCount != null && modCount == childNodes.modCount())
415                    return els;
416            }
417        }
418        return null;
419    }
420
421    /** caches the child els into the Attribute user data. */
422    private void stashChildren(List<Element> els) {
423        Map<String, Object> userData = attributes().userData();
424        WeakReference<List<Element>> ref = new WeakReference<>(els);
425        userData.put(childElsKey, ref);
426        userData.put(childElsMod, childNodes.modCount());
427    }
428
429    /**
430     Returns a Stream of this Element and all of its descendant Elements. The stream has document order.
431     @return a stream of this element and its descendants.
432     @see #nodeStream()
433     @since 1.17.1
434     */
435    public Stream<Element> stream() {
436        return NodeUtils.stream(this, Element.class);
437    }
438
439    private <T> List<T> filterNodes(Class<T> clazz) {
440        return childNodes.stream()
441                .filter(clazz::isInstance)
442                .map(clazz::cast)
443                .collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList));
444    }
445
446    /**
447     * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated.
448     * <p>
449     * This is effectively a filter on {@link #childNodes()} to get Text nodes.
450     * @return child text nodes. If this element has no text nodes, returns an
451     * empty list.
452     * </p>
453     * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected:
454     * <ul>
455     *     <li>{@code p.text()} = {@code "One Two Three Four"}</li>
456     *     <li>{@code p.ownText()} = {@code "One Three Four"}</li>
457     *     <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li>
458     *     <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li>
459     *     <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li>
460     * </ul>
461     */
462    public List<TextNode> textNodes() {
463        return filterNodes(TextNode.class);
464    }
465
466    /**
467     * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated.
468     * <p>
469     * This is effectively a filter on {@link #childNodes()} to get Data nodes.
470     * </p>
471     * @return child data nodes. If this element has no data nodes, returns an
472     * empty list.
473     * @see #data()
474     */
475    public List<DataNode> dataNodes() {
476        return filterNodes(DataNode.class);
477    }
478
479    /**
480     * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
481     * may include this element, or any of its descendents.
482     * <p>If the query starts with a combinator (e.g. {@code *} or {@code >}), that will combine to this element.</p>
483     * <p>This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
484     * multiple filters can be combined, e.g.:</p>
485     * <ul>
486     * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)</li>
487     * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)</li>
488     * <li>{@code el.select("* div")} - finds all divs that descend from this element (and excludes this element)</li>
489     * <li>{@code el.select("> div")} - finds all divs that are direct children of this element (and excludes this element)</li>
490     * </ul>
491     * <p>See the query syntax documentation in {@link org.jsoup.select.Selector}.</p>
492     * <p>Also known as {@code querySelectorAll()} in the Web DOM.</p>
493     *
494     * @param cssQuery a {@link Selector} CSS-like query
495     * @return an {@link Elements} list containing elements that match the query (empty if none match)
496     * @see Selector selector query syntax
497     * @see #select(Evaluator)
498     * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
499     */
500    public Elements select(String cssQuery) {
501        return Selector.select(cssQuery, this);
502    }
503
504    /**
505     * Find elements that match the supplied Evaluator. This has the same functionality as {@link #select(String)}, but
506     * may be useful if you are running the same query many times (on many documents) and want to save the overhead of
507     * repeatedly parsing the CSS query.
508     * @param evaluator an element evaluator
509     * @return an {@link Elements} list containing elements that match the query (empty if none match)
510     * @see Selector#evaluatorOf(String css)
511     */
512    public Elements select(Evaluator evaluator) {
513        return Selector.select(evaluator, this);
514    }
515
516    /**
517     Selects elements from the given root that match the specified {@link Selector} CSS query, with this element as the
518     starting context, and returns them as a lazy Stream. Matched elements may include this element, or any of its
519     children.
520     <p>
521     Unlike {@link #select(String query)}, which returns a complete list of all matching elements, this method returns a
522     {@link Stream} that processes elements lazily as they are needed. The stream operates in a "pull" model — elements
523     are fetched from the root as the stream is traversed. You can use standard {@code Stream} operations such as
524     {@code filter}, {@code map}, or {@code findFirst} to process elements on demand.
525     </p>
526
527     @param cssQuery a {@link Selector} CSS-like query
528     @return a {@link Stream} containing elements that match the query (empty if none match)
529     @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
530     @see Selector selector query syntax
531     @see #selectStream(Evaluator eval)
532     @since 1.19.1
533     */
534    public Stream<Element> selectStream(String cssQuery) {
535        return Selector.selectStream(cssQuery, this);
536    }
537
538    /**
539     Find a Stream of elements that match the supplied Evaluator.
540
541     @param evaluator an element Evaluator
542     @return a {@link Stream} containing elements that match the query (empty if none match)
543     @see Selector#evaluatorOf(String css)
544     @since 1.19.1
545     */
546    public Stream<Element> selectStream(Evaluator evaluator) {
547        return Selector.selectStream(evaluator, this);
548    }
549
550    /**
551     * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context.
552     * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query
553     * execution stops on the first hit.</p>
554     * <p>Also known as {@code querySelector()} in the Web DOM.</p>
555     * @param cssQuery cssQuery a {@link Selector} CSS-like query
556     * @return the first matching element, or <b>{@code null}</b> if there is no match.
557     * @see #expectFirst(String)
558     */
559    public @Nullable Element selectFirst(String cssQuery) {
560        return Selector.selectFirst(cssQuery, this);
561    }
562
563    /**
564     * Finds the first Element that matches the supplied Evaluator, with this element as the starting context, or
565     * {@code null} if none match.
566     *
567     * @param evaluator an element evaluator
568     * @return the first matching element (walking down the tree, starting from this element), or {@code null} if none
569     * match.
570     */
571    public @Nullable Element selectFirst(Evaluator evaluator) {
572        return Collector.findFirst(evaluator, this);
573    }
574
575    /**
576     Just like {@link #selectFirst(String)}, but if there is no match, throws an {@link IllegalArgumentException}. This
577     is useful if you want to simply abort processing on a failed match.
578     @param cssQuery a {@link Selector} CSS-like query
579     @return the first matching element
580     @throws IllegalArgumentException if no match is found
581     @since 1.15.2
582     */
583    public Element expectFirst(String cssQuery) {
584        return Validate.expectNotNull(
585            Selector.selectFirst(cssQuery, this),
586            parent() != null ?
587                "No elements matched the query '%s' on element '%s'." :
588                "No elements matched the query '%s' in the document."
589            , cssQuery, this.tagName()
590        );
591    }
592
593    /**
594     Find nodes that match the supplied {@link Evaluator}, with this element as the starting context. Matched
595     nodes may include this element, or any of its descendents.
596
597     @param evaluator an evaluator
598     @return a list of nodes that match the query (empty if none match)
599     @since 1.21.1
600     */
601    public Nodes<Node> selectNodes(Evaluator evaluator) {
602        return selectNodes(evaluator, Node.class);
603    }
604
605    /**
606     Find nodes that match the supplied {@link Selector} CSS query, with this element as the starting context. Matched
607     nodes may include this element, or any of its descendents.
608     <p>To select leaf nodes, the query should specify the node type, e.g. {@code ::text},
609     {@code ::comment}, {@code ::data}, {@code ::leafnode}.</p>
610
611     @param cssQuery a {@link Selector} CSS query
612     @return a list of nodes that match the query (empty if none match)
613     @since 1.21.1
614     */
615    public Nodes<Node> selectNodes(String cssQuery) {
616        return selectNodes(cssQuery, Node.class);
617    }
618
619    /**
620     Find nodes that match the supplied Evaluator, with this element as the starting context. Matched
621     nodes may include this element, or any of its descendents.
622
623     @param evaluator an evaluator
624     @param type the type of node to collect (e.g. {@link Element}, {@link LeafNode}, {@link TextNode} etc)
625     @param <T> the type of node to collect
626     @return a list of nodes that match the query (empty if none match)
627     @since 1.21.1
628     */
629    public <T extends Node> Nodes<T> selectNodes(Evaluator evaluator, Class<T> type) {
630        Validate.notNull(evaluator);
631        return Collector.collectNodes(evaluator, this, type);
632    }
633
634    /**
635     Find nodes that match the supplied {@link Selector} CSS query, with this element as the starting context. Matched
636     nodes may include this element, or any of its descendents.
637     <p>To select specific node types, use {@code ::text}, {@code ::comment}, {@code ::leafnode}, etc. For example, to
638     select all text nodes under {@code p} elements: </p>
639     <pre>    Nodes&lt;TextNode&gt; textNodes = doc.selectNodes("p ::text", TextNode.class);</pre>
640
641     @param cssQuery a {@link Selector} CSS query
642     @param type the type of node to collect (e.g. {@link Element}, {@link LeafNode}, {@link TextNode} etc)
643     @param <T> the type of node to collect
644     @return a list of nodes that match the query (empty if none match)
645     @since 1.21.1
646     */
647    public <T extends Node> Nodes<T> selectNodes(String cssQuery, Class<T> type) {
648        Validate.notEmpty(cssQuery);
649        return selectNodes(evaluatorOf(cssQuery), type);
650    }
651
652    /**
653     Find the first Node that matches the {@link Selector} CSS query, with this element as the starting context.
654     <p>This is effectively the same as calling {@code element.selectNodes(query).first()}, but is more efficient as
655     query
656     execution stops on the first hit.</p>
657     <p>Also known as {@code querySelector()} in the Web DOM.</p>
658
659     @param cssQuery cssQuery a {@link Selector} CSS-like query
660     @return the first matching node, or <b>{@code null}</b> if there is no match.
661     @since 1.21.1
662     @see #expectFirst(String)
663     */
664    public @Nullable <T extends Node> T selectFirstNode(String cssQuery, Class<T> type) {
665        return selectFirstNode(evaluatorOf(cssQuery), type);
666    }
667
668    /**
669     Finds the first Node that matches the supplied Evaluator, with this element as the starting context, or
670     {@code null} if none match.
671
672     @param evaluator an element evaluator
673     @return the first matching node (walking down the tree, starting from this element), or {@code null} if none
674     match.
675     @since 1.21.1
676     */
677    public @Nullable <T extends Node> T selectFirstNode(Evaluator evaluator, Class<T> type) {
678        return Collector.findFirstNode(evaluator, this, type);
679    }
680
681    /**
682     Just like {@link #selectFirstNode(String, Class)}, but if there is no match, throws an
683     {@link IllegalArgumentException}. This is useful if you want to simply abort processing on a failed match.
684
685     @param cssQuery a {@link Selector} CSS-like query
686     @return the first matching node
687     @throws IllegalArgumentException if no match is found
688     @since 1.21.1
689     */
690    public <T extends Node> T expectFirstNode(String cssQuery, Class<T> type) {
691        return Validate.expectNotNull(
692            selectFirstNode(cssQuery, type),
693            parent() != null ?
694                "No nodes matched the query '%s' on element '%s'.":
695                "No nodes matched the query '%s' in the document."
696            , cssQuery, this.tagName()
697        );
698    }
699
700    /**
701     * Checks if this element matches the given {@link Selector} CSS query. Also knows as {@code matches()} in the Web
702     * DOM.
703     *
704     * @param cssQuery a {@link Selector} CSS query
705     * @return if this element matches the query
706     */
707    public boolean is(String cssQuery) {
708        return is(evaluatorOf(cssQuery));
709    }
710
711    /**
712     * Check if this element matches the given evaluator.
713     * @param evaluator an element evaluator
714     * @return if this element matches
715     */
716    public boolean is(Evaluator evaluator) {
717        return evaluator.matches(this.root(), this);
718    }
719
720    /**
721     * Find the closest element up the tree of parents that matches the specified CSS query. Will return itself, an
722     * ancestor, or {@code null} if there is no such matching element.
723     * @param cssQuery a {@link Selector} CSS query
724     * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not
725     * found.
726     */
727    public @Nullable Element closest(String cssQuery) {
728        return closest(evaluatorOf(cssQuery));
729    }
730
731    /**
732     * Find the closest element up the tree of parents that matches the specified evaluator. Will return itself, an
733     * ancestor, or {@code null} if there is no such matching element.
734     * @param evaluator a query evaluator
735     * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not
736     * found.
737     */
738    public @Nullable Element closest(Evaluator evaluator) {
739        Validate.notNull(evaluator);
740        Element el = this;
741        final Element root = root();
742        do {
743            if (evaluator.matches(root, el))
744                return el;
745            el = el.parent();
746        } while (el != null);
747        return null;
748    }
749
750    /**
751     Find Elements that match the supplied {@index XPath} expression.
752     <p>Note that for convenience of writing the Xpath expression, namespaces are disabled, and queries can be
753     expressed using the element's local name only.</p>
754     <p>By default, XPath 1.0 expressions are supported. If you would to use XPath 2.0 or higher, you can provide an
755     alternate XPathFactory implementation:</p>
756     <ol>
757     <li>Add the implementation to your classpath. E.g. to use <a href="https://www.saxonica.com/products/products.xml">Saxon-HE</a>, add <a href="https://mvnrepository.com/artifact/net.sf.saxon/Saxon-HE">net.sf.saxon:Saxon-HE</a> to your build.</li>
758     <li>Set the system property <code>javax.xml.xpath.XPathFactory:jsoup</code> to the implementing classname. E.g.:<br>
759     <code>System.setProperty(W3CDom.XPathFactoryProperty, "net.sf.saxon.xpath.XPathFactoryImpl");</code>
760     </li>
761     </ol>
762
763     @param xpath XPath expression
764     @return matching elements, or an empty list if none match.
765     @see #selectXpath(String, Class)
766     @since 1.14.3
767     */
768    public Elements selectXpath(String xpath) {
769        return new Elements(NodeUtils.selectXpath(xpath, this, Element.class));
770    }
771
772    /**
773     Find Nodes that match the supplied XPath expression.
774     <p>For example, to select TextNodes under {@code p} elements: </p>
775     <pre>List&lt;TextNode&gt; textNodes = doc.selectXpath("//body//p//text()", TextNode.class);</pre>
776     <p>Note that in the jsoup DOM, Attribute objects are not Nodes. To directly select attribute values, do something
777     like:</p>
778     <pre>List&lt;String&gt; hrefs = doc.selectXpath("//a").eachAttr("href");</pre>
779     @param xpath XPath expression
780     @param nodeType the jsoup node type to return
781     @see #selectXpath(String)
782     @return a list of matching nodes
783     @since 1.14.3
784     */
785    public <T extends Node> List<T> selectXpath(String xpath, Class<T> nodeType) {
786        return NodeUtils.selectXpath(xpath, this, nodeType);
787    }
788
789    /**
790     * Insert a node to the end of this Element's children. The incoming node will be re-parented.
791     *
792     * @param child node to add.
793     * @return this Element, for chaining
794     * @see #prependChild(Node)
795     * @see #insertChildren(int, Collection)
796     */
797    public Element appendChild(Node child) {
798        Validate.notNull(child);
799
800        // was - Node#addChildren(child). short-circuits an array create and a loop.
801        reparentChild(child);
802        ensureChildNodes();
803        childNodes.add(child);
804        child.setSiblingIndex(childNodes.size() - 1);
805        return this;
806    }
807
808    /**
809     Insert the given nodes to the end of this Element's children.
810
811     @param children nodes to add
812     @return this Element, for chaining
813     @see #insertChildren(int, Collection)
814     */
815    public Element appendChildren(Collection<? extends Node> children) {
816        insertChildren(-1, children);
817        return this;
818    }
819
820    /**
821     * Add this element to the supplied parent element, as its next child.
822     *
823     * @param parent element to which this element will be appended
824     * @return this element, so that you can continue modifying the element
825     */
826    public Element appendTo(Element parent) {
827        Validate.notNull(parent);
828        parent.appendChild(this);
829        return this;
830    }
831
832    /**
833     * Add a node to the start of this element's children.
834     *
835     * @param child node to add.
836     * @return this element, so that you can add more child nodes or elements.
837     */
838    public Element prependChild(Node child) {
839        Validate.notNull(child);
840
841        addChildren(0, child);
842        return this;
843    }
844
845    /**
846     Insert the given nodes to the start of this Element's children.
847
848     @param children nodes to add
849     @return this Element, for chaining
850     @see #insertChildren(int, Collection)
851     */
852    public Element prependChildren(Collection<? extends Node> children) {
853        insertChildren(0, children);
854        return this;
855    }
856
857
858    /**
859     * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the
860     * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first.
861     *
862     * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
863     * end
864     * @param children child nodes to insert
865     * @return this element, for chaining.
866     */
867    public Element insertChildren(int index, Collection<? extends Node> children) {
868        Validate.notNull(children, "Children collection to be inserted must not be null.");
869        int currentSize = childNodeSize();
870        if (index < 0) index += currentSize +1; // roll around
871        Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");
872
873        ArrayList<Node> nodes = new ArrayList<>(children);
874        Node[] nodeArray = nodes.toArray(new Node[0]);
875        addChildren(index, nodeArray);
876        return this;
877    }
878
879    /**
880     * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the
881     * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first.
882     *
883     * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
884     * end
885     * @param children child nodes to insert
886     * @return this element, for chaining.
887     */
888    public Element insertChildren(int index, Node... children) {
889        Validate.notNull(children, "Children collection to be inserted must not be null.");
890        int currentSize = childNodeSize();
891        if (index < 0) index += currentSize +1; // roll around
892        Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");
893
894        addChildren(index, children);
895        return this;
896    }
897
898    /**
899     * Create a new element by tag name, and add it as this Element's last child.
900     *
901     * @param tagName the name of the tag (e.g. {@code div}).
902     * @return the new element, to allow you to add content to it, e.g.:
903     *  {@code parent.appendElement("h1").attr("id", "header").text("Welcome");}
904     */
905    public Element appendElement(String tagName) {
906        return appendElement(tagName, tag.namespace());
907    }
908
909    /**
910     * Create a new element by tag name and namespace, add it as this Element's last child.
911     *
912     * @param tagName the name of the tag (e.g. {@code div}).
913     * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml})
914     * @return the new element, in the specified namespace
915     */
916    public Element appendElement(String tagName, String namespace) {
917        Parser parser = NodeUtils.parser(this);
918        Element child = new Element(parser.tagSet().valueOf(tagName, namespace, parser.settings()), baseUri());
919        appendChild(child);
920        return child;
921    }
922
923    /**
924     * Create a new element by tag name, and add it as this Element's first child.
925     *
926     * @param tagName the name of the tag (e.g. {@code div}).
927     * @return the new element, to allow you to add content to it, e.g.:
928     *  {@code parent.prependElement("h1").attr("id", "header").text("Welcome");}
929     */
930    public Element prependElement(String tagName) {
931        return prependElement(tagName, tag.namespace());
932    }
933
934    /**
935     * Create a new element by tag name and namespace, and add it as this Element's first child.
936     *
937     * @param tagName the name of the tag (e.g. {@code div}).
938     * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml})
939     * @return the new element, in the specified namespace
940     */
941    public Element prependElement(String tagName, String namespace) {
942        Parser parser = NodeUtils.parser(this);
943        Element child = new Element(parser.tagSet().valueOf(tagName, namespace, parser.settings()), baseUri());
944        prependChild(child);
945        return child;
946    }
947
948    /**
949     * Create and append a new TextNode to this element.
950     *
951     * @param text the (un-encoded) text to add
952     * @return this element
953     */
954    public Element appendText(String text) {
955        Validate.notNull(text);
956        TextNode node = new TextNode(text);
957        appendChild(node);
958        return this;
959    }
960
961    /**
962     * Create and prepend a new TextNode to this element.
963     *
964     * @param text the decoded text to add
965     * @return this element
966     */
967    public Element prependText(String text) {
968        Validate.notNull(text);
969        TextNode node = new TextNode(text);
970        prependChild(node);
971        return this;
972    }
973
974    /**
975     * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children.
976     * @param html HTML to add inside this element, after the existing HTML
977     * @return this element
978     * @see #html(String)
979     */
980    public Element append(String html) {
981        Validate.notNull(html);
982        List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri());
983        addChildren(nodes.toArray(new Node[0]));
984        return this;
985    }
986
987    /**
988     * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children.
989     * @param html HTML to add inside this element, before the existing HTML
990     * @return this element
991     * @see #html(String)
992     */
993    public Element prepend(String html) {
994        Validate.notNull(html);
995        List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri());
996        addChildren(0, nodes.toArray(new Node[0]));
997        return this;
998    }
999
1000    /**
1001     * Insert the specified HTML into the DOM before this element (as a preceding sibling).
1002     *
1003     * @param html HTML to add before this element
1004     * @return this element, for chaining
1005     * @see #after(String)
1006     */
1007    @Override
1008    public Element before(String html) {
1009        return (Element) super.before(html);
1010    }
1011
1012    /**
1013     * Insert the specified node into the DOM before this node (as a preceding sibling).
1014     * @param node to add before this element
1015     * @return this Element, for chaining
1016     * @see #after(Node)
1017     */
1018    @Override
1019    public Element before(Node node) {
1020        return (Element) super.before(node);
1021    }
1022
1023    /**
1024     * Insert the specified HTML into the DOM after this element (as a following sibling).
1025     *
1026     * @param html HTML to add after this element
1027     * @return this element, for chaining
1028     * @see #before(String)
1029     */
1030    @Override
1031    public Element after(String html) {
1032        return (Element) super.after(html);
1033    }
1034
1035    /**
1036     * Insert the specified node into the DOM after this node (as a following sibling).
1037     * @param node to add after this element
1038     * @return this element, for chaining
1039     * @see #before(Node)
1040     */
1041    @Override
1042    public Element after(Node node) {
1043        return (Element) super.after(node);
1044    }
1045
1046    /**
1047     * Remove all the element's child nodes. Any attributes are left as-is. Each child node has its parent set to
1048     * {@code null}.
1049     * @return this element
1050     */
1051    @Override
1052    public Element empty() {
1053        // Detach each of the children -> parent links:
1054        for (Node child : childNodes) {
1055            child.parentNode = null;
1056        }
1057        childNodes.clear();
1058        return this;
1059    }
1060
1061    /**
1062     * Wrap the supplied HTML around this element.
1063     *
1064     * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
1065     * @return this element, for chaining.
1066     */
1067    @Override
1068    public Element wrap(String html) {
1069        return (Element) super.wrap(html);
1070    }
1071
1072    /**
1073     Gets an #id selector for this element, if it has a unique ID. Otherwise, returns an empty string.
1074
1075     @param ownerDoc the document that owns this element, if there is one
1076     */
1077    private String uniqueIdSelector(@Nullable Document ownerDoc) {
1078        String id = id();
1079        if (!id.isEmpty()) { // check if the ID is unique and matches this
1080            String idSel = "#" + escapeCssIdentifier(id);
1081            if (ownerDoc != null) {
1082                Elements els = ownerDoc.select(idSel);
1083                if (els.size() == 1 && els.get(0) == this) return idSel;
1084            } else {
1085                return idSel;
1086            }
1087        }
1088        return EmptyString;
1089    }
1090
1091    /**
1092     Get a CSS selector that will uniquely select this element.
1093     <p>
1094     If the element has an ID, returns #id; otherwise returns the parent (if any) CSS selector, followed by
1095     {@literal '>'}, followed by a unique selector for the element (tag.class.class:nth-child(n)).
1096     </p>
1097
1098     @return the CSS Path that can be used to retrieve the element in a selector.
1099     */
1100    public String cssSelector() {
1101        Document ownerDoc = ownerDocument();
1102        String idSel = uniqueIdSelector(ownerDoc);
1103        if (!idSel.isEmpty()) return idSel;
1104
1105        // No unique ID, work up the parent stack and find either a unique ID to hang from, or just a GP > Parent > Child chain
1106        StringBuilder selector = StringUtil.borrowBuilder();
1107        Element el = this;
1108        while (el != null && !(el instanceof Document)) {
1109            idSel = el.uniqueIdSelector(ownerDoc);
1110            if (!idSel.isEmpty()) {
1111                selector.insert(0, idSel);
1112                break; // found a unique ID to use as ancestor; stop
1113            }
1114            selector.insert(0, el.cssSelectorComponent());
1115            el = el.parent();
1116        }
1117        return StringUtil.releaseBuilder(selector);
1118    }
1119
1120    private String cssSelectorComponent() {
1121        // Escape tagname, and translate HTML namespace ns:tag to CSS namespace syntax ns|tag
1122        String tagName = escapeCssIdentifier(tagName()).replace("\\:", "|");
1123        StringBuilder selector = StringUtil.borrowBuilder().append(tagName);
1124        String classes = classNames().stream().map(TokenQueue::escapeCssIdentifier)
1125                .collect(StringUtil.joining("."));
1126        if (!classes.isEmpty())
1127            selector.append('.').append(classes);
1128
1129        if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node
1130            return StringUtil.releaseBuilder(selector);
1131
1132        selector.insert(0, " > ");
1133        if (parent().select(selector.toString()).size() > 1)
1134            selector.append(String.format(
1135                ":nth-child(%d)", elementSiblingIndex() + 1));
1136
1137        return StringUtil.releaseBuilder(selector);
1138    }
1139
1140    /**
1141     * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
1142     * of itself, so will not be included in the returned list.
1143     * @return sibling elements
1144     */
1145    public Elements siblingElements() {
1146        if (parentNode == null)
1147            return new Elements(0);
1148
1149        List<Element> elements = parent().childElementsList();
1150        Elements siblings = new Elements(elements.size() - 1);
1151        for (Element el: elements)
1152            if (el != this)
1153                siblings.add(el);
1154        return siblings;
1155    }
1156
1157
1158
1159    /**
1160     * Get each of the sibling elements that come after this element.
1161     *
1162     * @return each of the element siblings after this element, or an empty list if there are no next sibling elements
1163     */
1164    public Elements nextElementSiblings() {
1165        return nextElementSiblings(true);
1166    }
1167
1168    /**
1169     * Get each of the element siblings before this element.
1170     *
1171     * @return the previous element siblings, or an empty list if there are none.
1172     */
1173    public Elements previousElementSiblings() {
1174        return nextElementSiblings(false);
1175    }
1176
1177    private Elements nextElementSiblings(boolean next) {
1178        Elements els = new Elements();
1179        if (parentNode == null)
1180            return  els;
1181        els.add(this);
1182        return next ?  els.nextAll() : els.prevAll();
1183    }
1184
1185    /**
1186     * Gets the first Element sibling of this element. That may be this element.
1187     * @return the first sibling that is an element (aka the parent's first element child)
1188     */
1189    public Element firstElementSibling() {
1190        if (parent() != null) {
1191            //noinspection DataFlowIssue (not nullable, would be this is no other sibs)
1192            return parent().firstElementChild();
1193        } else
1194            return this; // orphan is its own first sibling
1195    }
1196
1197    /**
1198     * Get the list index of this element in its element sibling list. I.e. if this is the first element
1199     * sibling, returns 0.
1200     * @return position in element sibling list
1201     */
1202    public int elementSiblingIndex() {
1203       if (parent() == null) return 0;
1204       return indexInList(this, parent().childElementsList());
1205    }
1206
1207    /**
1208     * Gets the last element sibling of this element. That may be this element.
1209     * @return the last sibling that is an element (aka the parent's last element child)
1210     */
1211    public Element lastElementSibling() {
1212        if (parent() != null) {
1213            //noinspection DataFlowIssue (not nullable, would be this if no other sibs)
1214            return parent().lastElementChild();
1215        } else
1216            return this;
1217    }
1218
1219    private static <E extends Element> int indexInList(Element search, List<E> elements) {
1220        final int size = elements.size();
1221        for (int i = 0; i < size; i++) {
1222            if (elements.get(i) == search)
1223                return i;
1224        }
1225        return 0;
1226    }
1227
1228    /**
1229     Gets the first child of this Element that is an Element, or {@code null} if there is none.
1230     @return the first Element child node, or null.
1231     @see #firstChild()
1232     @see #lastElementChild()
1233     @since 1.15.2
1234     */
1235    public @Nullable Element firstElementChild() {
1236        Node child = firstChild();
1237        while (child != null) {
1238            if (child instanceof Element) return (Element) child;
1239            child = child.nextSibling();
1240        }
1241        return null;
1242    }
1243
1244    /**
1245     Gets the last child of this Element that is an Element, or @{code null} if there is none.
1246     @return the last Element child node, or null.
1247     @see #lastChild()
1248     @see #firstElementChild()
1249     @since 1.15.2
1250     */
1251    public @Nullable Element lastElementChild() {
1252        Node child = lastChild();
1253        while (child != null) {
1254            if (child instanceof Element) return (Element) child;
1255            child = child.previousSibling();
1256        }
1257        return null;
1258    }
1259
1260    // DOM type methods
1261
1262    /**
1263     * Finds elements, including and recursively under this element, with the specified tag name.
1264     * @param tagName The tag name to search for (case insensitively).
1265     * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match.
1266     */
1267    public Elements getElementsByTag(String tagName) {
1268        Validate.notEmpty(tagName);
1269        tagName = normalize(tagName);
1270
1271        return Collector.collect(new Evaluator.Tag(tagName), this);
1272    }
1273
1274    /**
1275     * Find an element by ID, including or under this element.
1276     * <p>
1277     * Note that this finds the first matching ID, starting with this element. If you search down from a different
1278     * starting point, it is possible to find a different element by ID. For unique element by ID within a Document,
1279     * use {@link Document#getElementById(String)}
1280     * @param id The ID to search for.
1281     * @return The first matching element by ID, starting with this element, or null if none found.
1282     */
1283    public @Nullable Element getElementById(String id) {
1284        Validate.notEmpty(id);
1285        return Collector.findFirst(new Evaluator.Id(id), this);
1286    }
1287
1288    /**
1289     * Find elements that have this class, including or under this element. Case-insensitive.
1290     * <p>
1291     * Elements can have multiple classes (e.g. {@code <div class="header round first">}). This method
1292     * checks each class, so you can find the above with {@code el.getElementsByClass("header");}.
1293     *
1294     * @param className the name of the class to search for.
1295     * @return elements with the supplied class name, empty if none
1296     * @see #hasClass(String)
1297     * @see #classNames()
1298     */
1299    public Elements getElementsByClass(String className) {
1300        Validate.notEmpty(className);
1301
1302        return Collector.collect(new Evaluator.Class(className), this);
1303    }
1304
1305    /**
1306     * Find elements that have a named attribute set. Case-insensitive.
1307     *
1308     * @param key name of the attribute, e.g. {@code href}
1309     * @return elements that have this attribute, empty if none
1310     */
1311    public Elements getElementsByAttribute(String key) {
1312        Validate.notEmpty(key);
1313        key = key.trim();
1314
1315        return Collector.collect(new Evaluator.Attribute(key), this);
1316    }
1317
1318    /**
1319     * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements
1320     * that have HTML5 datasets.
1321     * @param keyPrefix name prefix of the attribute e.g. {@code data-}
1322     * @return elements that have attribute names that start with the prefix, empty if none.
1323     */
1324    public Elements getElementsByAttributeStarting(String keyPrefix) {
1325        Validate.notEmpty(keyPrefix);
1326        keyPrefix = keyPrefix.trim();
1327
1328        return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this);
1329    }
1330
1331    /**
1332     * Find elements that have an attribute with the specific value. Case-insensitive.
1333     *
1334     * @param key name of the attribute
1335     * @param value value of the attribute
1336     * @return elements that have this attribute with this value, empty if none
1337     */
1338    public Elements getElementsByAttributeValue(String key, String value) {
1339        return Collector.collect(new Evaluator.AttributeWithValue(key, value), this);
1340    }
1341
1342    /**
1343     * Find elements that either do not have this attribute, or have it with a different value. Case-insensitive.
1344     *
1345     * @param key name of the attribute
1346     * @param value value of the attribute
1347     * @return elements that do not have a matching attribute
1348     */
1349    public Elements getElementsByAttributeValueNot(String key, String value) {
1350        return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this);
1351    }
1352
1353    /**
1354     * Find elements that have attributes that start with the value prefix. Case-insensitive.
1355     *
1356     * @param key name of the attribute
1357     * @param valuePrefix start of attribute value
1358     * @return elements that have attributes that start with the value prefix
1359     */
1360    public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) {
1361        return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this);
1362    }
1363
1364    /**
1365     * Find elements that have attributes that end with the value suffix. Case-insensitive.
1366     *
1367     * @param key name of the attribute
1368     * @param valueSuffix end of the attribute value
1369     * @return elements that have attributes that end with the value suffix
1370     */
1371    public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) {
1372        return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this);
1373    }
1374
1375    /**
1376     * Find elements that have attributes whose value contains the match string. Case-insensitive.
1377     *
1378     * @param key name of the attribute
1379     * @param match substring of value to search for
1380     * @return elements that have attributes containing this text
1381     */
1382    public Elements getElementsByAttributeValueContaining(String key, String match) {
1383        return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this);
1384    }
1385
1386    /**
1387     * Find elements that have an attribute whose value matches the supplied regular expression.
1388     * @param key name of the attribute
1389     * @param pattern compiled regular expression to match against attribute values
1390     * @return elements that have attributes matching this regular expression
1391     */
1392    public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) {
1393        return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this);
1394
1395    }
1396
1397    /**
1398     * Find elements that have attributes whose values match the supplied regular expression.
1399     * @param key name of the attribute
1400     * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options.
1401     * @return elements that have attributes matching this regular expression
1402     */
1403    public Elements getElementsByAttributeValueMatching(String key, String regex) {
1404        Pattern pattern;
1405        try {
1406            pattern = Pattern.compile(regex);
1407        } catch (PatternSyntaxException e) {
1408            throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
1409        }
1410        return getElementsByAttributeValueMatching(key, pattern);
1411    }
1412
1413    /**
1414     * Find elements whose sibling index is less than the supplied index.
1415     * @param index 0-based index
1416     * @return elements less than index
1417     */
1418    public Elements getElementsByIndexLessThan(int index) {
1419        return Collector.collect(new Evaluator.IndexLessThan(index), this);
1420    }
1421
1422    /**
1423     * Find elements whose sibling index is greater than the supplied index.
1424     * @param index 0-based index
1425     * @return elements greater than index
1426     */
1427    public Elements getElementsByIndexGreaterThan(int index) {
1428        return Collector.collect(new Evaluator.IndexGreaterThan(index), this);
1429    }
1430
1431    /**
1432     * Find elements whose sibling index is equal to the supplied index.
1433     * @param index 0-based index
1434     * @return elements equal to index
1435     */
1436    public Elements getElementsByIndexEquals(int index) {
1437        return Collector.collect(new Evaluator.IndexEquals(index), this);
1438    }
1439
1440    /**
1441     * Find elements that contain the specified string. The search is case-insensitive. The text may appear directly
1442     * in the element, or in any of its descendants.
1443     * @param searchText to look for in the element's text
1444     * @return elements that contain the string, case-insensitive.
1445     * @see Element#text()
1446     */
1447    public Elements getElementsContainingText(String searchText) {
1448        return Collector.collect(new Evaluator.ContainsText(searchText), this);
1449    }
1450
1451    /**
1452     * Find elements that directly contain the specified string. The search is case-insensitive. The text must appear directly
1453     * in the element, not in any of its descendants.
1454     * @param searchText to look for in the element's own text
1455     * @return elements that contain the string, case-insensitive.
1456     * @see Element#ownText()
1457     */
1458    public Elements getElementsContainingOwnText(String searchText) {
1459        return Collector.collect(new Evaluator.ContainsOwnText(searchText), this);
1460    }
1461
1462    /**
1463     * Find elements whose text matches the supplied regular expression.
1464     * @param pattern regular expression to match text against
1465     * @return elements matching the supplied regular expression.
1466     * @see Element#text()
1467     */
1468    public Elements getElementsMatchingText(Pattern pattern) {
1469        return Collector.collect(new Evaluator.Matches(pattern), this);
1470    }
1471
1472    /**
1473     * Find elements whose text matches the supplied regular expression.
1474     * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options.
1475     * @return elements matching the supplied regular expression.
1476     * @see Element#text()
1477     */
1478    public Elements getElementsMatchingText(String regex) {
1479        Pattern pattern;
1480        try {
1481            pattern = Pattern.compile(regex);
1482        } catch (PatternSyntaxException e) {
1483            throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
1484        }
1485        return getElementsMatchingText(pattern);
1486    }
1487
1488    /**
1489     * Find elements whose own text matches the supplied regular expression.
1490     * @param pattern regular expression to match text against
1491     * @return elements matching the supplied regular expression.
1492     * @see Element#ownText()
1493     */
1494    public Elements getElementsMatchingOwnText(Pattern pattern) {
1495        return Collector.collect(new Evaluator.MatchesOwn(pattern), this);
1496    }
1497
1498    /**
1499     * Find elements whose own text matches the supplied regular expression.
1500     * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options.
1501     * @return elements matching the supplied regular expression.
1502     * @see Element#ownText()
1503     */
1504    public Elements getElementsMatchingOwnText(String regex) {
1505        Pattern pattern;
1506        try {
1507            pattern = Pattern.compile(regex);
1508        } catch (PatternSyntaxException e) {
1509            throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
1510        }
1511        return getElementsMatchingOwnText(pattern);
1512    }
1513
1514    /**
1515     * Find all elements under this element (including self, and children of children).
1516     *
1517     * @return all elements
1518     */
1519    public Elements getAllElements() {
1520        return Collector.collect(new Evaluator.AllElements(), this);
1521    }
1522
1523    /**
1524     Gets the <b>normalized, combined text</b> of this element and all its children. Whitespace is normalized and
1525     trimmed.
1526     <p>For example, given HTML {@code <p>Hello  <b>there</b> now! </p>}, {@code p.text()} returns {@code "Hello there
1527    now!"}
1528     <p>If you do not want normalized text, use {@link #wholeText()}. If you want just the text of this node (and not
1529     children), use {@link #ownText()}
1530     <p>Note that this method returns the textual content that would be presented to a reader. The contents of data
1531     nodes (such as {@code <script>} tags) are not considered text. Use {@link #data()} or {@link #html()} to retrieve
1532     that content.
1533
1534     @return decoded, normalized text, or empty string if none.
1535     @see #wholeText()
1536     @see #ownText()
1537     @see #textNodes()
1538     */
1539    public String text() {
1540        final StringBuilder accum = StringUtil.borrowBuilder();
1541        new TextAccumulator(accum).traverse(this);
1542        return StringUtil.releaseBuilder(accum).trim();
1543    }
1544
1545    private static class TextAccumulator implements NodeVisitor {
1546        private final StringBuilder accum;
1547
1548        public TextAccumulator(StringBuilder accum) {
1549            this.accum = accum;
1550        }
1551
1552        @Override public void head(Node node, int depth) {
1553            if (node instanceof TextNode) {
1554                TextNode textNode = (TextNode) node;
1555                appendNormalisedText(accum, textNode);
1556            } else if (node instanceof Element) {
1557                Element element = (Element) node;
1558                if (accum.length() > 0 &&
1559                    (element.isBlock() || element.nameIs("br")) &&
1560                    !lastCharIsWhitespace(accum))
1561                    accum.append(' ');
1562            }
1563        }
1564
1565        @Override public void tail(Node node, int depth) {
1566            // make sure there is a space between block tags and immediately following text nodes or inline elements <div>One</div>Two should be "One Two".
1567            if (node instanceof Element) {
1568                Element element = (Element) node;
1569                Node next = node.nextSibling();
1570                if (!element.tag.isInline() && (next instanceof TextNode || next instanceof Element && ((Element) next).tag.isInline()) && !lastCharIsWhitespace(accum))
1571                    accum.append(' ');
1572            }
1573
1574        }
1575    }
1576
1577    /**
1578     Get the non-normalized, decoded text of this element and its children, including only any newlines and spaces
1579     present in the original source.
1580     @return decoded, non-normalized text
1581     @see #text()
1582     @see #wholeOwnText()
1583     */
1584    public String wholeText() {
1585        return wholeTextOf(nodeStream());
1586    }
1587
1588    /**
1589     An Element's nodeValue is its whole own text.
1590     */
1591    @Override
1592    public String nodeValue() {
1593        return wholeOwnText();
1594    }
1595
1596    private static String wholeTextOf(Stream<Node> stream) {
1597        return stream.map(node -> {
1598            if (node instanceof TextNode) return ((TextNode) node).getWholeText();
1599            if (node.nameIs("br")) return "\n";
1600            return "";
1601        }).collect(StringUtil.joining(""));
1602    }
1603
1604    /**
1605     Get the non-normalized, decoded text of this element, <b>not including</b> any child elements, including any
1606     newlines and spaces present in the original source.
1607     @return decoded, non-normalized text that is a direct child of this Element
1608     @see #text()
1609     @see #wholeText()
1610     @see #ownText()
1611     @since 1.15.1
1612     */
1613    public String wholeOwnText() {
1614        return wholeTextOf(childNodes.stream());
1615    }
1616
1617    /**
1618     * Gets the (normalized) text owned by this element only; does not get the combined text of all children.
1619     * <p>
1620     * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"},
1621     * whereas {@code p.text()} returns {@code "Hello there now!"}.
1622     * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element.
1623     *
1624     * @return decoded text, or empty string if none.
1625     * @see #text()
1626     * @see #textNodes()
1627     */
1628    public String ownText() {
1629        StringBuilder sb = StringUtil.borrowBuilder();
1630        ownText(sb);
1631        return StringUtil.releaseBuilder(sb).trim();
1632    }
1633
1634    private void ownText(StringBuilder accum) {
1635        for (int i = 0; i < childNodeSize(); i++) {
1636            Node child = childNodes.get(i);
1637            if (child instanceof TextNode) {
1638                TextNode textNode = (TextNode) child;
1639                appendNormalisedText(accum, textNode);
1640            } else if (child.nameIs("br") && !lastCharIsWhitespace(accum)) {
1641                accum.append(" ");
1642            }
1643        }
1644    }
1645
1646    private static void appendNormalisedText(StringBuilder accum, TextNode textNode) {
1647        String text = textNode.getWholeText();
1648        if (preserveWhitespace(textNode.parentNode) || textNode instanceof CDataNode)
1649            accum.append(text);
1650        else
1651            StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum));
1652    }
1653
1654    static boolean preserveWhitespace(@Nullable Node node) {
1655        // looks only at this element and five levels up, to prevent recursion & needless stack searches
1656        if (node instanceof Element) {
1657            Element el = (Element) node;
1658            int i = 0;
1659            do {
1660                if (el.tag.preserveWhitespace())
1661                    return true;
1662                el = el.parent();
1663                i++;
1664            } while (i < 6 && el != null);
1665        }
1666        return false;
1667    }
1668
1669    /**
1670     * Set the text of this element. Any existing contents (text or elements) will be cleared.
1671     * <p>As a special case, for {@code <script>} and {@code <style>} tags, the input text will be treated as data,
1672     * not visible text.</p>
1673     * @param text decoded text
1674     * @return this element
1675     */
1676    public Element text(String text) {
1677        Validate.notNull(text);
1678        empty();
1679        // special case for script/style in HTML (or customs): should be data node
1680        if (tag().is(Tag.Data))
1681            appendChild(new DataNode(text));
1682        else
1683            appendChild(new TextNode(text));
1684
1685        return this;
1686    }
1687
1688    /**
1689     Checks if the current element or any of its child elements contain non-whitespace text.
1690     @return {@code true} if the element has non-blank text content, {@code false} otherwise.
1691     */
1692    public boolean hasText() {
1693        AtomicBoolean hasText = new AtomicBoolean(false);
1694        filter((node, depth) -> {
1695            if (node instanceof TextNode) {
1696                TextNode textNode = (TextNode) node;
1697                if (!textNode.isBlank()) {
1698                    hasText.set(true);
1699                    return NodeFilter.FilterResult.STOP;
1700                }
1701            }
1702            return NodeFilter.FilterResult.CONTINUE;
1703        });
1704        return hasText.get();
1705    }
1706
1707    /**
1708     * Get the combined data of this element. Data is e.g. the inside of a {@code <script>} tag. Note that data is NOT the
1709     * text of the element. Use {@link #text()} to get the text that would be visible to a user, and {@code data()}
1710     * for the contents of scripts, comments, CSS styles, etc.
1711     *
1712     * @return the data, or empty string if none
1713     *
1714     * @see #dataNodes()
1715     */
1716    public String data() {
1717        StringBuilder sb = StringUtil.borrowBuilder();
1718        traverse((childNode, depth) -> {
1719            if (childNode instanceof DataNode) {
1720                DataNode data = (DataNode) childNode;
1721                sb.append(data.getWholeData());
1722            } else if (childNode instanceof Comment) {
1723                Comment comment = (Comment) childNode;
1724                sb.append(comment.getData());
1725            } else if (childNode instanceof CDataNode) {
1726                // this shouldn't really happen because the html parser won't see the cdata as anything special when parsing script.
1727                // but in case another type gets through.
1728                CDataNode cDataNode = (CDataNode) childNode;
1729                sb.append(cDataNode.getWholeText());
1730            }
1731        });
1732        return StringUtil.releaseBuilder(sb);
1733    }
1734
1735    /**
1736     * Gets the literal value of this element's "class" attribute, which may include multiple class names, space
1737     * separated. (E.g. on <code>&lt;div class="header gray"&gt;</code> returns, "<code>header gray</code>")
1738     * @return The literal class attribute, or <b>empty string</b> if no class attribute set.
1739     */
1740    public String className() {
1741        return attr("class").trim();
1742    }
1743
1744    /**
1745     * Get each of the element's class names. E.g. on element {@code <div class="header gray">},
1746     * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to
1747     * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them.
1748     * @return set of classnames, empty if no class attribute
1749     */
1750    public Set<String> classNames() {
1751        String[] names = ClassSplit.split(className());
1752        Set<String> classNames = new LinkedHashSet<>(Arrays.asList(names));
1753        classNames.remove(""); // if classNames() was empty, would include an empty class
1754
1755        return classNames;
1756    }
1757
1758    /**
1759     Set the element's {@code class} attribute to the supplied class names.
1760     @param classNames set of classes
1761     @return this element, for chaining
1762     */
1763    public Element classNames(Set<String> classNames) {
1764        Validate.notNull(classNames);
1765        if (classNames.isEmpty()) {
1766            attributes().remove("class");
1767        } else {
1768            attributes().put("class", StringUtil.join(classNames, " "));
1769        }
1770        return this;
1771    }
1772
1773    /**
1774     * Tests if this element has a class. Case-insensitive.
1775     * @param className name of class to check for
1776     * @return true if it does, false if not
1777     */
1778    // performance sensitive
1779    public boolean hasClass(String className) {
1780        if (attributes == null)
1781            return false;
1782
1783        final String classAttr = attributes.getIgnoreCase("class");
1784        final int len = classAttr.length();
1785        final int wantLen = className.length();
1786
1787        if (len == 0 || len < wantLen) {
1788            return false;
1789        }
1790
1791        // if both lengths are equal, only need compare the className with the attribute
1792        if (len == wantLen) {
1793            return className.equalsIgnoreCase(classAttr);
1794        }
1795
1796        // otherwise, scan for whitespace and compare regions (with no string or arraylist allocations)
1797        boolean inClass = false;
1798        int start = 0;
1799        for (int i = 0; i < len; i++) {
1800            if (Character.isWhitespace(classAttr.charAt(i))) {
1801                if (inClass) {
1802                    // white space ends a class name, compare it with the requested one, ignore case
1803                    if (i - start == wantLen && classAttr.regionMatches(true, start, className, 0, wantLen)) {
1804                        return true;
1805                    }
1806                    inClass = false;
1807                }
1808            } else {
1809                if (!inClass) {
1810                    // we're in a class name : keep the start of the substring
1811                    inClass = true;
1812                    start = i;
1813                }
1814            }
1815        }
1816
1817        // check the last entry
1818        if (inClass && len - start == wantLen) {
1819            return classAttr.regionMatches(true, start, className, 0, wantLen);
1820        }
1821
1822        return false;
1823    }
1824
1825    /**
1826     Add a class name to this element's {@code class} attribute.
1827     @param className class name to add
1828     @return this element
1829     */
1830    public Element addClass(String className) {
1831        Validate.notNull(className);
1832
1833        Set<String> classes = classNames();
1834        classes.add(className);
1835        classNames(classes);
1836
1837        return this;
1838    }
1839
1840    /**
1841     Remove a class name from this element's {@code class} attribute.
1842     @param className class name to remove
1843     @return this element
1844     */
1845    public Element removeClass(String className) {
1846        Validate.notNull(className);
1847
1848        Set<String> classes = classNames();
1849        classes.remove(className);
1850        classNames(classes);
1851
1852        return this;
1853    }
1854
1855    /**
1856     Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it.
1857     @param className class name to toggle
1858     @return this element
1859     */
1860    public Element toggleClass(String className) {
1861        Validate.notNull(className);
1862
1863        Set<String> classes = classNames();
1864        if (classes.contains(className))
1865            classes.remove(className);
1866        else
1867            classes.add(className);
1868        classNames(classes);
1869
1870        return this;
1871    }
1872
1873    /**
1874     * Get the value of a form element (input, textarea, etc).
1875     * @return the value of the form element, or empty string if not set.
1876     */
1877    public String val() {
1878        if (elementIs("textarea", NamespaceHtml))
1879            return text();
1880        else
1881            return attr("value");
1882    }
1883
1884    /**
1885     * Set the value of a form element (input, textarea, etc).
1886     * @param value value to set
1887     * @return this element (for chaining)
1888     */
1889    public Element val(String value) {
1890        if (elementIs("textarea", NamespaceHtml))
1891            text(value);
1892        else
1893            attr("value", value);
1894        return this;
1895    }
1896
1897    /**
1898     Get the source range (start and end positions) of the end (closing) tag for this Element. Position tracking must be
1899     enabled prior to parsing the content.
1900     @return the range of the closing tag for this element, or {@code untracked} if its range was not tracked.
1901     @see org.jsoup.parser.Parser#setTrackPosition(boolean)
1902     @see Node#sourceRange()
1903     @see Range#isImplicit()
1904     @since 1.15.2
1905     */
1906    public Range endSourceRange() {
1907        return Range.of(this, false);
1908    }
1909
1910    @Override
1911    void outerHtmlHead(final QuietAppendable accum, Document.OutputSettings out) {
1912        String tagName = safeTagName(out.syntax());
1913        accum.append('<').append(tagName);
1914        if (attributes != null) attributes.html(accum, out);
1915
1916        if (childNodes.isEmpty()) {
1917            boolean xmlMode = out.syntax() == xml || !tag.namespace().equals(NamespaceHtml);
1918            if (xmlMode && (tag.is(Tag.SeenSelfClose) || (tag.isKnownTag() && (tag.isEmpty() || tag.isSelfClosing())))) {
1919                accum.append(" />");
1920            } else if (!xmlMode && tag.isEmpty()) { // html void element
1921                accum.append('>');
1922            } else {
1923                accum.append("></").append(tagName).append('>');
1924            }
1925        } else {
1926            accum.append('>');
1927        }
1928    }
1929
1930    @Override
1931    void outerHtmlTail(QuietAppendable accum, Document.OutputSettings out) {
1932        if (!childNodes.isEmpty())
1933            accum.append("</").append(safeTagName(out.syntax())).append('>');
1934        // if empty, we have already closed in htmlHead
1935    }
1936
1937    /* If XML syntax, normalizes < to _ in tag name. */
1938    @Nullable private String safeTagName(Document.OutputSettings.Syntax syntax) {
1939        return syntax == xml ? Normalizer.xmlSafeTagName(tagName()) : tagName();
1940    }
1941
1942    /**
1943     * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return
1944     * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.)
1945     *
1946     * @return String of HTML.
1947     * @see #outerHtml()
1948     */
1949    public String html() {
1950        StringBuilder sb = StringUtil.borrowBuilder();
1951        html(sb);
1952        String html = StringUtil.releaseBuilder(sb);
1953        return NodeUtils.outputSettings(this).prettyPrint() ? html.trim() : html;
1954    }
1955
1956    @Override
1957    public <T extends Appendable> T html(T accum) {
1958        Node child = firstChild();
1959        if (child != null) {
1960            Printer printer = Printer.printerFor(child, QuietAppendable.wrap(accum));
1961            while (child != null) {
1962                printer.traverse(child);
1963                child = child.nextSibling();
1964            }
1965        }
1966        return accum;
1967    }
1968
1969    /**
1970     * Set this element's inner HTML. Clears the existing HTML first.
1971     * @param html HTML to parse and set into this element
1972     * @return this element
1973     * @see #append(String)
1974     */
1975    public Element html(String html) {
1976        empty();
1977        append(html);
1978        return this;
1979    }
1980
1981    @Override
1982    public Element clone() {
1983        return (Element) super.clone();
1984    }
1985
1986    @Override
1987    public Element shallowClone() {
1988        // simpler than implementing a clone version with no child copy
1989        String baseUri = baseUri();
1990        if (baseUri.isEmpty()) baseUri = null; // saves setting a blank internal attribute
1991        return new Element(tag, baseUri, attributes == null ? null : attributes.clone());
1992    }
1993
1994    @Override
1995    protected Element doClone(@Nullable Node parent) {
1996        Element clone = (Element) super.doClone(parent);
1997        clone.childNodes = new NodeList(childNodes.size());
1998        clone.childNodes.addAll(childNodes); // the children then get iterated and cloned in Node.clone
1999        if (attributes != null) {
2000            clone.attributes = attributes.clone();
2001            // clear any cached children
2002            clone.attributes.userData(childElsKey, null);
2003        }
2004
2005        return clone;
2006    }
2007
2008    // overrides of Node for call chaining
2009    @Override
2010    public Element clearAttributes() {
2011        if (attributes != null) {
2012            super.clearAttributes(); // keeps internal attributes via iterator
2013            if (attributes.size() == 0)
2014                attributes = null; // only remove entirely if no internal attributes
2015        }
2016
2017        return this;
2018    }
2019
2020    @Override
2021    public Element removeAttr(String attributeKey) {
2022        return (Element) super.removeAttr(attributeKey);
2023    }
2024
2025    @Override
2026    public Element root() {
2027        return (Element) super.root(); // probably a document, but always at least an element
2028    }
2029
2030    @Override
2031    public Element traverse(NodeVisitor nodeVisitor) {
2032        return (Element) super.traverse(nodeVisitor);
2033    }
2034
2035    @Override
2036    public Element forEachNode(Consumer<? super Node> action) {
2037        return (Element) super.forEachNode(action);
2038    }
2039
2040    /**
2041     Perform the supplied action on this Element and each of its descendant Elements, during a depth-first traversal.
2042     Elements may be inspected, changed, added, replaced, or removed.
2043     @param action the function to perform on the element
2044     @see Node#forEachNode(Consumer)
2045     */
2046    @Override
2047    public void forEach(Consumer<? super Element> action) {
2048        stream().forEach(action);
2049    }
2050
2051    /**
2052     Returns an Iterator that iterates this Element and each of its descendant Elements, in document order.
2053     @return an Iterator
2054     */
2055    @Override
2056    public Iterator<Element> iterator() {
2057        return new NodeIterator<>(this, Element.class);
2058    }
2059
2060    @Override
2061    public Element filter(NodeFilter nodeFilter) {
2062        return  (Element) super.filter(nodeFilter);
2063    }
2064
2065    static final class NodeList extends ArrayList<Node> {
2066        public NodeList(int size) {
2067            super(size);
2068        }
2069
2070        int modCount() {
2071            return this.modCount;
2072        }
2073    }
2074}