001package org.jsoup.nodes;
002
003import org.jsoup.helper.Validate;
004import org.jsoup.internal.Normalizer;
005import org.jsoup.internal.StringUtil;
006import org.jsoup.parser.ParseSettings;
007import org.jsoup.parser.Parser;
008import org.jsoup.parser.Tag;
009import org.jsoup.parser.TokenQueue;
010import org.jsoup.select.Collector;
011import org.jsoup.select.Elements;
012import org.jsoup.select.Evaluator;
013import org.jsoup.select.NodeFilter;
014import org.jsoup.select.NodeTraversor;
015import org.jsoup.select.NodeVisitor;
016import org.jsoup.select.QueryParser;
017import org.jsoup.select.Selector;
018import org.jspecify.annotations.Nullable;
019
020import java.io.IOException;
021import java.lang.ref.WeakReference;
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.Collection;
025import java.util.Collections;
026import java.util.Iterator;
027import java.util.LinkedHashSet;
028import java.util.List;
029import java.util.Map;
030import java.util.Set;
031import java.util.concurrent.atomic.AtomicBoolean;
032import java.util.function.Consumer;
033import java.util.regex.Pattern;
034import java.util.regex.PatternSyntaxException;
035import java.util.stream.Collectors;
036import java.util.stream.Stream;
037
038import static org.jsoup.internal.Normalizer.normalize;
039import static org.jsoup.nodes.Document.OutputSettings.Syntax.xml;
040import static org.jsoup.nodes.TextNode.lastCharIsWhitespace;
041import static org.jsoup.parser.Parser.NamespaceHtml;
042import static org.jsoup.parser.TokenQueue.escapeCssIdentifier;
043
044/**
045 An HTML Element consists of a tag name, attributes, and child nodes (including text nodes and other elements).
046 <p>
047 From an Element, you can extract data, traverse the node graph, and manipulate the HTML.
048*/
049public class Element extends Node implements Iterable<Element> {
050    private static final List<Element> EmptyChildren = Collections.emptyList();
051    private static final NodeList EmptyNodeList = new NodeList(0);
052    private static final Pattern ClassSplit = Pattern.compile("\\s+");
053    private static final String BaseUriKey = Attributes.internalKey("baseUri");
054    Tag tag;
055    NodeList childNodes;
056    @Nullable Attributes attributes; // field is nullable but all methods for attributes are non-null
057
058    /**
059     * Create a new, standalone element, in the specified namespace.
060     * @param tag tag name
061     * @param namespace namespace for this element
062     */
063    public Element(String tag, String namespace) {
064        this(Tag.valueOf(tag, namespace, ParseSettings.preserveCase), null);
065    }
066
067    /**
068     * Create a new, standalone element, in the HTML namespace.
069     * @param tag tag name
070     * @see #Element(String tag, String namespace)
071     */
072    public Element(String tag) {
073        this(tag, Parser.NamespaceHtml);
074    }
075
076    /**
077     * Create a new, standalone Element. (Standalone in that it has no parent.)
078     *
079     * @param tag tag of this element
080     * @param baseUri the base URI (optional, may be null to inherit from parent, or "" to clear parent's)
081     * @param attributes initial attributes (optional, may be null)
082     * @see #appendChild(Node)
083     * @see #appendElement(String)
084     */
085    public Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attributes) {
086        Validate.notNull(tag);
087        childNodes = EmptyNodeList;
088        this.attributes = attributes;
089        this.tag = tag;
090        if (baseUri != null)
091            this.setBaseUri(baseUri);
092    }
093
094    /**
095     * Create a new Element from a Tag and a base URI.
096     *
097     * @param tag element tag
098     * @param baseUri the base URI of this element. Optional, and will inherit from its parent, if any.
099     * @see Tag#valueOf(String, ParseSettings)
100     */
101    public Element(Tag tag, @Nullable String baseUri) {
102        this(tag, baseUri, null);
103    }
104
105    /**
106     Internal test to check if a nodelist object has been created.
107     */
108    protected boolean hasChildNodes() {
109        return childNodes != EmptyNodeList;
110    }
111
112    @Override protected List<Node> ensureChildNodes() {
113        if (childNodes == EmptyNodeList) {
114            childNodes = new NodeList(4);
115        }
116        return childNodes;
117    }
118
119    @Override
120    protected boolean hasAttributes() {
121        return attributes != null;
122    }
123
124    @Override
125    public Attributes attributes() {
126        if (attributes == null) // not using hasAttributes, as doesn't clear warning
127            attributes = new Attributes();
128        return attributes;
129    }
130
131    @Override
132    public String baseUri() {
133        return searchUpForAttribute(this, BaseUriKey);
134    }
135
136    private static String searchUpForAttribute(final Element start, final String key) {
137        Element el = start;
138        while (el != null) {
139            if (el.attributes != null && el.attributes.hasKey(key))
140                return el.attributes.get(key);
141            el = el.parent();
142        }
143        return "";
144    }
145
146    @Override
147    protected void doSetBaseUri(String baseUri) {
148        attributes().put(BaseUriKey, baseUri);
149    }
150
151    @Override
152    public int childNodeSize() {
153        return childNodes.size();
154    }
155
156    @Override
157    public String nodeName() {
158        return tag.getName();
159    }
160
161    /**
162     * Get the name of the tag for this element. E.g. {@code div}. If you are using {@link ParseSettings#preserveCase
163     * case preserving parsing}, this will return the source's original case.
164     *
165     * @return the tag name
166     */
167    public String tagName() {
168        return tag.getName();
169    }
170
171    /**
172     * Get the normalized name of this Element's tag. This will always be the lower-cased version of the tag, regardless
173     * of the tag case preserving setting of the parser. For e.g., {@code <DIV>} and {@code <div>} both have a
174     * normal name of {@code div}.
175     * @return normal name
176     */
177    @Override
178    public String normalName() {
179        return tag.normalName();
180    }
181
182    /**
183     Test if this Element has the specified normalized name, and is in the specified namespace.
184     * @param normalName a normalized element name (e.g. {@code div}).
185     * @param namespace the namespace
186     * @return true if the element's normal name matches exactly, and is in the specified namespace
187     * @since 1.17.2
188     */
189    public boolean elementIs(String normalName, String namespace) {
190        return tag.normalName().equals(normalName) && tag.namespace().equals(namespace);
191    }
192
193    /**
194     * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with
195     * {@code el.tagName("div");}.
196     *
197     * @param tagName new tag name for this element
198     * @return this element, for chaining
199     * @see Elements#tagName(String)
200     */
201    public Element tagName(String tagName) {
202        return tagName(tagName, tag.namespace());
203    }
204
205    /**
206     * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with
207     * {@code el.tagName("div");}.
208     *
209     * @param tagName new tag name for this element
210     * @param namespace the new namespace for this element
211     * @return this element, for chaining
212     * @see Elements#tagName(String)
213     */
214    public Element tagName(String tagName, String namespace) {
215        Validate.notEmptyParam(tagName, "tagName");
216        Validate.notEmptyParam(namespace, "namespace");
217        Parser parser = NodeUtils.parser(this);
218        tag = parser.tagSet().valueOf(tagName, namespace, parser.settings()); // maintains the case option of the original parse
219        return this;
220    }
221
222    /**
223     * Get the Tag for this element.
224     *
225     * @return the tag object
226     */
227    public Tag tag() {
228        return tag;
229    }
230
231    /**
232     Change the Tag of this element.
233     @param tag the new tag
234     @return this element, for chaining
235     @since 1.20.1
236     */
237    public Element tag(Tag tag) {
238        Validate.notNull(tag);
239        this.tag = tag;
240        return this;
241    }
242
243    /**
244     * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element
245     * {@code <span> == false}).
246     *
247     * @return true if block, false if not (and thus inline)
248     */
249    public boolean isBlock() {
250        return tag.isBlock();
251    }
252
253    /**
254     * Get the {@code id} attribute of this element.
255     *
256     * @return The id attribute, if present, or an empty string if not.
257     */
258    public String id() {
259        return attributes != null ? attributes.getIgnoreCase("id") :"";
260    }
261
262    /**
263     Set the {@code id} attribute of this element.
264     @param id the ID value to use
265     @return this Element, for chaining
266     */
267    public Element id(String id) {
268        Validate.notNull(id);
269        attr("id", id);
270        return this;
271    }
272
273    /**
274     * Set an attribute value on this element. If this element already has an attribute with the
275     * key, its value is updated; otherwise, a new attribute is added.
276     *
277     * @return this element
278     */
279    @Override public Element attr(String attributeKey, String attributeValue) {
280        super.attr(attributeKey, attributeValue);
281        return this;
282    }
283
284    /**
285     * Set a boolean attribute value on this element. Setting to <code>true</code> sets the attribute value to "" and
286     * marks the attribute as boolean so no value is written out. Setting to <code>false</code> removes the attribute
287     * with the same key if it exists.
288     *
289     * @param attributeKey the attribute key
290     * @param attributeValue the attribute value
291     *
292     * @return this element
293     */
294    public Element attr(String attributeKey, boolean attributeValue) {
295        attributes().put(attributeKey, attributeValue);
296        return this;
297    }
298
299    /**
300     Get an Attribute by key. Changes made via {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc
301     will cascade back to this Element.
302     @param key the (case-sensitive) attribute key
303     @return the Attribute for this key, or null if not present.
304     @since 1.17.2
305     */
306    @Nullable public Attribute attribute(String key) {
307        return hasAttributes() ? attributes().attribute(key) : null;
308    }
309
310    /**
311     * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key
312     * starting with "data-" is included the dataset.
313     * <p>
314     * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset
315     * {@code package=jsoup, language=java}.
316     * <p>
317     * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected
318     * in the other map.
319     * <p>
320     * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector.
321     * @return a map of {@code key=value} custom data attributes.
322     */
323    public Map<String, String> dataset() {
324        return attributes().dataset();
325    }
326
327    @Override @Nullable
328    public final Element parent() {
329        return (Element) parentNode;
330    }
331
332    /**
333     * Get this element's parent and ancestors, up to the document root.
334     * @return this element's stack of parents, starting with the closest first.
335     */
336    public Elements parents() {
337        Elements parents = new Elements();
338        Element parent = this.parent();
339        while (parent != null && !parent.nameIs("#root")) {
340            parents.add(parent);
341            parent = parent.parent();
342        }
343        return parents;
344    }
345
346    /**
347     * Get a child element of this element, by its 0-based index number.
348     * <p>
349     * Note that an element can have both mixed Nodes and Elements as children. This method inspects
350     * a filtered list of children that are elements, and the index is based on that filtered list.
351     * </p>
352     *
353     * @param index the index number of the element to retrieve
354     * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException}
355     * @see #childNode(int)
356     */
357    public Element child(int index) {
358        return childElementsList().get(index);
359    }
360
361    /**
362     * Get the number of child nodes of this element that are elements.
363     * <p>
364     * This method works on the same filtered list like {@link #child(int)}. Use {@link #childNodes()} and {@link
365     * #childNodeSize()} to get the unfiltered Nodes (e.g. includes TextNodes etc.)
366     * </p>
367     *
368     * @return the number of child nodes that are elements
369     * @see #children()
370     * @see #child(int)
371     */
372    public int childrenSize() {
373        return childElementsList().size();
374    }
375
376    /**
377     * Get this element's child elements.
378     * <p>
379     * This is effectively a filter on {@link #childNodes()} to get Element nodes.
380     * </p>
381     * @return child elements. If this element has no children, returns an empty list.
382     * @see #childNodes()
383     */
384    public Elements children() {
385        return new Elements(childElementsList());
386    }
387
388    /**
389     * Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated.
390     * TODO - think about pulling this out as a helper as there are other shadow lists (like in Attributes) kept around.
391     * @return a list of child elements
392     */
393    List<Element> childElementsList() {
394        if (childNodeSize() == 0) return EmptyChildren; // short circuit creating empty
395        List<Element> children = cachedChildren();
396        if (children == null) {
397            children = filterNodes(Element.class);
398            stashChildren(children);
399        }
400        return children;
401    }
402
403    private static final String childElsKey = "jsoup.childEls";
404    private static final String childElsMod = "jsoup.childElsMod";
405
406    /** returns the cached child els, if they exist, and the modcount of our childnodes matches the stashed modcount */
407    private @Nullable List<Element> cachedChildren() {
408        Map<String, Object> userData = attributes().userData();
409        //noinspection unchecked
410        WeakReference<List<Element>> ref = (WeakReference<List<Element>>) userData.get(childElsKey);
411        if (ref != null) {
412            List<Element> els = ref.get();
413            if (els != null) {
414                Integer modCount = (Integer) userData.get(childElsMod);
415                if (modCount != null && modCount == childNodes.modCount())
416                    return els;
417            }
418        }
419        return null;
420    }
421
422    /** caches the child els into the Attribute user data. */
423    private void stashChildren(List<Element> els) {
424        Map<String, Object> userData = attributes().userData();
425        WeakReference<List<Element>> ref = new WeakReference<>(els);
426        userData.put(childElsKey, ref);
427        userData.put(childElsMod, childNodes.modCount());
428    }
429
430    /**
431     Returns a Stream of this Element and all of its descendant Elements. The stream has document order.
432     @return a stream of this element and its descendants.
433     @see #nodeStream()
434     @since 1.17.1
435     */
436    public Stream<Element> stream() {
437        return NodeUtils.stream(this, Element.class);
438    }
439
440    private <T> List<T> filterNodes(Class<T> clazz) {
441        return childNodes.stream()
442                .filter(clazz::isInstance)
443                .map(clazz::cast)
444                .collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList));
445    }
446
447    /**
448     * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated.
449     * <p>
450     * This is effectively a filter on {@link #childNodes()} to get Text nodes.
451     * @return child text nodes. If this element has no text nodes, returns an
452     * empty list.
453     * </p>
454     * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected:
455     * <ul>
456     *     <li>{@code p.text()} = {@code "One Two Three Four"}</li>
457     *     <li>{@code p.ownText()} = {@code "One Three Four"}</li>
458     *     <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li>
459     *     <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li>
460     *     <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li>
461     * </ul>
462     */
463    public List<TextNode> textNodes() {
464        return filterNodes(TextNode.class);
465    }
466
467    /**
468     * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated.
469     * <p>
470     * This is effectively a filter on {@link #childNodes()} to get Data nodes.
471     * </p>
472     * @return child data nodes. If this element has no data nodes, returns an
473     * empty list.
474     * @see #data()
475     */
476    public List<DataNode> dataNodes() {
477        return filterNodes(DataNode.class);
478    }
479
480    /**
481     * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
482     * may include this element, or any of its children.
483     * <p>This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
484     * multiple filters can be combined, e.g.:</p>
485     * <ul>
486     * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
487     * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
488     * </ul>
489     * <p>See the query syntax documentation in {@link org.jsoup.select.Selector}.</p>
490     * <p>Also known as {@code querySelectorAll()} in the Web DOM.</p>
491     *
492     * @param cssQuery a {@link Selector} CSS-like query
493     * @return an {@link Elements} list containing elements that match the query (empty if none match)
494     * @see Selector selector query syntax
495     * @see #select(Evaluator)
496     * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
497     */
498    public Elements select(String cssQuery) {
499        return Selector.select(cssQuery, this);
500    }
501
502    /**
503     * Find elements that match the supplied Evaluator. This has the same functionality as {@link #select(String)}, but
504     * may be useful if you are running the same query many times (on many documents) and want to save the overhead of
505     * repeatedly parsing the CSS query.
506     * @param evaluator an element evaluator
507     * @return an {@link Elements} list containing elements that match the query (empty if none match)
508     * @see QueryParser#parse(String)
509     */
510    public Elements select(Evaluator evaluator) {
511        return Selector.select(evaluator, this);
512    }
513
514    /**
515     Selects elements from the given root that match the specified {@link Selector} CSS query, with this element as the
516     starting context, and returns them as a lazy Stream. Matched elements may include this element, or any of its
517     children.
518     <p>
519     Unlike {@link #select(String query)}, which returns a complete list of all matching elements, this method returns a
520     {@link Stream} that processes elements lazily as they are needed. The stream operates in a "pull" model — elements
521     are fetched from the root as the stream is traversed. You can use standard {@code Stream} operations such as
522     {@code filter}, {@code map}, or {@code findFirst} to process elements on demand.
523     </p>
524
525     @param cssQuery a {@link Selector} CSS-like query
526     @return a {@link Stream} containing elements that match the query (empty if none match)
527     @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
528     @see Selector selector query syntax
529     @see QueryParser#parse(String)
530     @since 1.19.1
531     */
532    public Stream<Element> selectStream(String cssQuery) {
533        return Selector.selectStream(cssQuery, this);
534    }
535
536    /**
537     Find a Stream of elements that match the supplied Evaluator.
538
539     @param evaluator an element Evaluator
540     @return a {@link Stream} containing elements that match the query (empty if none match)
541     @since 1.19.1
542     */
543    public Stream<Element> selectStream(Evaluator evaluator) {
544        return Selector.selectStream(evaluator, this);
545    }
546
547    /**
548     * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context.
549     * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query
550     * execution stops on the first hit.</p>
551     * <p>Also known as {@code querySelector()} in the Web DOM.</p>
552     * @param cssQuery cssQuery a {@link Selector} CSS-like query
553     * @return the first matching element, or <b>{@code null}</b> if there is no match.
554     * @see #expectFirst(String)
555     */
556    public @Nullable Element selectFirst(String cssQuery) {
557        return Selector.selectFirst(cssQuery, this);
558    }
559
560    /**
561     * Finds the first Element that matches the supplied Evaluator, with this element as the starting context, or
562     * {@code null} if none match.
563     *
564     * @param evaluator an element evaluator
565     * @return the first matching element (walking down the tree, starting from this element), or {@code null} if none
566     * match.
567     */
568    public @Nullable Element selectFirst(Evaluator evaluator) {
569        return Collector.findFirst(evaluator, this);
570    }
571
572    /**
573     Just like {@link #selectFirst(String)}, but if there is no match, throws an {@link IllegalArgumentException}. This
574     is useful if you want to simply abort processing on a failed match.
575     @param cssQuery a {@link Selector} CSS-like query
576     @return the first matching element
577     @throws IllegalArgumentException if no match is found
578     @since 1.15.2
579     */
580    public Element expectFirst(String cssQuery) {
581        return (Element) Validate.ensureNotNull(
582            Selector.selectFirst(cssQuery, this),
583            parent() != null ?
584                "No elements matched the query '%s' on element '%s'.":
585                "No elements matched the query '%s' in the document."
586            , cssQuery, this.tagName()
587        );
588    }
589
590    /**
591     * Checks if this element matches the given {@link Selector} CSS query. Also knows as {@code matches()} in the Web
592     * DOM.
593     *
594     * @param cssQuery a {@link Selector} CSS query
595     * @return if this element matches the query
596     */
597    public boolean is(String cssQuery) {
598        return is(QueryParser.parse(cssQuery));
599    }
600
601    /**
602     * Check if this element matches the given evaluator.
603     * @param evaluator an element evaluator
604     * @return if this element matches
605     */
606    public boolean is(Evaluator evaluator) {
607        return evaluator.matches(this.root(), this);
608    }
609
610    /**
611     * Find the closest element up the tree of parents that matches the specified CSS query. Will return itself, an
612     * ancestor, or {@code null} if there is no such matching element.
613     * @param cssQuery a {@link Selector} CSS query
614     * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not
615     * found.
616     */
617    public @Nullable Element closest(String cssQuery) {
618        return closest(QueryParser.parse(cssQuery));
619    }
620
621    /**
622     * Find the closest element up the tree of parents that matches the specified evaluator. Will return itself, an
623     * ancestor, or {@code null} if there is no such matching element.
624     * @param evaluator a query evaluator
625     * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not
626     * found.
627     */
628    public @Nullable Element closest(Evaluator evaluator) {
629        Validate.notNull(evaluator);
630        Element el = this;
631        final Element root = root();
632        do {
633            if (evaluator.matches(root, el))
634                return el;
635            el = el.parent();
636        } while (el != null);
637        return null;
638    }
639
640    /**
641     Find Elements that match the supplied {@index XPath} expression.
642     <p>Note that for convenience of writing the Xpath expression, namespaces are disabled, and queries can be
643     expressed using the element's local name only.</p>
644     <p>By default, XPath 1.0 expressions are supported. If you would to use XPath 2.0 or higher, you can provide an
645     alternate XPathFactory implementation:</p>
646     <ol>
647     <li>Add the implementation to your classpath. E.g. to use <a href="https://www.saxonica.com/products/products.xml">Saxon-HE</a>, add <a href="https://mvnrepository.com/artifact/net.sf.saxon/Saxon-HE">net.sf.saxon:Saxon-HE</a> to your build.</li>
648     <li>Set the system property <code>javax.xml.xpath.XPathFactory:jsoup</code> to the implementing classname. E.g.:<br>
649     <code>System.setProperty(W3CDom.XPathFactoryProperty, "net.sf.saxon.xpath.XPathFactoryImpl");</code>
650     </li>
651     </ol>
652
653     @param xpath XPath expression
654     @return matching elements, or an empty list if none match.
655     @see #selectXpath(String, Class)
656     @since 1.14.3
657     */
658    public Elements selectXpath(String xpath) {
659        return new Elements(NodeUtils.selectXpath(xpath, this, Element.class));
660    }
661
662    /**
663     Find Nodes that match the supplied XPath expression.
664     <p>For example, to select TextNodes under {@code p} elements: </p>
665     <pre>List&lt;TextNode&gt; textNodes = doc.selectXpath("//body//p//text()", TextNode.class);</pre>
666     <p>Note that in the jsoup DOM, Attribute objects are not Nodes. To directly select attribute values, do something
667     like:</p>
668     <pre>List&lt;String&gt; hrefs = doc.selectXpath("//a").eachAttr("href");</pre>
669     @param xpath XPath expression
670     @param nodeType the jsoup node type to return
671     @see #selectXpath(String)
672     @return a list of matching nodes
673     @since 1.14.3
674     */
675    public <T extends Node> List<T> selectXpath(String xpath, Class<T> nodeType) {
676        return NodeUtils.selectXpath(xpath, this, nodeType);
677    }
678
679    /**
680     * Insert a node to the end of this Element's children. The incoming node will be re-parented.
681     *
682     * @param child node to add.
683     * @return this Element, for chaining
684     * @see #prependChild(Node)
685     * @see #insertChildren(int, Collection)
686     */
687    public Element appendChild(Node child) {
688        Validate.notNull(child);
689
690        // was - Node#addChildren(child). short-circuits an array create and a loop.
691        reparentChild(child);
692        ensureChildNodes();
693        childNodes.add(child);
694        child.setSiblingIndex(childNodes.size() - 1);
695        return this;
696    }
697
698    /**
699     Insert the given nodes to the end of this Element's children.
700
701     @param children nodes to add
702     @return this Element, for chaining
703     @see #insertChildren(int, Collection)
704     */
705    public Element appendChildren(Collection<? extends Node> children) {
706        insertChildren(-1, children);
707        return this;
708    }
709
710    /**
711     * Add this element to the supplied parent element, as its next child.
712     *
713     * @param parent element to which this element will be appended
714     * @return this element, so that you can continue modifying the element
715     */
716    public Element appendTo(Element parent) {
717        Validate.notNull(parent);
718        parent.appendChild(this);
719        return this;
720    }
721
722    /**
723     * Add a node to the start of this element's children.
724     *
725     * @param child node to add.
726     * @return this element, so that you can add more child nodes or elements.
727     */
728    public Element prependChild(Node child) {
729        Validate.notNull(child);
730
731        addChildren(0, child);
732        return this;
733    }
734
735    /**
736     Insert the given nodes to the start of this Element's children.
737
738     @param children nodes to add
739     @return this Element, for chaining
740     @see #insertChildren(int, Collection)
741     */
742    public Element prependChildren(Collection<? extends Node> children) {
743        insertChildren(0, children);
744        return this;
745    }
746
747
748    /**
749     * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the
750     * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first.
751     *
752     * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
753     * end
754     * @param children child nodes to insert
755     * @return this element, for chaining.
756     */
757    public Element insertChildren(int index, Collection<? extends Node> children) {
758        Validate.notNull(children, "Children collection to be inserted must not be null.");
759        int currentSize = childNodeSize();
760        if (index < 0) index += currentSize +1; // roll around
761        Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");
762
763        ArrayList<Node> nodes = new ArrayList<>(children);
764        Node[] nodeArray = nodes.toArray(new Node[0]);
765        addChildren(index, nodeArray);
766        return this;
767    }
768
769    /**
770     * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the
771     * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first.
772     *
773     * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
774     * end
775     * @param children child nodes to insert
776     * @return this element, for chaining.
777     */
778    public Element insertChildren(int index, Node... children) {
779        Validate.notNull(children, "Children collection to be inserted must not be null.");
780        int currentSize = childNodeSize();
781        if (index < 0) index += currentSize +1; // roll around
782        Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");
783
784        addChildren(index, children);
785        return this;
786    }
787
788    /**
789     * Create a new element by tag name, and add it as this Element's last child.
790     *
791     * @param tagName the name of the tag (e.g. {@code div}).
792     * @return the new element, to allow you to add content to it, e.g.:
793     *  {@code parent.appendElement("h1").attr("id", "header").text("Welcome");}
794     */
795    public Element appendElement(String tagName) {
796        return appendElement(tagName, tag.namespace());
797    }
798
799    /**
800     * Create a new element by tag name and namespace, add it as this Element's last child.
801     *
802     * @param tagName the name of the tag (e.g. {@code div}).
803     * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml})
804     * @return the new element, in the specified namespace
805     */
806    public Element appendElement(String tagName, String namespace) {
807        Parser parser = NodeUtils.parser(this);
808        Element child = new Element(parser.tagSet().valueOf(tagName, namespace, parser.settings()), baseUri());
809        appendChild(child);
810        return child;
811    }
812
813    /**
814     * Create a new element by tag name, and add it as this Element's first child.
815     *
816     * @param tagName the name of the tag (e.g. {@code div}).
817     * @return the new element, to allow you to add content to it, e.g.:
818     *  {@code parent.prependElement("h1").attr("id", "header").text("Welcome");}
819     */
820    public Element prependElement(String tagName) {
821        return prependElement(tagName, tag.namespace());
822    }
823
824    /**
825     * Create a new element by tag name and namespace, and add it as this Element's first child.
826     *
827     * @param tagName the name of the tag (e.g. {@code div}).
828     * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml})
829     * @return the new element, in the specified namespace
830     */
831    public Element prependElement(String tagName, String namespace) {
832        Parser parser = NodeUtils.parser(this);
833        Element child = new Element(parser.tagSet().valueOf(tagName, namespace, parser.settings()), baseUri());
834        prependChild(child);
835        return child;
836    }
837
838    /**
839     * Create and append a new TextNode to this element.
840     *
841     * @param text the (un-encoded) text to add
842     * @return this element
843     */
844    public Element appendText(String text) {
845        Validate.notNull(text);
846        TextNode node = new TextNode(text);
847        appendChild(node);
848        return this;
849    }
850
851    /**
852     * Create and prepend a new TextNode to this element.
853     *
854     * @param text the decoded text to add
855     * @return this element
856     */
857    public Element prependText(String text) {
858        Validate.notNull(text);
859        TextNode node = new TextNode(text);
860        prependChild(node);
861        return this;
862    }
863
864    /**
865     * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children.
866     * @param html HTML to add inside this element, after the existing HTML
867     * @return this element
868     * @see #html(String)
869     */
870    public Element append(String html) {
871        Validate.notNull(html);
872        List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri());
873        addChildren(nodes.toArray(new Node[0]));
874        return this;
875    }
876
877    /**
878     * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children.
879     * @param html HTML to add inside this element, before the existing HTML
880     * @return this element
881     * @see #html(String)
882     */
883    public Element prepend(String html) {
884        Validate.notNull(html);
885        List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri());
886        addChildren(0, nodes.toArray(new Node[0]));
887        return this;
888    }
889
890    /**
891     * Insert the specified HTML into the DOM before this element (as a preceding sibling).
892     *
893     * @param html HTML to add before this element
894     * @return this element, for chaining
895     * @see #after(String)
896     */
897    @Override
898    public Element before(String html) {
899        return (Element) super.before(html);
900    }
901
902    /**
903     * Insert the specified node into the DOM before this node (as a preceding sibling).
904     * @param node to add before this element
905     * @return this Element, for chaining
906     * @see #after(Node)
907     */
908    @Override
909    public Element before(Node node) {
910        return (Element) super.before(node);
911    }
912
913    /**
914     * Insert the specified HTML into the DOM after this element (as a following sibling).
915     *
916     * @param html HTML to add after this element
917     * @return this element, for chaining
918     * @see #before(String)
919     */
920    @Override
921    public Element after(String html) {
922        return (Element) super.after(html);
923    }
924
925    /**
926     * Insert the specified node into the DOM after this node (as a following sibling).
927     * @param node to add after this element
928     * @return this element, for chaining
929     * @see #before(Node)
930     */
931    @Override
932    public Element after(Node node) {
933        return (Element) super.after(node);
934    }
935
936    /**
937     * Remove all the element's child nodes. Any attributes are left as-is. Each child node has its parent set to
938     * {@code null}.
939     * @return this element
940     */
941    @Override
942    public Element empty() {
943        // Detach each of the children -> parent links:
944        for (Node child : childNodes) {
945            child.parentNode = null;
946        }
947        childNodes.clear();
948        return this;
949    }
950
951    /**
952     * Wrap the supplied HTML around this element.
953     *
954     * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
955     * @return this element, for chaining.
956     */
957    @Override
958    public Element wrap(String html) {
959        return (Element) super.wrap(html);
960    }
961
962    /**
963     Gets an #id selector for this element, if it has a unique ID. Otherwise, returns an empty string.
964
965     @param ownerDoc the document that owns this element, if there is one
966     */
967    private String uniqueIdSelector(@Nullable Document ownerDoc) {
968        String id = id();
969        if (!id.isEmpty()) { // check if the ID is unique and matches this
970            String idSel = "#" + escapeCssIdentifier(id);
971            if (ownerDoc != null) {
972                Elements els = ownerDoc.select(idSel);
973                if (els.size() == 1 && els.get(0) == this) return idSel;
974            } else {
975                return idSel;
976            }
977        }
978        return EmptyString;
979    }
980
981    /**
982     Get a CSS selector that will uniquely select this element.
983     <p>
984     If the element has an ID, returns #id; otherwise returns the parent (if any) CSS selector, followed by
985     {@literal '>'}, followed by a unique selector for the element (tag.class.class:nth-child(n)).
986     </p>
987
988     @return the CSS Path that can be used to retrieve the element in a selector.
989     */
990    public String cssSelector() {
991        Document ownerDoc = ownerDocument();
992        String idSel = uniqueIdSelector(ownerDoc);
993        if (!idSel.isEmpty()) return idSel;
994
995        // No unique ID, work up the parent stack and find either a unique ID to hang from, or just a GP > Parent > Child chain
996        StringBuilder selector = StringUtil.borrowBuilder();
997        Element el = this;
998        while (el != null && !(el instanceof Document)) {
999            idSel = el.uniqueIdSelector(ownerDoc);
1000            if (!idSel.isEmpty()) {
1001                selector.insert(0, idSel);
1002                break; // found a unique ID to use as ancestor; stop
1003            }
1004            selector.insert(0, el.cssSelectorComponent());
1005            el = el.parent();
1006        }
1007        return StringUtil.releaseBuilder(selector);
1008    }
1009
1010    private String cssSelectorComponent() {
1011        // Escape tagname, and translate HTML namespace ns:tag to CSS namespace syntax ns|tag
1012        String tagName = escapeCssIdentifier(tagName()).replace("\\:", "|");
1013        StringBuilder selector = StringUtil.borrowBuilder().append(tagName);
1014        String classes = classNames().stream().map(TokenQueue::escapeCssIdentifier)
1015                .collect(StringUtil.joining("."));
1016        if (!classes.isEmpty())
1017            selector.append('.').append(classes);
1018
1019        if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node
1020            return StringUtil.releaseBuilder(selector);
1021
1022        selector.insert(0, " > ");
1023        if (parent().select(selector.toString()).size() > 1)
1024            selector.append(String.format(
1025                ":nth-child(%d)", elementSiblingIndex() + 1));
1026
1027        return StringUtil.releaseBuilder(selector);
1028    }
1029
1030    /**
1031     * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
1032     * of itself, so will not be included in the returned list.
1033     * @return sibling elements
1034     */
1035    public Elements siblingElements() {
1036        if (parentNode == null)
1037            return new Elements(0);
1038
1039        List<Element> elements = parent().childElementsList();
1040        Elements siblings = new Elements(elements.size() - 1);
1041        for (Element el: elements)
1042            if (el != this)
1043                siblings.add(el);
1044        return siblings;
1045    }
1046
1047    /**
1048     * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s,
1049     * the {@code nextElementSibling} of the first {@code p} is the second {@code p}.
1050     * <p>
1051     * This is similar to {@link #nextSibling()}, but specifically finds only Elements
1052     * </p>
1053     * @return the next element, or null if there is no next element
1054     * @see #previousElementSibling()
1055     */
1056    public @Nullable Element nextElementSibling() {
1057        Node next = this;
1058        while ((next = next.nextSibling()) != null) {
1059            if (next instanceof Element) return (Element) next;
1060        }
1061        return null;
1062    }
1063
1064    /**
1065     * Get each of the sibling elements that come after this element.
1066     *
1067     * @return each of the element siblings after this element, or an empty list if there are no next sibling elements
1068     */
1069    public Elements nextElementSiblings() {
1070        return nextElementSiblings(true);
1071    }
1072
1073    /**
1074     * Gets the previous element sibling of this element.
1075     * @return the previous element, or null if there is no previous element
1076     * @see #nextElementSibling()
1077     */
1078    public @Nullable Element previousElementSibling() {
1079        Node prev = this;
1080        while ((prev = prev.previousSibling()) != null) {
1081            if (prev instanceof Element) return (Element) prev;
1082        }
1083        return null;
1084    }
1085
1086    /**
1087     * Get each of the element siblings before this element.
1088     *
1089     * @return the previous element siblings, or an empty list if there are none.
1090     */
1091    public Elements previousElementSiblings() {
1092        return nextElementSiblings(false);
1093    }
1094
1095    private Elements nextElementSiblings(boolean next) {
1096        Elements els = new Elements();
1097        if (parentNode == null)
1098            return  els;
1099        els.add(this);
1100        return next ?  els.nextAll() : els.prevAll();
1101    }
1102
1103    /**
1104     * Gets the first Element sibling of this element. That may be this element.
1105     * @return the first sibling that is an element (aka the parent's first element child)
1106     */
1107    public Element firstElementSibling() {
1108        if (parent() != null) {
1109            //noinspection DataFlowIssue (not nullable, would be this is no other sibs)
1110            return parent().firstElementChild();
1111        } else
1112            return this; // orphan is its own first sibling
1113    }
1114
1115    /**
1116     * Get the list index of this element in its element sibling list. I.e. if this is the first element
1117     * sibling, returns 0.
1118     * @return position in element sibling list
1119     */
1120    public int elementSiblingIndex() {
1121       if (parent() == null) return 0;
1122       return indexInList(this, parent().childElementsList());
1123    }
1124
1125    /**
1126     * Gets the last element sibling of this element. That may be this element.
1127     * @return the last sibling that is an element (aka the parent's last element child)
1128     */
1129    public Element lastElementSibling() {
1130        if (parent() != null) {
1131            //noinspection DataFlowIssue (not nullable, would be this if no other sibs)
1132            return parent().lastElementChild();
1133        } else
1134            return this;
1135    }
1136
1137    private static <E extends Element> int indexInList(Element search, List<E> elements) {
1138        final int size = elements.size();
1139        for (int i = 0; i < size; i++) {
1140            if (elements.get(i) == search)
1141                return i;
1142        }
1143        return 0;
1144    }
1145
1146    /**
1147     Gets the first child of this Element that is an Element, or {@code null} if there is none.
1148     @return the first Element child node, or null.
1149     @see #firstChild()
1150     @see #lastElementChild()
1151     @since 1.15.2
1152     */
1153    public @Nullable Element firstElementChild() {
1154        Node child = firstChild();
1155        while (child != null) {
1156            if (child instanceof Element) return (Element) child;
1157            child = child.nextSibling();
1158        }
1159        return null;
1160    }
1161
1162    /**
1163     Gets the last child of this Element that is an Element, or @{code null} if there is none.
1164     @return the last Element child node, or null.
1165     @see #lastChild()
1166     @see #firstElementChild()
1167     @since 1.15.2
1168     */
1169    public @Nullable Element lastElementChild() {
1170        Node child = lastChild();
1171        while (child != null) {
1172            if (child instanceof Element) return (Element) child;
1173            child = child.previousSibling();
1174        }
1175        return null;
1176    }
1177
1178    // DOM type methods
1179
1180    /**
1181     * Finds elements, including and recursively under this element, with the specified tag name.
1182     * @param tagName The tag name to search for (case insensitively).
1183     * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match.
1184     */
1185    public Elements getElementsByTag(String tagName) {
1186        Validate.notEmpty(tagName);
1187        tagName = normalize(tagName);
1188
1189        return Collector.collect(new Evaluator.Tag(tagName), this);
1190    }
1191
1192    /**
1193     * Find an element by ID, including or under this element.
1194     * <p>
1195     * Note that this finds the first matching ID, starting with this element. If you search down from a different
1196     * starting point, it is possible to find a different element by ID. For unique element by ID within a Document,
1197     * use {@link Document#getElementById(String)}
1198     * @param id The ID to search for.
1199     * @return The first matching element by ID, starting with this element, or null if none found.
1200     */
1201    public @Nullable Element getElementById(String id) {
1202        Validate.notEmpty(id);
1203        return Collector.findFirst(new Evaluator.Id(id), this);
1204    }
1205
1206    /**
1207     * Find elements that have this class, including or under this element. Case-insensitive.
1208     * <p>
1209     * Elements can have multiple classes (e.g. {@code <div class="header round first">}). This method
1210     * checks each class, so you can find the above with {@code el.getElementsByClass("header");}.
1211     *
1212     * @param className the name of the class to search for.
1213     * @return elements with the supplied class name, empty if none
1214     * @see #hasClass(String)
1215     * @see #classNames()
1216     */
1217    public Elements getElementsByClass(String className) {
1218        Validate.notEmpty(className);
1219
1220        return Collector.collect(new Evaluator.Class(className), this);
1221    }
1222
1223    /**
1224     * Find elements that have a named attribute set. Case-insensitive.
1225     *
1226     * @param key name of the attribute, e.g. {@code href}
1227     * @return elements that have this attribute, empty if none
1228     */
1229    public Elements getElementsByAttribute(String key) {
1230        Validate.notEmpty(key);
1231        key = key.trim();
1232
1233        return Collector.collect(new Evaluator.Attribute(key), this);
1234    }
1235
1236    /**
1237     * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements
1238     * that have HTML5 datasets.
1239     * @param keyPrefix name prefix of the attribute e.g. {@code data-}
1240     * @return elements that have attribute names that start with the prefix, empty if none.
1241     */
1242    public Elements getElementsByAttributeStarting(String keyPrefix) {
1243        Validate.notEmpty(keyPrefix);
1244        keyPrefix = keyPrefix.trim();
1245
1246        return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this);
1247    }
1248
1249    /**
1250     * Find elements that have an attribute with the specific value. Case-insensitive.
1251     *
1252     * @param key name of the attribute
1253     * @param value value of the attribute
1254     * @return elements that have this attribute with this value, empty if none
1255     */
1256    public Elements getElementsByAttributeValue(String key, String value) {
1257        return Collector.collect(new Evaluator.AttributeWithValue(key, value), this);
1258    }
1259
1260    /**
1261     * Find elements that either do not have this attribute, or have it with a different value. Case-insensitive.
1262     *
1263     * @param key name of the attribute
1264     * @param value value of the attribute
1265     * @return elements that do not have a matching attribute
1266     */
1267    public Elements getElementsByAttributeValueNot(String key, String value) {
1268        return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this);
1269    }
1270
1271    /**
1272     * Find elements that have attributes that start with the value prefix. Case-insensitive.
1273     *
1274     * @param key name of the attribute
1275     * @param valuePrefix start of attribute value
1276     * @return elements that have attributes that start with the value prefix
1277     */
1278    public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) {
1279        return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this);
1280    }
1281
1282    /**
1283     * Find elements that have attributes that end with the value suffix. Case-insensitive.
1284     *
1285     * @param key name of the attribute
1286     * @param valueSuffix end of the attribute value
1287     * @return elements that have attributes that end with the value suffix
1288     */
1289    public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) {
1290        return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this);
1291    }
1292
1293    /**
1294     * Find elements that have attributes whose value contains the match string. Case-insensitive.
1295     *
1296     * @param key name of the attribute
1297     * @param match substring of value to search for
1298     * @return elements that have attributes containing this text
1299     */
1300    public Elements getElementsByAttributeValueContaining(String key, String match) {
1301        return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this);
1302    }
1303
1304    /**
1305     * Find elements that have an attribute whose value matches the supplied regular expression.
1306     * @param key name of the attribute
1307     * @param pattern compiled regular expression to match against attribute values
1308     * @return elements that have attributes matching this regular expression
1309     */
1310    public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) {
1311        return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this);
1312
1313    }
1314
1315    /**
1316     * Find elements that have attributes whose values match the supplied regular expression.
1317     * @param key name of the attribute
1318     * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options.
1319     * @return elements that have attributes matching this regular expression
1320     */
1321    public Elements getElementsByAttributeValueMatching(String key, String regex) {
1322        Pattern pattern;
1323        try {
1324            pattern = Pattern.compile(regex);
1325        } catch (PatternSyntaxException e) {
1326            throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
1327        }
1328        return getElementsByAttributeValueMatching(key, pattern);
1329    }
1330
1331    /**
1332     * Find elements whose sibling index is less than the supplied index.
1333     * @param index 0-based index
1334     * @return elements less than index
1335     */
1336    public Elements getElementsByIndexLessThan(int index) {
1337        return Collector.collect(new Evaluator.IndexLessThan(index), this);
1338    }
1339
1340    /**
1341     * Find elements whose sibling index is greater than the supplied index.
1342     * @param index 0-based index
1343     * @return elements greater than index
1344     */
1345    public Elements getElementsByIndexGreaterThan(int index) {
1346        return Collector.collect(new Evaluator.IndexGreaterThan(index), this);
1347    }
1348
1349    /**
1350     * Find elements whose sibling index is equal to the supplied index.
1351     * @param index 0-based index
1352     * @return elements equal to index
1353     */
1354    public Elements getElementsByIndexEquals(int index) {
1355        return Collector.collect(new Evaluator.IndexEquals(index), this);
1356    }
1357
1358    /**
1359     * Find elements that contain the specified string. The search is case-insensitive. The text may appear directly
1360     * in the element, or in any of its descendants.
1361     * @param searchText to look for in the element's text
1362     * @return elements that contain the string, case-insensitive.
1363     * @see Element#text()
1364     */
1365    public Elements getElementsContainingText(String searchText) {
1366        return Collector.collect(new Evaluator.ContainsText(searchText), this);
1367    }
1368
1369    /**
1370     * Find elements that directly contain the specified string. The search is case-insensitive. The text must appear directly
1371     * in the element, not in any of its descendants.
1372     * @param searchText to look for in the element's own text
1373     * @return elements that contain the string, case-insensitive.
1374     * @see Element#ownText()
1375     */
1376    public Elements getElementsContainingOwnText(String searchText) {
1377        return Collector.collect(new Evaluator.ContainsOwnText(searchText), this);
1378    }
1379
1380    /**
1381     * Find elements whose text matches the supplied regular expression.
1382     * @param pattern regular expression to match text against
1383     * @return elements matching the supplied regular expression.
1384     * @see Element#text()
1385     */
1386    public Elements getElementsMatchingText(Pattern pattern) {
1387        return Collector.collect(new Evaluator.Matches(pattern), this);
1388    }
1389
1390    /**
1391     * Find elements whose text matches the supplied regular expression.
1392     * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options.
1393     * @return elements matching the supplied regular expression.
1394     * @see Element#text()
1395     */
1396    public Elements getElementsMatchingText(String regex) {
1397        Pattern pattern;
1398        try {
1399            pattern = Pattern.compile(regex);
1400        } catch (PatternSyntaxException e) {
1401            throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
1402        }
1403        return getElementsMatchingText(pattern);
1404    }
1405
1406    /**
1407     * Find elements whose own text matches the supplied regular expression.
1408     * @param pattern regular expression to match text against
1409     * @return elements matching the supplied regular expression.
1410     * @see Element#ownText()
1411     */
1412    public Elements getElementsMatchingOwnText(Pattern pattern) {
1413        return Collector.collect(new Evaluator.MatchesOwn(pattern), this);
1414    }
1415
1416    /**
1417     * Find elements whose own text matches the supplied regular expression.
1418     * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options.
1419     * @return elements matching the supplied regular expression.
1420     * @see Element#ownText()
1421     */
1422    public Elements getElementsMatchingOwnText(String regex) {
1423        Pattern pattern;
1424        try {
1425            pattern = Pattern.compile(regex);
1426        } catch (PatternSyntaxException e) {
1427            throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
1428        }
1429        return getElementsMatchingOwnText(pattern);
1430    }
1431
1432    /**
1433     * Find all elements under this element (including self, and children of children).
1434     *
1435     * @return all elements
1436     */
1437    public Elements getAllElements() {
1438        return Collector.collect(new Evaluator.AllElements(), this);
1439    }
1440
1441    /**
1442     Gets the <b>normalized, combined text</b> of this element and all its children. Whitespace is normalized and
1443     trimmed.
1444     <p>For example, given HTML {@code <p>Hello  <b>there</b> now! </p>}, {@code p.text()} returns {@code "Hello there
1445    now!"}
1446     <p>If you do not want normalized text, use {@link #wholeText()}. If you want just the text of this node (and not
1447     children), use {@link #ownText()}
1448     <p>Note that this method returns the textual content that would be presented to a reader. The contents of data
1449     nodes (such as {@code <script>} tags) are not considered text. Use {@link #data()} or {@link #html()} to retrieve
1450     that content.
1451
1452     @return decoded, normalized text, or empty string if none.
1453     @see #wholeText()
1454     @see #ownText()
1455     @see #textNodes()
1456     */
1457    public String text() {
1458        final StringBuilder accum = StringUtil.borrowBuilder();
1459        NodeTraversor.traverse(new TextAccumulator(accum), this);
1460        return StringUtil.releaseBuilder(accum).trim();
1461    }
1462
1463    private static class TextAccumulator implements NodeVisitor {
1464        private final StringBuilder accum;
1465
1466        public TextAccumulator(StringBuilder accum) {
1467            this.accum = accum;
1468        }
1469
1470        @Override public void head(Node node, int depth) {
1471            if (node instanceof TextNode) {
1472                TextNode textNode = (TextNode) node;
1473                appendNormalisedText(accum, textNode);
1474            } else if (node instanceof Element) {
1475                Element element = (Element) node;
1476                if (accum.length() > 0 &&
1477                    (element.isBlock() || element.nameIs("br")) &&
1478                    !lastCharIsWhitespace(accum))
1479                    accum.append(' ');
1480            }
1481        }
1482
1483        @Override public void tail(Node node, int depth) {
1484            // make sure there is a space between block tags and immediately following text nodes or inline elements <div>One</div>Two should be "One Two".
1485            if (node instanceof Element) {
1486                Element element = (Element) node;
1487                Node next = node.nextSibling();
1488                if (!element.tag.isInline() && (next instanceof TextNode || next instanceof Element && ((Element) next).tag.isInline()) && !lastCharIsWhitespace(accum))
1489                    accum.append(' ');
1490            }
1491
1492        }
1493    }
1494
1495    /**
1496     Get the non-normalized, decoded text of this element and its children, including only any newlines and spaces
1497     present in the original source.
1498     @return decoded, non-normalized text
1499     @see #text()
1500     @see #wholeOwnText()
1501     */
1502    public String wholeText() {
1503        return wholeTextOf(nodeStream());
1504    }
1505
1506    private static String wholeTextOf(Stream<Node> stream) {
1507        return stream.map(node -> {
1508            if (node instanceof TextNode) return ((TextNode) node).getWholeText();
1509            if (node.nameIs("br")) return "\n";
1510            return "";
1511        }).collect(StringUtil.joining(""));
1512    }
1513
1514    /**
1515     Get the non-normalized, decoded text of this element, <b>not including</b> any child elements, including any
1516     newlines and spaces present in the original source.
1517     @return decoded, non-normalized text that is a direct child of this Element
1518     @see #text()
1519     @see #wholeText()
1520     @see #ownText()
1521     @since 1.15.1
1522     */
1523    public String wholeOwnText() {
1524        return wholeTextOf(childNodes.stream());
1525    }
1526
1527    /**
1528     * Gets the (normalized) text owned by this element only; does not get the combined text of all children.
1529     * <p>
1530     * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"},
1531     * whereas {@code p.text()} returns {@code "Hello there now!"}.
1532     * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element.
1533     *
1534     * @return decoded text, or empty string if none.
1535     * @see #text()
1536     * @see #textNodes()
1537     */
1538    public String ownText() {
1539        StringBuilder sb = StringUtil.borrowBuilder();
1540        ownText(sb);
1541        return StringUtil.releaseBuilder(sb).trim();
1542    }
1543
1544    private void ownText(StringBuilder accum) {
1545        for (int i = 0; i < childNodeSize(); i++) {
1546            Node child = childNodes.get(i);
1547            if (child instanceof TextNode) {
1548                TextNode textNode = (TextNode) child;
1549                appendNormalisedText(accum, textNode);
1550            } else if (child.nameIs("br") && !lastCharIsWhitespace(accum)) {
1551                accum.append(" ");
1552            }
1553        }
1554    }
1555
1556    private static void appendNormalisedText(StringBuilder accum, TextNode textNode) {
1557        String text = textNode.getWholeText();
1558        if (preserveWhitespace(textNode.parentNode) || textNode instanceof CDataNode)
1559            accum.append(text);
1560        else
1561            StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum));
1562    }
1563
1564    static boolean preserveWhitespace(@Nullable Node node) {
1565        // looks only at this element and five levels up, to prevent recursion & needless stack searches
1566        if (node instanceof Element) {
1567            Element el = (Element) node;
1568            int i = 0;
1569            do {
1570                if (el.tag.preserveWhitespace())
1571                    return true;
1572                el = el.parent();
1573                i++;
1574            } while (i < 6 && el != null);
1575        }
1576        return false;
1577    }
1578
1579    /**
1580     * Set the text of this element. Any existing contents (text or elements) will be cleared.
1581     * <p>As a special case, for {@code <script>} and {@code <style>} tags, the input text will be treated as data,
1582     * not visible text.</p>
1583     * @param text decoded text
1584     * @return this element
1585     */
1586    public Element text(String text) {
1587        Validate.notNull(text);
1588        empty();
1589        // special case for script/style in HTML (or customs): should be data node
1590        if (tag().is(Tag.Data))
1591            appendChild(new DataNode(text));
1592        else
1593            appendChild(new TextNode(text));
1594
1595        return this;
1596    }
1597
1598    /**
1599     Checks if the current element or any of its child elements contain non-whitespace text.
1600     @return {@code true} if the element has non-blank text content, {@code false} otherwise.
1601     */
1602    public boolean hasText() {
1603        AtomicBoolean hasText = new AtomicBoolean(false);
1604        filter((node, depth) -> {
1605            if (node instanceof TextNode) {
1606                TextNode textNode = (TextNode) node;
1607                if (!textNode.isBlank()) {
1608                    hasText.set(true);
1609                    return NodeFilter.FilterResult.STOP;
1610                }
1611            }
1612            return NodeFilter.FilterResult.CONTINUE;
1613        });
1614        return hasText.get();
1615    }
1616
1617    /**
1618     * Get the combined data of this element. Data is e.g. the inside of a {@code <script>} tag. Note that data is NOT the
1619     * text of the element. Use {@link #text()} to get the text that would be visible to a user, and {@code data()}
1620     * for the contents of scripts, comments, CSS styles, etc.
1621     *
1622     * @return the data, or empty string if none
1623     *
1624     * @see #dataNodes()
1625     */
1626    public String data() {
1627        StringBuilder sb = StringUtil.borrowBuilder();
1628        traverse((childNode, depth) -> {
1629            if (childNode instanceof DataNode) {
1630                DataNode data = (DataNode) childNode;
1631                sb.append(data.getWholeData());
1632            } else if (childNode instanceof Comment) {
1633                Comment comment = (Comment) childNode;
1634                sb.append(comment.getData());
1635            } else if (childNode instanceof CDataNode) {
1636                // this shouldn't really happen because the html parser won't see the cdata as anything special when parsing script.
1637                // but in case another type gets through.
1638                CDataNode cDataNode = (CDataNode) childNode;
1639                sb.append(cDataNode.getWholeText());
1640            }
1641        });
1642        return StringUtil.releaseBuilder(sb);
1643    }
1644
1645    /**
1646     * Gets the literal value of this element's "class" attribute, which may include multiple class names, space
1647     * separated. (E.g. on <code>&lt;div class="header gray"&gt;</code> returns, "<code>header gray</code>")
1648     * @return The literal class attribute, or <b>empty string</b> if no class attribute set.
1649     */
1650    public String className() {
1651        return attr("class").trim();
1652    }
1653
1654    /**
1655     * Get each of the element's class names. E.g. on element {@code <div class="header gray">},
1656     * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to
1657     * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them.
1658     * @return set of classnames, empty if no class attribute
1659     */
1660    public Set<String> classNames() {
1661        String[] names = ClassSplit.split(className());
1662        Set<String> classNames = new LinkedHashSet<>(Arrays.asList(names));
1663        classNames.remove(""); // if classNames() was empty, would include an empty class
1664
1665        return classNames;
1666    }
1667
1668    /**
1669     Set the element's {@code class} attribute to the supplied class names.
1670     @param classNames set of classes
1671     @return this element, for chaining
1672     */
1673    public Element classNames(Set<String> classNames) {
1674        Validate.notNull(classNames);
1675        if (classNames.isEmpty()) {
1676            attributes().remove("class");
1677        } else {
1678            attributes().put("class", StringUtil.join(classNames, " "));
1679        }
1680        return this;
1681    }
1682
1683    /**
1684     * Tests if this element has a class. Case-insensitive.
1685     * @param className name of class to check for
1686     * @return true if it does, false if not
1687     */
1688    // performance sensitive
1689    public boolean hasClass(String className) {
1690        if (attributes == null)
1691            return false;
1692
1693        final String classAttr = attributes.getIgnoreCase("class");
1694        final int len = classAttr.length();
1695        final int wantLen = className.length();
1696
1697        if (len == 0 || len < wantLen) {
1698            return false;
1699        }
1700
1701        // if both lengths are equal, only need compare the className with the attribute
1702        if (len == wantLen) {
1703            return className.equalsIgnoreCase(classAttr);
1704        }
1705
1706        // otherwise, scan for whitespace and compare regions (with no string or arraylist allocations)
1707        boolean inClass = false;
1708        int start = 0;
1709        for (int i = 0; i < len; i++) {
1710            if (Character.isWhitespace(classAttr.charAt(i))) {
1711                if (inClass) {
1712                    // white space ends a class name, compare it with the requested one, ignore case
1713                    if (i - start == wantLen && classAttr.regionMatches(true, start, className, 0, wantLen)) {
1714                        return true;
1715                    }
1716                    inClass = false;
1717                }
1718            } else {
1719                if (!inClass) {
1720                    // we're in a class name : keep the start of the substring
1721                    inClass = true;
1722                    start = i;
1723                }
1724            }
1725        }
1726
1727        // check the last entry
1728        if (inClass && len - start == wantLen) {
1729            return classAttr.regionMatches(true, start, className, 0, wantLen);
1730        }
1731
1732        return false;
1733    }
1734
1735    /**
1736     Add a class name to this element's {@code class} attribute.
1737     @param className class name to add
1738     @return this element
1739     */
1740    public Element addClass(String className) {
1741        Validate.notNull(className);
1742
1743        Set<String> classes = classNames();
1744        classes.add(className);
1745        classNames(classes);
1746
1747        return this;
1748    }
1749
1750    /**
1751     Remove a class name from this element's {@code class} attribute.
1752     @param className class name to remove
1753     @return this element
1754     */
1755    public Element removeClass(String className) {
1756        Validate.notNull(className);
1757
1758        Set<String> classes = classNames();
1759        classes.remove(className);
1760        classNames(classes);
1761
1762        return this;
1763    }
1764
1765    /**
1766     Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it.
1767     @param className class name to toggle
1768     @return this element
1769     */
1770    public Element toggleClass(String className) {
1771        Validate.notNull(className);
1772
1773        Set<String> classes = classNames();
1774        if (classes.contains(className))
1775            classes.remove(className);
1776        else
1777            classes.add(className);
1778        classNames(classes);
1779
1780        return this;
1781    }
1782
1783    /**
1784     * Get the value of a form element (input, textarea, etc).
1785     * @return the value of the form element, or empty string if not set.
1786     */
1787    public String val() {
1788        if (elementIs("textarea", NamespaceHtml))
1789            return text();
1790        else
1791            return attr("value");
1792    }
1793
1794    /**
1795     * Set the value of a form element (input, textarea, etc).
1796     * @param value value to set
1797     * @return this element (for chaining)
1798     */
1799    public Element val(String value) {
1800        if (elementIs("textarea", NamespaceHtml))
1801            text(value);
1802        else
1803            attr("value", value);
1804        return this;
1805    }
1806
1807    /**
1808     Get the source range (start and end positions) of the end (closing) tag for this Element. Position tracking must be
1809     enabled prior to parsing the content.
1810     @return the range of the closing tag for this element, or {@code untracked} if its range was not tracked.
1811     @see org.jsoup.parser.Parser#setTrackPosition(boolean)
1812     @see Node#sourceRange()
1813     @see Range#isImplicit()
1814     @since 1.15.2
1815     */
1816    public Range endSourceRange() {
1817        return Range.of(this, false);
1818    }
1819
1820    @Override
1821    void outerHtmlHead(final Appendable accum, Document.OutputSettings out) throws IOException {
1822        String tagName = safeTagName(out.syntax());
1823        accum.append('<').append(tagName);
1824        if (attributes != null) attributes.html(accum, out);
1825
1826        if (childNodes.isEmpty()) {
1827            boolean xmlMode = out.syntax() == xml || !tag.namespace().equals(NamespaceHtml);
1828            if (xmlMode && (tag.is(Tag.SeenSelfClose) || (tag.isKnownTag() && (tag.isEmpty() || tag.isSelfClosing())))) {
1829                accum.append(" />");
1830            } else if (!xmlMode && tag.isEmpty()) { // html void element
1831                accum.append('>');
1832            } else {
1833                accum.append("></").append(tagName).append('>');
1834            }
1835        } else {
1836            accum.append('>');
1837        }
1838    }
1839
1840    @Override
1841    void outerHtmlTail(Appendable accum, Document.OutputSettings out) throws IOException {
1842        if (!childNodes.isEmpty())
1843            accum.append("</").append(safeTagName(out.syntax())).append('>');
1844        // if empty, we have already closed in htmlHead
1845    }
1846
1847    /* If XML syntax, normalizes < to _ in tag name. */
1848    @Nullable private String safeTagName(Document.OutputSettings.Syntax syntax) {
1849        return syntax == xml ? Normalizer.xmlSafeTagName(tagName()) : tagName();
1850    }
1851
1852    /**
1853     * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return
1854     * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.)
1855     *
1856     * @return String of HTML.
1857     * @see #outerHtml()
1858     */
1859    public String html() {
1860        StringBuilder accum = StringUtil.borrowBuilder();
1861        html(accum);
1862        String html = StringUtil.releaseBuilder(accum);
1863        return NodeUtils.outputSettings(this).prettyPrint() ? html.trim() : html;
1864    }
1865
1866    @Override
1867    public <T extends Appendable> T html(T accum) {
1868        Node child = firstChild();
1869        if (child != null) {
1870            Printer printer = Printer.printerFor(child, accum);
1871            while (child != null) {
1872                NodeTraversor.traverse(printer, child);
1873                child = child.nextSibling();
1874            }
1875        }
1876        return accum;
1877    }
1878
1879    /**
1880     * Set this element's inner HTML. Clears the existing HTML first.
1881     * @param html HTML to parse and set into this element
1882     * @return this element
1883     * @see #append(String)
1884     */
1885    public Element html(String html) {
1886        empty();
1887        append(html);
1888        return this;
1889    }
1890
1891    @Override
1892    public Element clone() {
1893        return (Element) super.clone();
1894    }
1895
1896    @Override
1897    public Element shallowClone() {
1898        // simpler than implementing a clone version with no child copy
1899        String baseUri = baseUri();
1900        if (baseUri.isEmpty()) baseUri = null; // saves setting a blank internal attribute
1901        return new Element(tag, baseUri, attributes == null ? null : attributes.clone());
1902    }
1903
1904    @Override
1905    protected Element doClone(@Nullable Node parent) {
1906        Element clone = (Element) super.doClone(parent);
1907        clone.attributes = attributes != null ? attributes.clone() : null;
1908        clone.childNodes = new NodeList(childNodes.size());
1909        clone.childNodes.addAll(childNodes); // the children then get iterated and cloned in Node.clone
1910
1911        return clone;
1912    }
1913
1914    // overrides of Node for call chaining
1915    @Override
1916    public Element clearAttributes() {
1917        if (attributes != null) {
1918            super.clearAttributes(); // keeps internal attributes via iterator
1919            if (attributes.size() == 0)
1920                attributes = null; // only remove entirely if no internal attributes
1921        }
1922
1923        return this;
1924    }
1925
1926    @Override
1927    public Element removeAttr(String attributeKey) {
1928        return (Element) super.removeAttr(attributeKey);
1929    }
1930
1931    @Override
1932    public Element root() {
1933        return (Element) super.root(); // probably a document, but always at least an element
1934    }
1935
1936    @Override
1937    public Element traverse(NodeVisitor nodeVisitor) {
1938        return (Element) super.traverse(nodeVisitor);
1939    }
1940
1941    @Override
1942    public Element forEachNode(Consumer<? super Node> action) {
1943        return (Element) super.forEachNode(action);
1944    }
1945
1946    /**
1947     Perform the supplied action on this Element and each of its descendant Elements, during a depth-first traversal.
1948     Elements may be inspected, changed, added, replaced, or removed.
1949     @param action the function to perform on the element
1950     @see Node#forEachNode(Consumer)
1951     */
1952    @Override
1953    public void forEach(Consumer<? super Element> action) {
1954        stream().forEach(action);
1955    }
1956
1957    /**
1958     Returns an Iterator that iterates this Element and each of its descendant Elements, in document order.
1959     @return an Iterator
1960     */
1961    @Override
1962    public Iterator<Element> iterator() {
1963        return new NodeIterator<>(this, Element.class);
1964    }
1965
1966    @Override
1967    public Element filter(NodeFilter nodeFilter) {
1968        return  (Element) super.filter(nodeFilter);
1969    }
1970
1971    static final class NodeList extends ArrayList<Node> {
1972        public NodeList(int size) {
1973            super(size);
1974        }
1975
1976        int modCount() {
1977            return this.modCount;
1978        }
1979    }
1980}