001package org.jsoup.nodes;
002
003import org.jsoup.helper.ChangeNotifyingArrayList;
004import org.jsoup.helper.Validate;
005import org.jsoup.internal.Normalizer;
006import org.jsoup.internal.StringUtil;
007import org.jsoup.parser.ParseSettings;
008import org.jsoup.parser.Parser;
009import org.jsoup.parser.Tag;
010import org.jsoup.parser.TokenQueue;
011import org.jsoup.select.Collector;
012import org.jsoup.select.Elements;
013import org.jsoup.select.Evaluator;
014import org.jsoup.select.NodeFilter;
015import org.jsoup.select.NodeTraversor;
016import org.jsoup.select.NodeVisitor;
017import org.jsoup.select.QueryParser;
018import org.jsoup.select.Selector;
019import org.jspecify.annotations.Nullable;
020
021import java.io.IOException;
022import java.lang.ref.WeakReference;
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.Collection;
026import java.util.Collections;
027import java.util.Iterator;
028import java.util.LinkedHashSet;
029import java.util.List;
030import java.util.Map;
031import java.util.Set;
032import java.util.concurrent.atomic.AtomicBoolean;
033import java.util.function.Consumer;
034import java.util.regex.Pattern;
035import java.util.regex.PatternSyntaxException;
036import java.util.stream.Collectors;
037import java.util.stream.Stream;
038
039import static org.jsoup.internal.Normalizer.normalize;
040import static org.jsoup.nodes.Document.OutputSettings.Syntax.html;
041import static org.jsoup.nodes.Document.OutputSettings.Syntax.xml;
042import static org.jsoup.nodes.TextNode.lastCharIsWhitespace;
043import static org.jsoup.parser.Parser.NamespaceHtml;
044import static org.jsoup.parser.TokenQueue.escapeCssIdentifier;
045
046/**
047 An HTML Element consists of a tag name, attributes, and child nodes (including text nodes and other elements).
048 <p>
049 From an Element, you can extract data, traverse the node graph, and manipulate the HTML.
050*/
051public class Element extends Node implements Iterable<Element> {
052    private static final List<Element> EmptyChildren = Collections.emptyList();
053    private static final Pattern ClassSplit = Pattern.compile("\\s+");
054    private static final String BaseUriKey = Attributes.internalKey("baseUri");
055    private Tag tag;
056    private @Nullable WeakReference<List<Element>> shadowChildrenRef; // points to child elements shadowed from node children
057    List<Node> childNodes;
058    @Nullable Attributes attributes; // field is nullable but all methods for attributes are non-null
059
060    /**
061     * Create a new, standalone element, in the specified namespace.
062     * @param tag tag name
063     * @param namespace namespace for this element
064     */
065    public Element(String tag, String namespace) {
066        this(Tag.valueOf(tag, namespace, ParseSettings.preserveCase), null);
067    }
068
069    /**
070     * Create a new, standalone element, in the HTML namespace.
071     * @param tag tag name
072     * @see #Element(String tag, String namespace)
073     */
074    public Element(String tag) {
075        this(Tag.valueOf(tag, Parser.NamespaceHtml, ParseSettings.preserveCase), "", null);
076    }
077
078    /**
079     * Create a new, standalone Element. (Standalone in that it has no parent.)
080     *
081     * @param tag tag of this element
082     * @param baseUri the base URI (optional, may be null to inherit from parent, or "" to clear parent's)
083     * @param attributes initial attributes (optional, may be null)
084     * @see #appendChild(Node)
085     * @see #appendElement(String)
086     */
087    public Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attributes) {
088        Validate.notNull(tag);
089        childNodes = EmptyNodes;
090        this.attributes = attributes;
091        this.tag = tag;
092        if (baseUri != null)
093            this.setBaseUri(baseUri);
094    }
095
096    /**
097     * Create a new Element from a Tag and a base URI.
098     *
099     * @param tag element tag
100     * @param baseUri the base URI of this element. Optional, and will inherit from its parent, if any.
101     * @see Tag#valueOf(String, ParseSettings)
102     */
103    public Element(Tag tag, @Nullable String baseUri) {
104        this(tag, baseUri, null);
105    }
106
107    /**
108     Internal test to check if a nodelist object has been created.
109     */
110    protected boolean hasChildNodes() {
111        return childNodes != EmptyNodes;
112    }
113
114    @Override protected List<Node> ensureChildNodes() {
115        if (childNodes == EmptyNodes) {
116            childNodes = new NodeList(this, 4);
117        }
118        return childNodes;
119    }
120
121    @Override
122    protected boolean hasAttributes() {
123        return attributes != null;
124    }
125
126    @Override
127    public Attributes attributes() {
128        if (attributes == null) // not using hasAttributes, as doesn't clear warning
129            attributes = new Attributes();
130        return attributes;
131    }
132
133    @Override
134    public String baseUri() {
135        return searchUpForAttribute(this, BaseUriKey);
136    }
137
138    private static String searchUpForAttribute(final Element start, final String key) {
139        Element el = start;
140        while (el != null) {
141            if (el.attributes != null && el.attributes.hasKey(key))
142                return el.attributes.get(key);
143            el = el.parent();
144        }
145        return "";
146    }
147
148    @Override
149    protected void doSetBaseUri(String baseUri) {
150        attributes().put(BaseUriKey, baseUri);
151    }
152
153    @Override
154    public int childNodeSize() {
155        return childNodes.size();
156    }
157
158    @Override
159    public String nodeName() {
160        return tag.getName();
161    }
162
163    /**
164     * Get the name of the tag for this element. E.g. {@code div}. If you are using {@link ParseSettings#preserveCase
165     * case preserving parsing}, this will return the source's original case.
166     *
167     * @return the tag name
168     */
169    public String tagName() {
170        return tag.getName();
171    }
172
173    /**
174     * Get the normalized name of this Element's tag. This will always be the lower-cased version of the tag, regardless
175     * of the tag case preserving setting of the parser. For e.g., {@code <DIV>} and {@code <div>} both have a
176     * normal name of {@code div}.
177     * @return normal name
178     */
179    @Override
180    public String normalName() {
181        return tag.normalName();
182    }
183
184    /**
185     Test if this Element has the specified normalized name, and is in the specified namespace.
186     * @param normalName a normalized element name (e.g. {@code div}).
187     * @param namespace the namespace
188     * @return true if the element's normal name matches exactly, and is in the specified namespace
189     * @since 1.17.2
190     */
191    public boolean elementIs(String normalName, String namespace) {
192        return tag.normalName().equals(normalName) && tag.namespace().equals(namespace);
193    }
194
195    /**
196     * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with
197     * {@code el.tagName("div");}.
198     *
199     * @param tagName new tag name for this element
200     * @return this element, for chaining
201     * @see Elements#tagName(String)
202     */
203    public Element tagName(String tagName) {
204        return tagName(tagName, tag.namespace());
205    }
206
207    /**
208     * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with
209     * {@code el.tagName("div");}.
210     *
211     * @param tagName new tag name for this element
212     * @param namespace the new namespace for this element
213     * @return this element, for chaining
214     * @see Elements#tagName(String)
215     */
216    public Element tagName(String tagName, String namespace) {
217        Validate.notEmptyParam(tagName, "tagName");
218        Validate.notEmptyParam(namespace, "namespace");
219        tag = Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()); // maintains the case option of the original parse
220        return this;
221    }
222
223    /**
224     * Get the Tag for this element.
225     *
226     * @return the tag object
227     */
228    public Tag tag() {
229        return tag;
230    }
231
232    /**
233     * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element
234     * {@code <span> == false}).
235     *
236     * @return true if block, false if not (and thus inline)
237     */
238    public boolean isBlock() {
239        return tag.isBlock();
240    }
241
242    /**
243     * Get the {@code id} attribute of this element.
244     *
245     * @return The id attribute, if present, or an empty string if not.
246     */
247    public String id() {
248        return attributes != null ? attributes.getIgnoreCase("id") :"";
249    }
250
251    /**
252     Set the {@code id} attribute of this element.
253     @param id the ID value to use
254     @return this Element, for chaining
255     */
256    public Element id(String id) {
257        Validate.notNull(id);
258        attr("id", id);
259        return this;
260    }
261
262    /**
263     * Set an attribute value on this element. If this element already has an attribute with the
264     * key, its value is updated; otherwise, a new attribute is added.
265     *
266     * @return this element
267     */
268    @Override public Element attr(String attributeKey, String attributeValue) {
269        super.attr(attributeKey, attributeValue);
270        return this;
271    }
272
273    /**
274     * Set a boolean attribute value on this element. Setting to <code>true</code> sets the attribute value to "" and
275     * marks the attribute as boolean so no value is written out. Setting to <code>false</code> removes the attribute
276     * with the same key if it exists.
277     *
278     * @param attributeKey the attribute key
279     * @param attributeValue the attribute value
280     *
281     * @return this element
282     */
283    public Element attr(String attributeKey, boolean attributeValue) {
284        attributes().put(attributeKey, attributeValue);
285        return this;
286    }
287
288    /**
289     Get an Attribute by key. Changes made via {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc
290     will cascade back to this Element.
291     @param key the (case-sensitive) attribute key
292     @return the Attribute for this key, or null if not present.
293     @since 1.17.2
294     */
295    @Nullable public Attribute attribute(String key) {
296        return hasAttributes() ? attributes().attribute(key) : null;
297    }
298
299    /**
300     * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key
301     * starting with "data-" is included the dataset.
302     * <p>
303     * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset
304     * {@code package=jsoup, language=java}.
305     * <p>
306     * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected
307     * in the other map.
308     * <p>
309     * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector.
310     * @return a map of {@code key=value} custom data attributes.
311     */
312    public Map<String, String> dataset() {
313        return attributes().dataset();
314    }
315
316    @Override @Nullable
317    public final Element parent() {
318        return (Element) parentNode;
319    }
320
321    /**
322     * Get this element's parent and ancestors, up to the document root.
323     * @return this element's stack of parents, starting with the closest first.
324     */
325    public Elements parents() {
326        Elements parents = new Elements();
327        Element parent = this.parent();
328        while (parent != null && !parent.nameIs("#root")) {
329            parents.add(parent);
330            parent = parent.parent();
331        }
332        return parents;
333    }
334
335    /**
336     * Get a child element of this element, by its 0-based index number.
337     * <p>
338     * Note that an element can have both mixed Nodes and Elements as children. This method inspects
339     * a filtered list of children that are elements, and the index is based on that filtered list.
340     * </p>
341     *
342     * @param index the index number of the element to retrieve
343     * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException}
344     * @see #childNode(int)
345     */
346    public Element child(int index) {
347        return childElementsList().get(index);
348    }
349
350    /**
351     * Get the number of child nodes of this element that are elements.
352     * <p>
353     * This method works on the same filtered list like {@link #child(int)}. Use {@link #childNodes()} and {@link
354     * #childNodeSize()} to get the unfiltered Nodes (e.g. includes TextNodes etc.)
355     * </p>
356     *
357     * @return the number of child nodes that are elements
358     * @see #children()
359     * @see #child(int)
360     */
361    public int childrenSize() {
362        return childElementsList().size();
363    }
364
365    /**
366     * Get this element's child elements.
367     * <p>
368     * This is effectively a filter on {@link #childNodes()} to get Element nodes.
369     * </p>
370     * @return child elements. If this element has no children, returns an empty list.
371     * @see #childNodes()
372     */
373    public Elements children() {
374        return new Elements(childElementsList());
375    }
376
377    /**
378     * Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated.
379     * TODO - think about pulling this out as a helper as there are other shadow lists (like in Attributes) kept around.
380     * @return a list of child elements
381     */
382    List<Element> childElementsList() {
383        if (childNodeSize() == 0)
384            return EmptyChildren; // short circuit creating empty
385
386        List<Element> children;
387        if (shadowChildrenRef == null || (children = shadowChildrenRef.get()) == null) {
388            final int size = childNodes.size();
389            children = new ArrayList<>(size);
390            //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here)
391            for (int i = 0; i < size; i++) {
392                final Node node = childNodes.get(i);
393                if (node instanceof Element)
394                    children.add((Element) node);
395            }
396            shadowChildrenRef = new WeakReference<>(children);
397        }
398        return children;
399    }
400
401    /**
402     * Clears the cached shadow child elements.
403     */
404    @Override
405    void nodelistChanged() {
406        super.nodelistChanged();
407        shadowChildrenRef = null;
408    }
409
410    /**
411     Returns a Stream of this Element and all of its descendant Elements. The stream has document order.
412     @return a stream of this element and its descendants.
413     @see #nodeStream()
414     @since 1.17.1
415     */
416    public Stream<Element> stream() {
417        return NodeUtils.stream(this, Element.class);
418    }
419
420    private <T> List<T> filterNodes(Class<T> clazz) {
421        return childNodes.stream()
422                .filter(clazz::isInstance)
423                .map(clazz::cast)
424                .collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList));
425    }
426
427    /**
428     * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated.
429     * <p>
430     * This is effectively a filter on {@link #childNodes()} to get Text nodes.
431     * @return child text nodes. If this element has no text nodes, returns an
432     * empty list.
433     * </p>
434     * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected:
435     * <ul>
436     *     <li>{@code p.text()} = {@code "One Two Three Four"}</li>
437     *     <li>{@code p.ownText()} = {@code "One Three Four"}</li>
438     *     <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li>
439     *     <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li>
440     *     <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li>
441     * </ul>
442     */
443    public List<TextNode> textNodes() {
444        return filterNodes(TextNode.class);
445    }
446
447    /**
448     * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated.
449     * <p>
450     * This is effectively a filter on {@link #childNodes()} to get Data nodes.
451     * </p>
452     * @return child data nodes. If this element has no data nodes, returns an
453     * empty list.
454     * @see #data()
455     */
456    public List<DataNode> dataNodes() {
457        return filterNodes(DataNode.class);
458    }
459
460    /**
461     * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
462     * may include this element, or any of its children.
463     * <p>This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
464     * multiple filters can be combined, e.g.:</p>
465     * <ul>
466     * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
467     * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
468     * </ul>
469     * <p>See the query syntax documentation in {@link org.jsoup.select.Selector}.</p>
470     * <p>Also known as {@code querySelectorAll()} in the Web DOM.</p>
471     *
472     * @param cssQuery a {@link Selector} CSS-like query
473     * @return an {@link Elements} list containing elements that match the query (empty if none match)
474     * @see Selector selector query syntax
475     * @see #select(Evaluator)
476     * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
477     */
478    public Elements select(String cssQuery) {
479        return Selector.select(cssQuery, this);
480    }
481
482    /**
483     * Find elements that match the supplied Evaluator. This has the same functionality as {@link #select(String)}, but
484     * may be useful if you are running the same query many times (on many documents) and want to save the overhead of
485     * repeatedly parsing the CSS query.
486     * @param evaluator an element evaluator
487     * @return an {@link Elements} list containing elements that match the query (empty if none match)
488     * @see QueryParser#parse(String)
489     */
490    public Elements select(Evaluator evaluator) {
491        return Selector.select(evaluator, this);
492    }
493
494    /**
495     Selects elements from the given root that match the specified {@link Selector} CSS query, with this element as the
496     starting context, and returns them as a lazy Stream. Matched elements may include this element, or any of its
497     children.
498     <p>
499     Unlike {@link #select(String query)}, which returns a complete list of all matching elements, this method returns a
500     {@link Stream} that processes elements lazily as they are needed. The stream operates in a "pull" model — elements
501     are fetched from the root as the stream is traversed. You can use standard {@code Stream} operations such as
502     {@code filter}, {@code map}, or {@code findFirst} to process elements on demand.
503     </p>
504
505     @param cssQuery a {@link Selector} CSS-like query
506     @return a {@link Stream} containing elements that match the query (empty if none match)
507     @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
508     @see Selector selector query syntax
509     @see QueryParser#parse(String)
510     @since 1.19.1
511     */
512    public Stream<Element> selectStream(String cssQuery) {
513        return Selector.selectStream(cssQuery, this);
514    }
515
516    /**
517     Find a Stream of elements that match the supplied Evaluator.
518
519     @param evaluator an element Evaluator
520     @return a {@link Stream} containing elements that match the query (empty if none match)
521     @since 1.19.1
522     */
523    public Stream<Element> selectStream(Evaluator evaluator) {
524        return Selector.selectStream(evaluator, this);
525    }
526
527    /**
528     * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context.
529     * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query
530     * execution stops on the first hit.</p>
531     * <p>Also known as {@code querySelector()} in the Web DOM.</p>
532     * @param cssQuery cssQuery a {@link Selector} CSS-like query
533     * @return the first matching element, or <b>{@code null}</b> if there is no match.
534     * @see #expectFirst(String)
535     */
536    public @Nullable Element selectFirst(String cssQuery) {
537        return Selector.selectFirst(cssQuery, this);
538    }
539
540    /**
541     * Finds the first Element that matches the supplied Evaluator, with this element as the starting context, or
542     * {@code null} if none match.
543     *
544     * @param evaluator an element evaluator
545     * @return the first matching element (walking down the tree, starting from this element), or {@code null} if none
546     * match.
547     */
548    public @Nullable Element selectFirst(Evaluator evaluator) {
549        return Collector.findFirst(evaluator, this);
550    }
551
552    /**
553     Just like {@link #selectFirst(String)}, but if there is no match, throws an {@link IllegalArgumentException}. This
554     is useful if you want to simply abort processing on a failed match.
555     @param cssQuery a {@link Selector} CSS-like query
556     @return the first matching element
557     @throws IllegalArgumentException if no match is found
558     @since 1.15.2
559     */
560    public Element expectFirst(String cssQuery) {
561        return (Element) Validate.ensureNotNull(
562            Selector.selectFirst(cssQuery, this),
563            parent() != null ?
564                "No elements matched the query '%s' on element '%s'.":
565                "No elements matched the query '%s' in the document."
566            , cssQuery, this.tagName()
567        );
568    }
569
570    /**
571     * Checks if this element matches the given {@link Selector} CSS query. Also knows as {@code matches()} in the Web
572     * DOM.
573     *
574     * @param cssQuery a {@link Selector} CSS query
575     * @return if this element matches the query
576     */
577    public boolean is(String cssQuery) {
578        return is(QueryParser.parse(cssQuery));
579    }
580
581    /**
582     * Check if this element matches the given evaluator.
583     * @param evaluator an element evaluator
584     * @return if this element matches
585     */
586    public boolean is(Evaluator evaluator) {
587        return evaluator.matches(this.root(), this);
588    }
589
590    /**
591     * Find the closest element up the tree of parents that matches the specified CSS query. Will return itself, an
592     * ancestor, or {@code null} if there is no such matching element.
593     * @param cssQuery a {@link Selector} CSS query
594     * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not
595     * found.
596     */
597    public @Nullable Element closest(String cssQuery) {
598        return closest(QueryParser.parse(cssQuery));
599    }
600
601    /**
602     * Find the closest element up the tree of parents that matches the specified evaluator. Will return itself, an
603     * ancestor, or {@code null} if there is no such matching element.
604     * @param evaluator a query evaluator
605     * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not
606     * found.
607     */
608    public @Nullable Element closest(Evaluator evaluator) {
609        Validate.notNull(evaluator);
610        Element el = this;
611        final Element root = root();
612        do {
613            if (evaluator.matches(root, el))
614                return el;
615            el = el.parent();
616        } while (el != null);
617        return null;
618    }
619
620    /**
621     Find Elements that match the supplied {@index XPath} expression.
622     <p>Note that for convenience of writing the Xpath expression, namespaces are disabled, and queries can be
623     expressed using the element's local name only.</p>
624     <p>By default, XPath 1.0 expressions are supported. If you would to use XPath 2.0 or higher, you can provide an
625     alternate XPathFactory implementation:</p>
626     <ol>
627     <li>Add the implementation to your classpath. E.g. to use <a href="https://www.saxonica.com/products/products.xml">Saxon-HE</a>, add <a href="https://mvnrepository.com/artifact/net.sf.saxon/Saxon-HE">net.sf.saxon:Saxon-HE</a> to your build.</li>
628     <li>Set the system property <code>javax.xml.xpath.XPathFactory:jsoup</code> to the implementing classname. E.g.:<br>
629     <code>System.setProperty(W3CDom.XPathFactoryProperty, "net.sf.saxon.xpath.XPathFactoryImpl");</code>
630     </li>
631     </ol>
632
633     @param xpath XPath expression
634     @return matching elements, or an empty list if none match.
635     @see #selectXpath(String, Class)
636     @since 1.14.3
637     */
638    public Elements selectXpath(String xpath) {
639        return new Elements(NodeUtils.selectXpath(xpath, this, Element.class));
640    }
641
642    /**
643     Find Nodes that match the supplied XPath expression.
644     <p>For example, to select TextNodes under {@code p} elements: </p>
645     <pre>List&lt;TextNode&gt; textNodes = doc.selectXpath("//body//p//text()", TextNode.class);</pre>
646     <p>Note that in the jsoup DOM, Attribute objects are not Nodes. To directly select attribute values, do something
647     like:</p>
648     <pre>List&lt;String&gt; hrefs = doc.selectXpath("//a").eachAttr("href");</pre>
649     @param xpath XPath expression
650     @param nodeType the jsoup node type to return
651     @see #selectXpath(String)
652     @return a list of matching nodes
653     @since 1.14.3
654     */
655    public <T extends Node> List<T> selectXpath(String xpath, Class<T> nodeType) {
656        return NodeUtils.selectXpath(xpath, this, nodeType);
657    }
658
659    /**
660     * Insert a node to the end of this Element's children. The incoming node will be re-parented.
661     *
662     * @param child node to add.
663     * @return this Element, for chaining
664     * @see #prependChild(Node)
665     * @see #insertChildren(int, Collection)
666     */
667    public Element appendChild(Node child) {
668        Validate.notNull(child);
669
670        // was - Node#addChildren(child). short-circuits an array create and a loop.
671        reparentChild(child);
672        ensureChildNodes();
673        childNodes.add(child);
674        child.setSiblingIndex(childNodes.size() - 1);
675        return this;
676    }
677
678    /**
679     Insert the given nodes to the end of this Element's children.
680
681     @param children nodes to add
682     @return this Element, for chaining
683     @see #insertChildren(int, Collection)
684     */
685    public Element appendChildren(Collection<? extends Node> children) {
686        insertChildren(-1, children);
687        return this;
688    }
689
690    /**
691     * Add this element to the supplied parent element, as its next child.
692     *
693     * @param parent element to which this element will be appended
694     * @return this element, so that you can continue modifying the element
695     */
696    public Element appendTo(Element parent) {
697        Validate.notNull(parent);
698        parent.appendChild(this);
699        return this;
700    }
701
702    /**
703     * Add a node to the start of this element's children.
704     *
705     * @param child node to add.
706     * @return this element, so that you can add more child nodes or elements.
707     */
708    public Element prependChild(Node child) {
709        Validate.notNull(child);
710
711        addChildren(0, child);
712        return this;
713    }
714
715    /**
716     Insert the given nodes to the start of this Element's children.
717
718     @param children nodes to add
719     @return this Element, for chaining
720     @see #insertChildren(int, Collection)
721     */
722    public Element prependChildren(Collection<? extends Node> children) {
723        insertChildren(0, children);
724        return this;
725    }
726
727
728    /**
729     * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the
730     * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first.
731     *
732     * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
733     * end
734     * @param children child nodes to insert
735     * @return this element, for chaining.
736     */
737    public Element insertChildren(int index, Collection<? extends Node> children) {
738        Validate.notNull(children, "Children collection to be inserted must not be null.");
739        int currentSize = childNodeSize();
740        if (index < 0) index += currentSize +1; // roll around
741        Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");
742
743        ArrayList<Node> nodes = new ArrayList<>(children);
744        Node[] nodeArray = nodes.toArray(new Node[0]);
745        addChildren(index, nodeArray);
746        return this;
747    }
748
749    /**
750     * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the
751     * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first.
752     *
753     * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
754     * end
755     * @param children child nodes to insert
756     * @return this element, for chaining.
757     */
758    public Element insertChildren(int index, Node... children) {
759        Validate.notNull(children, "Children collection to be inserted must not be null.");
760        int currentSize = childNodeSize();
761        if (index < 0) index += currentSize +1; // roll around
762        Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");
763
764        addChildren(index, children);
765        return this;
766    }
767
768    /**
769     * Create a new element by tag name, and add it as this Element's last child.
770     *
771     * @param tagName the name of the tag (e.g. {@code div}).
772     * @return the new element, to allow you to add content to it, e.g.:
773     *  {@code parent.appendElement("h1").attr("id", "header").text("Welcome");}
774     */
775    public Element appendElement(String tagName) {
776        return appendElement(tagName, tag.namespace());
777    }
778
779    /**
780     * Create a new element by tag name and namespace, add it as this Element's last child.
781     *
782     * @param tagName the name of the tag (e.g. {@code div}).
783     * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml})
784     * @return the new element, in the specified namespace
785     */
786    public Element appendElement(String tagName, String namespace) {
787        Element child = new Element(Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()), baseUri());
788        appendChild(child);
789        return child;
790    }
791
792    /**
793     * Create a new element by tag name, and add it as this Element's first child.
794     *
795     * @param tagName the name of the tag (e.g. {@code div}).
796     * @return the new element, to allow you to add content to it, e.g.:
797     *  {@code parent.prependElement("h1").attr("id", "header").text("Welcome");}
798     */
799    public Element prependElement(String tagName) {
800        return prependElement(tagName, tag.namespace());
801    }
802
803    /**
804     * Create a new element by tag name and namespace, and add it as this Element's first child.
805     *
806     * @param tagName the name of the tag (e.g. {@code div}).
807     * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml})
808     * @return the new element, in the specified namespace
809     */
810    public Element prependElement(String tagName, String namespace) {
811        Element child = new Element(Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()), baseUri());
812        prependChild(child);
813        return child;
814    }
815
816    /**
817     * Create and append a new TextNode to this element.
818     *
819     * @param text the (un-encoded) text to add
820     * @return this element
821     */
822    public Element appendText(String text) {
823        Validate.notNull(text);
824        TextNode node = new TextNode(text);
825        appendChild(node);
826        return this;
827    }
828
829    /**
830     * Create and prepend a new TextNode to this element.
831     *
832     * @param text the decoded text to add
833     * @return this element
834     */
835    public Element prependText(String text) {
836        Validate.notNull(text);
837        TextNode node = new TextNode(text);
838        prependChild(node);
839        return this;
840    }
841
842    /**
843     * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children.
844     * @param html HTML to add inside this element, after the existing HTML
845     * @return this element
846     * @see #html(String)
847     */
848    public Element append(String html) {
849        Validate.notNull(html);
850        List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri());
851        addChildren(nodes.toArray(new Node[0]));
852        return this;
853    }
854
855    /**
856     * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children.
857     * @param html HTML to add inside this element, before the existing HTML
858     * @return this element
859     * @see #html(String)
860     */
861    public Element prepend(String html) {
862        Validate.notNull(html);
863        List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri());
864        addChildren(0, nodes.toArray(new Node[0]));
865        return this;
866    }
867
868    /**
869     * Insert the specified HTML into the DOM before this element (as a preceding sibling).
870     *
871     * @param html HTML to add before this element
872     * @return this element, for chaining
873     * @see #after(String)
874     */
875    @Override
876    public Element before(String html) {
877        return (Element) super.before(html);
878    }
879
880    /**
881     * Insert the specified node into the DOM before this node (as a preceding sibling).
882     * @param node to add before this element
883     * @return this Element, for chaining
884     * @see #after(Node)
885     */
886    @Override
887    public Element before(Node node) {
888        return (Element) super.before(node);
889    }
890
891    /**
892     * Insert the specified HTML into the DOM after this element (as a following sibling).
893     *
894     * @param html HTML to add after this element
895     * @return this element, for chaining
896     * @see #before(String)
897     */
898    @Override
899    public Element after(String html) {
900        return (Element) super.after(html);
901    }
902
903    /**
904     * Insert the specified node into the DOM after this node (as a following sibling).
905     * @param node to add after this element
906     * @return this element, for chaining
907     * @see #before(Node)
908     */
909    @Override
910    public Element after(Node node) {
911        return (Element) super.after(node);
912    }
913
914    /**
915     * Remove all the element's child nodes. Any attributes are left as-is. Each child node has its parent set to
916     * {@code null}.
917     * @return this element
918     */
919    @Override
920    public Element empty() {
921        // Detach each of the children -> parent links:
922        for (Node child : childNodes) {
923            child.parentNode = null;
924        }
925        childNodes.clear();
926        return this;
927    }
928
929    /**
930     * Wrap the supplied HTML around this element.
931     *
932     * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
933     * @return this element, for chaining.
934     */
935    @Override
936    public Element wrap(String html) {
937        return (Element) super.wrap(html);
938    }
939
940    /**
941     * Get a CSS selector that will uniquely select this element.
942     * <p>
943     * If the element has an ID, returns #id;
944     * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'},
945     * followed by a unique selector for the element (tag.class.class:nth-child(n)).
946     * </p>
947     *
948     * @return the CSS Path that can be used to retrieve the element in a selector.
949     */
950    public String cssSelector() {
951        if (id().length() > 0) {
952            // prefer to return the ID - but check that it's actually unique first!
953            String idSel = "#" + escapeCssIdentifier(id());
954            Document doc = ownerDocument();
955            if (doc != null) {
956                Elements els = doc.select(idSel);
957                if (els.size() == 1 && els.get(0) == this) // otherwise, continue to the nth-child impl
958                    return idSel;
959            } else {
960                return idSel; // no ownerdoc, return the ID selector
961            }
962        }
963
964        StringBuilder selector = StringUtil.borrowBuilder();
965        Element el = this;
966        while (el != null && !(el instanceof Document)) {
967            selector.insert(0, el.cssSelectorComponent());
968            el = el.parent();
969        }
970        return StringUtil.releaseBuilder(selector);
971    }
972
973    private String cssSelectorComponent() {
974        // Escape tagname, and translate HTML namespace ns:tag to CSS namespace syntax ns|tag
975        String tagName = escapeCssIdentifier(tagName()).replace("\\:", "|");
976        StringBuilder selector = StringUtil.borrowBuilder().append(tagName);
977        String classes = classNames().stream().map(TokenQueue::escapeCssIdentifier)
978                .collect(StringUtil.joining("."));
979        if (!classes.isEmpty())
980            selector.append('.').append(classes);
981
982        if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node
983            return StringUtil.releaseBuilder(selector);
984
985        selector.insert(0, " > ");
986        if (parent().select(selector.toString()).size() > 1)
987            selector.append(String.format(
988                ":nth-child(%d)", elementSiblingIndex() + 1));
989
990        return StringUtil.releaseBuilder(selector);
991    }
992
993    /**
994     * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
995     * of itself, so will not be included in the returned list.
996     * @return sibling elements
997     */
998    public Elements siblingElements() {
999        if (parentNode == null)
1000            return new Elements(0);
1001
1002        List<Element> elements = parent().childElementsList();
1003        Elements siblings = new Elements(elements.size() - 1);
1004        for (Element el: elements)
1005            if (el != this)
1006                siblings.add(el);
1007        return siblings;
1008    }
1009
1010    /**
1011     * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s,
1012     * the {@code nextElementSibling} of the first {@code p} is the second {@code p}.
1013     * <p>
1014     * This is similar to {@link #nextSibling()}, but specifically finds only Elements
1015     * </p>
1016     * @return the next element, or null if there is no next element
1017     * @see #previousElementSibling()
1018     */
1019    public @Nullable Element nextElementSibling() {
1020        Node next = this;
1021        while ((next = next.nextSibling()) != null) {
1022            if (next instanceof Element) return (Element) next;
1023        }
1024        return null;
1025    }
1026
1027    /**
1028     * Get each of the sibling elements that come after this element.
1029     *
1030     * @return each of the element siblings after this element, or an empty list if there are no next sibling elements
1031     */
1032    public Elements nextElementSiblings() {
1033        return nextElementSiblings(true);
1034    }
1035
1036    /**
1037     * Gets the previous element sibling of this element.
1038     * @return the previous element, or null if there is no previous element
1039     * @see #nextElementSibling()
1040     */
1041    public @Nullable Element previousElementSibling() {
1042        Node prev = this;
1043        while ((prev = prev.previousSibling()) != null) {
1044            if (prev instanceof Element) return (Element) prev;
1045        }
1046        return null;
1047    }
1048
1049    /**
1050     * Get each of the element siblings before this element.
1051     *
1052     * @return the previous element siblings, or an empty list if there are none.
1053     */
1054    public Elements previousElementSiblings() {
1055        return nextElementSiblings(false);
1056    }
1057
1058    private Elements nextElementSiblings(boolean next) {
1059        Elements els = new Elements();
1060        if (parentNode == null)
1061            return  els;
1062        els.add(this);
1063        return next ?  els.nextAll() : els.prevAll();
1064    }
1065
1066    /**
1067     * Gets the first Element sibling of this element. That may be this element.
1068     * @return the first sibling that is an element (aka the parent's first element child)
1069     */
1070    public Element firstElementSibling() {
1071        if (parent() != null) {
1072            //noinspection DataFlowIssue (not nullable, would be this is no other sibs)
1073            return parent().firstElementChild();
1074        } else
1075            return this; // orphan is its own first sibling
1076    }
1077
1078    /**
1079     * Get the list index of this element in its element sibling list. I.e. if this is the first element
1080     * sibling, returns 0.
1081     * @return position in element sibling list
1082     */
1083    public int elementSiblingIndex() {
1084       if (parent() == null) return 0;
1085       return indexInList(this, parent().childElementsList());
1086    }
1087
1088    /**
1089     * Gets the last element sibling of this element. That may be this element.
1090     * @return the last sibling that is an element (aka the parent's last element child)
1091     */
1092    public Element lastElementSibling() {
1093        if (parent() != null) {
1094            //noinspection DataFlowIssue (not nullable, would be this if no other sibs)
1095            return parent().lastElementChild();
1096        } else
1097            return this;
1098    }
1099
1100    private static <E extends Element> int indexInList(Element search, List<E> elements) {
1101        final int size = elements.size();
1102        for (int i = 0; i < size; i++) {
1103            if (elements.get(i) == search)
1104                return i;
1105        }
1106        return 0;
1107    }
1108
1109    /**
1110     Gets the first child of this Element that is an Element, or {@code null} if there is none.
1111     @return the first Element child node, or null.
1112     @see #firstChild()
1113     @see #lastElementChild()
1114     @since 1.15.2
1115     */
1116    public @Nullable Element firstElementChild() {
1117        Node child = firstChild();
1118        while (child != null) {
1119            if (child instanceof Element) return (Element) child;
1120            child = child.nextSibling();
1121        }
1122        return null;
1123    }
1124
1125    /**
1126     Gets the last child of this Element that is an Element, or @{code null} if there is none.
1127     @return the last Element child node, or null.
1128     @see #lastChild()
1129     @see #firstElementChild()
1130     @since 1.15.2
1131     */
1132    public @Nullable Element lastElementChild() {
1133        Node child = lastChild();
1134        while (child != null) {
1135            if (child instanceof Element) return (Element) child;
1136            child = child.previousSibling();
1137        }
1138        return null;
1139    }
1140
1141    // DOM type methods
1142
1143    /**
1144     * Finds elements, including and recursively under this element, with the specified tag name.
1145     * @param tagName The tag name to search for (case insensitively).
1146     * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match.
1147     */
1148    public Elements getElementsByTag(String tagName) {
1149        Validate.notEmpty(tagName);
1150        tagName = normalize(tagName);
1151
1152        return Collector.collect(new Evaluator.Tag(tagName), this);
1153    }
1154
1155    /**
1156     * Find an element by ID, including or under this element.
1157     * <p>
1158     * Note that this finds the first matching ID, starting with this element. If you search down from a different
1159     * starting point, it is possible to find a different element by ID. For unique element by ID within a Document,
1160     * use {@link Document#getElementById(String)}
1161     * @param id The ID to search for.
1162     * @return The first matching element by ID, starting with this element, or null if none found.
1163     */
1164    public @Nullable Element getElementById(String id) {
1165        Validate.notEmpty(id);
1166        return Collector.findFirst(new Evaluator.Id(id), this);
1167    }
1168
1169    /**
1170     * Find elements that have this class, including or under this element. Case-insensitive.
1171     * <p>
1172     * Elements can have multiple classes (e.g. {@code <div class="header round first">}). This method
1173     * checks each class, so you can find the above with {@code el.getElementsByClass("header");}.
1174     *
1175     * @param className the name of the class to search for.
1176     * @return elements with the supplied class name, empty if none
1177     * @see #hasClass(String)
1178     * @see #classNames()
1179     */
1180    public Elements getElementsByClass(String className) {
1181        Validate.notEmpty(className);
1182
1183        return Collector.collect(new Evaluator.Class(className), this);
1184    }
1185
1186    /**
1187     * Find elements that have a named attribute set. Case-insensitive.
1188     *
1189     * @param key name of the attribute, e.g. {@code href}
1190     * @return elements that have this attribute, empty if none
1191     */
1192    public Elements getElementsByAttribute(String key) {
1193        Validate.notEmpty(key);
1194        key = key.trim();
1195
1196        return Collector.collect(new Evaluator.Attribute(key), this);
1197    }
1198
1199    /**
1200     * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements
1201     * that have HTML5 datasets.
1202     * @param keyPrefix name prefix of the attribute e.g. {@code data-}
1203     * @return elements that have attribute names that start with the prefix, empty if none.
1204     */
1205    public Elements getElementsByAttributeStarting(String keyPrefix) {
1206        Validate.notEmpty(keyPrefix);
1207        keyPrefix = keyPrefix.trim();
1208
1209        return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this);
1210    }
1211
1212    /**
1213     * Find elements that have an attribute with the specific value. Case-insensitive.
1214     *
1215     * @param key name of the attribute
1216     * @param value value of the attribute
1217     * @return elements that have this attribute with this value, empty if none
1218     */
1219    public Elements getElementsByAttributeValue(String key, String value) {
1220        return Collector.collect(new Evaluator.AttributeWithValue(key, value), this);
1221    }
1222
1223    /**
1224     * Find elements that either do not have this attribute, or have it with a different value. Case-insensitive.
1225     *
1226     * @param key name of the attribute
1227     * @param value value of the attribute
1228     * @return elements that do not have a matching attribute
1229     */
1230    public Elements getElementsByAttributeValueNot(String key, String value) {
1231        return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this);
1232    }
1233
1234    /**
1235     * Find elements that have attributes that start with the value prefix. Case-insensitive.
1236     *
1237     * @param key name of the attribute
1238     * @param valuePrefix start of attribute value
1239     * @return elements that have attributes that start with the value prefix
1240     */
1241    public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) {
1242        return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this);
1243    }
1244
1245    /**
1246     * Find elements that have attributes that end with the value suffix. Case-insensitive.
1247     *
1248     * @param key name of the attribute
1249     * @param valueSuffix end of the attribute value
1250     * @return elements that have attributes that end with the value suffix
1251     */
1252    public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) {
1253        return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this);
1254    }
1255
1256    /**
1257     * Find elements that have attributes whose value contains the match string. Case-insensitive.
1258     *
1259     * @param key name of the attribute
1260     * @param match substring of value to search for
1261     * @return elements that have attributes containing this text
1262     */
1263    public Elements getElementsByAttributeValueContaining(String key, String match) {
1264        return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this);
1265    }
1266
1267    /**
1268     * Find elements that have an attribute whose value matches the supplied regular expression.
1269     * @param key name of the attribute
1270     * @param pattern compiled regular expression to match against attribute values
1271     * @return elements that have attributes matching this regular expression
1272     */
1273    public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) {
1274        return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this);
1275
1276    }
1277
1278    /**
1279     * Find elements that have attributes whose values match the supplied regular expression.
1280     * @param key name of the attribute
1281     * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options.
1282     * @return elements that have attributes matching this regular expression
1283     */
1284    public Elements getElementsByAttributeValueMatching(String key, String regex) {
1285        Pattern pattern;
1286        try {
1287            pattern = Pattern.compile(regex);
1288        } catch (PatternSyntaxException e) {
1289            throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
1290        }
1291        return getElementsByAttributeValueMatching(key, pattern);
1292    }
1293
1294    /**
1295     * Find elements whose sibling index is less than the supplied index.
1296     * @param index 0-based index
1297     * @return elements less than index
1298     */
1299    public Elements getElementsByIndexLessThan(int index) {
1300        return Collector.collect(new Evaluator.IndexLessThan(index), this);
1301    }
1302
1303    /**
1304     * Find elements whose sibling index is greater than the supplied index.
1305     * @param index 0-based index
1306     * @return elements greater than index
1307     */
1308    public Elements getElementsByIndexGreaterThan(int index) {
1309        return Collector.collect(new Evaluator.IndexGreaterThan(index), this);
1310    }
1311
1312    /**
1313     * Find elements whose sibling index is equal to the supplied index.
1314     * @param index 0-based index
1315     * @return elements equal to index
1316     */
1317    public Elements getElementsByIndexEquals(int index) {
1318        return Collector.collect(new Evaluator.IndexEquals(index), this);
1319    }
1320
1321    /**
1322     * Find elements that contain the specified string. The search is case-insensitive. The text may appear directly
1323     * in the element, or in any of its descendants.
1324     * @param searchText to look for in the element's text
1325     * @return elements that contain the string, case-insensitive.
1326     * @see Element#text()
1327     */
1328    public Elements getElementsContainingText(String searchText) {
1329        return Collector.collect(new Evaluator.ContainsText(searchText), this);
1330    }
1331
1332    /**
1333     * Find elements that directly contain the specified string. The search is case-insensitive. The text must appear directly
1334     * in the element, not in any of its descendants.
1335     * @param searchText to look for in the element's own text
1336     * @return elements that contain the string, case-insensitive.
1337     * @see Element#ownText()
1338     */
1339    public Elements getElementsContainingOwnText(String searchText) {
1340        return Collector.collect(new Evaluator.ContainsOwnText(searchText), this);
1341    }
1342
1343    /**
1344     * Find elements whose text matches the supplied regular expression.
1345     * @param pattern regular expression to match text against
1346     * @return elements matching the supplied regular expression.
1347     * @see Element#text()
1348     */
1349    public Elements getElementsMatchingText(Pattern pattern) {
1350        return Collector.collect(new Evaluator.Matches(pattern), this);
1351    }
1352
1353    /**
1354     * Find elements whose text matches the supplied regular expression.
1355     * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options.
1356     * @return elements matching the supplied regular expression.
1357     * @see Element#text()
1358     */
1359    public Elements getElementsMatchingText(String regex) {
1360        Pattern pattern;
1361        try {
1362            pattern = Pattern.compile(regex);
1363        } catch (PatternSyntaxException e) {
1364            throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
1365        }
1366        return getElementsMatchingText(pattern);
1367    }
1368
1369    /**
1370     * Find elements whose own text matches the supplied regular expression.
1371     * @param pattern regular expression to match text against
1372     * @return elements matching the supplied regular expression.
1373     * @see Element#ownText()
1374     */
1375    public Elements getElementsMatchingOwnText(Pattern pattern) {
1376        return Collector.collect(new Evaluator.MatchesOwn(pattern), this);
1377    }
1378
1379    /**
1380     * Find elements whose own text matches the supplied regular expression.
1381     * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options.
1382     * @return elements matching the supplied regular expression.
1383     * @see Element#ownText()
1384     */
1385    public Elements getElementsMatchingOwnText(String regex) {
1386        Pattern pattern;
1387        try {
1388            pattern = Pattern.compile(regex);
1389        } catch (PatternSyntaxException e) {
1390            throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
1391        }
1392        return getElementsMatchingOwnText(pattern);
1393    }
1394
1395    /**
1396     * Find all elements under this element (including self, and children of children).
1397     *
1398     * @return all elements
1399     */
1400    public Elements getAllElements() {
1401        return Collector.collect(new Evaluator.AllElements(), this);
1402    }
1403
1404    /**
1405     Gets the <b>normalized, combined text</b> of this element and all its children. Whitespace is normalized and
1406     trimmed.
1407     <p>For example, given HTML {@code <p>Hello  <b>there</b> now! </p>}, {@code p.text()} returns {@code "Hello there
1408    now!"}
1409     <p>If you do not want normalized text, use {@link #wholeText()}. If you want just the text of this node (and not
1410     children), use {@link #ownText()}
1411     <p>Note that this method returns the textual content that would be presented to a reader. The contents of data
1412     nodes (such as {@code <script>} tags) are not considered text. Use {@link #data()} or {@link #html()} to retrieve
1413     that content.
1414
1415     @return decoded, normalized text, or empty string if none.
1416     @see #wholeText()
1417     @see #ownText()
1418     @see #textNodes()
1419     */
1420    public String text() {
1421        final StringBuilder accum = StringUtil.borrowBuilder();
1422        NodeTraversor.traverse(new TextAccumulator(accum), this);
1423        return StringUtil.releaseBuilder(accum).trim();
1424    }
1425
1426    private static class TextAccumulator implements NodeVisitor {
1427        private final StringBuilder accum;
1428
1429        public TextAccumulator(StringBuilder accum) {
1430            this.accum = accum;
1431        }
1432
1433        @Override public void head(Node node, int depth) {
1434            if (node instanceof TextNode) {
1435                TextNode textNode = (TextNode) node;
1436                appendNormalisedText(accum, textNode);
1437            } else if (node instanceof Element) {
1438                Element element = (Element) node;
1439                if (accum.length() > 0 &&
1440                    (element.isBlock() || element.nameIs("br")) &&
1441                    !lastCharIsWhitespace(accum))
1442                    accum.append(' ');
1443            }
1444        }
1445
1446        @Override public void tail(Node node, int depth) {
1447            // make sure there is a space between block tags and immediately following text nodes or inline elements <div>One</div>Two should be "One Two".
1448            if (node instanceof Element) {
1449                Element element = (Element) node;
1450                Node next = node.nextSibling();
1451                if (element.isBlock() && (next instanceof TextNode || next instanceof Element && !((Element) next).tag.formatAsBlock()) && !lastCharIsWhitespace(accum))
1452                    accum.append(' ');
1453            }
1454
1455        }
1456    }
1457
1458    /**
1459     Get the non-normalized, decoded text of this element and its children, including only any newlines and spaces
1460     present in the original source.
1461     @return decoded, non-normalized text
1462     @see #text()
1463     @see #wholeOwnText()
1464     */
1465    public String wholeText() {
1466        return wholeTextOf(nodeStream());
1467    }
1468
1469    private static String wholeTextOf(Stream<Node> stream) {
1470        return stream.map(node -> {
1471            if (node instanceof TextNode) return ((TextNode) node).getWholeText();
1472            if (node.nameIs("br")) return "\n";
1473            return "";
1474        }).collect(StringUtil.joining(""));
1475    }
1476
1477    /**
1478     Get the non-normalized, decoded text of this element, <b>not including</b> any child elements, including any
1479     newlines and spaces present in the original source.
1480     @return decoded, non-normalized text that is a direct child of this Element
1481     @see #text()
1482     @see #wholeText()
1483     @see #ownText()
1484     @since 1.15.1
1485     */
1486    public String wholeOwnText() {
1487        return wholeTextOf(childNodes.stream());
1488    }
1489
1490    /**
1491     * Gets the (normalized) text owned by this element only; does not get the combined text of all children.
1492     * <p>
1493     * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"},
1494     * whereas {@code p.text()} returns {@code "Hello there now!"}.
1495     * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element.
1496     *
1497     * @return decoded text, or empty string if none.
1498     * @see #text()
1499     * @see #textNodes()
1500     */
1501    public String ownText() {
1502        StringBuilder sb = StringUtil.borrowBuilder();
1503        ownText(sb);
1504        return StringUtil.releaseBuilder(sb).trim();
1505    }
1506
1507    private void ownText(StringBuilder accum) {
1508        for (int i = 0; i < childNodeSize(); i++) {
1509            Node child = childNodes.get(i);
1510            if (child instanceof TextNode) {
1511                TextNode textNode = (TextNode) child;
1512                appendNormalisedText(accum, textNode);
1513            } else if (child.nameIs("br") && !lastCharIsWhitespace(accum)) {
1514                accum.append(" ");
1515            }
1516        }
1517    }
1518
1519    private static void appendNormalisedText(StringBuilder accum, TextNode textNode) {
1520        String text = textNode.getWholeText();
1521        if (preserveWhitespace(textNode.parentNode) || textNode instanceof CDataNode)
1522            accum.append(text);
1523        else
1524            StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum));
1525    }
1526
1527    static boolean preserveWhitespace(@Nullable Node node) {
1528        // looks only at this element and five levels up, to prevent recursion & needless stack searches
1529        if (node instanceof Element) {
1530            Element el = (Element) node;
1531            int i = 0;
1532            do {
1533                if (el.tag.preserveWhitespace())
1534                    return true;
1535                el = el.parent();
1536                i++;
1537            } while (i < 6 && el != null);
1538        }
1539        return false;
1540    }
1541
1542    /**
1543     * Set the text of this element. Any existing contents (text or elements) will be cleared.
1544     * <p>As a special case, for {@code <script>} and {@code <style>} tags, the input text will be treated as data,
1545     * not visible text.</p>
1546     * @param text decoded text
1547     * @return this element
1548     */
1549    public Element text(String text) {
1550        Validate.notNull(text);
1551        empty();
1552        // special case for script/style in HTML: should be data node
1553        Document owner = ownerDocument();
1554        // an alternate impl would be to run through the parser
1555        if (owner != null && owner.parser().isContentForTagData(normalName()))
1556            appendChild(new DataNode(text));
1557        else
1558            appendChild(new TextNode(text));
1559
1560        return this;
1561    }
1562
1563    /**
1564     Checks if the current element or any of its child elements contain non-whitespace text.
1565     @return {@code true} if the element has non-blank text content, {@code false} otherwise.
1566     */
1567    public boolean hasText() {
1568        AtomicBoolean hasText = new AtomicBoolean(false);
1569        filter((node, depth) -> {
1570            if (node instanceof TextNode) {
1571                TextNode textNode = (TextNode) node;
1572                if (!textNode.isBlank()) {
1573                    hasText.set(true);
1574                    return NodeFilter.FilterResult.STOP;
1575                }
1576            }
1577            return NodeFilter.FilterResult.CONTINUE;
1578        });
1579        return hasText.get();
1580    }
1581
1582    /**
1583     * Get the combined data of this element. Data is e.g. the inside of a {@code <script>} tag. Note that data is NOT the
1584     * text of the element. Use {@link #text()} to get the text that would be visible to a user, and {@code data()}
1585     * for the contents of scripts, comments, CSS styles, etc.
1586     *
1587     * @return the data, or empty string if none
1588     *
1589     * @see #dataNodes()
1590     */
1591    public String data() {
1592        StringBuilder sb = StringUtil.borrowBuilder();
1593        traverse((childNode, depth) -> {
1594            if (childNode instanceof DataNode) {
1595                DataNode data = (DataNode) childNode;
1596                sb.append(data.getWholeData());
1597            } else if (childNode instanceof Comment) {
1598                Comment comment = (Comment) childNode;
1599                sb.append(comment.getData());
1600            } else if (childNode instanceof CDataNode) {
1601                // this shouldn't really happen because the html parser won't see the cdata as anything special when parsing script.
1602                // but in case another type gets through.
1603                CDataNode cDataNode = (CDataNode) childNode;
1604                sb.append(cDataNode.getWholeText());
1605            }
1606        });
1607        return StringUtil.releaseBuilder(sb);
1608    }
1609
1610    /**
1611     * Gets the literal value of this element's "class" attribute, which may include multiple class names, space
1612     * separated. (E.g. on <code>&lt;div class="header gray"&gt;</code> returns, "<code>header gray</code>")
1613     * @return The literal class attribute, or <b>empty string</b> if no class attribute set.
1614     */
1615    public String className() {
1616        return attr("class").trim();
1617    }
1618
1619    /**
1620     * Get each of the element's class names. E.g. on element {@code <div class="header gray">},
1621     * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to
1622     * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them.
1623     * @return set of classnames, empty if no class attribute
1624     */
1625    public Set<String> classNames() {
1626        String[] names = ClassSplit.split(className());
1627        Set<String> classNames = new LinkedHashSet<>(Arrays.asList(names));
1628        classNames.remove(""); // if classNames() was empty, would include an empty class
1629
1630        return classNames;
1631    }
1632
1633    /**
1634     Set the element's {@code class} attribute to the supplied class names.
1635     @param classNames set of classes
1636     @return this element, for chaining
1637     */
1638    public Element classNames(Set<String> classNames) {
1639        Validate.notNull(classNames);
1640        if (classNames.isEmpty()) {
1641            attributes().remove("class");
1642        } else {
1643            attributes().put("class", StringUtil.join(classNames, " "));
1644        }
1645        return this;
1646    }
1647
1648    /**
1649     * Tests if this element has a class. Case-insensitive.
1650     * @param className name of class to check for
1651     * @return true if it does, false if not
1652     */
1653    // performance sensitive
1654    public boolean hasClass(String className) {
1655        if (attributes == null)
1656            return false;
1657
1658        final String classAttr = attributes.getIgnoreCase("class");
1659        final int len = classAttr.length();
1660        final int wantLen = className.length();
1661
1662        if (len == 0 || len < wantLen) {
1663            return false;
1664        }
1665
1666        // if both lengths are equal, only need compare the className with the attribute
1667        if (len == wantLen) {
1668            return className.equalsIgnoreCase(classAttr);
1669        }
1670
1671        // otherwise, scan for whitespace and compare regions (with no string or arraylist allocations)
1672        boolean inClass = false;
1673        int start = 0;
1674        for (int i = 0; i < len; i++) {
1675            if (Character.isWhitespace(classAttr.charAt(i))) {
1676                if (inClass) {
1677                    // white space ends a class name, compare it with the requested one, ignore case
1678                    if (i - start == wantLen && classAttr.regionMatches(true, start, className, 0, wantLen)) {
1679                        return true;
1680                    }
1681                    inClass = false;
1682                }
1683            } else {
1684                if (!inClass) {
1685                    // we're in a class name : keep the start of the substring
1686                    inClass = true;
1687                    start = i;
1688                }
1689            }
1690        }
1691
1692        // check the last entry
1693        if (inClass && len - start == wantLen) {
1694            return classAttr.regionMatches(true, start, className, 0, wantLen);
1695        }
1696
1697        return false;
1698    }
1699
1700    /**
1701     Add a class name to this element's {@code class} attribute.
1702     @param className class name to add
1703     @return this element
1704     */
1705    public Element addClass(String className) {
1706        Validate.notNull(className);
1707
1708        Set<String> classes = classNames();
1709        classes.add(className);
1710        classNames(classes);
1711
1712        return this;
1713    }
1714
1715    /**
1716     Remove a class name from this element's {@code class} attribute.
1717     @param className class name to remove
1718     @return this element
1719     */
1720    public Element removeClass(String className) {
1721        Validate.notNull(className);
1722
1723        Set<String> classes = classNames();
1724        classes.remove(className);
1725        classNames(classes);
1726
1727        return this;
1728    }
1729
1730    /**
1731     Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it.
1732     @param className class name to toggle
1733     @return this element
1734     */
1735    public Element toggleClass(String className) {
1736        Validate.notNull(className);
1737
1738        Set<String> classes = classNames();
1739        if (classes.contains(className))
1740            classes.remove(className);
1741        else
1742            classes.add(className);
1743        classNames(classes);
1744
1745        return this;
1746    }
1747
1748    /**
1749     * Get the value of a form element (input, textarea, etc).
1750     * @return the value of the form element, or empty string if not set.
1751     */
1752    public String val() {
1753        if (elementIs("textarea", NamespaceHtml))
1754            return text();
1755        else
1756            return attr("value");
1757    }
1758
1759    /**
1760     * Set the value of a form element (input, textarea, etc).
1761     * @param value value to set
1762     * @return this element (for chaining)
1763     */
1764    public Element val(String value) {
1765        if (elementIs("textarea", NamespaceHtml))
1766            text(value);
1767        else
1768            attr("value", value);
1769        return this;
1770    }
1771
1772    /**
1773     Get the source range (start and end positions) of the end (closing) tag for this Element. Position tracking must be
1774     enabled prior to parsing the content.
1775     @return the range of the closing tag for this element, or {@code untracked} if its range was not tracked.
1776     @see org.jsoup.parser.Parser#setTrackPosition(boolean)
1777     @see Node#sourceRange()
1778     @see Range#isImplicit()
1779     @since 1.15.2
1780     */
1781    public Range endSourceRange() {
1782        return Range.of(this, false);
1783    }
1784
1785    boolean shouldIndent(final Document.OutputSettings out) {
1786        return out.prettyPrint() && isFormatAsBlock(out) && !isInlineable(out) && !preserveWhitespace(parentNode);
1787    }
1788
1789    @Override
1790    void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException {
1791        if (shouldIndent(out)) {
1792            if (accum instanceof StringBuilder) {
1793                if (((StringBuilder) accum).length() > 0)
1794                    indent(accum, depth, out);
1795            } else {
1796                indent(accum, depth, out);
1797            }
1798        }
1799        accum.append('<').append(safeTagName(out.syntax()));
1800        if (attributes != null) attributes.html(accum, out);
1801
1802        // selfclosing includes unknown tags, isEmpty defines tags that are always empty
1803        if (childNodes.isEmpty() && tag.isSelfClosing()) {
1804            if (out.syntax() == html && tag.isEmpty())
1805                accum.append('>');
1806            else
1807                accum.append(" />"); // <img> in html, <img /> in xml
1808        }
1809        else
1810            accum.append('>');
1811    }
1812
1813    @Override
1814    void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
1815        if (!(childNodes.isEmpty() && tag.isSelfClosing())) {
1816            if (out.prettyPrint() && (!childNodes.isEmpty() && (
1817                (tag.formatAsBlock() && !preserveWhitespace(parentNode)) ||
1818                    (out.outline() && (childNodes.size()>1 || (childNodes.size()==1 && (childNodes.get(0) instanceof Element))))
1819            )))
1820                indent(accum, depth, out);
1821            accum.append("</").append(safeTagName(out.syntax())).append('>');
1822        }
1823    }
1824
1825    /* If XML syntax, normalizes < to _ in tag name. */
1826    private String safeTagName(Document.OutputSettings.Syntax syntax) {
1827        return syntax == xml ? Normalizer.xmlSafeTagName(tagName()) : tagName();
1828    }
1829
1830    /**
1831     * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return
1832     * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.)
1833     *
1834     * @return String of HTML.
1835     * @see #outerHtml()
1836     */
1837    public String html() {
1838        StringBuilder accum = StringUtil.borrowBuilder();
1839        html(accum);
1840        String html = StringUtil.releaseBuilder(accum);
1841        return NodeUtils.outputSettings(this).prettyPrint() ? html.trim() : html;
1842    }
1843
1844    @Override
1845    public <T extends Appendable> T html(T appendable) {
1846        final int size = childNodes.size();
1847        for (int i = 0; i < size; i++)
1848            childNodes.get(i).outerHtml(appendable);
1849
1850        return appendable;
1851    }
1852
1853    /**
1854     * Set this element's inner HTML. Clears the existing HTML first.
1855     * @param html HTML to parse and set into this element
1856     * @return this element
1857     * @see #append(String)
1858     */
1859    public Element html(String html) {
1860        empty();
1861        append(html);
1862        return this;
1863    }
1864
1865    @Override
1866    public Element clone() {
1867        return (Element) super.clone();
1868    }
1869
1870    @Override
1871    public Element shallowClone() {
1872        // simpler than implementing a clone version with no child copy
1873        String baseUri = baseUri();
1874        if (baseUri.isEmpty()) baseUri = null; // saves setting a blank internal attribute
1875        return new Element(tag, baseUri, attributes == null ? null : attributes.clone());
1876    }
1877
1878    @Override
1879    protected Element doClone(@Nullable Node parent) {
1880        Element clone = (Element) super.doClone(parent);
1881        clone.attributes = attributes != null ? attributes.clone() : null;
1882        clone.childNodes = new NodeList(clone, childNodes.size());
1883        clone.childNodes.addAll(childNodes); // the children then get iterated and cloned in Node.clone
1884
1885        return clone;
1886    }
1887
1888    // overrides of Node for call chaining
1889    @Override
1890    public Element clearAttributes() {
1891        if (attributes != null) {
1892            super.clearAttributes(); // keeps internal attributes via iterator
1893            if (attributes.size() == 0)
1894                attributes = null; // only remove entirely if no internal attributes
1895        }
1896
1897        return this;
1898    }
1899
1900    @Override
1901    public Element removeAttr(String attributeKey) {
1902        return (Element) super.removeAttr(attributeKey);
1903    }
1904
1905    @Override
1906    public Element root() {
1907        return (Element) super.root(); // probably a document, but always at least an element
1908    }
1909
1910    @Override
1911    public Element traverse(NodeVisitor nodeVisitor) {
1912        return (Element) super.traverse(nodeVisitor);
1913    }
1914
1915    @Override
1916    public Element forEachNode(Consumer<? super Node> action) {
1917        return (Element) super.forEachNode(action);
1918    }
1919
1920    /**
1921     Perform the supplied action on this Element and each of its descendant Elements, during a depth-first traversal.
1922     Elements may be inspected, changed, added, replaced, or removed.
1923     @param action the function to perform on the element
1924     @see Node#forEachNode(Consumer)
1925     */
1926    @Override
1927    public void forEach(Consumer<? super Element> action) {
1928        stream().forEach(action);
1929    }
1930
1931    /**
1932     Returns an Iterator that iterates this Element and each of its descendant Elements, in document order.
1933     @return an Iterator
1934     */
1935    @Override
1936    public Iterator<Element> iterator() {
1937        return new NodeIterator<>(this, Element.class);
1938    }
1939
1940    @Override
1941    public Element filter(NodeFilter nodeFilter) {
1942        return  (Element) super.filter(nodeFilter);
1943    }
1944
1945    private static final class NodeList extends ChangeNotifyingArrayList<Node> {
1946        private final Element owner;
1947
1948        NodeList(Element owner, int initialCapacity) {
1949            super(initialCapacity);
1950            this.owner = owner;
1951        }
1952
1953        @Override public void onContentsChanged() {
1954            owner.nodelistChanged();
1955        }
1956    }
1957
1958    private boolean isFormatAsBlock(Document.OutputSettings out) {
1959        return tag.isBlock() || (parent() != null && parent().tag().formatAsBlock()) || out.outline();
1960    }
1961
1962    private boolean isInlineable(Document.OutputSettings out) {
1963        if (!tag.isInline())
1964            return false;
1965        return (parent() == null || parent().isBlock())
1966            && !isEffectivelyFirst()
1967            && !out.outline()
1968            && !nameIs("br");
1969    }
1970}