001package org.jsoup.nodes;
002
003import org.jsoup.helper.ChangeNotifyingArrayList;
004import org.jsoup.helper.Validate;
005import org.jsoup.internal.StringUtil;
006import org.jsoup.parser.ParseSettings;
007import org.jsoup.parser.Parser;
008import org.jsoup.parser.Tag;
009import org.jsoup.parser.TokenQueue;
010import org.jsoup.select.Collector;
011import org.jsoup.select.Elements;
012import org.jsoup.select.Evaluator;
013import org.jsoup.select.NodeFilter;
014import org.jsoup.select.NodeTraversor;
015import org.jsoup.select.NodeVisitor;
016import org.jsoup.select.QueryParser;
017import org.jsoup.select.Selector;
018import org.jspecify.annotations.Nullable;
019
020import java.io.IOException;
021import java.lang.ref.WeakReference;
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.Collection;
025import java.util.Collections;
026import java.util.LinkedHashSet;
027import java.util.List;
028import java.util.Map;
029import java.util.Set;
030import java.util.concurrent.atomic.AtomicBoolean;
031import java.util.function.Consumer;
032import java.util.regex.Pattern;
033import java.util.regex.PatternSyntaxException;
034import java.util.stream.Collectors;
035import java.util.stream.Stream;
036
037import static org.jsoup.internal.Normalizer.normalize;
038import static org.jsoup.nodes.TextNode.lastCharIsWhitespace;
039import static org.jsoup.parser.Parser.NamespaceHtml;
040import static org.jsoup.parser.TokenQueue.escapeCssIdentifier;
041
042/**
043 An HTML Element consists of a tag name, attributes, and child nodes (including text nodes and other elements).
044 <p>
045 From an Element, you can extract data, traverse the node graph, and manipulate the HTML.
046*/
047public class Element extends Node {
048    private static final List<Element> EmptyChildren = Collections.emptyList();
049    private static final Pattern ClassSplit = Pattern.compile("\\s+");
050    private static final String BaseUriKey = Attributes.internalKey("baseUri");
051    private Tag tag;
052    private @Nullable WeakReference<List<Element>> shadowChildrenRef; // points to child elements shadowed from node children
053    List<Node> childNodes;
054    @Nullable Attributes attributes; // field is nullable but all methods for attributes are non-null
055
056    /**
057     * Create a new, standalone element, in the specified namespace.
058     * @param tag tag name
059     * @param namespace namespace for this element
060     */
061    public Element(String tag, String namespace) {
062        this(Tag.valueOf(tag, namespace, ParseSettings.preserveCase), null);
063    }
064
065    /**
066     * Create a new, standalone element, in the HTML namespace.
067     * @param tag tag name
068     * @see #Element(String tag, String namespace)
069     */
070    public Element(String tag) {
071        this(Tag.valueOf(tag, Parser.NamespaceHtml, ParseSettings.preserveCase), "", null);
072    }
073
074    /**
075     * Create a new, standalone Element. (Standalone in that it has no parent.)
076     *
077     * @param tag tag of this element
078     * @param baseUri the base URI (optional, may be null to inherit from parent, or "" to clear parent's)
079     * @param attributes initial attributes (optional, may be null)
080     * @see #appendChild(Node)
081     * @see #appendElement(String)
082     */
083    public Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attributes) {
084        Validate.notNull(tag);
085        childNodes = EmptyNodes;
086        this.attributes = attributes;
087        this.tag = tag;
088        if (baseUri != null)
089            this.setBaseUri(baseUri);
090    }
091
092    /**
093     * Create a new Element from a Tag and a base URI.
094     *
095     * @param tag element tag
096     * @param baseUri the base URI of this element. Optional, and will inherit from its parent, if any.
097     * @see Tag#valueOf(String, ParseSettings)
098     */
099    public Element(Tag tag, @Nullable String baseUri) {
100        this(tag, baseUri, null);
101    }
102
103    /**
104     Internal test to check if a nodelist object has been created.
105     */
106    protected boolean hasChildNodes() {
107        return childNodes != EmptyNodes;
108    }
109
110    protected List<Node> ensureChildNodes() {
111        if (childNodes == EmptyNodes) {
112            childNodes = new NodeList(this, 4);
113        }
114        return childNodes;
115    }
116
117    @Override
118    protected boolean hasAttributes() {
119        return attributes != null;
120    }
121
122    @Override
123    public Attributes attributes() {
124        if (attributes == null) // not using hasAttributes, as doesn't clear warning
125            attributes = new Attributes();
126        return attributes;
127    }
128
129    @Override
130    public String baseUri() {
131        return searchUpForAttribute(this, BaseUriKey);
132    }
133
134    private static String searchUpForAttribute(final Element start, final String key) {
135        Element el = start;
136        while (el != null) {
137            if (el.attributes != null && el.attributes.hasKey(key))
138                return el.attributes.get(key);
139            el = el.parent();
140        }
141        return "";
142    }
143
144    @Override
145    protected void doSetBaseUri(String baseUri) {
146        attributes().put(BaseUriKey, baseUri);
147    }
148
149    @Override
150    public int childNodeSize() {
151        return childNodes.size();
152    }
153
154    @Override
155    public String nodeName() {
156        return tag.getName();
157    }
158
159    /**
160     * Get the name of the tag for this element. E.g. {@code div}. If you are using {@link ParseSettings#preserveCase
161     * case preserving parsing}, this will return the source's original case.
162     *
163     * @return the tag name
164     */
165    public String tagName() {
166        return tag.getName();
167    }
168
169    /**
170     * Get the normalized name of this Element's tag. This will always be the lower-cased version of the tag, regardless
171     * of the tag case preserving setting of the parser. For e.g., {@code <DIV>} and {@code <div>} both have a
172     * normal name of {@code div}.
173     * @return normal name
174     */
175    @Override
176    public String normalName() {
177        return tag.normalName();
178    }
179
180    /**
181     Test if this Element has the specified normalized name, and is in the specified namespace.
182     * @param normalName a normalized element name (e.g. {@code div}).
183     * @param namespace the namespace
184     * @return true if the element's normal name matches exactly, and is in the specified namespace
185     * @since 1.17.2
186     */
187    public boolean elementIs(String normalName, String namespace) {
188        return tag.normalName().equals(normalName) && tag.namespace().equals(namespace);
189    }
190
191    /**
192     * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with
193     * {@code el.tagName("div");}.
194     *
195     * @param tagName new tag name for this element
196     * @return this element, for chaining
197     * @see Elements#tagName(String)
198     */
199    public Element tagName(String tagName) {
200        return tagName(tagName, tag.namespace());
201    }
202
203    /**
204     * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with
205     * {@code el.tagName("div");}.
206     *
207     * @param tagName new tag name for this element
208     * @param namespace the new namespace for this element
209     * @return this element, for chaining
210     * @see Elements#tagName(String)
211     */
212    public Element tagName(String tagName, String namespace) {
213        Validate.notEmptyParam(tagName, "tagName");
214        Validate.notEmptyParam(namespace, "namespace");
215        tag = Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()); // maintains the case option of the original parse
216        return this;
217    }
218
219    /**
220     * Get the Tag for this element.
221     *
222     * @return the tag object
223     */
224    public Tag tag() {
225        return tag;
226    }
227
228    /**
229     * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element
230     * {@code <span> == false}).
231     *
232     * @return true if block, false if not (and thus inline)
233     */
234    public boolean isBlock() {
235        return tag.isBlock();
236    }
237
238    /**
239     * Get the {@code id} attribute of this element.
240     *
241     * @return The id attribute, if present, or an empty string if not.
242     */
243    public String id() {
244        return attributes != null ? attributes.getIgnoreCase("id") :"";
245    }
246
247    /**
248     Set the {@code id} attribute of this element.
249     @param id the ID value to use
250     @return this Element, for chaining
251     */
252    public Element id(String id) {
253        Validate.notNull(id);
254        attr("id", id);
255        return this;
256    }
257
258    /**
259     * Set an attribute value on this element. If this element already has an attribute with the
260     * key, its value is updated; otherwise, a new attribute is added.
261     *
262     * @return this element
263     */
264    public Element attr(String attributeKey, String attributeValue) {
265        super.attr(attributeKey, attributeValue);
266        return this;
267    }
268
269    /**
270     * Set a boolean attribute value on this element. Setting to <code>true</code> sets the attribute value to "" and
271     * marks the attribute as boolean so no value is written out. Setting to <code>false</code> removes the attribute
272     * with the same key if it exists.
273     *
274     * @param attributeKey the attribute key
275     * @param attributeValue the attribute value
276     *
277     * @return this element
278     */
279    public Element attr(String attributeKey, boolean attributeValue) {
280        attributes().put(attributeKey, attributeValue);
281        return this;
282    }
283
284    /**
285     Get an Attribute by key. Changes made via {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc
286     will cascade back to this Element.
287     @param key the (case-sensitive) attribute key
288     @return the Attribute for this key, or null if not present.
289     @since 1.17.2
290     */
291    public Attribute attribute(String key) {
292        return hasAttributes() ? attributes().attribute(key) : null;
293    }
294
295    /**
296     * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key
297     * starting with "data-" is included the dataset.
298     * <p>
299     * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset
300     * {@code package=jsoup, language=java}.
301     * <p>
302     * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected
303     * in the other map.
304     * <p>
305     * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector.
306     * @return a map of {@code key=value} custom data attributes.
307     */
308    public Map<String, String> dataset() {
309        return attributes().dataset();
310    }
311
312    @Override @Nullable
313    public final Element parent() {
314        return (Element) parentNode;
315    }
316
317    /**
318     * Get this element's parent and ancestors, up to the document root.
319     * @return this element's stack of parents, starting with the closest first.
320     */
321    public Elements parents() {
322        Elements parents = new Elements();
323        Element parent = this.parent();
324        while (parent != null && !parent.nameIs("#root")) {
325            parents.add(parent);
326            parent = parent.parent();
327        }
328        return parents;
329    }
330
331    /**
332     * Get a child element of this element, by its 0-based index number.
333     * <p>
334     * Note that an element can have both mixed Nodes and Elements as children. This method inspects
335     * a filtered list of children that are elements, and the index is based on that filtered list.
336     * </p>
337     *
338     * @param index the index number of the element to retrieve
339     * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException}
340     * @see #childNode(int)
341     */
342    public Element child(int index) {
343        return childElementsList().get(index);
344    }
345
346    /**
347     * Get the number of child nodes of this element that are elements.
348     * <p>
349     * This method works on the same filtered list like {@link #child(int)}. Use {@link #childNodes()} and {@link
350     * #childNodeSize()} to get the unfiltered Nodes (e.g. includes TextNodes etc.)
351     * </p>
352     *
353     * @return the number of child nodes that are elements
354     * @see #children()
355     * @see #child(int)
356     */
357    public int childrenSize() {
358        return childElementsList().size();
359    }
360
361    /**
362     * Get this element's child elements.
363     * <p>
364     * This is effectively a filter on {@link #childNodes()} to get Element nodes.
365     * </p>
366     * @return child elements. If this element has no children, returns an empty list.
367     * @see #childNodes()
368     */
369    public Elements children() {
370        return new Elements(childElementsList());
371    }
372
373    /**
374     * Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated.
375     * TODO - think about pulling this out as a helper as there are other shadow lists (like in Attributes) kept around.
376     * @return a list of child elements
377     */
378    List<Element> childElementsList() {
379        if (childNodeSize() == 0)
380            return EmptyChildren; // short circuit creating empty
381
382        List<Element> children;
383        if (shadowChildrenRef == null || (children = shadowChildrenRef.get()) == null) {
384            final int size = childNodes.size();
385            children = new ArrayList<>(size);
386            //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here)
387            for (int i = 0; i < size; i++) {
388                final Node node = childNodes.get(i);
389                if (node instanceof Element)
390                    children.add((Element) node);
391            }
392            shadowChildrenRef = new WeakReference<>(children);
393        }
394        return children;
395    }
396
397    /**
398     * Clears the cached shadow child elements.
399     */
400    @Override
401    void nodelistChanged() {
402        super.nodelistChanged();
403        shadowChildrenRef = null;
404    }
405
406    /**
407     Returns a Stream of this Element and all of its descendant Elements. The stream has document order.
408     @return a stream of this element and its descendants.
409     @see #nodeStream()
410     @since 1.17.1
411     */
412    public Stream<Element> stream() {
413        return NodeUtils.stream(this, Element.class);
414    }
415
416    private <T> List<T> filterNodes(Class<T> clazz) {
417        return childNodes.stream()
418                .filter(clazz::isInstance)
419                .map(clazz::cast)
420                .collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList));
421    }
422
423    /**
424     * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated.
425     * <p>
426     * This is effectively a filter on {@link #childNodes()} to get Text nodes.
427     * @return child text nodes. If this element has no text nodes, returns an
428     * empty list.
429     * </p>
430     * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected:
431     * <ul>
432     *     <li>{@code p.text()} = {@code "One Two Three Four"}</li>
433     *     <li>{@code p.ownText()} = {@code "One Three Four"}</li>
434     *     <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li>
435     *     <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li>
436     *     <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li>
437     * </ul>
438     */
439    public List<TextNode> textNodes() {
440        return filterNodes(TextNode.class);
441    }
442
443    /**
444     * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated.
445     * <p>
446     * This is effectively a filter on {@link #childNodes()} to get Data nodes.
447     * </p>
448     * @return child data nodes. If this element has no data nodes, returns an
449     * empty list.
450     * @see #data()
451     */
452    public List<DataNode> dataNodes() {
453        return filterNodes(DataNode.class);
454    }
455
456    /**
457     * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
458     * may include this element, or any of its children.
459     * <p>This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
460     * multiple filters can be combined, e.g.:</p>
461     * <ul>
462     * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
463     * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
464     * </ul>
465     * <p>See the query syntax documentation in {@link org.jsoup.select.Selector}.</p>
466     * <p>Also known as {@code querySelectorAll()} in the Web DOM.</p>
467     *
468     * @param cssQuery a {@link Selector} CSS-like query
469     * @return an {@link Elements} list containing elements that match the query (empty if none match)
470     * @see Selector selector query syntax
471     * @see QueryParser#parse(String)
472     * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
473     */
474    public Elements select(String cssQuery) {
475        return Selector.select(cssQuery, this);
476    }
477
478    /**
479     * Find elements that match the supplied Evaluator. This has the same functionality as {@link #select(String)}, but
480     * may be useful if you are running the same query many times (on many documents) and want to save the overhead of
481     * repeatedly parsing the CSS query.
482     * @param evaluator an element evaluator
483     * @return an {@link Elements} list containing elements that match the query (empty if none match)
484     */
485    public Elements select(Evaluator evaluator) {
486        return Selector.select(evaluator, this);
487    }
488
489    /**
490     * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context.
491     * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query
492     * execution stops on the first hit.</p>
493     * <p>Also known as {@code querySelector()} in the Web DOM.</p>
494     * @param cssQuery cssQuery a {@link Selector} CSS-like query
495     * @return the first matching element, or <b>{@code null}</b> if there is no match.
496     * @see #expectFirst(String)
497     */
498    public @Nullable Element selectFirst(String cssQuery) {
499        return Selector.selectFirst(cssQuery, this);
500    }
501
502    /**
503     * Finds the first Element that matches the supplied Evaluator, with this element as the starting context, or
504     * {@code null} if none match.
505     *
506     * @param evaluator an element evaluator
507     * @return the first matching element (walking down the tree, starting from this element), or {@code null} if none
508     * match.
509     */
510    public @Nullable Element selectFirst(Evaluator evaluator) {
511        return Collector.findFirst(evaluator, this);
512    }
513
514    /**
515     Just like {@link #selectFirst(String)}, but if there is no match, throws an {@link IllegalArgumentException}. This
516     is useful if you want to simply abort processing on a failed match.
517     @param cssQuery a {@link Selector} CSS-like query
518     @return the first matching element
519     @throws IllegalArgumentException if no match is found
520     @since 1.15.2
521     */
522    public Element expectFirst(String cssQuery) {
523        return (Element) Validate.ensureNotNull(
524            Selector.selectFirst(cssQuery, this),
525            parent() != null ?
526                "No elements matched the query '%s' on element '%s'.":
527                "No elements matched the query '%s' in the document."
528            , cssQuery, this.tagName()
529        );
530    }
531
532    /**
533     * Checks if this element matches the given {@link Selector} CSS query. Also knows as {@code matches()} in the Web
534     * DOM.
535     *
536     * @param cssQuery a {@link Selector} CSS query
537     * @return if this element matches the query
538     */
539    public boolean is(String cssQuery) {
540        return is(QueryParser.parse(cssQuery));
541    }
542
543    /**
544     * Check if this element matches the given evaluator.
545     * @param evaluator an element evaluator
546     * @return if this element matches
547     */
548    public boolean is(Evaluator evaluator) {
549        return evaluator.matches(this.root(), this);
550    }
551
552    /**
553     * Find the closest element up the tree of parents that matches the specified CSS query. Will return itself, an
554     * ancestor, or {@code null} if there is no such matching element.
555     * @param cssQuery a {@link Selector} CSS query
556     * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not
557     * found.
558     */
559    public @Nullable Element closest(String cssQuery) {
560        return closest(QueryParser.parse(cssQuery));
561    }
562
563    /**
564     * Find the closest element up the tree of parents that matches the specified evaluator. Will return itself, an
565     * ancestor, or {@code null} if there is no such matching element.
566     * @param evaluator a query evaluator
567     * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not
568     * found.
569     */
570    public @Nullable Element closest(Evaluator evaluator) {
571        Validate.notNull(evaluator);
572        Element el = this;
573        final Element root = root();
574        do {
575            if (evaluator.matches(root, el))
576                return el;
577            el = el.parent();
578        } while (el != null);
579        return null;
580    }
581
582    /**
583     Find Elements that match the supplied {@index XPath} expression.
584     <p>Note that for convenience of writing the Xpath expression, namespaces are disabled, and queries can be
585     expressed using the element's local name only.</p>
586     <p>By default, XPath 1.0 expressions are supported. If you would to use XPath 2.0 or higher, you can provide an
587     alternate XPathFactory implementation:</p>
588     <ol>
589     <li>Add the implementation to your classpath. E.g. to use <a href="https://www.saxonica.com/products/products.xml">Saxon-HE</a>, add <a href="https://mvnrepository.com/artifact/net.sf.saxon/Saxon-HE">net.sf.saxon:Saxon-HE</a> to your build.</li>
590     <li>Set the system property <code>javax.xml.xpath.XPathFactory:jsoup</code> to the implementing classname. E.g.:<br>
591     <code>System.setProperty(W3CDom.XPathFactoryProperty, "net.sf.saxon.xpath.XPathFactoryImpl");</code>
592     </li>
593     </ol>
594
595     @param xpath XPath expression
596     @return matching elements, or an empty list if none match.
597     @see #selectXpath(String, Class)
598     @since 1.14.3
599     */
600    public Elements selectXpath(String xpath) {
601        return new Elements(NodeUtils.selectXpath(xpath, this, Element.class));
602    }
603
604    /**
605     Find Nodes that match the supplied XPath expression.
606     <p>For example, to select TextNodes under {@code p} elements: </p>
607     <pre>List&lt;TextNode&gt; textNodes = doc.selectXpath("//body//p//text()", TextNode.class);</pre>
608     <p>Note that in the jsoup DOM, Attribute objects are not Nodes. To directly select attribute values, do something
609     like:</p>
610     <pre>List&lt;String&gt; hrefs = doc.selectXpath("//a").eachAttr("href");</pre>
611     @param xpath XPath expression
612     @param nodeType the jsoup node type to return
613     @see #selectXpath(String)
614     @return a list of matching nodes
615     @since 1.14.3
616     */
617    public <T extends Node> List<T> selectXpath(String xpath, Class<T> nodeType) {
618        return NodeUtils.selectXpath(xpath, this, nodeType);
619    }
620
621    /**
622     * Insert a node to the end of this Element's children. The incoming node will be re-parented.
623     *
624     * @param child node to add.
625     * @return this Element, for chaining
626     * @see #prependChild(Node)
627     * @see #insertChildren(int, Collection)
628     */
629    public Element appendChild(Node child) {
630        Validate.notNull(child);
631
632        // was - Node#addChildren(child). short-circuits an array create and a loop.
633        reparentChild(child);
634        ensureChildNodes();
635        childNodes.add(child);
636        child.setSiblingIndex(childNodes.size() - 1);
637        return this;
638    }
639
640    /**
641     Insert the given nodes to the end of this Element's children.
642
643     @param children nodes to add
644     @return this Element, for chaining
645     @see #insertChildren(int, Collection)
646     */
647    public Element appendChildren(Collection<? extends Node> children) {
648        insertChildren(-1, children);
649        return this;
650    }
651
652    /**
653     * Add this element to the supplied parent element, as its next child.
654     *
655     * @param parent element to which this element will be appended
656     * @return this element, so that you can continue modifying the element
657     */
658    public Element appendTo(Element parent) {
659        Validate.notNull(parent);
660        parent.appendChild(this);
661        return this;
662    }
663
664    /**
665     * Add a node to the start of this element's children.
666     *
667     * @param child node to add.
668     * @return this element, so that you can add more child nodes or elements.
669     */
670    public Element prependChild(Node child) {
671        Validate.notNull(child);
672
673        addChildren(0, child);
674        return this;
675    }
676
677    /**
678     Insert the given nodes to the start of this Element's children.
679
680     @param children nodes to add
681     @return this Element, for chaining
682     @see #insertChildren(int, Collection)
683     */
684    public Element prependChildren(Collection<? extends Node> children) {
685        insertChildren(0, children);
686        return this;
687    }
688
689
690    /**
691     * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the
692     * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first.
693     *
694     * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
695     * end
696     * @param children child nodes to insert
697     * @return this element, for chaining.
698     */
699    public Element insertChildren(int index, Collection<? extends Node> children) {
700        Validate.notNull(children, "Children collection to be inserted must not be null.");
701        int currentSize = childNodeSize();
702        if (index < 0) index += currentSize +1; // roll around
703        Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");
704
705        ArrayList<Node> nodes = new ArrayList<>(children);
706        Node[] nodeArray = nodes.toArray(new Node[0]);
707        addChildren(index, nodeArray);
708        return this;
709    }
710
711    /**
712     * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the
713     * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first.
714     *
715     * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
716     * end
717     * @param children child nodes to insert
718     * @return this element, for chaining.
719     */
720    public Element insertChildren(int index, Node... children) {
721        Validate.notNull(children, "Children collection to be inserted must not be null.");
722        int currentSize = childNodeSize();
723        if (index < 0) index += currentSize +1; // roll around
724        Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");
725
726        addChildren(index, children);
727        return this;
728    }
729
730    /**
731     * Create a new element by tag name, and add it as this Element's last child.
732     *
733     * @param tagName the name of the tag (e.g. {@code div}).
734     * @return the new element, to allow you to add content to it, e.g.:
735     *  {@code parent.appendElement("h1").attr("id", "header").text("Welcome");}
736     */
737    public Element appendElement(String tagName) {
738        return appendElement(tagName, tag.namespace());
739    }
740
741    /**
742     * Create a new element by tag name and namespace, add it as this Element's last child.
743     *
744     * @param tagName the name of the tag (e.g. {@code div}).
745     * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml})
746     * @return the new element, in the specified namespace
747     */
748    public Element appendElement(String tagName, String namespace) {
749        Element child = new Element(Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()), baseUri());
750        appendChild(child);
751        return child;
752    }
753
754    /**
755     * Create a new element by tag name, and add it as this Element's first child.
756     *
757     * @param tagName the name of the tag (e.g. {@code div}).
758     * @return the new element, to allow you to add content to it, e.g.:
759     *  {@code parent.prependElement("h1").attr("id", "header").text("Welcome");}
760     */
761    public Element prependElement(String tagName) {
762        return prependElement(tagName, tag.namespace());
763    }
764
765    /**
766     * Create a new element by tag name and namespace, and add it as this Element's first child.
767     *
768     * @param tagName the name of the tag (e.g. {@code div}).
769     * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml})
770     * @return the new element, in the specified namespace
771     */
772    public Element prependElement(String tagName, String namespace) {
773        Element child = new Element(Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()), baseUri());
774        prependChild(child);
775        return child;
776    }
777
778    /**
779     * Create and append a new TextNode to this element.
780     *
781     * @param text the (un-encoded) text to add
782     * @return this element
783     */
784    public Element appendText(String text) {
785        Validate.notNull(text);
786        TextNode node = new TextNode(text);
787        appendChild(node);
788        return this;
789    }
790
791    /**
792     * Create and prepend a new TextNode to this element.
793     *
794     * @param text the decoded text to add
795     * @return this element
796     */
797    public Element prependText(String text) {
798        Validate.notNull(text);
799        TextNode node = new TextNode(text);
800        prependChild(node);
801        return this;
802    }
803
804    /**
805     * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children.
806     * @param html HTML to add inside this element, after the existing HTML
807     * @return this element
808     * @see #html(String)
809     */
810    public Element append(String html) {
811        Validate.notNull(html);
812        List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri());
813        addChildren(nodes.toArray(new Node[0]));
814        return this;
815    }
816
817    /**
818     * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children.
819     * @param html HTML to add inside this element, before the existing HTML
820     * @return this element
821     * @see #html(String)
822     */
823    public Element prepend(String html) {
824        Validate.notNull(html);
825        List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri());
826        addChildren(0, nodes.toArray(new Node[0]));
827        return this;
828    }
829
830    /**
831     * Insert the specified HTML into the DOM before this element (as a preceding sibling).
832     *
833     * @param html HTML to add before this element
834     * @return this element, for chaining
835     * @see #after(String)
836     */
837    @Override
838    public Element before(String html) {
839        return (Element) super.before(html);
840    }
841
842    /**
843     * Insert the specified node into the DOM before this node (as a preceding sibling).
844     * @param node to add before this element
845     * @return this Element, for chaining
846     * @see #after(Node)
847     */
848    @Override
849    public Element before(Node node) {
850        return (Element) super.before(node);
851    }
852
853    /**
854     * Insert the specified HTML into the DOM after this element (as a following sibling).
855     *
856     * @param html HTML to add after this element
857     * @return this element, for chaining
858     * @see #before(String)
859     */
860    @Override
861    public Element after(String html) {
862        return (Element) super.after(html);
863    }
864
865    /**
866     * Insert the specified node into the DOM after this node (as a following sibling).
867     * @param node to add after this element
868     * @return this element, for chaining
869     * @see #before(Node)
870     */
871    @Override
872    public Element after(Node node) {
873        return (Element) super.after(node);
874    }
875
876    /**
877     * Remove all the element's child nodes. Any attributes are left as-is. Each child node has its parent set to
878     * {@code null}.
879     * @return this element
880     */
881    @Override
882    public Element empty() {
883        // Detach each of the children -> parent links:
884        for (Node child : childNodes) {
885            child.parentNode = null;
886        }
887        childNodes.clear();
888        return this;
889    }
890
891    /**
892     * Wrap the supplied HTML around this element.
893     *
894     * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
895     * @return this element, for chaining.
896     */
897    @Override
898    public Element wrap(String html) {
899        return (Element) super.wrap(html);
900    }
901
902    /**
903     * Get a CSS selector that will uniquely select this element.
904     * <p>
905     * If the element has an ID, returns #id;
906     * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'},
907     * followed by a unique selector for the element (tag.class.class:nth-child(n)).
908     * </p>
909     *
910     * @return the CSS Path that can be used to retrieve the element in a selector.
911     */
912    public String cssSelector() {
913        if (id().length() > 0) {
914            // prefer to return the ID - but check that it's actually unique first!
915            String idSel = "#" + escapeCssIdentifier(id());
916            Document doc = ownerDocument();
917            if (doc != null) {
918                Elements els = doc.select(idSel);
919                if (els.size() == 1 && els.get(0) == this) // otherwise, continue to the nth-child impl
920                    return idSel;
921            } else {
922                return idSel; // no ownerdoc, return the ID selector
923            }
924        }
925
926        StringBuilder selector = StringUtil.borrowBuilder();
927        Element el = this;
928        while (el != null && !(el instanceof Document)) {
929            selector.insert(0, el.cssSelectorComponent());
930            el = el.parent();
931        }
932        return StringUtil.releaseBuilder(selector);
933    }
934
935    private String cssSelectorComponent() {
936        // Escape tagname, and translate HTML namespace ns:tag to CSS namespace syntax ns|tag
937        String tagName = escapeCssIdentifier(tagName()).replace("\\:", "|");
938        StringBuilder selector = StringUtil.borrowBuilder().append(tagName);
939        String classes = classNames().stream().map(TokenQueue::escapeCssIdentifier)
940                .collect(StringUtil.joining("."));
941        if (!classes.isEmpty())
942            selector.append('.').append(classes);
943
944        if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node
945            return StringUtil.releaseBuilder(selector);
946
947        selector.insert(0, " > ");
948        if (parent().select(selector.toString()).size() > 1)
949            selector.append(String.format(
950                ":nth-child(%d)", elementSiblingIndex() + 1));
951
952        return StringUtil.releaseBuilder(selector);
953    }
954
955    /**
956     * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
957     * of itself, so will not be included in the returned list.
958     * @return sibling elements
959     */
960    public Elements siblingElements() {
961        if (parentNode == null)
962            return new Elements(0);
963
964        List<Element> elements = parent().childElementsList();
965        Elements siblings = new Elements(elements.size() - 1);
966        for (Element el: elements)
967            if (el != this)
968                siblings.add(el);
969        return siblings;
970    }
971
972    /**
973     * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s,
974     * the {@code nextElementSibling} of the first {@code p} is the second {@code p}.
975     * <p>
976     * This is similar to {@link #nextSibling()}, but specifically finds only Elements
977     * </p>
978     * @return the next element, or null if there is no next element
979     * @see #previousElementSibling()
980     */
981    public @Nullable Element nextElementSibling() {
982        Node next = this;
983        while ((next = next.nextSibling()) != null) {
984            if (next instanceof Element) return (Element) next;
985        }
986        return null;
987    }
988
989    /**
990     * Get each of the sibling elements that come after this element.
991     *
992     * @return each of the element siblings after this element, or an empty list if there are no next sibling elements
993     */
994    public Elements nextElementSiblings() {
995        return nextElementSiblings(true);
996    }
997
998    /**
999     * Gets the previous element sibling of this element.
1000     * @return the previous element, or null if there is no previous element
1001     * @see #nextElementSibling()
1002     */
1003    public @Nullable Element previousElementSibling() {
1004        Node prev = this;
1005        while ((prev = prev.previousSibling()) != null) {
1006            if (prev instanceof Element) return (Element) prev;
1007        }
1008        return null;
1009    }
1010
1011    /**
1012     * Get each of the element siblings before this element.
1013     *
1014     * @return the previous element siblings, or an empty list if there are none.
1015     */
1016    public Elements previousElementSiblings() {
1017        return nextElementSiblings(false);
1018    }
1019
1020    private Elements nextElementSiblings(boolean next) {
1021        Elements els = new Elements();
1022        if (parentNode == null)
1023            return  els;
1024        els.add(this);
1025        return next ?  els.nextAll() : els.prevAll();
1026    }
1027
1028    /**
1029     * Gets the first Element sibling of this element. That may be this element.
1030     * @return the first sibling that is an element (aka the parent's first element child)
1031     */
1032    public Element firstElementSibling() {
1033        if (parent() != null) {
1034            //noinspection DataFlowIssue (not nullable, would be this is no other sibs)
1035            return parent().firstElementChild();
1036        } else
1037            return this; // orphan is its own first sibling
1038    }
1039
1040    /**
1041     * Get the list index of this element in its element sibling list. I.e. if this is the first element
1042     * sibling, returns 0.
1043     * @return position in element sibling list
1044     */
1045    public int elementSiblingIndex() {
1046       if (parent() == null) return 0;
1047       return indexInList(this, parent().childElementsList());
1048    }
1049
1050    /**
1051     * Gets the last element sibling of this element. That may be this element.
1052     * @return the last sibling that is an element (aka the parent's last element child)
1053     */
1054    public Element lastElementSibling() {
1055        if (parent() != null) {
1056            //noinspection DataFlowIssue (not nullable, would be this if no other sibs)
1057            return parent().lastElementChild();
1058        } else
1059            return this;
1060    }
1061
1062    private static <E extends Element> int indexInList(Element search, List<E> elements) {
1063        final int size = elements.size();
1064        for (int i = 0; i < size; i++) {
1065            if (elements.get(i) == search)
1066                return i;
1067        }
1068        return 0;
1069    }
1070
1071    /**
1072     Gets the first child of this Element that is an Element, or {@code null} if there is none.
1073     @return the first Element child node, or null.
1074     @see #firstChild()
1075     @see #lastElementChild()
1076     @since 1.15.2
1077     */
1078    public @Nullable Element firstElementChild() {
1079        Node child = firstChild();
1080        while (child != null) {
1081            if (child instanceof Element) return (Element) child;
1082            child = child.nextSibling();
1083        }
1084        return null;
1085    }
1086
1087    /**
1088     Gets the last child of this Element that is an Element, or @{code null} if there is none.
1089     @return the last Element child node, or null.
1090     @see #lastChild()
1091     @see #firstElementChild()
1092     @since 1.15.2
1093     */
1094    public @Nullable Element lastElementChild() {
1095        Node child = lastChild();
1096        while (child != null) {
1097            if (child instanceof Element) return (Element) child;
1098            child = child.previousSibling();
1099        }
1100        return null;
1101    }
1102
1103    // DOM type methods
1104
1105    /**
1106     * Finds elements, including and recursively under this element, with the specified tag name.
1107     * @param tagName The tag name to search for (case insensitively).
1108     * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match.
1109     */
1110    public Elements getElementsByTag(String tagName) {
1111        Validate.notEmpty(tagName);
1112        tagName = normalize(tagName);
1113
1114        return Collector.collect(new Evaluator.Tag(tagName), this);
1115    }
1116
1117    /**
1118     * Find an element by ID, including or under this element.
1119     * <p>
1120     * Note that this finds the first matching ID, starting with this element. If you search down from a different
1121     * starting point, it is possible to find a different element by ID. For unique element by ID within a Document,
1122     * use {@link Document#getElementById(String)}
1123     * @param id The ID to search for.
1124     * @return The first matching element by ID, starting with this element, or null if none found.
1125     */
1126    public @Nullable Element getElementById(String id) {
1127        Validate.notEmpty(id);
1128
1129        Elements elements = Collector.collect(new Evaluator.Id(id), this);
1130        if (elements.size() > 0)
1131            return elements.get(0);
1132        else
1133            return null;
1134    }
1135
1136    /**
1137     * Find elements that have this class, including or under this element. Case-insensitive.
1138     * <p>
1139     * Elements can have multiple classes (e.g. {@code <div class="header round first">}). This method
1140     * checks each class, so you can find the above with {@code el.getElementsByClass("header");}.
1141     *
1142     * @param className the name of the class to search for.
1143     * @return elements with the supplied class name, empty if none
1144     * @see #hasClass(String)
1145     * @see #classNames()
1146     */
1147    public Elements getElementsByClass(String className) {
1148        Validate.notEmpty(className);
1149
1150        return Collector.collect(new Evaluator.Class(className), this);
1151    }
1152
1153    /**
1154     * Find elements that have a named attribute set. Case-insensitive.
1155     *
1156     * @param key name of the attribute, e.g. {@code href}
1157     * @return elements that have this attribute, empty if none
1158     */
1159    public Elements getElementsByAttribute(String key) {
1160        Validate.notEmpty(key);
1161        key = key.trim();
1162
1163        return Collector.collect(new Evaluator.Attribute(key), this);
1164    }
1165
1166    /**
1167     * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements
1168     * that have HTML5 datasets.
1169     * @param keyPrefix name prefix of the attribute e.g. {@code data-}
1170     * @return elements that have attribute names that start with the prefix, empty if none.
1171     */
1172    public Elements getElementsByAttributeStarting(String keyPrefix) {
1173        Validate.notEmpty(keyPrefix);
1174        keyPrefix = keyPrefix.trim();
1175
1176        return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this);
1177    }
1178
1179    /**
1180     * Find elements that have an attribute with the specific value. Case-insensitive.
1181     *
1182     * @param key name of the attribute
1183     * @param value value of the attribute
1184     * @return elements that have this attribute with this value, empty if none
1185     */
1186    public Elements getElementsByAttributeValue(String key, String value) {
1187        return Collector.collect(new Evaluator.AttributeWithValue(key, value), this);
1188    }
1189
1190    /**
1191     * Find elements that either do not have this attribute, or have it with a different value. Case-insensitive.
1192     *
1193     * @param key name of the attribute
1194     * @param value value of the attribute
1195     * @return elements that do not have a matching attribute
1196     */
1197    public Elements getElementsByAttributeValueNot(String key, String value) {
1198        return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this);
1199    }
1200
1201    /**
1202     * Find elements that have attributes that start with the value prefix. Case-insensitive.
1203     *
1204     * @param key name of the attribute
1205     * @param valuePrefix start of attribute value
1206     * @return elements that have attributes that start with the value prefix
1207     */
1208    public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) {
1209        return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this);
1210    }
1211
1212    /**
1213     * Find elements that have attributes that end with the value suffix. Case-insensitive.
1214     *
1215     * @param key name of the attribute
1216     * @param valueSuffix end of the attribute value
1217     * @return elements that have attributes that end with the value suffix
1218     */
1219    public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) {
1220        return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this);
1221    }
1222
1223    /**
1224     * Find elements that have attributes whose value contains the match string. Case-insensitive.
1225     *
1226     * @param key name of the attribute
1227     * @param match substring of value to search for
1228     * @return elements that have attributes containing this text
1229     */
1230    public Elements getElementsByAttributeValueContaining(String key, String match) {
1231        return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this);
1232    }
1233
1234    /**
1235     * Find elements that have an attribute whose value matches the supplied regular expression.
1236     * @param key name of the attribute
1237     * @param pattern compiled regular expression to match against attribute values
1238     * @return elements that have attributes matching this regular expression
1239     */
1240    public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) {
1241        return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this);
1242
1243    }
1244
1245    /**
1246     * Find elements that have attributes whose values match the supplied regular expression.
1247     * @param key name of the attribute
1248     * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
1249     * @return elements that have attributes matching this regular expression
1250     */
1251    public Elements getElementsByAttributeValueMatching(String key, String regex) {
1252        Pattern pattern;
1253        try {
1254            pattern = Pattern.compile(regex);
1255        } catch (PatternSyntaxException e) {
1256            throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
1257        }
1258        return getElementsByAttributeValueMatching(key, pattern);
1259    }
1260
1261    /**
1262     * Find elements whose sibling index is less than the supplied index.
1263     * @param index 0-based index
1264     * @return elements less than index
1265     */
1266    public Elements getElementsByIndexLessThan(int index) {
1267        return Collector.collect(new Evaluator.IndexLessThan(index), this);
1268    }
1269
1270    /**
1271     * Find elements whose sibling index is greater than the supplied index.
1272     * @param index 0-based index
1273     * @return elements greater than index
1274     */
1275    public Elements getElementsByIndexGreaterThan(int index) {
1276        return Collector.collect(new Evaluator.IndexGreaterThan(index), this);
1277    }
1278
1279    /**
1280     * Find elements whose sibling index is equal to the supplied index.
1281     * @param index 0-based index
1282     * @return elements equal to index
1283     */
1284    public Elements getElementsByIndexEquals(int index) {
1285        return Collector.collect(new Evaluator.IndexEquals(index), this);
1286    }
1287
1288    /**
1289     * Find elements that contain the specified string. The search is case-insensitive. The text may appear directly
1290     * in the element, or in any of its descendants.
1291     * @param searchText to look for in the element's text
1292     * @return elements that contain the string, case-insensitive.
1293     * @see Element#text()
1294     */
1295    public Elements getElementsContainingText(String searchText) {
1296        return Collector.collect(new Evaluator.ContainsText(searchText), this);
1297    }
1298
1299    /**
1300     * Find elements that directly contain the specified string. The search is case-insensitive. The text must appear directly
1301     * in the element, not in any of its descendants.
1302     * @param searchText to look for in the element's own text
1303     * @return elements that contain the string, case-insensitive.
1304     * @see Element#ownText()
1305     */
1306    public Elements getElementsContainingOwnText(String searchText) {
1307        return Collector.collect(new Evaluator.ContainsOwnText(searchText), this);
1308    }
1309
1310    /**
1311     * Find elements whose text matches the supplied regular expression.
1312     * @param pattern regular expression to match text against
1313     * @return elements matching the supplied regular expression.
1314     * @see Element#text()
1315     */
1316    public Elements getElementsMatchingText(Pattern pattern) {
1317        return Collector.collect(new Evaluator.Matches(pattern), this);
1318    }
1319
1320    /**
1321     * Find elements whose text matches the supplied regular expression.
1322     * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
1323     * @return elements matching the supplied regular expression.
1324     * @see Element#text()
1325     */
1326    public Elements getElementsMatchingText(String regex) {
1327        Pattern pattern;
1328        try {
1329            pattern = Pattern.compile(regex);
1330        } catch (PatternSyntaxException e) {
1331            throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
1332        }
1333        return getElementsMatchingText(pattern);
1334    }
1335
1336    /**
1337     * Find elements whose own text matches the supplied regular expression.
1338     * @param pattern regular expression to match text against
1339     * @return elements matching the supplied regular expression.
1340     * @see Element#ownText()
1341     */
1342    public Elements getElementsMatchingOwnText(Pattern pattern) {
1343        return Collector.collect(new Evaluator.MatchesOwn(pattern), this);
1344    }
1345
1346    /**
1347     * Find elements whose own text matches the supplied regular expression.
1348     * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
1349     * @return elements matching the supplied regular expression.
1350     * @see Element#ownText()
1351     */
1352    public Elements getElementsMatchingOwnText(String regex) {
1353        Pattern pattern;
1354        try {
1355            pattern = Pattern.compile(regex);
1356        } catch (PatternSyntaxException e) {
1357            throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
1358        }
1359        return getElementsMatchingOwnText(pattern);
1360    }
1361
1362    /**
1363     * Find all elements under this element (including self, and children of children).
1364     *
1365     * @return all elements
1366     */
1367    public Elements getAllElements() {
1368        return Collector.collect(new Evaluator.AllElements(), this);
1369    }
1370
1371    /**
1372     Gets the <b>normalized, combined text</b> of this element and all its children. Whitespace is normalized and
1373     trimmed.
1374     <p>For example, given HTML {@code <p>Hello  <b>there</b> now! </p>}, {@code p.text()} returns {@code "Hello there
1375    now!"}
1376     <p>If you do not want normalized text, use {@link #wholeText()}. If you want just the text of this node (and not
1377     children), use {@link #ownText()}
1378     <p>Note that this method returns the textual content that would be presented to a reader. The contents of data
1379     nodes (such as {@code <script>} tags) are not considered text. Use {@link #data()} or {@link #html()} to retrieve
1380     that content.
1381
1382     @return decoded, normalized text, or empty string if none.
1383     @see #wholeText()
1384     @see #ownText()
1385     @see #textNodes()
1386     */
1387    public String text() {
1388        final StringBuilder accum = StringUtil.borrowBuilder();
1389        NodeTraversor.traverse(new TextAccumulator(accum), this);
1390        return StringUtil.releaseBuilder(accum).trim();
1391    }
1392
1393    private static class TextAccumulator implements NodeVisitor {
1394        private final StringBuilder accum;
1395
1396        public TextAccumulator(StringBuilder accum) {
1397            this.accum = accum;
1398        }
1399
1400        public void head(Node node, int depth) {
1401            if (node instanceof TextNode) {
1402                TextNode textNode = (TextNode) node;
1403                appendNormalisedText(accum, textNode);
1404            } else if (node instanceof Element) {
1405                Element element = (Element) node;
1406                if (accum.length() > 0 &&
1407                    (element.isBlock() || element.nameIs("br")) &&
1408                    !lastCharIsWhitespace(accum))
1409                    accum.append(' ');
1410            }
1411        }
1412
1413        public void tail(Node node, int depth) {
1414            // make sure there is a space between block tags and immediately following text nodes or inline elements <div>One</div>Two should be "One Two".
1415            if (node instanceof Element) {
1416                Element element = (Element) node;
1417                Node next = node.nextSibling();
1418                if (element.isBlock() && (next instanceof TextNode || next instanceof Element && !((Element) next).tag.formatAsBlock()) && !lastCharIsWhitespace(accum))
1419                    accum.append(' ');
1420            }
1421
1422        }
1423    }
1424
1425    /**
1426     Get the non-normalized, decoded text of this element and its children, including only any newlines and spaces
1427     present in the original source.
1428     @return decoded, non-normalized text
1429     @see #text()
1430     @see #wholeOwnText()
1431     */
1432    public String wholeText() {
1433        final StringBuilder accum = StringUtil.borrowBuilder();
1434        nodeStream().forEach(node -> appendWholeText(node, accum));
1435        return StringUtil.releaseBuilder(accum);
1436    }
1437
1438    private static void appendWholeText(Node node, StringBuilder accum) {
1439        if (node instanceof TextNode) {
1440            accum.append(((TextNode) node).getWholeText());
1441        } else if (node.nameIs("br")) {
1442            accum.append("\n");
1443        }
1444    }
1445
1446    /**
1447     Get the non-normalized, decoded text of this element, <b>not including</b> any child elements, including any
1448     newlines and spaces present in the original source.
1449     @return decoded, non-normalized text that is a direct child of this Element
1450     @see #text()
1451     @see #wholeText()
1452     @see #ownText()
1453     @since 1.15.1
1454     */
1455    public String wholeOwnText() {
1456        final StringBuilder accum = StringUtil.borrowBuilder();
1457        final int size = childNodeSize();
1458        for (int i = 0; i < size; i++) {
1459            Node node = childNodes.get(i);
1460            appendWholeText(node, accum);
1461        }
1462
1463        return StringUtil.releaseBuilder(accum);
1464    }
1465
1466    /**
1467     * Gets the (normalized) text owned by this element only; does not get the combined text of all children.
1468     * <p>
1469     * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"},
1470     * whereas {@code p.text()} returns {@code "Hello there now!"}.
1471     * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element.
1472     *
1473     * @return decoded text, or empty string if none.
1474     * @see #text()
1475     * @see #textNodes()
1476     */
1477    public String ownText() {
1478        StringBuilder sb = StringUtil.borrowBuilder();
1479        ownText(sb);
1480        return StringUtil.releaseBuilder(sb).trim();
1481    }
1482
1483    private void ownText(StringBuilder accum) {
1484        for (int i = 0; i < childNodeSize(); i++) {
1485            Node child = childNodes.get(i);
1486            if (child instanceof TextNode) {
1487                TextNode textNode = (TextNode) child;
1488                appendNormalisedText(accum, textNode);
1489            } else if (child.nameIs("br") && !lastCharIsWhitespace(accum)) {
1490                accum.append(" ");
1491            }
1492        }
1493    }
1494
1495    private static void appendNormalisedText(StringBuilder accum, TextNode textNode) {
1496        String text = textNode.getWholeText();
1497        if (preserveWhitespace(textNode.parentNode) || textNode instanceof CDataNode)
1498            accum.append(text);
1499        else
1500            StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum));
1501    }
1502
1503    static boolean preserveWhitespace(@Nullable Node node) {
1504        // looks only at this element and five levels up, to prevent recursion & needless stack searches
1505        if (node instanceof Element) {
1506            Element el = (Element) node;
1507            int i = 0;
1508            do {
1509                if (el.tag.preserveWhitespace())
1510                    return true;
1511                el = el.parent();
1512                i++;
1513            } while (i < 6 && el != null);
1514        }
1515        return false;
1516    }
1517
1518    /**
1519     * Set the text of this element. Any existing contents (text or elements) will be cleared.
1520     * <p>As a special case, for {@code <script>} and {@code <style>} tags, the input text will be treated as data,
1521     * not visible text.</p>
1522     * @param text decoded text
1523     * @return this element
1524     */
1525    public Element text(String text) {
1526        Validate.notNull(text);
1527        empty();
1528        // special case for script/style in HTML: should be data node
1529        Document owner = ownerDocument();
1530        // an alternate impl would be to run through the parser
1531        if (owner != null && owner.parser().isContentForTagData(normalName()))
1532            appendChild(new DataNode(text));
1533        else
1534            appendChild(new TextNode(text));
1535
1536        return this;
1537    }
1538
1539    /**
1540     Checks if the current element or any of its child elements contain non-whitespace text.
1541     @return {@code true} if the element has non-blank text content, {@code false} otherwise.
1542     */
1543    public boolean hasText() {
1544        AtomicBoolean hasText = new AtomicBoolean(false);
1545        filter((node, depth) -> {
1546            if (node instanceof TextNode) {
1547                TextNode textNode = (TextNode) node;
1548                if (!textNode.isBlank()) {
1549                    hasText.set(true);
1550                    return NodeFilter.FilterResult.STOP;
1551                }
1552            }
1553            return NodeFilter.FilterResult.CONTINUE;
1554        });
1555        return hasText.get();
1556    }
1557
1558    /**
1559     * Get the combined data of this element. Data is e.g. the inside of a {@code <script>} tag. Note that data is NOT the
1560     * text of the element. Use {@link #text()} to get the text that would be visible to a user, and {@code data()}
1561     * for the contents of scripts, comments, CSS styles, etc.
1562     *
1563     * @return the data, or empty string if none
1564     *
1565     * @see #dataNodes()
1566     */
1567    public String data() {
1568        StringBuilder sb = StringUtil.borrowBuilder();
1569        traverse((childNode, depth) -> {
1570            if (childNode instanceof DataNode) {
1571                DataNode data = (DataNode) childNode;
1572                sb.append(data.getWholeData());
1573            } else if (childNode instanceof Comment) {
1574                Comment comment = (Comment) childNode;
1575                sb.append(comment.getData());
1576            } else if (childNode instanceof CDataNode) {
1577                // this shouldn't really happen because the html parser won't see the cdata as anything special when parsing script.
1578                // but in case another type gets through.
1579                CDataNode cDataNode = (CDataNode) childNode;
1580                sb.append(cDataNode.getWholeText());
1581            }
1582        });
1583        return StringUtil.releaseBuilder(sb);
1584    }
1585
1586    /**
1587     * Gets the literal value of this element's "class" attribute, which may include multiple class names, space
1588     * separated. (E.g. on <code>&lt;div class="header gray"&gt;</code> returns, "<code>header gray</code>")
1589     * @return The literal class attribute, or <b>empty string</b> if no class attribute set.
1590     */
1591    public String className() {
1592        return attr("class").trim();
1593    }
1594
1595    /**
1596     * Get each of the element's class names. E.g. on element {@code <div class="header gray">},
1597     * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to
1598     * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them.
1599     * @return set of classnames, empty if no class attribute
1600     */
1601    public Set<String> classNames() {
1602        String[] names = ClassSplit.split(className());
1603        Set<String> classNames = new LinkedHashSet<>(Arrays.asList(names));
1604        classNames.remove(""); // if classNames() was empty, would include an empty class
1605
1606        return classNames;
1607    }
1608
1609    /**
1610     Set the element's {@code class} attribute to the supplied class names.
1611     @param classNames set of classes
1612     @return this element, for chaining
1613     */
1614    public Element classNames(Set<String> classNames) {
1615        Validate.notNull(classNames);
1616        if (classNames.isEmpty()) {
1617            attributes().remove("class");
1618        } else {
1619            attributes().put("class", StringUtil.join(classNames, " "));
1620        }
1621        return this;
1622    }
1623
1624    /**
1625     * Tests if this element has a class. Case-insensitive.
1626     * @param className name of class to check for
1627     * @return true if it does, false if not
1628     */
1629    // performance sensitive
1630    public boolean hasClass(String className) {
1631        if (attributes == null)
1632            return false;
1633
1634        final String classAttr = attributes.getIgnoreCase("class");
1635        final int len = classAttr.length();
1636        final int wantLen = className.length();
1637
1638        if (len == 0 || len < wantLen) {
1639            return false;
1640        }
1641
1642        // if both lengths are equal, only need compare the className with the attribute
1643        if (len == wantLen) {
1644            return className.equalsIgnoreCase(classAttr);
1645        }
1646
1647        // otherwise, scan for whitespace and compare regions (with no string or arraylist allocations)
1648        boolean inClass = false;
1649        int start = 0;
1650        for (int i = 0; i < len; i++) {
1651            if (Character.isWhitespace(classAttr.charAt(i))) {
1652                if (inClass) {
1653                    // white space ends a class name, compare it with the requested one, ignore case
1654                    if (i - start == wantLen && classAttr.regionMatches(true, start, className, 0, wantLen)) {
1655                        return true;
1656                    }
1657                    inClass = false;
1658                }
1659            } else {
1660                if (!inClass) {
1661                    // we're in a class name : keep the start of the substring
1662                    inClass = true;
1663                    start = i;
1664                }
1665            }
1666        }
1667
1668        // check the last entry
1669        if (inClass && len - start == wantLen) {
1670            return classAttr.regionMatches(true, start, className, 0, wantLen);
1671        }
1672
1673        return false;
1674    }
1675
1676    /**
1677     Add a class name to this element's {@code class} attribute.
1678     @param className class name to add
1679     @return this element
1680     */
1681    public Element addClass(String className) {
1682        Validate.notNull(className);
1683
1684        Set<String> classes = classNames();
1685        classes.add(className);
1686        classNames(classes);
1687
1688        return this;
1689    }
1690
1691    /**
1692     Remove a class name from this element's {@code class} attribute.
1693     @param className class name to remove
1694     @return this element
1695     */
1696    public Element removeClass(String className) {
1697        Validate.notNull(className);
1698
1699        Set<String> classes = classNames();
1700        classes.remove(className);
1701        classNames(classes);
1702
1703        return this;
1704    }
1705
1706    /**
1707     Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it.
1708     @param className class name to toggle
1709     @return this element
1710     */
1711    public Element toggleClass(String className) {
1712        Validate.notNull(className);
1713
1714        Set<String> classes = classNames();
1715        if (classes.contains(className))
1716            classes.remove(className);
1717        else
1718            classes.add(className);
1719        classNames(classes);
1720
1721        return this;
1722    }
1723
1724    /**
1725     * Get the value of a form element (input, textarea, etc).
1726     * @return the value of the form element, or empty string if not set.
1727     */
1728    public String val() {
1729        if (elementIs("textarea", NamespaceHtml))
1730            return text();
1731        else
1732            return attr("value");
1733    }
1734
1735    /**
1736     * Set the value of a form element (input, textarea, etc).
1737     * @param value value to set
1738     * @return this element (for chaining)
1739     */
1740    public Element val(String value) {
1741        if (elementIs("textarea", NamespaceHtml))
1742            text(value);
1743        else
1744            attr("value", value);
1745        return this;
1746    }
1747
1748    /**
1749     Get the source range (start and end positions) of the end (closing) tag for this Element. Position tracking must be
1750     enabled prior to parsing the content.
1751     @return the range of the closing tag for this element, or {@code untracked} if its range was not tracked.
1752     @see org.jsoup.parser.Parser#setTrackPosition(boolean)
1753     @see Node#sourceRange()
1754     @see Range#isImplicit()
1755     @since 1.15.2
1756     */
1757    public Range endSourceRange() {
1758        return Range.of(this, false);
1759    }
1760
1761    boolean shouldIndent(final Document.OutputSettings out) {
1762        return out.prettyPrint() && isFormatAsBlock(out) && !isInlineable(out) && !preserveWhitespace(parentNode);
1763    }
1764
1765    @Override
1766    void outerHtmlHead(final Appendable accum, int depth, final Document.OutputSettings out) throws IOException {
1767        if (shouldIndent(out)) {
1768            if (accum instanceof StringBuilder) {
1769                if (((StringBuilder) accum).length() > 0)
1770                    indent(accum, depth, out);
1771            } else {
1772                indent(accum, depth, out);
1773            }
1774        }
1775        accum.append('<').append(tagName());
1776        if (attributes != null) attributes.html(accum, out);
1777
1778        // selfclosing includes unknown tags, isEmpty defines tags that are always empty
1779        if (childNodes.isEmpty() && tag.isSelfClosing()) {
1780            if (out.syntax() == Document.OutputSettings.Syntax.html && tag.isEmpty())
1781                accum.append('>');
1782            else
1783                accum.append(" />"); // <img> in html, <img /> in xml
1784        }
1785        else
1786            accum.append('>');
1787    }
1788
1789    @Override
1790    void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
1791        if (!(childNodes.isEmpty() && tag.isSelfClosing())) {
1792            if (out.prettyPrint() && (!childNodes.isEmpty() && (
1793                (tag.formatAsBlock() && !preserveWhitespace(parentNode)) ||
1794                    (out.outline() && (childNodes.size()>1 || (childNodes.size()==1 && (childNodes.get(0) instanceof Element))))
1795            )))
1796                indent(accum, depth, out);
1797            accum.append("</").append(tagName()).append('>');
1798        }
1799    }
1800
1801    /**
1802     * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return
1803     * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.)
1804     *
1805     * @return String of HTML.
1806     * @see #outerHtml()
1807     */
1808    public String html() {
1809        StringBuilder accum = StringUtil.borrowBuilder();
1810        html(accum);
1811        String html = StringUtil.releaseBuilder(accum);
1812        return NodeUtils.outputSettings(this).prettyPrint() ? html.trim() : html;
1813    }
1814
1815    @Override
1816    public <T extends Appendable> T html(T appendable) {
1817        final int size = childNodes.size();
1818        for (int i = 0; i < size; i++)
1819            childNodes.get(i).outerHtml(appendable);
1820
1821        return appendable;
1822    }
1823
1824    /**
1825     * Set this element's inner HTML. Clears the existing HTML first.
1826     * @param html HTML to parse and set into this element
1827     * @return this element
1828     * @see #append(String)
1829     */
1830    public Element html(String html) {
1831        empty();
1832        append(html);
1833        return this;
1834    }
1835
1836    @Override
1837    public Element clone() {
1838        return (Element) super.clone();
1839    }
1840
1841    @Override
1842    public Element shallowClone() {
1843        // simpler than implementing a clone version with no child copy
1844        String baseUri = baseUri();
1845        if (baseUri.isEmpty()) baseUri = null; // saves setting a blank internal attribute
1846        return new Element(tag, baseUri, attributes == null ? null : attributes.clone());
1847    }
1848
1849    @Override
1850    protected Element doClone(@Nullable Node parent) {
1851        Element clone = (Element) super.doClone(parent);
1852        clone.attributes = attributes != null ? attributes.clone() : null;
1853        clone.childNodes = new NodeList(clone, childNodes.size());
1854        clone.childNodes.addAll(childNodes); // the children then get iterated and cloned in Node.clone
1855
1856        return clone;
1857    }
1858
1859    // overrides of Node for call chaining
1860    @Override
1861    public Element clearAttributes() {
1862        if (attributes != null) {
1863            super.clearAttributes(); // keeps internal attributes via iterator
1864            if (attributes.size() == 0)
1865                attributes = null; // only remove entirely if no internal attributes
1866        }
1867
1868        return this;
1869    }
1870
1871    @Override
1872    public Element removeAttr(String attributeKey) {
1873        return (Element) super.removeAttr(attributeKey);
1874    }
1875
1876    @Override
1877    public Element root() {
1878        return (Element) super.root(); // probably a document, but always at least an element
1879    }
1880
1881    @Override
1882    public Element traverse(NodeVisitor nodeVisitor) {
1883        return (Element) super.traverse(nodeVisitor);
1884    }
1885
1886    @Override
1887    public Element forEachNode(Consumer<? super Node> action) {
1888        return (Element) super.forEachNode(action);
1889    }
1890
1891    /**
1892     Perform the supplied action on this Element and each of its descendant Elements, during a depth-first traversal.
1893     Elements may be inspected, changed, added, replaced, or removed.
1894     @param action the function to perform on the element
1895     @return this Element, for chaining
1896     @see Node#forEachNode(Consumer)
1897     @deprecated use {@link #stream()}.{@link Stream#forEach(Consumer) forEach(Consumer)} instead. (Removing this method
1898     so Element can implement Iterable, which this signature conflicts with due to the non-void return.)
1899     */
1900    @Deprecated
1901    public Element forEach(Consumer<? super Element> action) {
1902        stream().forEach(action);
1903        return this;
1904    }
1905
1906    @Override
1907    public Element filter(NodeFilter nodeFilter) {
1908        return  (Element) super.filter(nodeFilter);
1909    }
1910
1911    private static final class NodeList extends ChangeNotifyingArrayList<Node> {
1912        private final Element owner;
1913
1914        NodeList(Element owner, int initialCapacity) {
1915            super(initialCapacity);
1916            this.owner = owner;
1917        }
1918
1919        public void onContentsChanged() {
1920            owner.nodelistChanged();
1921        }
1922    }
1923
1924    private boolean isFormatAsBlock(Document.OutputSettings out) {
1925        return tag.isBlock() || (parent() != null && parent().tag().formatAsBlock()) || out.outline();
1926    }
1927
1928    private boolean isInlineable(Document.OutputSettings out) {
1929        if (!tag.isInline())
1930            return false;
1931        return (parent() == null || parent().isBlock())
1932            && !isEffectivelyFirst()
1933            && !out.outline()
1934            && !nameIs("br");
1935    }
1936}