001package org.jsoup.parser;
002
003import org.jsoup.helper.Validate;
004import org.jsoup.internal.Normalizer;
005import org.jsoup.internal.StringUtil;
006import org.jsoup.nodes.Attributes;
007import org.jsoup.nodes.CDataNode;
008import org.jsoup.nodes.Comment;
009import org.jsoup.nodes.DataNode;
010import org.jsoup.nodes.Document;
011import org.jsoup.nodes.Element;
012import org.jsoup.nodes.FormElement;
013import org.jsoup.nodes.Node;
014import org.jsoup.nodes.TextNode;
015import org.jspecify.annotations.Nullable;
016
017import java.io.Reader;
018import java.util.ArrayList;
019import java.util.List;
020
021import static org.jsoup.internal.StringUtil.inSorted;
022import static org.jsoup.parser.HtmlTreeBuilderState.Constants.InTableFoster;
023import static org.jsoup.parser.HtmlTreeBuilderState.ForeignContent;
024import static org.jsoup.parser.Parser.*;
025
026/**
027 * HTML Tree Builder; creates a DOM from Tokens.
028 */
029public class HtmlTreeBuilder extends TreeBuilder {
030    // tag searches. must be sorted, used in inSorted. HtmlTreeBuilderTest validates they're sorted.
031    static final String[] TagsSearchInScope = new String[]{ // a particular element in scope
032        "applet", "caption", "html", "marquee", "object", "table", "td", "template", "th"
033    };
034    // math and svg namespaces for particular element in scope
035    static final String[]TagSearchInScopeMath = new String[] {
036        "annotation-xml",  "mi", "mn", "mo", "ms", "mtext"
037    };
038    static final String[]TagSearchInScopeSvg = new String[] {
039        "desc", "foreignObject", "title"
040    };
041
042    static final String[] TagSearchList = new String[]{"ol", "ul"};
043    static final String[] TagSearchButton = new String[]{"button"};
044    static final String[] TagSearchTableScope = new String[]{"html", "table"};
045    static final String[] TagSearchSelectScope = new String[]{"optgroup", "option"};
046    static final String[] TagSearchEndTags = new String[]{"dd", "dt", "li", "optgroup", "option", "p", "rb", "rp", "rt", "rtc"};
047    static final String[] TagThoroughSearchEndTags = new String[]{"caption", "colgroup", "dd", "dt", "li", "optgroup", "option", "p", "rb", "rp", "rt", "rtc", "tbody", "td", "tfoot", "th", "thead", "tr"};
048    static final String[] TagSearchSpecial = new String[]{
049        "address", "applet", "area", "article", "aside", "base", "basefont", "bgsound", "blockquote", "body", "br",
050        "button", "caption", "center", "col", "colgroup", "dd", "details", "dir", "div", "dl", "dt", "embed",
051        "fieldset", "figcaption", "figure", "footer", "form", "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6",
052        "head", "header", "hgroup", "hr", "html", "iframe", "img", "input", "keygen", "li", "link", "listing", "main",
053        "marquee", "menu", "meta", "nav", "noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext",
054        "pre", "script", "search", "section", "select", "source", "style", "summary", "table", "tbody", "td",
055        "template", "textarea", "tfoot", "th", "thead", "title", "tr", "track", "ul", "wbr", "xmp"};
056    static String[] TagSearchSpecialMath = {"annotation-xml", "mi", "mn", "mo", "ms", "mtext"}; // differs to MathML text integration point; adds annotation-xml
057    static final String[] TagMathMlTextIntegration = new String[]{"mi", "mn", "mo", "ms", "mtext"};
058    static final String[] TagSvgHtmlIntegration = new String[]{"desc", "foreignObject", "title"};
059    static final String[] TagFormListed = {
060        "button", "fieldset", "input", "keygen", "object", "output", "select", "textarea"
061    };
062
063    public static final int MaxScopeSearchDepth = 100; // prevents the parser bogging down in exceptionally broken pages
064
065    private HtmlTreeBuilderState state; // the current state
066    private HtmlTreeBuilderState originalState; // original / marked state
067
068    private boolean baseUriSetFromDoc;
069    private @Nullable Element headElement; // the current head element
070    private @Nullable FormElement formElement; // the current form element
071    private @Nullable Element contextElement; // fragment parse root; name only copy of context. could be null even if fragment parsing
072    ArrayList<Element> formattingElements; // active (open) formatting elements
073    private ArrayList<HtmlTreeBuilderState> tmplInsertMode; // stack of Template Insertion modes
074    private List<Token.Character> pendingTableCharacters; // chars in table to be shifted out
075    private Token.EndTag emptyEnd; // reused empty end tag
076
077    private boolean framesetOk; // if ok to go into frameset
078    private boolean fosterInserts; // if next inserts should be fostered
079    private boolean fragmentParsing; // if parsing a fragment of html
080
081    @Override ParseSettings defaultSettings() {
082        return ParseSettings.htmlDefault;
083    }
084
085    @Override
086    HtmlTreeBuilder newInstance() {
087        return new HtmlTreeBuilder();
088    }
089
090    @Override
091    protected void initialiseParse(Reader input, String baseUri, Parser parser) {
092        super.initialiseParse(input, baseUri, parser);
093
094        // this is a bit mucky. todo - probably just create new parser objects to ensure all reset.
095        state = HtmlTreeBuilderState.Initial;
096        originalState = null;
097        baseUriSetFromDoc = false;
098        headElement = null;
099        formElement = null;
100        contextElement = null;
101        formattingElements = new ArrayList<>();
102        tmplInsertMode = new ArrayList<>();
103        pendingTableCharacters = new ArrayList<>();
104        emptyEnd = new Token.EndTag(this);
105        framesetOk = true;
106        fosterInserts = false;
107        fragmentParsing = false;
108    }
109
110    @Override void initialiseParseFragment(@Nullable Element context) {
111        // context may be null
112        state = HtmlTreeBuilderState.Initial;
113        fragmentParsing = true;
114
115        if (context != null) {
116            final String contextName = context.normalName();
117            contextElement = new Element(tagFor(contextName, contextName, defaultNamespace(), settings), baseUri);
118            if (context.ownerDocument() != null) // quirks setup:
119                doc.quirksMode(context.ownerDocument().quirksMode());
120
121            // initialise the tokeniser state:
122            switch (contextName) {
123                case "script":
124                    tokeniser.transition(TokeniserState.ScriptData);
125                    break;
126                case "plaintext":
127                    tokeniser.transition(TokeniserState.PLAINTEXT);
128                    break;
129                case "template":
130                    tokeniser.transition(TokeniserState.Data);
131                    pushTemplateMode(HtmlTreeBuilderState.InTemplate);
132                    break;
133                default:
134                    Tag tag = contextElement.tag();
135                    TokeniserState textState = tag.textState();
136                    if (textState != null)
137                        tokeniser.transition(textState); // style, xmp, title, textarea, etc; or custom
138                    else
139                        tokeniser.transition(TokeniserState.Data);
140            }
141            doc.appendChild(contextElement);
142            push(contextElement);
143            resetInsertionMode();
144
145            // setup form element to nearest form on context (up ancestor chain). ensures form controls are associated
146            // with form correctly
147            Element formSearch = context;
148            while (formSearch != null) {
149                if (formSearch instanceof FormElement) {
150                    formElement = (FormElement) formSearch;
151                    break;
152                }
153                formSearch = formSearch.parent();
154            }
155        }
156    }
157
158    @Override List<Node> completeParseFragment() {
159        if (contextElement != null) {
160            // depending on context and the input html, content may have been added outside of the root el
161            // e.g. context=p, input=div, the div will have been pushed out.
162            List<Node> nodes = contextElement.siblingNodes();
163            if (!nodes.isEmpty())
164                contextElement.insertChildren(-1, nodes);
165            return contextElement.childNodes();
166        }
167        else
168            return doc.childNodes();
169    }
170
171    @Override
172    protected boolean process(Token token) {
173        HtmlTreeBuilderState dispatch = useCurrentOrForeignInsert(token) ? this.state : ForeignContent;
174        return dispatch.process(token, this);
175    }
176
177    boolean useCurrentOrForeignInsert(Token token) {
178        // https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
179        // If the stack of open elements is empty
180        if (stack.isEmpty())
181            return true;
182        final Element el = currentElement();
183        final String ns = el.tag().namespace();
184
185        // If the adjusted current node is an element in the HTML namespace
186        if (NamespaceHtml.equals(ns))
187            return true;
188
189        // If the adjusted current node is a MathML text integration point and the token is a start tag whose tag name is neither "mglyph" nor "malignmark"
190        // If the adjusted current node is a MathML text integration point and the token is a character token
191        if (isMathmlTextIntegration(el)) {
192            if (token.isStartTag()
193                    && !"mglyph".equals(token.asStartTag().normalName)
194                    && !"malignmark".equals(token.asStartTag().normalName))
195                    return true;
196            if (token.isCharacter())
197                    return true;
198        }
199        // If the adjusted current node is a MathML annotation-xml element and the token is a start tag whose tag name is "svg"
200        if (Parser.NamespaceMathml.equals(ns)
201            && el.nameIs("annotation-xml")
202            && token.isStartTag()
203            && "svg".equals(token.asStartTag().normalName))
204            return true;
205
206        // If the adjusted current node is an HTML integration point and the token is a start tag
207        // If the adjusted current node is an HTML integration point and the token is a character token
208        if (isHtmlIntegration(el)
209            && (token.isStartTag() || token.isCharacter()))
210            return true;
211
212        // If the token is an end-of-file token
213        return token.isEOF();
214    }
215
216    static boolean isMathmlTextIntegration(Element el) {
217        /*
218        A node is a MathML text integration point if it is one of the following elements:
219        A MathML mi element
220        A MathML mo element
221        A MathML mn element
222        A MathML ms element
223        A MathML mtext element
224         */
225        return (Parser.NamespaceMathml.equals(el.tag().namespace())
226            && StringUtil.inSorted(el.normalName(), TagMathMlTextIntegration));
227    }
228
229    static boolean isHtmlIntegration(Element el) {
230        /*
231        A node is an HTML integration point if it is one of the following elements:
232        A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "text/html"
233        A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "application/xhtml+xml"
234        An SVG foreignObject element
235        An SVG desc element
236        An SVG title element
237         */
238        if (Parser.NamespaceMathml.equals(el.tag().namespace())
239            && el.nameIs("annotation-xml")) {
240            String encoding = Normalizer.normalize(el.attr("encoding"));
241            if (encoding.equals("text/html") || encoding.equals("application/xhtml+xml"))
242                return true;
243        }
244        // note using .tagName for case-sensitive hit here of foreignObject
245        return Parser.NamespaceSvg.equals(el.tag().namespace()) && StringUtil.in(el.tagName(), TagSvgHtmlIntegration);
246    }
247
248    boolean process(Token token, HtmlTreeBuilderState state) {
249        return state.process(token, this);
250    }
251
252    void transition(HtmlTreeBuilderState state) {
253        this.state = state;
254    }
255
256    HtmlTreeBuilderState state() {
257        return state;
258    }
259
260    void markInsertionMode() {
261        originalState = state;
262    }
263
264    HtmlTreeBuilderState originalState() {
265        return originalState;
266    }
267
268    void framesetOk(boolean framesetOk) {
269        this.framesetOk = framesetOk;
270    }
271
272    boolean framesetOk() {
273        return framesetOk;
274    }
275
276    Document getDocument() {
277        return doc;
278    }
279
280    String getBaseUri() {
281        return baseUri;
282    }
283
284    void maybeSetBaseUri(Element base) {
285        if (baseUriSetFromDoc) // only listen to the first <base href> in parse
286            return;
287
288        String href = base.absUrl("href");
289        if (href.length() != 0) { // ignore <base target> etc
290            baseUri = href;
291            baseUriSetFromDoc = true;
292            doc.setBaseUri(href); // set on the doc so doc.createElement(Tag) will get updated base, and to update all descendants
293        }
294    }
295
296    boolean isFragmentParsing() {
297        return fragmentParsing;
298    }
299
300    void error(HtmlTreeBuilderState state) {
301        if (parser.getErrors().canAddError())
302            parser.getErrors().add(new ParseError(reader, "Unexpected %s token [%s] when in state [%s]",
303                currentToken.tokenType(), currentToken, state));
304    }
305
306    Element createElementFor(Token.StartTag startTag, String namespace, boolean forcePreserveCase) {
307        // dedupe and normalize the attributes:
308        Attributes attributes = startTag.attributes;
309        if (!forcePreserveCase)
310            attributes = settings.normalizeAttributes(attributes);
311        if (attributes != null && !attributes.isEmpty()) {
312            int dupes = attributes.deduplicate(settings);
313            if (dupes > 0) {
314                error("Dropped duplicate attribute(s) in tag [%s]", startTag.normalName);
315            }
316        }
317
318        Tag tag = tagFor(startTag.name(), startTag.normalName, namespace,
319            forcePreserveCase ? ParseSettings.preserveCase : settings);
320
321        return (tag.normalName().equals("form")) ?
322            new FormElement(tag, null, attributes) :
323            new Element(tag, null, attributes);
324    }
325
326    /** Inserts an HTML element for the given tag) */
327    Element insertElementFor(final Token.StartTag startTag) {
328        Element el = createElementFor(startTag, NamespaceHtml, false);
329        doInsertElement(el);
330
331        // handle self-closing tags. when the spec expects an empty (void) tag, will directly hit insertEmpty, so won't generate this fake end tag.
332        if (startTag.isSelfClosing()) {
333            Tag tag = el.tag();
334            tag.setSeenSelfClose(); // can infer output if in xml syntax
335            if (tag.isKnownTag() && (tag.isEmpty() || tag.isSelfClosing())) {
336                // ok, allow it. effectively a pop, but fiddles with the state. handles empty style, title etc which would otherwise leave us in data state
337                tokeniser.transition(TokeniserState.Data); // handles <script />, otherwise needs breakout steps from script data
338                tokeniser.emit(emptyEnd.reset().name(el.tagName()));  // ensure we get out of whatever state we are in. emitted for yielded processing
339            } else {
340                // error it, and leave the inserted element on
341                tokeniser.error("Tag [%s] cannot be self-closing; not a void tag", tag.normalName());
342            }
343        }
344
345        return el;
346    }
347
348    /**
349     Inserts a foreign element. Preserves the case of the tag name and of the attributes.
350     */
351    Element insertForeignElementFor(final Token.StartTag startTag, String namespace) {
352        Element el = createElementFor(startTag, namespace, true);
353        doInsertElement(el);
354
355        if (startTag.isSelfClosing()) { // foreign els are OK to self-close
356            el.tag().setSeenSelfClose(); // remember this is self-closing for output
357            pop();
358        }
359
360        return el;
361    }
362
363    Element insertEmptyElementFor(Token.StartTag startTag) {
364        Element el = createElementFor(startTag, NamespaceHtml, false);
365        doInsertElement(el);
366        pop();
367        return el;
368    }
369
370    FormElement insertFormElement(Token.StartTag startTag, boolean onStack, boolean checkTemplateStack) {
371        FormElement el = (FormElement) createElementFor(startTag, NamespaceHtml, false);
372
373        if (checkTemplateStack) {
374            if(!onStack("template"))
375                setFormElement(el);
376        } else
377            setFormElement(el);
378
379        doInsertElement(el);
380        if (!onStack) pop();
381        return el;
382    }
383
384    /** Inserts the Element onto the stack. All element inserts must run through this method. Performs any general
385     tests on the Element before insertion.
386     * @param el the Element to insert and make the current element
387     */
388    private void doInsertElement(Element el) {
389        if (formElement != null && el.tag().namespace.equals(NamespaceHtml) && StringUtil.inSorted(el.normalName(), TagFormListed))
390            formElement.addElement(el); // connect form controls to their form element
391
392        // in HTML, the xmlns attribute if set must match what the parser set the tag's namespace to
393        if (parser.getErrors().canAddError() && el.hasAttr("xmlns") && !el.attr("xmlns").equals(el.tag().namespace()))
394            error("Invalid xmlns attribute [%s] on tag [%s]", el.attr("xmlns"), el.tagName());
395
396        if (isFosterInserts() && StringUtil.inSorted(currentElement().normalName(), InTableFoster))
397            insertInFosterParent(el);
398        else
399            currentElement().appendChild(el);
400
401        push(el);
402    }
403
404    void insertCommentNode(Token.Comment token) {
405        Comment node = new Comment(token.getData());
406        currentElement().appendChild(node);
407        onNodeInserted(node);
408    }
409
410    /** Inserts the provided character token into the current element. */
411    void insertCharacterNode(Token.Character characterToken) {
412        Element el = currentElement(); // will be doc if no current element; allows for whitespace to be inserted into the doc root object (not on the stack)
413        insertCharacterToElement(characterToken, el);
414    }
415
416    /** Inserts the provided character token into the provided element. */
417    void insertCharacterToElement(Token.Character characterToken, Element el) {
418        final Node node;
419        final String data = characterToken.getData();
420
421        if (characterToken.isCData())
422            node = new CDataNode(data);
423        else if (el.tag().is(Tag.Data))
424            node = new DataNode(data);
425        else
426            node = new TextNode(data);
427        el.appendChild(node); // doesn't use insertNode, because we don't foster these; and will always have a stack.
428        onNodeInserted(node);
429    }
430
431    ArrayList<Element> getStack() {
432        return stack;
433    }
434
435    boolean onStack(Element el) {
436        return onStack(stack, el);
437    }
438
439    /** Checks if there is an HTML element with the given name on the stack. */
440    boolean onStack(String elName) {
441        return getFromStack(elName) != null;
442    }
443
444    private static final int maxQueueDepth = 256; // an arbitrary tension point between real HTML and crafted pain
445    private static boolean onStack(ArrayList<Element> queue, Element element) {
446        final int bottom = queue.size() - 1;
447        final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0;
448        for (int pos = bottom; pos >= upper; pos--) {
449            Element next = queue.get(pos);
450            if (next == element) {
451                return true;
452            }
453        }
454        return false;
455    }
456
457    /** Gets the nearest (lowest) HTML element with the given name from the stack. */
458    @Nullable
459    Element getFromStack(String elName) {
460        final int bottom = stack.size() - 1;
461        final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0;
462        for (int pos = bottom; pos >= upper; pos--) {
463            Element next = stack.get(pos);
464            if (next.elementIs(elName, NamespaceHtml)) {
465                return next;
466            }
467        }
468        return null;
469    }
470
471    boolean removeFromStack(Element el) {
472        for (int pos = stack.size() -1; pos >= 0; pos--) {
473            Element next = stack.get(pos);
474            if (next == el) {
475                stack.remove(pos);
476                onNodeClosed(el);
477                return true;
478            }
479        }
480        return false;
481    }
482
483    /** Pops the stack until the given HTML element is removed. */
484    @Nullable
485    Element popStackToClose(String elName) {
486        for (int pos = stack.size() -1; pos >= 0; pos--) {
487            Element el = pop();
488            if (el.elementIs(elName, NamespaceHtml)) {
489                return el;
490            }
491        }
492        return null;
493    }
494
495    /** Pops the stack until an element with the supplied name is removed, irrespective of namespace. */
496    @Nullable
497    Element popStackToCloseAnyNamespace(String elName) {
498        for (int pos = stack.size() -1; pos >= 0; pos--) {
499            Element el = pop();
500            if (el.nameIs(elName)) {
501                return el;
502            }
503        }
504        return null;
505    }
506
507    /** Pops the stack until one of the given HTML elements is removed. */
508    void popStackToClose(String... elNames) { // elnames is sorted, comes from Constants
509        for (int pos = stack.size() -1; pos >= 0; pos--) {
510            Element el = pop();
511            if (inSorted(el.normalName(), elNames) && NamespaceHtml.equals(el.tag().namespace())) {
512                break;
513            }
514        }
515    }
516
517    void clearStackToTableContext() {
518        clearStackToContext("table", "template");
519    }
520
521    void clearStackToTableBodyContext() {
522        clearStackToContext("tbody", "tfoot", "thead", "template");
523    }
524
525    void clearStackToTableRowContext() {
526        clearStackToContext("tr", "template");
527    }
528
529    /** Removes elements from the stack until one of the supplied HTML elements is removed. */
530    private void clearStackToContext(String... nodeNames) {
531        for (int pos = stack.size() -1; pos >= 0; pos--) {
532            Element next = stack.get(pos);
533            if (NamespaceHtml.equals(next.tag().namespace()) &&
534                (StringUtil.in(next.normalName(), nodeNames) || next.nameIs("html")))
535                break;
536            else
537                pop();
538        }
539    }
540
541    /**
542     Gets the Element immediately above the supplied element on the stack. Which due to adoption, may not necessarily be
543     its parent.
544
545     @param el
546     @return the Element immediately above the supplied element, or null if there is no such element.
547     */
548    @Nullable Element aboveOnStack(Element el) {
549        assert onStack(el);
550        for (int pos = stack.size() -1; pos >= 0; pos--) {
551            Element next = stack.get(pos);
552            if (next == el) {
553                return stack.get(pos-1);
554            }
555        }
556        return null;
557    }
558
559    void insertOnStackAfter(Element after, Element in) {
560        int i = stack.lastIndexOf(after);
561        Validate.isTrue(i != -1);
562        stack.add(i+1, in);
563    }
564
565    void replaceOnStack(Element out, Element in) {
566        replaceInQueue(stack, out, in);
567    }
568
569    private static void replaceInQueue(ArrayList<Element> queue, Element out, Element in) {
570        int i = queue.lastIndexOf(out);
571        Validate.isTrue(i != -1);
572        queue.set(i, in);
573    }
574
575    /**
576     * Reset the insertion mode, by searching up the stack for an appropriate insertion mode. The stack search depth
577     * is limited to {@link #maxQueueDepth}.
578     * @return true if the insertion mode was actually changed.
579     */
580    boolean resetInsertionMode() {
581        // https://html.spec.whatwg.org/multipage/parsing.html#the-insertion-mode
582        boolean last = false;
583        final int bottom = stack.size() - 1;
584        final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0;
585        final HtmlTreeBuilderState origState = this.state;
586
587        if (stack.size() == 0) { // nothing left of stack, just get to body
588            transition(HtmlTreeBuilderState.InBody);
589        }
590
591        LOOP: for (int pos = bottom; pos >= upper; pos--) {
592            Element node = stack.get(pos);
593            if (pos == upper) {
594                last = true;
595                if (fragmentParsing)
596                    node = contextElement;
597            }
598            String name = node != null ? node.normalName() : "";
599            if (!NamespaceHtml.equals(node.tag().namespace()))
600                continue; // only looking for HTML elements here
601
602            switch (name) {
603                case "select":
604                    transition(HtmlTreeBuilderState.InSelect);
605                    // todo - should loop up (with some limit) and check for table or template hits
606                    break LOOP;
607                case "td":
608                case "th":
609                    if (!last) {
610                        transition(HtmlTreeBuilderState.InCell);
611                        break LOOP;
612                    }
613                    break;
614                case "tr":
615                    transition(HtmlTreeBuilderState.InRow);
616                    break LOOP;
617                case "tbody":
618                case "thead":
619                case "tfoot":
620                    transition(HtmlTreeBuilderState.InTableBody);
621                    break LOOP;
622                case "caption":
623                    transition(HtmlTreeBuilderState.InCaption);
624                    break LOOP;
625                case "colgroup":
626                    transition(HtmlTreeBuilderState.InColumnGroup);
627                    break LOOP;
628                case "table":
629                    transition(HtmlTreeBuilderState.InTable);
630                    break LOOP;
631                case "template":
632                    HtmlTreeBuilderState tmplState = currentTemplateMode();
633                    Validate.notNull(tmplState, "Bug: no template insertion mode on stack!");
634                    transition(tmplState);
635                    break LOOP;
636                case "head":
637                    if (!last) {
638                        transition(HtmlTreeBuilderState.InHead);
639                        break LOOP;
640                    }
641                    break;
642                case "body":
643                    transition(HtmlTreeBuilderState.InBody);
644                    break LOOP;
645                case "frameset":
646                    transition(HtmlTreeBuilderState.InFrameset);
647                    break LOOP;
648                case "html":
649                    transition(headElement == null ? HtmlTreeBuilderState.BeforeHead : HtmlTreeBuilderState.AfterHead);
650                    break LOOP;
651            }
652            if (last) {
653                transition(HtmlTreeBuilderState.InBody);
654                break;
655            }
656        }
657        return state != origState;
658    }
659
660    /** Places the body back onto the stack and moves to InBody, for cases in AfterBody / AfterAfterBody when more content comes */
661    void resetBody() {
662        if (!onStack("body")) {
663            stack.add(doc.body()); // not onNodeInserted, as already seen
664        }
665        transition(HtmlTreeBuilderState.InBody);
666    }
667
668    // todo: tidy up in specific scope methods
669    private final String[] specificScopeTarget = {null};
670
671    private boolean inSpecificScope(String targetName, String[] baseTypes, String[] extraTypes) {
672        specificScopeTarget[0] = targetName;
673        return inSpecificScope(specificScopeTarget, baseTypes, extraTypes);
674    }
675
676    private boolean inSpecificScope(String[] targetNames, String[] baseTypes, @Nullable String[] extraTypes) {
677        // https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-the-specific-scope
678        final int bottom = stack.size() -1;
679        final int top = bottom > MaxScopeSearchDepth ? bottom - MaxScopeSearchDepth : 0;
680        // don't walk too far up the tree
681        for (int pos = bottom; pos >= top; pos--) {
682            Element el = stack.get(pos);
683            String elName = el.normalName();
684            // namespace checks - arguments provided are always in html ns, with this bolt-on for math and svg:
685            String ns = el.tag().namespace();
686            if (ns.equals(NamespaceHtml)) {
687                if (inSorted(elName, targetNames))
688                    return true;
689                if (inSorted(elName, baseTypes))
690                    return false;
691                if (extraTypes != null && inSorted(elName, extraTypes))
692                    return false;
693            } else if (baseTypes == TagsSearchInScope) {
694                if (ns.equals(NamespaceMathml) && inSorted(elName, TagSearchInScopeMath))
695                    return false;
696                if (ns.equals(NamespaceSvg) && inSorted(elName, TagSearchInScopeSvg))
697                    return false;
698            }
699        }
700        //Validate.fail("Should not be reachable"); // would end up false because hitting 'html' at root (basetypes)
701        return false;
702    }
703
704    boolean inScope(String[] targetNames) {
705        return inSpecificScope(targetNames, TagsSearchInScope, null);
706    }
707
708    boolean inScope(String targetName) {
709        return inScope(targetName, null);
710    }
711
712    boolean inScope(String targetName, String[] extras) {
713        return inSpecificScope(targetName, TagsSearchInScope, extras);
714        // todo: in mathml namespace: mi, mo, mn, ms, mtext annotation-xml
715        // todo: in svg namespace: forignOjbect, desc, title
716    }
717
718    boolean inListItemScope(String targetName) {
719        return inScope(targetName, TagSearchList);
720    }
721
722    boolean inButtonScope(String targetName) {
723        return inScope(targetName, TagSearchButton);
724    }
725
726    boolean inTableScope(String targetName) {
727        return inSpecificScope(targetName, TagSearchTableScope, null);
728    }
729
730    boolean inSelectScope(String targetName) {
731        for (int pos = stack.size() -1; pos >= 0; pos--) {
732            Element el = stack.get(pos);
733            String elName = el.normalName();
734            if (elName.equals(targetName))
735                return true;
736            if (!inSorted(elName, TagSearchSelectScope)) // all elements except
737                return false;
738        }
739        Validate.fail("Should not be reachable");
740        return false;
741    }
742
743    /** Tests if there is some element on the stack that is not in the provided set. */
744    boolean onStackNot(String[] allowedTags) {
745        final int bottom = stack.size() -1;
746        final int top = bottom > MaxScopeSearchDepth ? bottom - MaxScopeSearchDepth : 0;
747        // don't walk too far up the tree
748
749        for (int pos = bottom; pos >= top; pos--) {
750            final String elName = stack.get(pos).normalName();
751            if (!inSorted(elName, allowedTags))
752                return true;
753        }
754        return false;
755    }
756
757    void setHeadElement(Element headElement) {
758        this.headElement = headElement;
759    }
760
761    Element getHeadElement() {
762        return headElement;
763    }
764
765    boolean isFosterInserts() {
766        return fosterInserts;
767    }
768
769    void setFosterInserts(boolean fosterInserts) {
770        this.fosterInserts = fosterInserts;
771    }
772
773    @Nullable FormElement getFormElement() {
774        return formElement;
775    }
776
777    void setFormElement(FormElement formElement) {
778        this.formElement = formElement;
779    }
780
781    void resetPendingTableCharacters() {
782        pendingTableCharacters.clear();
783    }
784
785    List<Token.Character> getPendingTableCharacters() {
786        return pendingTableCharacters;
787    }
788
789    void addPendingTableCharacters(Token.Character c) {
790        // make a copy of the token to maintain its state (as Tokens are otherwise reset)
791        Token.Character copy = new Token.Character(c);
792        pendingTableCharacters.add(copy);
793    }
794
795    /**
796     13.2.6.3 Closing elements that have implied end tags
797     When the steps below require the UA to generate implied end tags, then, while the current node is a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, or an rtc element, the UA must pop the current node off the stack of open elements.
798
799     If a step requires the UA to generate implied end tags but lists an element to exclude from the process, then the UA must perform the above steps as if that element was not in the above list.
800
801     When the steps below require the UA to generate all implied end tags thoroughly, then, while the current node is a caption element, a colgroup element, a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, an rtc element, a tbody element, a td element, a tfoot element, a th element, a thead element, or a tr element, the UA must pop the current node off the stack of open elements.
802
803     @param excludeTag If a step requires the UA to generate implied end tags but lists an element to exclude from the
804     process, then the UA must perform the above steps as if that element was not in the above list.
805     */
806    void generateImpliedEndTags(String excludeTag) {
807        while (inSorted(currentElement().normalName(), TagSearchEndTags)) {
808            if (excludeTag != null && currentElementIs(excludeTag))
809                break;
810            pop();
811        }
812    }
813
814    void generateImpliedEndTags() {
815        generateImpliedEndTags(false);
816    }
817
818    /**
819     Pops HTML elements off the stack according to the implied end tag rules
820     @param thorough if we are thorough (includes table elements etc) or not
821     */
822    void generateImpliedEndTags(boolean thorough) {
823        final String[] search = thorough ? TagThoroughSearchEndTags : TagSearchEndTags;
824        while (NamespaceHtml.equals(currentElement().tag().namespace())
825            && inSorted(currentElement().normalName(), search)) {
826            pop();
827        }
828    }
829
830    void closeElement(String name) {
831        generateImpliedEndTags(name);
832        if (!name.equals(currentElement().normalName())) error(state());
833        popStackToClose(name);
834    }
835
836    static boolean isSpecial(Element el) {
837        String namespace = el.tag().namespace();
838        String name = el.normalName();
839        switch (namespace) {
840            case NamespaceHtml:
841                return inSorted(name, TagSearchSpecial);
842            case Parser.NamespaceMathml:
843                return inSorted(name, TagSearchSpecialMath);
844            case Parser.NamespaceSvg:
845                return inSorted(name, TagSvgHtmlIntegration);
846            default:
847                return false;
848        }
849    }
850
851    Element lastFormattingElement() {
852        return formattingElements.size() > 0 ? formattingElements.get(formattingElements.size()-1) : null;
853    }
854
855    int positionOfElement(Element el){
856        for (int i = 0; i < formattingElements.size(); i++){
857            if (el == formattingElements.get(i))
858                return i;
859        }
860        return -1;
861    }
862
863    Element removeLastFormattingElement() {
864        int size = formattingElements.size();
865        if (size > 0)
866            return formattingElements.remove(size-1);
867        else
868            return null;
869    }
870
871    // active formatting elements
872    void pushActiveFormattingElements(Element in) {
873        checkActiveFormattingElements(in);
874        formattingElements.add(in);
875    }
876
877    void pushWithBookmark(Element in, int bookmark){
878        checkActiveFormattingElements(in);
879        // catch any range errors and assume bookmark is incorrect - saves a redundant range check.
880        try {
881            formattingElements.add(bookmark, in);
882        } catch (IndexOutOfBoundsException e) {
883            formattingElements.add(in);
884        }
885    }
886
887    void checkActiveFormattingElements(Element in){
888        int numSeen = 0;
889        final int size = formattingElements.size() -1;
890        int ceil = size - maxUsedFormattingElements; if (ceil <0) ceil = 0;
891
892        for (int pos = size; pos >= ceil; pos--) {
893            Element el = formattingElements.get(pos);
894            if (el == null) // marker
895                break;
896
897            if (isSameFormattingElement(in, el))
898                numSeen++;
899
900            if (numSeen == 3) {
901                formattingElements.remove(pos);
902                break;
903            }
904        }
905    }
906
907    private static boolean isSameFormattingElement(Element a, Element b) {
908        // same if: same namespace, tag, and attributes. Element.equals only checks tag, might in future check children
909        return a.normalName().equals(b.normalName()) &&
910                // a.namespace().equals(b.namespace()) &&
911                a.attributes().equals(b.attributes());
912        // todo: namespaces
913    }
914
915    void reconstructFormattingElements() {
916        if (stack.size() > maxQueueDepth)
917            return;
918        Element last = lastFormattingElement();
919        if (last == null || onStack(last))
920            return;
921
922        Element entry = last;
923        int size = formattingElements.size();
924        int ceil = size - maxUsedFormattingElements; if (ceil <0) ceil = 0;
925        int pos = size - 1;
926        boolean skip = false;
927        while (true) {
928            if (pos == ceil) { // step 4. if none before, skip to 8
929                skip = true;
930                break;
931            }
932            entry = formattingElements.get(--pos); // step 5. one earlier than entry
933            if (entry == null || onStack(entry)) // step 6 - neither marker nor on stack
934                break; // jump to 8, else continue back to 4
935        }
936        while(true) {
937            if (!skip) // step 7: on later than entry
938                entry = formattingElements.get(++pos);
939            Validate.notNull(entry); // should not occur, as we break at last element
940
941            // 8. create new element from element, 9 insert into current node, onto stack
942            skip = false; // can only skip increment from 4.
943            Element newEl = new Element(tagFor(entry.nodeName(), entry.normalName(), defaultNamespace(), settings), null, entry.attributes().clone());
944            doInsertElement(newEl);
945
946            // 10. replace entry with new entry
947            formattingElements.set(pos, newEl);
948
949            // 11
950            if (pos == size-1) // if not last entry in list, jump to 7
951                break;
952        }
953    }
954    private static final int maxUsedFormattingElements = 12; // limit how many elements get recreated
955
956    void clearFormattingElementsToLastMarker() {
957        while (!formattingElements.isEmpty()) {
958            Element el = removeLastFormattingElement();
959            if (el == null)
960                break;
961        }
962    }
963
964    void removeFromActiveFormattingElements(Element el) {
965        for (int pos = formattingElements.size() -1; pos >= 0; pos--) {
966            Element next = formattingElements.get(pos);
967            if (next == el) {
968                formattingElements.remove(pos);
969                break;
970            }
971        }
972    }
973
974    boolean isInActiveFormattingElements(Element el) {
975        return onStack(formattingElements, el);
976    }
977
978    @Nullable
979    Element getActiveFormattingElement(String nodeName) {
980        for (int pos = formattingElements.size() -1; pos >= 0; pos--) {
981            Element next = formattingElements.get(pos);
982            if (next == null) // scope marker
983                break;
984            else if (next.nameIs(nodeName))
985                return next;
986        }
987        return null;
988    }
989
990    void replaceActiveFormattingElement(Element out, Element in) {
991        replaceInQueue(formattingElements, out, in);
992    }
993
994    void insertMarkerToFormattingElements() {
995        formattingElements.add(null);
996    }
997
998    void insertInFosterParent(Node in) {
999        Element fosterParent;
1000        Element lastTable = getFromStack("table");
1001        boolean isLastTableParent = false;
1002        if (lastTable != null) {
1003            if (lastTable.parent() != null) {
1004                fosterParent = lastTable.parent();
1005                isLastTableParent = true;
1006            } else
1007                fosterParent = aboveOnStack(lastTable);
1008        } else { // no table == frag
1009            fosterParent = stack.get(0);
1010        }
1011
1012        if (isLastTableParent) {
1013            Validate.notNull(lastTable); // last table cannot be null by this point.
1014            lastTable.before(in);
1015        }
1016        else
1017            fosterParent.appendChild(in);
1018    }
1019
1020    // Template Insertion Mode stack
1021    void pushTemplateMode(HtmlTreeBuilderState state) {
1022        tmplInsertMode.add(state);
1023    }
1024
1025    @Nullable HtmlTreeBuilderState popTemplateMode() {
1026        if (tmplInsertMode.size() > 0) {
1027            return tmplInsertMode.remove(tmplInsertMode.size() -1);
1028        } else {
1029            return null;
1030        }
1031    }
1032
1033    int templateModeSize() {
1034        return tmplInsertMode.size();
1035    }
1036
1037    @Nullable HtmlTreeBuilderState currentTemplateMode() {
1038        return (tmplInsertMode.size() > 0) ? tmplInsertMode.get(tmplInsertMode.size() -1)  : null;
1039    }
1040
1041    @Override
1042    public String toString() {
1043        return "TreeBuilder{" +
1044                "currentToken=" + currentToken +
1045                ", state=" + state +
1046                ", currentElement=" + currentElement() +
1047                '}';
1048    }
1049
1050    /** @deprecated this unused internal method will be removed. */
1051    @Deprecated
1052    protected boolean isContentForTagData(final String normalName) {
1053        return (normalName.equals("script") || normalName.equals("style"));
1054    }
1055
1056}