001package org.jsoup.parser;
002
003import org.jsoup.helper.Validate;
004import org.jsoup.internal.Normalizer;
005import org.jsoup.internal.StringUtil;
006import org.jsoup.nodes.Attributes;
007import org.jsoup.nodes.CDataNode;
008import org.jsoup.nodes.Comment;
009import org.jsoup.nodes.DataNode;
010import org.jsoup.nodes.Document;
011import org.jsoup.nodes.Element;
012import org.jsoup.nodes.FormElement;
013import org.jsoup.nodes.Node;
014import org.jsoup.nodes.TextNode;
015import org.jspecify.annotations.Nullable;
016
017import java.io.Reader;
018import java.util.ArrayList;
019import java.util.List;
020
021import static org.jsoup.internal.StringUtil.inSorted;
022import static org.jsoup.parser.HtmlTreeBuilderState.Constants.InTableFoster;
023import static org.jsoup.parser.HtmlTreeBuilderState.ForeignContent;
024import static org.jsoup.parser.Parser.*;
025
026/**
027 * HTML Tree Builder; creates a DOM from Tokens.
028 */
029public class HtmlTreeBuilder extends TreeBuilder {
030    // tag searches. must be sorted, used in inSorted. HtmlTreeBuilderTest validates they're sorted.
031    static final String[] TagsSearchInScope = new String[]{ // a particular element in scope
032        "applet", "caption", "html", "marquee", "object", "table", "td", "template", "th"
033    };
034    // math and svg namespaces for particular element in scope
035    static final String[]TagSearchInScopeMath = new String[] {
036        "annotation-xml",  "mi", "mn", "mo", "ms", "mtext"
037    };
038    static final String[]TagSearchInScopeSvg = new String[] {
039        "desc", "foreignObject", "title"
040    };
041
042    static final String[] TagSearchList = new String[]{"ol", "ul"};
043    static final String[] TagSearchButton = new String[]{"button"};
044    static final String[] TagSearchTableScope = new String[]{"html", "table"};
045    static final String[] TagSearchSelectScope = new String[]{"optgroup", "option"};
046    static final String[] TagSearchEndTags = new String[]{"dd", "dt", "li", "optgroup", "option", "p", "rb", "rp", "rt", "rtc"};
047    static final String[] TagThoroughSearchEndTags = new String[]{"caption", "colgroup", "dd", "dt", "li", "optgroup", "option", "p", "rb", "rp", "rt", "rtc", "tbody", "td", "tfoot", "th", "thead", "tr"};
048    static final String[] TagSearchSpecial = new String[]{
049        "address", "applet", "area", "article", "aside", "base", "basefont", "bgsound", "blockquote", "body", "br",
050        "button", "caption", "center", "col", "colgroup", "dd", "details", "dir", "div", "dl", "dt", "embed",
051        "fieldset", "figcaption", "figure", "footer", "form", "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6",
052        "head", "header", "hgroup", "hr", "html", "iframe", "img", "input", "keygen", "li", "link", "listing", "main",
053        "marquee", "menu", "meta", "nav", "noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext",
054        "pre", "script", "search", "section", "select", "source", "style", "summary", "table", "tbody", "td",
055        "template", "textarea", "tfoot", "th", "thead", "title", "tr", "track", "ul", "wbr", "xmp"};
056    static String[] TagSearchSpecialMath = {"annotation-xml", "mi", "mn", "mo", "ms", "mtext"}; // differs to MathML text integration point; adds annotation-xml
057    static final String[] TagMathMlTextIntegration = new String[]{"mi", "mn", "mo", "ms", "mtext"};
058    static final String[] TagSvgHtmlIntegration = new String[]{"desc", "foreignObject", "title"};
059    static final String[] TagFormListed = {
060        "button", "fieldset", "input", "keygen", "object", "output", "select", "textarea"
061    };
062
063    public static final int MaxScopeSearchDepth = 100; // prevents the parser bogging down in exceptionally broken pages
064
065    private HtmlTreeBuilderState state; // the current state
066    private HtmlTreeBuilderState originalState; // original / marked state
067
068    private boolean baseUriSetFromDoc;
069    private @Nullable Element headElement; // the current head element
070    private @Nullable FormElement formElement; // the current form element
071    private @Nullable Element contextElement; // fragment parse root; name only copy of context. could be null even if fragment parsing
072    ArrayList<Element> formattingElements; // active (open) formatting elements
073    private ArrayList<HtmlTreeBuilderState> tmplInsertMode; // stack of Template Insertion modes
074    private List<Token.Character> pendingTableCharacters; // chars in table to be shifted out
075    private Token.EndTag emptyEnd; // reused empty end tag
076
077    private boolean framesetOk; // if ok to go into frameset
078    private boolean fosterInserts; // if next inserts should be fostered
079    private boolean fragmentParsing; // if parsing a fragment of html
080
081    @Override ParseSettings defaultSettings() {
082        return ParseSettings.htmlDefault;
083    }
084
085    @Override
086    HtmlTreeBuilder newInstance() {
087        return new HtmlTreeBuilder();
088    }
089
090    @Override
091    protected void initialiseParse(Reader input, String baseUri, Parser parser) {
092        super.initialiseParse(input, baseUri, parser);
093
094        // this is a bit mucky. todo - probably just create new parser objects to ensure all reset.
095        state = HtmlTreeBuilderState.Initial;
096        originalState = null;
097        baseUriSetFromDoc = false;
098        headElement = null;
099        formElement = null;
100        contextElement = null;
101        formattingElements = new ArrayList<>();
102        tmplInsertMode = new ArrayList<>();
103        pendingTableCharacters = new ArrayList<>();
104        emptyEnd = new Token.EndTag(this);
105        framesetOk = true;
106        fosterInserts = false;
107        fragmentParsing = false;
108    }
109
110    @Override void initialiseParseFragment(@Nullable Element context) {
111        // context may be null
112        state = HtmlTreeBuilderState.Initial;
113        fragmentParsing = true;
114
115        if (context != null) {
116            final String contextName = context.normalName();
117            contextElement = new Element(tagFor(contextName, contextName, defaultNamespace(), settings), baseUri);
118            if (context.ownerDocument() != null) // quirks setup:
119                doc.quirksMode(context.ownerDocument().quirksMode());
120
121            // initialise the tokeniser state:
122            switch (contextName) {
123                case "script":
124                    tokeniser.transition(TokeniserState.ScriptData);
125                    break;
126                case "plaintext":
127                    tokeniser.transition(TokeniserState.PLAINTEXT);
128                    break;
129                case "template":
130                    tokeniser.transition(TokeniserState.Data);
131                    pushTemplateMode(HtmlTreeBuilderState.InTemplate);
132                    break;
133                default:
134                    Tag tag = contextElement.tag();
135                    TokeniserState textState = tag.textState();
136                    if (textState != null)
137                        tokeniser.transition(textState); // style, xmp, title, textarea, etc; or custom
138                    else
139                        tokeniser.transition(TokeniserState.Data);
140            }
141            doc.appendChild(contextElement);
142            push(contextElement);
143            resetInsertionMode();
144
145            // setup form element to nearest form on context (up ancestor chain). ensures form controls are associated
146            // with form correctly
147            Element formSearch = context;
148            while (formSearch != null) {
149                if (formSearch instanceof FormElement) {
150                    formElement = (FormElement) formSearch;
151                    break;
152                }
153                formSearch = formSearch.parent();
154            }
155        }
156    }
157
158    @Override List<Node> completeParseFragment() {
159        if (contextElement != null) {
160            // depending on context and the input html, content may have been added outside of the root el
161            // e.g. context=p, input=div, the div will have been pushed out.
162            List<Node> nodes = contextElement.siblingNodes();
163            if (!nodes.isEmpty())
164                contextElement.insertChildren(-1, nodes);
165            return contextElement.childNodes();
166        }
167        else
168            return doc.childNodes();
169    }
170
171    @Override
172    protected boolean process(Token token) {
173        HtmlTreeBuilderState dispatch = useCurrentOrForeignInsert(token) ? this.state : ForeignContent;
174        return dispatch.process(token, this);
175    }
176
177    boolean useCurrentOrForeignInsert(Token token) {
178        // https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
179        // If the stack of open elements is empty
180        if (stack.isEmpty())
181            return true;
182        final Element el = currentElement();
183        final String ns = el.tag().namespace();
184
185        // If the adjusted current node is an element in the HTML namespace
186        if (NamespaceHtml.equals(ns))
187            return true;
188
189        // If the adjusted current node is a MathML text integration point and the token is a start tag whose tag name is neither "mglyph" nor "malignmark"
190        // If the adjusted current node is a MathML text integration point and the token is a character token
191        if (isMathmlTextIntegration(el)) {
192            if (token.isStartTag()
193                    && !"mglyph".equals(token.asStartTag().normalName)
194                    && !"malignmark".equals(token.asStartTag().normalName))
195                    return true;
196            if (token.isCharacter())
197                    return true;
198        }
199        // If the adjusted current node is a MathML annotation-xml element and the token is a start tag whose tag name is "svg"
200        if (Parser.NamespaceMathml.equals(ns)
201            && el.nameIs("annotation-xml")
202            && token.isStartTag()
203            && "svg".equals(token.asStartTag().normalName))
204            return true;
205
206        // If the adjusted current node is an HTML integration point and the token is a start tag
207        // If the adjusted current node is an HTML integration point and the token is a character token
208        if (isHtmlIntegration(el)
209            && (token.isStartTag() || token.isCharacter()))
210            return true;
211
212        // If the token is an end-of-file token
213        return token.isEOF();
214    }
215
216    static boolean isMathmlTextIntegration(Element el) {
217        /*
218        A node is a MathML text integration point if it is one of the following elements:
219        A MathML mi element
220        A MathML mo element
221        A MathML mn element
222        A MathML ms element
223        A MathML mtext element
224         */
225        return (Parser.NamespaceMathml.equals(el.tag().namespace())
226            && StringUtil.inSorted(el.normalName(), TagMathMlTextIntegration));
227    }
228
229    static boolean isHtmlIntegration(Element el) {
230        /*
231        A node is an HTML integration point if it is one of the following elements:
232        A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "text/html"
233        A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "application/xhtml+xml"
234        An SVG foreignObject element
235        An SVG desc element
236        An SVG title element
237         */
238        if (Parser.NamespaceMathml.equals(el.tag().namespace())
239            && el.nameIs("annotation-xml")) {
240            String encoding = Normalizer.normalize(el.attr("encoding"));
241            if (encoding.equals("text/html") || encoding.equals("application/xhtml+xml"))
242                return true;
243        }
244        // note using .tagName for case-sensitive hit here of foreignObject
245        return Parser.NamespaceSvg.equals(el.tag().namespace()) && StringUtil.in(el.tagName(), TagSvgHtmlIntegration);
246    }
247
248    boolean process(Token token, HtmlTreeBuilderState state) {
249        return state.process(token, this);
250    }
251
252    void transition(HtmlTreeBuilderState state) {
253        this.state = state;
254    }
255
256    HtmlTreeBuilderState state() {
257        return state;
258    }
259
260    void markInsertionMode() {
261        originalState = state;
262    }
263
264    HtmlTreeBuilderState originalState() {
265        return originalState;
266    }
267
268    void framesetOk(boolean framesetOk) {
269        this.framesetOk = framesetOk;
270    }
271
272    boolean framesetOk() {
273        return framesetOk;
274    }
275
276    Document getDocument() {
277        return doc;
278    }
279
280    String getBaseUri() {
281        return baseUri;
282    }
283
284    void maybeSetBaseUri(Element base) {
285        if (baseUriSetFromDoc) // only listen to the first <base href> in parse
286            return;
287
288        String href = base.absUrl("href");
289        if (href.length() != 0) { // ignore <base target> etc
290            baseUri = href;
291            baseUriSetFromDoc = true;
292            doc.setBaseUri(href); // set on the doc so doc.createElement(Tag) will get updated base, and to update all descendants
293        }
294    }
295
296    boolean isFragmentParsing() {
297        return fragmentParsing;
298    }
299
300    void error(HtmlTreeBuilderState state) {
301        if (parser.getErrors().canAddError())
302            parser.getErrors().add(new ParseError(reader, "Unexpected %s token [%s] when in state [%s]",
303                currentToken.tokenType(), currentToken, state));
304    }
305
306    Element createElementFor(Token.StartTag startTag, String namespace, boolean forcePreserveCase) {
307        // dedupe and normalize the attributes:
308        Attributes attributes = startTag.attributes;
309        if (!forcePreserveCase)
310            attributes = settings.normalizeAttributes(attributes);
311        if (attributes != null && !attributes.isEmpty()) {
312            int dupes = attributes.deduplicate(settings);
313            if (dupes > 0) {
314                error("Dropped duplicate attribute(s) in tag [%s]", startTag.normalName);
315            }
316        }
317
318        Tag tag = tagFor(startTag.name(), startTag.normalName, namespace,
319            forcePreserveCase ? ParseSettings.preserveCase : settings);
320
321        return (tag.normalName().equals("form")) ?
322            new FormElement(tag, null, attributes) :
323            new Element(tag, null, attributes);
324    }
325
326    /** Inserts an HTML element for the given tag */
327    Element insertElementFor(final Token.StartTag startTag) {
328        Element el = createElementFor(startTag, NamespaceHtml, false);
329        doInsertElement(el);
330
331        // handle self-closing tags. when the spec expects an empty (void) tag, will directly hit insertEmpty, so won't generate this fake end tag.
332        if (startTag.isSelfClosing()) {
333            Tag tag = el.tag();
334            tag.setSeenSelfClose(); // can infer output if in xml syntax
335            if (tag.isKnownTag() && (tag.isEmpty() || tag.isSelfClosing())) {
336                // ok, allow it. effectively a pop, but fiddles with the state. handles empty style, title etc which would otherwise leave us in data state
337                tokeniser.transition(TokeniserState.Data); // handles <script />, otherwise needs breakout steps from script data
338                tokeniser.emit(emptyEnd.reset().name(el.tagName()));  // ensure we get out of whatever state we are in. emitted for yielded processing
339            } else {
340                // error it, and leave the inserted element on
341                tokeniser.error("Tag [%s] cannot be self-closing; not a void tag", tag.normalName());
342            }
343        }
344
345        return el;
346    }
347
348    /**
349     Inserts a foreign element. Preserves the case of the tag name and of the attributes.
350     */
351    Element insertForeignElementFor(final Token.StartTag startTag, String namespace) {
352        Element el = createElementFor(startTag, namespace, true);
353        doInsertElement(el);
354
355        if (startTag.isSelfClosing()) { // foreign els are OK to self-close
356            el.tag().setSeenSelfClose(); // remember this is self-closing for output
357            pop();
358        }
359
360        return el;
361    }
362
363    Element insertEmptyElementFor(Token.StartTag startTag) {
364        Element el = createElementFor(startTag, NamespaceHtml, false);
365        doInsertElement(el);
366        pop();
367        return el;
368    }
369
370    FormElement insertFormElement(Token.StartTag startTag, boolean onStack, boolean checkTemplateStack) {
371        FormElement el = (FormElement) createElementFor(startTag, NamespaceHtml, false);
372
373        if (checkTemplateStack) {
374            if(!onStack("template"))
375                setFormElement(el);
376        } else
377            setFormElement(el);
378
379        doInsertElement(el);
380        if (!onStack) pop();
381        return el;
382    }
383
384    /** Inserts the Element onto the stack. All element inserts must run through this method. Performs any general
385     tests on the Element before insertion.
386     * @param el the Element to insert and make the current element
387     */
388    private void doInsertElement(Element el) {
389        if (formElement != null && el.tag().namespace.equals(NamespaceHtml) && StringUtil.inSorted(el.normalName(), TagFormListed))
390            formElement.addElement(el); // connect form controls to their form element
391
392        // in HTML, the xmlns attribute if set must match what the parser set the tag's namespace to
393        if (parser.getErrors().canAddError() && el.hasAttr("xmlns") && !el.attr("xmlns").equals(el.tag().namespace()))
394            error("Invalid xmlns attribute [%s] on tag [%s]", el.attr("xmlns"), el.tagName());
395
396        if (isFosterInserts() && StringUtil.inSorted(currentElement().normalName(), InTableFoster))
397            insertInFosterParent(el);
398        else
399            currentElement().appendChild(el);
400
401        push(el);
402    }
403
404    void insertCommentNode(Token.Comment token) {
405        Comment node = new Comment(token.getData());
406        currentElement().appendChild(node);
407        onNodeInserted(node);
408    }
409
410    /** Inserts the provided character token into the current element. */
411    void insertCharacterNode(Token.Character characterToken) {
412        Element el = currentElement(); // will be doc if no current element; allows for whitespace to be inserted into the doc root object (not on the stack)
413        insertCharacterToElement(characterToken, el);
414    }
415
416    /** Inserts the provided character token into the provided element. */
417    void insertCharacterToElement(Token.Character characterToken, Element el) {
418        final Node node;
419        final String data = characterToken.getData();
420
421        if (characterToken.isCData())
422            node = new CDataNode(data);
423        else if (el.tag().is(Tag.Data))
424            node = new DataNode(data);
425        else
426            node = new TextNode(data);
427        el.appendChild(node); // doesn't use insertNode, because we don't foster these; and will always have a stack.
428        onNodeInserted(node);
429    }
430
431    ArrayList<Element> getStack() {
432        return stack;
433    }
434
435    boolean onStack(Element el) {
436        return onStack(stack, el);
437    }
438
439    /** Checks if there is an HTML element with the given name on the stack. */
440    boolean onStack(String elName) {
441        return getFromStack(elName) != null;
442    }
443
444    private static final int maxQueueDepth = 256; // an arbitrary tension point between real HTML and crafted pain
445    private static boolean onStack(ArrayList<Element> queue, Element element) {
446        final int bottom = queue.size() - 1;
447        final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0;
448        for (int pos = bottom; pos >= upper; pos--) {
449            Element next = queue.get(pos);
450            if (next == element) {
451                return true;
452            }
453        }
454        return false;
455    }
456
457    /** Gets the nearest (lowest) HTML element with the given name from the stack. */
458    @Nullable
459    Element getFromStack(String elName) {
460        final int bottom = stack.size() - 1;
461        final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0;
462        for (int pos = bottom; pos >= upper; pos--) {
463            Element next = stack.get(pos);
464            if (next.elementIs(elName, NamespaceHtml)) {
465                return next;
466            }
467        }
468        return null;
469    }
470
471    boolean removeFromStack(Element el) {
472        for (int pos = stack.size() -1; pos >= 0; pos--) {
473            Element next = stack.get(pos);
474            if (next == el) {
475                stack.remove(pos);
476                onNodeClosed(el);
477                return true;
478            }
479        }
480        return false;
481    }
482
483    /** Pops the stack until the given HTML element is removed. */
484    @Nullable
485    Element popStackToClose(String elName) {
486        for (int pos = stack.size() -1; pos >= 0; pos--) {
487            Element el = pop();
488            if (el.elementIs(elName, NamespaceHtml)) {
489                return el;
490            }
491        }
492        return null;
493    }
494
495    /** Pops the stack until an element with the supplied name is removed, irrespective of namespace. */
496    @Nullable
497    Element popStackToCloseAnyNamespace(String elName) {
498        for (int pos = stack.size() -1; pos >= 0; pos--) {
499            Element el = pop();
500            if (el.nameIs(elName)) {
501                return el;
502            }
503        }
504        return null;
505    }
506
507    /** Pops the stack until one of the given HTML elements is removed. */
508    void popStackToClose(String... elNames) { // elnames is sorted, comes from Constants
509        for (int pos = stack.size() -1; pos >= 0; pos--) {
510            Element el = pop();
511            if (inSorted(el.normalName(), elNames) && NamespaceHtml.equals(el.tag().namespace())) {
512                break;
513            }
514        }
515    }
516
517    void clearStackToTableContext() {
518        clearStackToContext("table", "template");
519    }
520
521    void clearStackToTableBodyContext() {
522        clearStackToContext("tbody", "tfoot", "thead", "template");
523    }
524
525    void clearStackToTableRowContext() {
526        clearStackToContext("tr", "template");
527    }
528
529    /** Removes elements from the stack until one of the supplied HTML elements is removed. */
530    private void clearStackToContext(String... nodeNames) {
531        for (int pos = stack.size() -1; pos >= 0; pos--) {
532            Element next = stack.get(pos);
533            if (NamespaceHtml.equals(next.tag().namespace()) &&
534                (StringUtil.in(next.normalName(), nodeNames) || next.nameIs("html")))
535                break;
536            else
537                pop();
538        }
539    }
540
541    /**
542     Gets the Element immediately above the supplied element on the stack. Which due to adoption, may not necessarily be
543     its parent.
544
545     @param el
546     @return the Element immediately above the supplied element, or null if there is no such element.
547     */
548    @Nullable Element aboveOnStack(Element el) {
549        assert onStack(el);
550        for (int pos = stack.size() -1; pos >= 0; pos--) {
551            Element next = stack.get(pos);
552            if (next == el) {
553                return stack.get(pos-1);
554            }
555        }
556        return null;
557    }
558
559    void insertOnStackAfter(Element after, Element in) {
560        int i = stack.lastIndexOf(after);
561        Validate.isTrue(i != -1);
562        stack.add(i+1, in);
563    }
564
565    void replaceOnStack(Element out, Element in) {
566        replaceInQueue(stack, out, in);
567    }
568
569    private static void replaceInQueue(ArrayList<Element> queue, Element out, Element in) {
570        int i = queue.lastIndexOf(out);
571        Validate.isTrue(i != -1);
572        queue.set(i, in);
573    }
574
575    /**
576     * Reset the insertion mode, by searching up the stack for an appropriate insertion mode. The stack search depth
577     * is limited to {@link #maxQueueDepth}.
578     * @return true if the insertion mode was actually changed.
579     */
580    boolean resetInsertionMode() {
581        // https://html.spec.whatwg.org/multipage/parsing.html#the-insertion-mode
582        boolean last = false;
583        final int bottom = stack.size() - 1;
584        final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0;
585        final HtmlTreeBuilderState origState = this.state;
586
587        if (stack.size() == 0) { // nothing left of stack, just get to body
588            transition(HtmlTreeBuilderState.InBody);
589        }
590
591        LOOP: for (int pos = bottom; pos >= upper; pos--) {
592            Element node = stack.get(pos);
593            if (pos == upper) {
594                last = true;
595                if (fragmentParsing)
596                    node = contextElement;
597            }
598            String name = node != null ? node.normalName() : "";
599            if (!NamespaceHtml.equals(node.tag().namespace()))
600                continue; // only looking for HTML elements here
601
602            switch (name) {
603                case "select":
604                    transition(HtmlTreeBuilderState.InSelect);
605                    // todo - should loop up (with some limit) and check for table or template hits
606                    break LOOP;
607                case "td":
608                case "th":
609                    if (!last) {
610                        transition(HtmlTreeBuilderState.InCell);
611                        break LOOP;
612                    }
613                    break;
614                case "tr":
615                    transition(HtmlTreeBuilderState.InRow);
616                    break LOOP;
617                case "tbody":
618                case "thead":
619                case "tfoot":
620                    transition(HtmlTreeBuilderState.InTableBody);
621                    break LOOP;
622                case "caption":
623                    transition(HtmlTreeBuilderState.InCaption);
624                    break LOOP;
625                case "colgroup":
626                    transition(HtmlTreeBuilderState.InColumnGroup);
627                    break LOOP;
628                case "table":
629                    transition(HtmlTreeBuilderState.InTable);
630                    break LOOP;
631                case "template":
632                    HtmlTreeBuilderState tmplState = currentTemplateMode();
633                    Validate.notNull(tmplState, "Bug: no template insertion mode on stack!");
634                    transition(tmplState);
635                    break LOOP;
636                case "head":
637                    if (!last) {
638                        transition(HtmlTreeBuilderState.InHead);
639                        break LOOP;
640                    }
641                    break;
642                case "body":
643                    transition(HtmlTreeBuilderState.InBody);
644                    break LOOP;
645                case "frameset":
646                    transition(HtmlTreeBuilderState.InFrameset);
647                    break LOOP;
648                case "html":
649                    transition(headElement == null ? HtmlTreeBuilderState.BeforeHead : HtmlTreeBuilderState.AfterHead);
650                    break LOOP;
651            }
652            if (last) {
653                transition(HtmlTreeBuilderState.InBody);
654                break;
655            }
656        }
657        return state != origState;
658    }
659
660    /** Places the body back onto the stack and moves to InBody, for cases in AfterBody / AfterAfterBody when more content comes */
661    void resetBody() {
662        if (!onStack("body")) {
663            stack.add(doc.body()); // not onNodeInserted, as already seen
664        }
665        transition(HtmlTreeBuilderState.InBody);
666    }
667
668    // todo: tidy up in specific scope methods
669    private final String[] specificScopeTarget = {null};
670
671    private boolean inSpecificScope(String targetName, String[] baseTypes, String[] extraTypes) {
672        specificScopeTarget[0] = targetName;
673        return inSpecificScope(specificScopeTarget, baseTypes, extraTypes);
674    }
675
676    private boolean inSpecificScope(String[] targetNames, String[] baseTypes, @Nullable String[] extraTypes) {
677        // https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-the-specific-scope
678        final int bottom = stack.size() -1;
679        final int top = bottom > MaxScopeSearchDepth ? bottom - MaxScopeSearchDepth : 0;
680        // don't walk too far up the tree
681        for (int pos = bottom; pos >= top; pos--) {
682            Element el = stack.get(pos);
683            String elName = el.normalName();
684            // namespace checks - arguments provided are always in html ns, with this bolt-on for math and svg:
685            String ns = el.tag().namespace();
686            if (ns.equals(NamespaceHtml)) {
687                if (inSorted(elName, targetNames))
688                    return true;
689                if (inSorted(elName, baseTypes))
690                    return false;
691                if (extraTypes != null && inSorted(elName, extraTypes))
692                    return false;
693            } else if (baseTypes == TagsSearchInScope) {
694                if (ns.equals(NamespaceMathml) && inSorted(elName, TagSearchInScopeMath))
695                    return false;
696                if (ns.equals(NamespaceSvg) && inSorted(elName, TagSearchInScopeSvg))
697                    return false;
698            }
699        }
700        //Validate.fail("Should not be reachable"); // would end up false because hitting 'html' at root (basetypes)
701        return false;
702    }
703
704    boolean inScope(String[] targetNames) {
705        return inSpecificScope(targetNames, TagsSearchInScope, null);
706    }
707
708    boolean inScope(String targetName) {
709        return inScope(targetName, null);
710    }
711
712    boolean inScope(String targetName, String[] extras) {
713        return inSpecificScope(targetName, TagsSearchInScope, extras);
714    }
715
716    boolean inListItemScope(String targetName) {
717        return inScope(targetName, TagSearchList);
718    }
719
720    boolean inButtonScope(String targetName) {
721        return inScope(targetName, TagSearchButton);
722    }
723
724    boolean inTableScope(String targetName) {
725        return inSpecificScope(targetName, TagSearchTableScope, null);
726    }
727
728    boolean inSelectScope(String targetName) {
729        for (int pos = stack.size() -1; pos >= 0; pos--) {
730            Element el = stack.get(pos);
731            String elName = el.normalName();
732            if (elName.equals(targetName))
733                return true;
734            if (!inSorted(elName, TagSearchSelectScope)) // all elements except
735                return false;
736        }
737        Validate.fail("Should not be reachable");
738        return false;
739    }
740
741    /** Tests if there is some element on the stack that is not in the provided set. */
742    boolean onStackNot(String[] allowedTags) {
743        final int bottom = stack.size() -1;
744        final int top = bottom > MaxScopeSearchDepth ? bottom - MaxScopeSearchDepth : 0;
745        // don't walk too far up the tree
746
747        for (int pos = bottom; pos >= top; pos--) {
748            final String elName = stack.get(pos).normalName();
749            if (!inSorted(elName, allowedTags))
750                return true;
751        }
752        return false;
753    }
754
755    void setHeadElement(Element headElement) {
756        this.headElement = headElement;
757    }
758
759    Element getHeadElement() {
760        return headElement;
761    }
762
763    boolean isFosterInserts() {
764        return fosterInserts;
765    }
766
767    void setFosterInserts(boolean fosterInserts) {
768        this.fosterInserts = fosterInserts;
769    }
770
771    @Nullable FormElement getFormElement() {
772        return formElement;
773    }
774
775    void setFormElement(FormElement formElement) {
776        this.formElement = formElement;
777    }
778
779    void resetPendingTableCharacters() {
780        pendingTableCharacters.clear();
781    }
782
783    List<Token.Character> getPendingTableCharacters() {
784        return pendingTableCharacters;
785    }
786
787    void addPendingTableCharacters(Token.Character c) {
788        // make a copy of the token to maintain its state (as Tokens are otherwise reset)
789        Token.Character copy = new Token.Character(c);
790        pendingTableCharacters.add(copy);
791    }
792
793    /**
794     13.2.6.3 Closing elements that have implied end tags
795     When the steps below require the UA to generate implied end tags, then, while the current node is a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, or an rtc element, the UA must pop the current node off the stack of open elements.
796
797     If a step requires the UA to generate implied end tags but lists an element to exclude from the process, then the UA must perform the above steps as if that element was not in the above list.
798
799     When the steps below require the UA to generate all implied end tags thoroughly, then, while the current node is a caption element, a colgroup element, a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, an rtc element, a tbody element, a td element, a tfoot element, a th element, a thead element, or a tr element, the UA must pop the current node off the stack of open elements.
800
801     @param excludeTag If a step requires the UA to generate implied end tags but lists an element to exclude from the
802     process, then the UA must perform the above steps as if that element was not in the above list.
803     */
804    void generateImpliedEndTags(String excludeTag) {
805        while (inSorted(currentElement().normalName(), TagSearchEndTags)) {
806            if (excludeTag != null && currentElementIs(excludeTag))
807                break;
808            pop();
809        }
810    }
811
812    void generateImpliedEndTags() {
813        generateImpliedEndTags(false);
814    }
815
816    /**
817     Pops HTML elements off the stack according to the implied end tag rules
818     @param thorough if we are thorough (includes table elements etc) or not
819     */
820    void generateImpliedEndTags(boolean thorough) {
821        final String[] search = thorough ? TagThoroughSearchEndTags : TagSearchEndTags;
822        while (NamespaceHtml.equals(currentElement().tag().namespace())
823            && inSorted(currentElement().normalName(), search)) {
824            pop();
825        }
826    }
827
828    void closeElement(String name) {
829        generateImpliedEndTags(name);
830        if (!name.equals(currentElement().normalName())) error(state());
831        popStackToClose(name);
832    }
833
834    static boolean isSpecial(Element el) {
835        String namespace = el.tag().namespace();
836        String name = el.normalName();
837        switch (namespace) {
838            case NamespaceHtml:
839                return inSorted(name, TagSearchSpecial);
840            case Parser.NamespaceMathml:
841                return inSorted(name, TagSearchSpecialMath);
842            case Parser.NamespaceSvg:
843                return inSorted(name, TagSvgHtmlIntegration);
844            default:
845                return false;
846        }
847    }
848
849    Element lastFormattingElement() {
850        return formattingElements.size() > 0 ? formattingElements.get(formattingElements.size()-1) : null;
851    }
852
853    int positionOfElement(Element el){
854        for (int i = 0; i < formattingElements.size(); i++){
855            if (el == formattingElements.get(i))
856                return i;
857        }
858        return -1;
859    }
860
861    Element removeLastFormattingElement() {
862        int size = formattingElements.size();
863        if (size > 0)
864            return formattingElements.remove(size-1);
865        else
866            return null;
867    }
868
869    // active formatting elements
870    void pushActiveFormattingElements(Element in) {
871        checkActiveFormattingElements(in);
872        formattingElements.add(in);
873    }
874
875    void pushWithBookmark(Element in, int bookmark){
876        checkActiveFormattingElements(in);
877        // catch any range errors and assume bookmark is incorrect - saves a redundant range check.
878        try {
879            formattingElements.add(bookmark, in);
880        } catch (IndexOutOfBoundsException e) {
881            formattingElements.add(in);
882        }
883    }
884
885    void checkActiveFormattingElements(Element in){
886        int numSeen = 0;
887        final int size = formattingElements.size() -1;
888        int ceil = size - maxUsedFormattingElements; if (ceil <0) ceil = 0;
889
890        for (int pos = size; pos >= ceil; pos--) {
891            Element el = formattingElements.get(pos);
892            if (el == null) // marker
893                break;
894
895            if (isSameFormattingElement(in, el))
896                numSeen++;
897
898            if (numSeen == 3) {
899                formattingElements.remove(pos);
900                break;
901            }
902        }
903    }
904
905    private static boolean isSameFormattingElement(Element a, Element b) {
906        // same if: same namespace, tag, and attributes. Element.equals only checks tag, might in future check children
907        return a.normalName().equals(b.normalName()) &&
908                // a.namespace().equals(b.namespace()) &&
909                a.attributes().equals(b.attributes());
910        // todo: namespaces
911    }
912
913    void reconstructFormattingElements() {
914        if (stack.size() > maxQueueDepth)
915            return;
916        Element last = lastFormattingElement();
917        if (last == null || onStack(last))
918            return;
919
920        Element entry = last;
921        int size = formattingElements.size();
922        int ceil = size - maxUsedFormattingElements; if (ceil <0) ceil = 0;
923        int pos = size - 1;
924        boolean skip = false;
925        while (true) {
926            if (pos == ceil) { // step 4. if none before, skip to 8
927                skip = true;
928                break;
929            }
930            entry = formattingElements.get(--pos); // step 5. one earlier than entry
931            if (entry == null || onStack(entry)) // step 6 - neither marker nor on stack
932                break; // jump to 8, else continue back to 4
933        }
934        while(true) {
935            if (!skip) // step 7: on later than entry
936                entry = formattingElements.get(++pos);
937            Validate.notNull(entry); // should not occur, as we break at last element
938
939            // 8. create new element from element, 9 insert into current node, onto stack
940            skip = false; // can only skip increment from 4.
941            Element newEl = new Element(tagFor(entry.nodeName(), entry.normalName(), defaultNamespace(), settings), null, entry.attributes().clone());
942            doInsertElement(newEl);
943
944            // 10. replace entry with new entry
945            formattingElements.set(pos, newEl);
946
947            // 11
948            if (pos == size-1) // if not last entry in list, jump to 7
949                break;
950        }
951    }
952    private static final int maxUsedFormattingElements = 12; // limit how many elements get recreated
953
954    void clearFormattingElementsToLastMarker() {
955        while (!formattingElements.isEmpty()) {
956            Element el = removeLastFormattingElement();
957            if (el == null)
958                break;
959        }
960    }
961
962    void removeFromActiveFormattingElements(Element el) {
963        for (int pos = formattingElements.size() -1; pos >= 0; pos--) {
964            Element next = formattingElements.get(pos);
965            if (next == el) {
966                formattingElements.remove(pos);
967                break;
968            }
969        }
970    }
971
972    boolean isInActiveFormattingElements(Element el) {
973        return onStack(formattingElements, el);
974    }
975
976    @Nullable
977    Element getActiveFormattingElement(String nodeName) {
978        for (int pos = formattingElements.size() -1; pos >= 0; pos--) {
979            Element next = formattingElements.get(pos);
980            if (next == null) // scope marker
981                break;
982            else if (next.nameIs(nodeName))
983                return next;
984        }
985        return null;
986    }
987
988    void replaceActiveFormattingElement(Element out, Element in) {
989        replaceInQueue(formattingElements, out, in);
990    }
991
992    void insertMarkerToFormattingElements() {
993        formattingElements.add(null);
994    }
995
996    void insertInFosterParent(Node in) {
997        Element fosterParent;
998        Element lastTable = getFromStack("table");
999        boolean isLastTableParent = false;
1000        if (lastTable != null) {
1001            if (lastTable.parent() != null) {
1002                fosterParent = lastTable.parent();
1003                isLastTableParent = true;
1004            } else
1005                fosterParent = aboveOnStack(lastTable);
1006        } else { // no table == frag
1007            fosterParent = stack.get(0);
1008        }
1009
1010        if (isLastTableParent) {
1011            Validate.notNull(lastTable); // last table cannot be null by this point.
1012            lastTable.before(in);
1013        }
1014        else
1015            fosterParent.appendChild(in);
1016    }
1017
1018    // Template Insertion Mode stack
1019    void pushTemplateMode(HtmlTreeBuilderState state) {
1020        tmplInsertMode.add(state);
1021    }
1022
1023    @Nullable HtmlTreeBuilderState popTemplateMode() {
1024        if (tmplInsertMode.size() > 0) {
1025            return tmplInsertMode.remove(tmplInsertMode.size() -1);
1026        } else {
1027            return null;
1028        }
1029    }
1030
1031    int templateModeSize() {
1032        return tmplInsertMode.size();
1033    }
1034
1035    @Nullable HtmlTreeBuilderState currentTemplateMode() {
1036        return (tmplInsertMode.size() > 0) ? tmplInsertMode.get(tmplInsertMode.size() -1)  : null;
1037    }
1038
1039    @Override
1040    public String toString() {
1041        return "TreeBuilder{" +
1042                "currentToken=" + currentToken +
1043                ", state=" + state +
1044                ", currentElement=" + currentElement() +
1045                '}';
1046    }
1047
1048}