001package org.jsoup.parser;
002
003import org.jsoup.helper.Validate;
004import org.jsoup.internal.Normalizer;
005import org.jsoup.internal.StringUtil;
006import org.jsoup.nodes.Attributes;
007import org.jsoup.nodes.CDataNode;
008import org.jsoup.nodes.Comment;
009import org.jsoup.nodes.DataNode;
010import org.jsoup.nodes.Document;
011import org.jsoup.nodes.Element;
012import org.jsoup.nodes.FormElement;
013import org.jsoup.nodes.Node;
014import org.jsoup.nodes.TextNode;
015import org.jspecify.annotations.Nullable;
016
017import java.io.Reader;
018import java.io.StringReader;
019import java.util.ArrayList;
020import java.util.List;
021
022import static org.jsoup.internal.StringUtil.inSorted;
023import static org.jsoup.parser.HtmlTreeBuilderState.Constants.InTableFoster;
024import static org.jsoup.parser.HtmlTreeBuilderState.ForeignContent;
025import static org.jsoup.parser.Parser.NamespaceHtml;
026
027/**
028 * HTML Tree Builder; creates a DOM from Tokens.
029 */
030public class HtmlTreeBuilder extends TreeBuilder {
031    // tag searches. must be sorted, used in inSorted. HtmlTreeBuilderTest validates they're sorted.
032    static final String[] TagsSearchInScope = new String[]{"applet", "caption", "html", "marquee", "object", "table", "td", "th"};
033    static final String[] TagSearchList = new String[]{"ol", "ul"};
034    static final String[] TagSearchButton = new String[]{"button"};
035    static final String[] TagSearchTableScope = new String[]{"html", "table"};
036    static final String[] TagSearchSelectScope = new String[]{"optgroup", "option"};
037    static final String[] TagSearchEndTags = new String[]{"dd", "dt", "li", "optgroup", "option", "p", "rb", "rp", "rt", "rtc"};
038    static final String[] TagThoroughSearchEndTags = new String[]{"caption", "colgroup", "dd", "dt", "li", "optgroup", "option", "p", "rb", "rp", "rt", "rtc", "tbody", "td", "tfoot", "th", "thead", "tr"};
039    static final String[] TagSearchSpecial = new String[]{"address", "applet", "area", "article", "aside", "base", "basefont", "bgsound",
040        "blockquote", "body", "br", "button", "caption", "center", "col", "colgroup", "command", "dd",
041        "details", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer", "form",
042        "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
043        "iframe", "img", "input", "isindex", "li", "link", "listing", "marquee", "menu", "meta", "nav",
044        "noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext", "pre", "script",
045        "section", "select", "style", "summary", "table", "tbody", "td", "textarea", "tfoot", "th", "thead",
046        "title", "tr", "ul", "wbr", "xmp"};
047    static final String[] TagMathMlTextIntegration = new String[]{"mi", "mn", "mo", "ms", "mtext"};
048    static final String[] TagSvgHtmlIntegration = new String[]{"desc", "foreignObject", "title"};
049
050    public static final int MaxScopeSearchDepth = 100; // prevents the parser bogging down in exceptionally broken pages
051
052    private HtmlTreeBuilderState state; // the current state
053    private HtmlTreeBuilderState originalState; // original / marked state
054
055    private boolean baseUriSetFromDoc;
056    private @Nullable Element headElement; // the current head element
057    private @Nullable FormElement formElement; // the current form element
058    private @Nullable Element contextElement; // fragment parse root; name only copy of context. could be null even if fragment parsing
059    private ArrayList<Element> formattingElements; // active (open) formatting elements
060    private ArrayList<HtmlTreeBuilderState> tmplInsertMode; // stack of Template Insertion modes
061    private List<Token.Character> pendingTableCharacters; // chars in table to be shifted out
062    private Token.EndTag emptyEnd; // reused empty end tag
063
064    private boolean framesetOk; // if ok to go into frameset
065    private boolean fosterInserts; // if next inserts should be fostered
066    private boolean fragmentParsing; // if parsing a fragment of html
067
068    @Override ParseSettings defaultSettings() {
069        return ParseSettings.htmlDefault;
070    }
071
072    @Override
073    HtmlTreeBuilder newInstance() {
074        return new HtmlTreeBuilder();
075    }
076
077    @Override
078    protected void initialiseParse(Reader input, String baseUri, Parser parser) {
079        super.initialiseParse(input, baseUri, parser);
080
081        // this is a bit mucky. todo - probably just create new parser objects to ensure all reset.
082        state = HtmlTreeBuilderState.Initial;
083        originalState = null;
084        baseUriSetFromDoc = false;
085        headElement = null;
086        formElement = null;
087        contextElement = null;
088        formattingElements = new ArrayList<>();
089        tmplInsertMode = new ArrayList<>();
090        pendingTableCharacters = new ArrayList<>();
091        emptyEnd = new Token.EndTag(this);
092        framesetOk = true;
093        fosterInserts = false;
094        fragmentParsing = false;
095    }
096
097    @Override void initialiseParseFragment(@Nullable Element context) {
098        // context may be null
099        state = HtmlTreeBuilderState.Initial;
100        fragmentParsing = true;
101
102        if (context != null) {
103            final String contextName = context.normalName();
104            contextElement = new Element(tagFor(contextName, settings), baseUri);
105            if (context.ownerDocument() != null) // quirks setup:
106                doc.quirksMode(context.ownerDocument().quirksMode());
107
108            // initialise the tokeniser state:
109            switch (contextName) {
110                case "title":
111                case "textarea":
112                    tokeniser.transition(TokeniserState.Rcdata);
113                    break;
114                case "iframe":
115                case "noembed":
116                case "noframes":
117                case "style":
118                case "xmp":
119                    tokeniser.transition(TokeniserState.Rawtext);
120                    break;
121                case "script":
122                    tokeniser.transition(TokeniserState.ScriptData);
123                    break;
124                case "plaintext":
125                    tokeniser.transition(TokeniserState.PLAINTEXT);
126                    break;
127                case "template":
128                    tokeniser.transition(TokeniserState.Data);
129                    pushTemplateMode(HtmlTreeBuilderState.InTemplate);
130                    break;
131                default:
132                    tokeniser.transition(TokeniserState.Data);
133            }
134            doc.appendChild(contextElement);
135            push(contextElement);
136            resetInsertionMode();
137
138            // setup form element to nearest form on context (up ancestor chain). ensures form controls are associated
139            // with form correctly
140            Element formSearch = context;
141            while (formSearch != null) {
142                if (formSearch instanceof FormElement) {
143                    formElement = (FormElement) formSearch;
144                    break;
145                }
146                formSearch = formSearch.parent();
147            }
148        }
149    }
150
151    @Override List<Node> completeParseFragment() {
152        if (contextElement != null) {
153            // depending on context and the input html, content may have been added outside of the root el
154            // e.g. context=p, input=div, the div will have been pushed out.
155            List<Node> nodes = contextElement.siblingNodes();
156            if (!nodes.isEmpty())
157                contextElement.insertChildren(-1, nodes);
158            return contextElement.childNodes();
159        }
160        else
161            return doc.childNodes();
162    }
163
164    @Override
165    protected boolean process(Token token) {
166        HtmlTreeBuilderState dispatch = useCurrentOrForeignInsert(token) ? this.state : ForeignContent;
167        return dispatch.process(token, this);
168    }
169
170    boolean useCurrentOrForeignInsert(Token token) {
171        // https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
172        // If the stack of open elements is empty
173        if (stack.isEmpty())
174            return true;
175        final Element el = currentElement();
176        final String ns = el.tag().namespace();
177
178        // If the adjusted current node is an element in the HTML namespace
179        if (NamespaceHtml.equals(ns))
180            return true;
181
182        // If the adjusted current node is a MathML text integration point and the token is a start tag whose tag name is neither "mglyph" nor "malignmark"
183        // If the adjusted current node is a MathML text integration point and the token is a character token
184        if (isMathmlTextIntegration(el)) {
185            if (token.isStartTag()
186                    && !"mglyph".equals(token.asStartTag().normalName)
187                    && !"malignmark".equals(token.asStartTag().normalName))
188                    return true;
189            if (token.isCharacter())
190                    return true;
191        }
192        // If the adjusted current node is a MathML annotation-xml element and the token is a start tag whose tag name is "svg"
193        if (Parser.NamespaceMathml.equals(ns)
194            && el.nameIs("annotation-xml")
195            && token.isStartTag()
196            && "svg".equals(token.asStartTag().normalName))
197            return true;
198
199        // If the adjusted current node is an HTML integration point and the token is a start tag
200        // If the adjusted current node is an HTML integration point and the token is a character token
201        if (isHtmlIntegration(el)
202            && (token.isStartTag() || token.isCharacter()))
203            return true;
204
205        // If the token is an end-of-file token
206        return token.isEOF();
207    }
208
209    static boolean isMathmlTextIntegration(Element el) {
210        /*
211        A node is a MathML text integration point if it is one of the following elements:
212        A MathML mi element
213        A MathML mo element
214        A MathML mn element
215        A MathML ms element
216        A MathML mtext element
217         */
218        return (Parser.NamespaceMathml.equals(el.tag().namespace())
219            && StringUtil.inSorted(el.normalName(), TagMathMlTextIntegration));
220    }
221
222    static boolean isHtmlIntegration(Element el) {
223        /*
224        A node is an HTML integration point if it is one of the following elements:
225        A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "text/html"
226        A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "application/xhtml+xml"
227        An SVG foreignObject element
228        An SVG desc element
229        An SVG title element
230         */
231        if (Parser.NamespaceMathml.equals(el.tag().namespace())
232            && el.nameIs("annotation-xml")) {
233            String encoding = Normalizer.normalize(el.attr("encoding"));
234            if (encoding.equals("text/html") || encoding.equals("application/xhtml+xml"))
235                return true;
236        }
237        if (Parser.NamespaceSvg.equals(el.tag().namespace())
238            && StringUtil.in(el.tagName(), TagSvgHtmlIntegration)) // note using .tagName for case-sensitive hit here of foreignObject
239            return true;
240
241        return false;
242    }
243
244    boolean process(Token token, HtmlTreeBuilderState state) {
245        return state.process(token, this);
246    }
247
248    void transition(HtmlTreeBuilderState state) {
249        this.state = state;
250    }
251
252    HtmlTreeBuilderState state() {
253        return state;
254    }
255
256    void markInsertionMode() {
257        originalState = state;
258    }
259
260    HtmlTreeBuilderState originalState() {
261        return originalState;
262    }
263
264    void framesetOk(boolean framesetOk) {
265        this.framesetOk = framesetOk;
266    }
267
268    boolean framesetOk() {
269        return framesetOk;
270    }
271
272    Document getDocument() {
273        return doc;
274    }
275
276    String getBaseUri() {
277        return baseUri;
278    }
279
280    void maybeSetBaseUri(Element base) {
281        if (baseUriSetFromDoc) // only listen to the first <base href> in parse
282            return;
283
284        String href = base.absUrl("href");
285        if (href.length() != 0) { // ignore <base target> etc
286            baseUri = href;
287            baseUriSetFromDoc = true;
288            doc.setBaseUri(href); // set on the doc so doc.createElement(Tag) will get updated base, and to update all descendants
289        }
290    }
291
292    boolean isFragmentParsing() {
293        return fragmentParsing;
294    }
295
296    void error(HtmlTreeBuilderState state) {
297        if (parser.getErrors().canAddError())
298            parser.getErrors().add(new ParseError(reader, "Unexpected %s token [%s] when in state [%s]",
299                currentToken.tokenType(), currentToken, state));
300    }
301
302    Element createElementFor(Token.StartTag startTag, String namespace, boolean forcePreserveCase) {
303        // dedupe and normalize the attributes:
304        Attributes attributes = startTag.attributes;
305        if (!forcePreserveCase)
306            attributes = settings.normalizeAttributes(attributes);
307        if (attributes != null && !attributes.isEmpty()) {
308            int dupes = attributes.deduplicate(settings);
309            if (dupes > 0) {
310                error("Dropped duplicate attribute(s) in tag [%s]", startTag.normalName);
311            }
312        }
313
314        Tag tag = tagFor(startTag.tagName, namespace,
315            forcePreserveCase ? ParseSettings.preserveCase : settings);
316
317        return (tag.normalName().equals("form")) ?
318            new FormElement(tag, null, attributes) :
319            new Element(tag, null, attributes);
320    }
321
322    /** Inserts an HTML element for the given tag) */
323    Element insertElementFor(final Token.StartTag startTag) {
324        Element el = createElementFor(startTag, NamespaceHtml, false);
325        doInsertElement(el, startTag);
326
327        // handle self-closing tags. when the spec expects an empty tag, will directly hit insertEmpty, so won't generate this fake end tag.
328        if (startTag.isSelfClosing()) {
329            Tag tag = el.tag();
330            if (tag.isKnownTag()) {
331                if (!tag.isEmpty())
332                    tokeniser.error("Tag [%s] cannot be self closing; not a void tag", tag.normalName());
333                // else: ok
334            }
335            else { // unknown tag: remember this is self-closing, for output
336                tag.setSelfClosing();
337            }
338
339            // effectively a pop, but fiddles with the state. handles empty style, title etc which would otherwise leave us in data state
340            tokeniser.transition(TokeniserState.Data); // handles <script />, otherwise needs breakout steps from script data
341            tokeniser.emit(emptyEnd.reset().name(el.tagName()));  // ensure we get out of whatever state we are in. emitted for yielded processing
342        }
343
344        return el;
345    }
346
347    /**
348     Inserts a foreign element. Preserves the case of the tag name and of the attributes.
349     */
350    Element insertForeignElementFor(final Token.StartTag startTag, String namespace) {
351        Element el = createElementFor(startTag, namespace, true);
352        doInsertElement(el, startTag);
353
354        if (startTag.isSelfClosing()) {
355            el.tag().setSelfClosing(); // remember this is self-closing for output
356            pop();
357        }
358
359        return el;
360    }
361
362    Element insertEmptyElementFor(Token.StartTag startTag) {
363        Element el = createElementFor(startTag, NamespaceHtml, false);
364        doInsertElement(el, startTag);
365        pop();
366        return el;
367    }
368
369    FormElement insertFormElement(Token.StartTag startTag, boolean onStack, boolean checkTemplateStack) {
370        FormElement el = (FormElement) createElementFor(startTag, NamespaceHtml, false);
371
372        if (checkTemplateStack) {
373            if(!onStack("template"))
374                setFormElement(el);
375        } else
376            setFormElement(el);
377
378        doInsertElement(el, startTag);
379        if (!onStack) pop();
380        return el;
381    }
382
383    /** Inserts the Element onto the stack. All element inserts must run through this method. Performs any general
384     tests on the Element before insertion.
385     * @param el the Element to insert and make the current element
386     * @param token the token this element was parsed from. If null, uses a zero-width current token as intrinsic insert
387     */
388    private void doInsertElement(Element el, @Nullable Token token) {
389        if (el.tag().isFormListed() && formElement != null)
390            formElement.addElement(el); // connect form controls to their form element
391
392        // in HTML, the xmlns attribute if set must match what the parser set the tag's namespace to
393        if (parser.getErrors().canAddError() && el.hasAttr("xmlns") && !el.attr("xmlns").equals(el.tag().namespace()))
394            error("Invalid xmlns attribute [%s] on tag [%s]", el.attr("xmlns"), el.tagName());
395
396        if (isFosterInserts() && StringUtil.inSorted(currentElement().normalName(), InTableFoster))
397            insertInFosterParent(el);
398        else
399            currentElement().appendChild(el);
400
401        push(el);
402    }
403
404    void insertCommentNode(Token.Comment token) {
405        Comment node = new Comment(token.getData());
406        currentElement().appendChild(node);
407        onNodeInserted(node);
408    }
409
410    /** Inserts the provided character token into the current element. */
411    void insertCharacterNode(Token.Character characterToken) {
412        Element el = currentElement(); // will be doc if no current element; allows for whitespace to be inserted into the doc root object (not on the stack)
413        insertCharacterToElement(characterToken, el);
414    }
415
416    /** Inserts the provided character token into the provided element. */
417    void insertCharacterToElement(Token.Character characterToken, Element el) {
418        final Node node;
419        final String tagName = el.normalName();
420        final String data = characterToken.getData();
421
422        if (characterToken.isCData())
423            node = new CDataNode(data);
424        else if (isContentForTagData(tagName))
425            node = new DataNode(data);
426        else
427            node = new TextNode(data);
428        el.appendChild(node); // doesn't use insertNode, because we don't foster these; and will always have a stack.
429        onNodeInserted(node);
430    }
431
432    ArrayList<Element> getStack() {
433        return stack;
434    }
435
436    boolean onStack(Element el) {
437        return onStack(stack, el);
438    }
439
440    /** Checks if there is an HTML element with the given name on the stack. */
441    boolean onStack(String elName) {
442        return getFromStack(elName) != null;
443    }
444
445    private static final int maxQueueDepth = 256; // an arbitrary tension point between real HTML and crafted pain
446    private static boolean onStack(ArrayList<Element> queue, Element element) {
447        final int bottom = queue.size() - 1;
448        final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0;
449        for (int pos = bottom; pos >= upper; pos--) {
450            Element next = queue.get(pos);
451            if (next == element) {
452                return true;
453            }
454        }
455        return false;
456    }
457
458    /** Gets the nearest (lowest) HTML element with the given name from the stack. */
459    @Nullable
460    Element getFromStack(String elName) {
461        final int bottom = stack.size() - 1;
462        final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0;
463        for (int pos = bottom; pos >= upper; pos--) {
464            Element next = stack.get(pos);
465            if (next.elementIs(elName, NamespaceHtml)) {
466                return next;
467            }
468        }
469        return null;
470    }
471
472    boolean removeFromStack(Element el) {
473        for (int pos = stack.size() -1; pos >= 0; pos--) {
474            Element next = stack.get(pos);
475            if (next == el) {
476                stack.remove(pos);
477                onNodeClosed(el);
478                return true;
479            }
480        }
481        return false;
482    }
483
484    /** Pops the stack until the given HTML element is removed. */
485    @Nullable
486    Element popStackToClose(String elName) {
487        for (int pos = stack.size() -1; pos >= 0; pos--) {
488            Element el = pop();
489            if (el.elementIs(elName, NamespaceHtml)) {
490                return el;
491            }
492        }
493        return null;
494    }
495
496    /** Pops the stack until an element with the supplied name is removed, irrespective of namespace. */
497    @Nullable
498    Element popStackToCloseAnyNamespace(String elName) {
499        for (int pos = stack.size() -1; pos >= 0; pos--) {
500            Element el = pop();
501            if (el.nameIs(elName)) {
502                return el;
503            }
504        }
505        return null;
506    }
507
508    /** Pops the stack until one of the given HTML elements is removed. */
509    void popStackToClose(String... elNames) { // elnames is sorted, comes from Constants
510        for (int pos = stack.size() -1; pos >= 0; pos--) {
511            Element el = pop();
512            if (inSorted(el.normalName(), elNames) && NamespaceHtml.equals(el.tag().namespace())) {
513                break;
514            }
515        }
516    }
517
518    void clearStackToTableContext() {
519        clearStackToContext("table", "template");
520    }
521
522    void clearStackToTableBodyContext() {
523        clearStackToContext("tbody", "tfoot", "thead", "template");
524    }
525
526    void clearStackToTableRowContext() {
527        clearStackToContext("tr", "template");
528    }
529
530    /** Removes elements from the stack until one of the supplied HTML elements is removed. */
531    private void clearStackToContext(String... nodeNames) {
532        for (int pos = stack.size() -1; pos >= 0; pos--) {
533            Element next = stack.get(pos);
534            if (NamespaceHtml.equals(next.tag().namespace()) &&
535                (StringUtil.in(next.normalName(), nodeNames) || next.nameIs("html")))
536                break;
537            else
538                pop();
539        }
540    }
541
542    @Nullable Element aboveOnStack(Element el) {
543        assert onStack(el);
544        for (int pos = stack.size() -1; pos >= 0; pos--) {
545            Element next = stack.get(pos);
546            if (next == el) {
547                return stack.get(pos-1);
548            }
549        }
550        return null;
551    }
552
553    void insertOnStackAfter(Element after, Element in) {
554        int i = stack.lastIndexOf(after);
555        Validate.isTrue(i != -1);
556        stack.add(i+1, in);
557    }
558
559    void replaceOnStack(Element out, Element in) {
560        replaceInQueue(stack, out, in);
561    }
562
563    private static void replaceInQueue(ArrayList<Element> queue, Element out, Element in) {
564        int i = queue.lastIndexOf(out);
565        Validate.isTrue(i != -1);
566        queue.set(i, in);
567    }
568
569    /**
570     * Reset the insertion mode, by searching up the stack for an appropriate insertion mode. The stack search depth
571     * is limited to {@link #maxQueueDepth}.
572     * @return true if the insertion mode was actually changed.
573     */
574    boolean resetInsertionMode() {
575        // https://html.spec.whatwg.org/multipage/parsing.html#the-insertion-mode
576        boolean last = false;
577        final int bottom = stack.size() - 1;
578        final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0;
579        final HtmlTreeBuilderState origState = this.state;
580
581        if (stack.size() == 0) { // nothing left of stack, just get to body
582            transition(HtmlTreeBuilderState.InBody);
583        }
584
585        LOOP: for (int pos = bottom; pos >= upper; pos--) {
586            Element node = stack.get(pos);
587            if (pos == upper) {
588                last = true;
589                if (fragmentParsing)
590                    node = contextElement;
591            }
592            String name = node != null ? node.normalName() : "";
593            if (!NamespaceHtml.equals(node.tag().namespace()))
594                continue; // only looking for HTML elements here
595
596            switch (name) {
597                case "select":
598                    transition(HtmlTreeBuilderState.InSelect);
599                    // todo - should loop up (with some limit) and check for table or template hits
600                    break LOOP;
601                case "td":
602                case "th":
603                    if (!last) {
604                        transition(HtmlTreeBuilderState.InCell);
605                        break LOOP;
606                    }
607                    break;
608                case "tr":
609                    transition(HtmlTreeBuilderState.InRow);
610                    break LOOP;
611                case "tbody":
612                case "thead":
613                case "tfoot":
614                    transition(HtmlTreeBuilderState.InTableBody);
615                    break LOOP;
616                case "caption":
617                    transition(HtmlTreeBuilderState.InCaption);
618                    break LOOP;
619                case "colgroup":
620                    transition(HtmlTreeBuilderState.InColumnGroup);
621                    break LOOP;
622                case "table":
623                    transition(HtmlTreeBuilderState.InTable);
624                    break LOOP;
625                case "template":
626                    HtmlTreeBuilderState tmplState = currentTemplateMode();
627                    Validate.notNull(tmplState, "Bug: no template insertion mode on stack!");
628                    transition(tmplState);
629                    break LOOP;
630                case "head":
631                    if (!last) {
632                        transition(HtmlTreeBuilderState.InHead);
633                        break LOOP;
634                    }
635                    break;
636                case "body":
637                    transition(HtmlTreeBuilderState.InBody);
638                    break LOOP;
639                case "frameset":
640                    transition(HtmlTreeBuilderState.InFrameset);
641                    break LOOP;
642                case "html":
643                    transition(headElement == null ? HtmlTreeBuilderState.BeforeHead : HtmlTreeBuilderState.AfterHead);
644                    break LOOP;
645            }
646            if (last) {
647                transition(HtmlTreeBuilderState.InBody);
648                break;
649            }
650        }
651        return state != origState;
652    }
653
654    /** Places the body back onto the stack and moves to InBody, for cases in AfterBody / AfterAfterBody when more content comes */
655    void resetBody() {
656        if (!onStack("body")) {
657            stack.add(doc.body()); // not onNodeInserted, as already seen
658        }
659        transition(HtmlTreeBuilderState.InBody);
660    }
661
662    // todo: tidy up in specific scope methods
663    private final String[] specificScopeTarget = {null};
664
665    private boolean inSpecificScope(String targetName, String[] baseTypes, String[] extraTypes) {
666        specificScopeTarget[0] = targetName;
667        return inSpecificScope(specificScopeTarget, baseTypes, extraTypes);
668    }
669
670    private boolean inSpecificScope(String[] targetNames, String[] baseTypes, @Nullable String[] extraTypes) {
671        // https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-the-specific-scope
672        final int bottom = stack.size() -1;
673        final int top = bottom > MaxScopeSearchDepth ? bottom - MaxScopeSearchDepth : 0;
674        // don't walk too far up the tree
675
676        for (int pos = bottom; pos >= top; pos--) {
677            Element el = stack.get(pos);
678            if (!el.tag().namespace().equals(NamespaceHtml)) continue;
679
680            final String elName = el.normalName();
681            if (inSorted(elName, targetNames))
682                return true;
683            if (inSorted(elName, baseTypes))
684                return false;
685            if (extraTypes != null && inSorted(elName, extraTypes))
686                return false;
687        }
688        //Validate.fail("Should not be reachable"); // would end up false because hitting 'html' at root (basetypes)
689        return false;
690    }
691
692    boolean inScope(String[] targetNames) {
693        return inSpecificScope(targetNames, TagsSearchInScope, null);
694    }
695
696    boolean inScope(String targetName) {
697        return inScope(targetName, null);
698    }
699
700    boolean inScope(String targetName, String[] extras) {
701        return inSpecificScope(targetName, TagsSearchInScope, extras);
702        // todo: in mathml namespace: mi, mo, mn, ms, mtext annotation-xml
703        // todo: in svg namespace: forignOjbect, desc, title
704    }
705
706    boolean inListItemScope(String targetName) {
707        return inScope(targetName, TagSearchList);
708    }
709
710    boolean inButtonScope(String targetName) {
711        return inScope(targetName, TagSearchButton);
712    }
713
714    boolean inTableScope(String targetName) {
715        return inSpecificScope(targetName, TagSearchTableScope, null);
716    }
717
718    boolean inSelectScope(String targetName) {
719        for (int pos = stack.size() -1; pos >= 0; pos--) {
720            Element el = stack.get(pos);
721            String elName = el.normalName();
722            if (elName.equals(targetName))
723                return true;
724            if (!inSorted(elName, TagSearchSelectScope)) // all elements except
725                return false;
726        }
727        Validate.fail("Should not be reachable");
728        return false;
729    }
730
731    /** Tests if there is some element on the stack that is not in the provided set. */
732    boolean onStackNot(String[] allowedTags) {
733        final int bottom = stack.size() -1;
734        final int top = bottom > MaxScopeSearchDepth ? bottom - MaxScopeSearchDepth : 0;
735        // don't walk too far up the tree
736
737        for (int pos = bottom; pos >= top; pos--) {
738            final String elName = stack.get(pos).normalName();
739            if (!inSorted(elName, allowedTags))
740                return true;
741        }
742        return false;
743    }
744
745    void setHeadElement(Element headElement) {
746        this.headElement = headElement;
747    }
748
749    Element getHeadElement() {
750        return headElement;
751    }
752
753    boolean isFosterInserts() {
754        return fosterInserts;
755    }
756
757    void setFosterInserts(boolean fosterInserts) {
758        this.fosterInserts = fosterInserts;
759    }
760
761    @Nullable FormElement getFormElement() {
762        return formElement;
763    }
764
765    void setFormElement(FormElement formElement) {
766        this.formElement = formElement;
767    }
768
769    void resetPendingTableCharacters() {
770        pendingTableCharacters.clear();
771    }
772
773    List<Token.Character> getPendingTableCharacters() {
774        return pendingTableCharacters;
775    }
776
777    void addPendingTableCharacters(Token.Character c) {
778        // make a clone of the token to maintain its state (as Tokens are otherwise reset)
779        Token.Character clone = c.clone();
780        pendingTableCharacters.add(clone);
781    }
782
783    /**
784     13.2.6.3 Closing elements that have implied end tags
785     When the steps below require the UA to generate implied end tags, then, while the current node is a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, or an rtc element, the UA must pop the current node off the stack of open elements.
786
787     If a step requires the UA to generate implied end tags but lists an element to exclude from the process, then the UA must perform the above steps as if that element was not in the above list.
788
789     When the steps below require the UA to generate all implied end tags thoroughly, then, while the current node is a caption element, a colgroup element, a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, an rtc element, a tbody element, a td element, a tfoot element, a th element, a thead element, or a tr element, the UA must pop the current node off the stack of open elements.
790
791     @param excludeTag If a step requires the UA to generate implied end tags but lists an element to exclude from the
792     process, then the UA must perform the above steps as if that element was not in the above list.
793     */
794    void generateImpliedEndTags(String excludeTag) {
795        while (inSorted(currentElement().normalName(), TagSearchEndTags)) {
796            if (excludeTag != null && currentElementIs(excludeTag))
797                break;
798            pop();
799        }
800    }
801
802    void generateImpliedEndTags() {
803        generateImpliedEndTags(false);
804    }
805
806    /**
807     Pops HTML elements off the stack according to the implied end tag rules
808     @param thorough if we are thorough (includes table elements etc) or not
809     */
810    void generateImpliedEndTags(boolean thorough) {
811        final String[] search = thorough ? TagThoroughSearchEndTags : TagSearchEndTags;
812        while (NamespaceHtml.equals(currentElement().tag().namespace())
813            && inSorted(currentElement().normalName(), search)) {
814            pop();
815        }
816    }
817
818    void closeElement(String name) {
819        generateImpliedEndTags(name);
820        if (!name.equals(currentElement().normalName())) error(state());
821        popStackToClose(name);
822    }
823
824    static boolean isSpecial(Element el) {
825        // todo: mathml's mi, mo, mn
826        // todo: svg's foreigObject, desc, title
827        String name = el.normalName();
828        return inSorted(name, TagSearchSpecial);
829    }
830
831    Element lastFormattingElement() {
832        return formattingElements.size() > 0 ? formattingElements.get(formattingElements.size()-1) : null;
833    }
834
835    int positionOfElement(Element el){
836        for (int i = 0; i < formattingElements.size(); i++){
837            if (el == formattingElements.get(i))
838                return i;
839        }
840        return -1;
841    }
842
843    Element removeLastFormattingElement() {
844        int size = formattingElements.size();
845        if (size > 0)
846            return formattingElements.remove(size-1);
847        else
848            return null;
849    }
850
851    // active formatting elements
852    void pushActiveFormattingElements(Element in) {
853        checkActiveFormattingElements(in);
854        formattingElements.add(in);
855    }
856
857    void pushWithBookmark(Element in, int bookmark){
858        checkActiveFormattingElements(in);
859        // catch any range errors and assume bookmark is incorrect - saves a redundant range check.
860        try {
861            formattingElements.add(bookmark, in);
862        } catch (IndexOutOfBoundsException e) {
863            formattingElements.add(in);
864        }
865    }
866
867    void checkActiveFormattingElements(Element in){
868        int numSeen = 0;
869        final int size = formattingElements.size() -1;
870        int ceil = size - maxUsedFormattingElements; if (ceil <0) ceil = 0;
871
872        for (int pos = size; pos >= ceil; pos--) {
873            Element el = formattingElements.get(pos);
874            if (el == null) // marker
875                break;
876
877            if (isSameFormattingElement(in, el))
878                numSeen++;
879
880            if (numSeen == 3) {
881                formattingElements.remove(pos);
882                break;
883            }
884        }
885    }
886
887    private static boolean isSameFormattingElement(Element a, Element b) {
888        // same if: same namespace, tag, and attributes. Element.equals only checks tag, might in future check children
889        return a.normalName().equals(b.normalName()) &&
890                // a.namespace().equals(b.namespace()) &&
891                a.attributes().equals(b.attributes());
892        // todo: namespaces
893    }
894
895    void reconstructFormattingElements() {
896        if (stack.size() > maxQueueDepth)
897            return;
898        Element last = lastFormattingElement();
899        if (last == null || onStack(last))
900            return;
901
902        Element entry = last;
903        int size = formattingElements.size();
904        int ceil = size - maxUsedFormattingElements; if (ceil <0) ceil = 0;
905        int pos = size - 1;
906        boolean skip = false;
907        while (true) {
908            if (pos == ceil) { // step 4. if none before, skip to 8
909                skip = true;
910                break;
911            }
912            entry = formattingElements.get(--pos); // step 5. one earlier than entry
913            if (entry == null || onStack(entry)) // step 6 - neither marker nor on stack
914                break; // jump to 8, else continue back to 4
915        }
916        while(true) {
917            if (!skip) // step 7: on later than entry
918                entry = formattingElements.get(++pos);
919            Validate.notNull(entry); // should not occur, as we break at last element
920
921            // 8. create new element from element, 9 insert into current node, onto stack
922            skip = false; // can only skip increment from 4.
923            Element newEl = new Element(tagFor(entry.normalName(), settings), null, entry.attributes().clone());
924            doInsertElement(newEl, null);
925
926            // 10. replace entry with new entry
927            formattingElements.set(pos, newEl);
928
929            // 11
930            if (pos == size-1) // if not last entry in list, jump to 7
931                break;
932        }
933    }
934    private static final int maxUsedFormattingElements = 12; // limit how many elements get recreated
935
936    void clearFormattingElementsToLastMarker() {
937        while (!formattingElements.isEmpty()) {
938            Element el = removeLastFormattingElement();
939            if (el == null)
940                break;
941        }
942    }
943
944    void removeFromActiveFormattingElements(Element el) {
945        for (int pos = formattingElements.size() -1; pos >= 0; pos--) {
946            Element next = formattingElements.get(pos);
947            if (next == el) {
948                formattingElements.remove(pos);
949                break;
950            }
951        }
952    }
953
954    boolean isInActiveFormattingElements(Element el) {
955        return onStack(formattingElements, el);
956    }
957
958    @Nullable
959    Element getActiveFormattingElement(String nodeName) {
960        for (int pos = formattingElements.size() -1; pos >= 0; pos--) {
961            Element next = formattingElements.get(pos);
962            if (next == null) // scope marker
963                break;
964            else if (next.nameIs(nodeName))
965                return next;
966        }
967        return null;
968    }
969
970    void replaceActiveFormattingElement(Element out, Element in) {
971        replaceInQueue(formattingElements, out, in);
972    }
973
974    void insertMarkerToFormattingElements() {
975        formattingElements.add(null);
976    }
977
978    void insertInFosterParent(Node in) {
979        Element fosterParent;
980        Element lastTable = getFromStack("table");
981        boolean isLastTableParent = false;
982        if (lastTable != null) {
983            if (lastTable.parent() != null) {
984                fosterParent = lastTable.parent();
985                isLastTableParent = true;
986            } else
987                fosterParent = aboveOnStack(lastTable);
988        } else { // no table == frag
989            fosterParent = stack.get(0);
990        }
991
992        if (isLastTableParent) {
993            Validate.notNull(lastTable); // last table cannot be null by this point.
994            lastTable.before(in);
995        }
996        else
997            fosterParent.appendChild(in);
998    }
999
1000    // Template Insertion Mode stack
1001    void pushTemplateMode(HtmlTreeBuilderState state) {
1002        tmplInsertMode.add(state);
1003    }
1004
1005    @Nullable HtmlTreeBuilderState popTemplateMode() {
1006        if (tmplInsertMode.size() > 0) {
1007            return tmplInsertMode.remove(tmplInsertMode.size() -1);
1008        } else {
1009            return null;
1010        }
1011    }
1012
1013    int templateModeSize() {
1014        return tmplInsertMode.size();
1015    }
1016
1017    @Nullable HtmlTreeBuilderState currentTemplateMode() {
1018        return (tmplInsertMode.size() > 0) ? tmplInsertMode.get(tmplInsertMode.size() -1)  : null;
1019    }
1020
1021    @Override
1022    public String toString() {
1023        return "TreeBuilder{" +
1024                "currentToken=" + currentToken +
1025                ", state=" + state +
1026                ", currentElement=" + currentElement() +
1027                '}';
1028    }
1029
1030    @Override protected boolean isContentForTagData(final String normalName) {
1031        return (normalName.equals("script") || normalName.equals("style"));
1032    }
1033}