001package org.jsoup.parser;
002
003import org.jsoup.helper.Validate;
004import org.jsoup.internal.Normalizer;
005import org.jsoup.internal.SharedConstants;
006
007import java.util.HashMap;
008import java.util.Map;
009import java.util.Objects;
010import java.util.function.Consumer;
011
012/**
013 * Tag capabilities.
014 *
015 * @author Jonathan Hedley, jonathan@hedley.net
016 */
017public class Tag implements Cloneable {
018    private static final Map<String, Tag> Tags = new HashMap<>(); // map of known tags
019
020    private String tagName;
021    private final String normalName; // always the lower case version of this tag, regardless of case preservation mode
022    private String namespace;
023    private boolean isBlock = true; // block
024    private boolean formatAsBlock = true; // should be formatted as a block
025    private boolean empty = false; // can hold nothing; e.g. img
026    private boolean selfClosing = false; // can self close (<foo />). used for unknown tags that self close, without forcing them as empty.
027    private boolean preserveWhitespace = false; // for pre, textarea, script etc
028    private boolean formList = false; // a control that appears in forms: input, textarea, output etc
029    private boolean formSubmit = false; // a control that can be submitted in a form: input etc
030
031    private Tag(String tagName, String namespace) {
032        this.tagName = tagName;
033        normalName = Normalizer.lowerCase(tagName);
034        this.namespace = namespace;
035    }
036
037    /**
038     * Get this tag's name.
039     *
040     * @return the tag's name
041     */
042    public String getName() {
043        return tagName;
044    }
045
046    /**
047     * Get this tag's normalized (lowercased) name.
048     * @return the tag's normal name.
049     */
050    public String normalName() {
051        return normalName;
052    }
053
054    public String namespace() {
055        return namespace;
056    }
057
058    /**
059     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
060     * <p>
061     * Pre-defined tags (p, div etc) will be ==, but unknown tags are not registered and will only .equals().
062     * </p>
063     * 
064     * @param tagName Name of tag, e.g. "p". Case-insensitive.
065     * @param namespace the namespace for the tag.
066     * @param settings used to control tag name sensitivity
067     * @return The tag, either defined or new generic.
068     */
069    public static Tag valueOf(String tagName, String namespace, ParseSettings settings) {
070        Validate.notEmpty(tagName);
071        Validate.notNull(namespace);
072        Tag tag = Tags.get(tagName);
073        if (tag != null && tag.namespace.equals(namespace))
074            return tag;
075
076        tagName = settings.normalizeTag(tagName); // the name we'll use
077        Validate.notEmpty(tagName);
078        String normalName = Normalizer.lowerCase(tagName); // the lower-case name to get tag settings off
079        tag = Tags.get(normalName);
080        if (tag != null && tag.namespace.equals(namespace)) {
081            if (settings.preserveTagCase() && !tagName.equals(normalName)) {
082                tag = tag.clone(); // get a new version vs the static one, so name update doesn't reset all
083                tag.tagName = tagName;
084            }
085            return tag;
086        }
087
088        // not defined: create default; go anywhere, do anything! (incl be inside a <p>)
089        tag = new Tag(tagName, namespace);
090        tag.isBlock = false;
091
092        return tag;
093    }
094
095    /**
096     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
097     * <p>
098     * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
099     * </p>
100     *
101     * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>.
102     * @return The tag, either defined or new generic.
103     * @see #valueOf(String tagName, String namespace, ParseSettings settings)
104     */
105    public static Tag valueOf(String tagName) {
106        return valueOf(tagName, Parser.NamespaceHtml, ParseSettings.preserveCase);
107    }
108
109    /**
110     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
111     * <p>
112     * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
113     * </p>
114     *
115     * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>.
116     * @param settings used to control tag name sensitivity
117     * @return The tag, either defined or new generic.
118     * @see #valueOf(String tagName, String namespace, ParseSettings settings)
119     */
120    public static Tag valueOf(String tagName, ParseSettings settings) {
121        return valueOf(tagName, Parser.NamespaceHtml, settings);
122    }
123
124    /**
125     * Gets if this is a block tag.
126     *
127     * @return if block tag
128     */
129    public boolean isBlock() {
130        return isBlock;
131    }
132
133    /**
134     * Gets if this tag should be formatted as a block (or as inline)
135     *
136     * @return if should be formatted as block or inline
137     */
138    public boolean formatAsBlock() {
139        return formatAsBlock;
140    }
141
142    /**
143     * Gets if this tag is an inline tag.
144     *
145     * @return if this tag is an inline tag.
146     */
147    public boolean isInline() {
148        return !isBlock;
149    }
150
151    /**
152     * Get if this is an empty tag
153     *
154     * @return if this is an empty tag
155     */
156    public boolean isEmpty() {
157        return empty;
158    }
159
160    /**
161     * Get if this tag is self-closing.
162     *
163     * @return if this tag should be output as self-closing.
164     */
165    public boolean isSelfClosing() {
166        return empty || selfClosing;
167    }
168
169    /**
170     * Get if this is a pre-defined tag, or was auto created on parsing.
171     *
172     * @return if a known tag
173     */
174    public boolean isKnownTag() {
175        return Tags.containsKey(tagName);
176    }
177
178    /**
179     * Check if this tagname is a known tag.
180     *
181     * @param tagName name of tag
182     * @return if known HTML tag
183     */
184    public static boolean isKnownTag(String tagName) {
185        return Tags.containsKey(tagName);
186    }
187
188    /**
189     * Get if this tag should preserve whitespace within child text nodes.
190     *
191     * @return if preserve whitespace
192     */
193    public boolean preserveWhitespace() {
194        return preserveWhitespace;
195    }
196
197    /**
198     * Get if this tag represents a control associated with a form. E.g. input, textarea, output
199     * @return if associated with a form
200     */
201    public boolean isFormListed() {
202        return formList;
203    }
204
205    /**
206     * Get if this tag represents an element that should be submitted with a form. E.g. input, option
207     * @return if submittable with a form
208     */
209    public boolean isFormSubmittable() {
210        return formSubmit;
211    }
212
213    Tag setSelfClosing() {
214        selfClosing = true;
215        return this;
216    }
217
218    @Override
219    public boolean equals(Object o) {
220        if (this == o) return true;
221        if (!(o instanceof Tag)) return false;
222
223        Tag tag = (Tag) o;
224
225        if (!tagName.equals(tag.tagName)) return false;
226        if (empty != tag.empty) return false;
227        if (formatAsBlock != tag.formatAsBlock) return false;
228        if (isBlock != tag.isBlock) return false;
229        if (preserveWhitespace != tag.preserveWhitespace) return false;
230        if (selfClosing != tag.selfClosing) return false;
231        if (formList != tag.formList) return false;
232        return formSubmit == tag.formSubmit;
233    }
234
235    @Override
236    public int hashCode() {
237        return Objects.hash(tagName, isBlock, formatAsBlock, empty, selfClosing, preserveWhitespace,
238            formList, formSubmit);
239    }
240
241    @Override
242    public String toString() {
243        return tagName;
244    }
245
246    @Override
247    protected Tag clone() {
248        try {
249            return (Tag) super.clone();
250        } catch (CloneNotSupportedException e) {
251            throw new RuntimeException(e);
252        }
253    }
254
255    // internal static initialisers:
256    // prepped from http://www.w3.org/TR/REC-html40/sgml/dtd.html and other sources
257    private static final String[] blockTags = {
258            "html", "head", "body", "frameset", "script", "noscript", "style", "meta", "link", "title", "frame",
259            "noframes", "section", "nav", "aside", "hgroup", "header", "footer", "p", "h1", "h2", "h3", "h4", "h5", "h6",
260            "ul", "ol", "pre", "div", "blockquote", "hr", "address", "figure", "figcaption", "form", "fieldset", "ins",
261            "del", "dl", "dt", "dd", "li", "table", "caption", "thead", "tfoot", "tbody", "colgroup", "col", "tr", "th",
262            "td", "video", "audio", "canvas", "details", "menu", "plaintext", "template", "article", "main",
263            "svg", "math", "center", "template",
264            "dir", "applet", "marquee", "listing" // deprecated but still known / special handling
265    };
266    private static final String[] inlineTags = {
267            "object", "base", "font", "tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code", "samp", "kbd",
268            "var", "cite", "abbr", "time", "acronym", "mark", "ruby", "rt", "rp", "rtc", "a", "img", "br", "wbr", "map", "q",
269            "sub", "sup", "bdo", "iframe", "embed", "span", "input", "select", "textarea", "label", "optgroup",
270            "option", "legend", "datalist", "keygen", "output", "progress", "meter", "area", "param", "source", "track",
271            "summary", "command", "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track",
272            "data", "bdi", "s", "strike", "nobr",
273            "rb", // deprecated but still known / special handling
274            "text", // in SVG NS
275            "mi", "mo", "msup", "mn", "mtext" // in MathML NS, to ensure inline
276    };
277    private static final String[] emptyTags = {
278            "meta", "link", "base", "frame", "img", "br", "wbr", "embed", "hr", "input", "keygen", "col", "command",
279            "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track"
280    };
281    // todo - rework this to format contents as inline; and update html emitter in Element. Same output, just neater.
282    private static final String[] formatAsInlineTags = {
283            "title", "a", "p", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "address", "li", "th", "td", "script", "style",
284            "ins", "del", "s", "button"
285    };
286    private static final String[] preserveWhitespaceTags = {
287            "pre", "plaintext", "title", "textarea"
288            // script is not here as it is a data node, which always preserve whitespace
289    };
290    // todo: I think we just need submit tags, and can scrub listed
291    private static final String[] formListedTags = {
292            "button", "fieldset", "input", "keygen", "object", "output", "select", "textarea"
293    };
294    private static final String[] formSubmitTags = SharedConstants.FormSubmitTags;
295
296    private static final Map<String, String[]> namespaces = new HashMap<>();
297    static {
298        namespaces.put(Parser.NamespaceMathml, new String[]{"math", "mi", "mo", "msup", "mn", "mtext"});
299        namespaces.put(Parser.NamespaceSvg, new String[]{"svg", "text"});
300        // We don't need absolute coverage here as other cases will be inferred by the HtmlTreeBuilder
301    }
302
303    private static void setupTags(String[] tagNames, Consumer<Tag> tagModifier) {
304        for (String tagName : tagNames) {
305            Tag tag = Tags.get(tagName);
306            if (tag == null) {
307                tag = new Tag(tagName, Parser.NamespaceHtml);
308                Tags.put(tag.tagName, tag);
309            }
310            tagModifier.accept(tag);
311        }
312    }
313
314    static {
315        setupTags(blockTags, tag -> {
316            tag.isBlock = true;
317            tag.formatAsBlock = true;
318        });
319
320        setupTags(inlineTags, tag -> {
321            tag.isBlock = false;
322            tag.formatAsBlock = false;
323        });
324
325        setupTags(emptyTags, tag -> tag.empty = true);
326        setupTags(formatAsInlineTags, tag -> tag.formatAsBlock = false);
327        setupTags(preserveWhitespaceTags, tag -> tag.preserveWhitespace = true);
328        setupTags(formListedTags, tag -> tag.formList = true);
329        setupTags(formSubmitTags, tag -> tag.formSubmit = true);
330        for (Map.Entry<String, String[]> ns : namespaces.entrySet()) {
331            setupTags(ns.getValue(), tag -> tag.namespace = ns.getKey());
332        }
333    }
334}