001package org.jsoup.parser;
002
003import org.jsoup.helper.Validate;
004import org.jsoup.internal.Normalizer;
005import org.jsoup.internal.SharedConstants;
006
007import java.util.HashMap;
008import java.util.Map;
009import java.util.Objects;
010import java.util.function.Consumer;
011
012/**
013 * Tag capabilities.
014 *
015 * @author Jonathan Hedley, jonathan@hedley.net
016 */
017public class Tag implements Cloneable {
018    private static final Map<String, Tag> Tags = new HashMap<>(); // map of known tags
019
020    private String tagName;
021    private final String normalName; // always the lower case version of this tag, regardless of case preservation mode
022    private String namespace;
023    private boolean isBlock = true; // block
024    private boolean formatAsBlock = true; // should be formatted as a block
025    private boolean empty = false; // can hold nothing; e.g. img
026    private boolean selfClosing = false; // can self close (<foo />). used for unknown tags that self close, without forcing them as empty.
027    private boolean preserveWhitespace = false; // for pre, textarea, script etc
028    private boolean formList = false; // a control that appears in forms: input, textarea, output etc
029    private boolean formSubmit = false; // a control that can be submitted in a form: input etc
030
031    private Tag(String tagName, String normalName, String namespace) {
032        this.tagName = tagName;
033        this.normalName = normalName;
034        this.namespace = namespace;
035    }
036
037    /**
038     * Get this tag's name.
039     *
040     * @return the tag's name
041     */
042    public String getName() {
043        return tagName;
044    }
045
046    /**
047     * Get this tag's normalized (lowercased) name.
048     * @return the tag's normal name.
049     */
050    public String normalName() {
051        return normalName;
052    }
053
054    public String namespace() {
055        return namespace;
056    }
057
058    /**
059     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
060     * <p>
061     * Pre-defined tags (p, div etc) will be ==, but unknown tags are not registered and will only .equals().
062     * </p>
063     * 
064     * @param tagName Name of tag, e.g. "p". Case-insensitive.
065     * @param namespace the namespace for the tag.
066     * @param settings used to control tag name sensitivity
067     * @return The tag, either defined or new generic.
068     */
069    public static Tag valueOf(String tagName, String namespace, ParseSettings settings) {
070        return valueOf(tagName, ParseSettings.normalName(tagName), namespace, settings);
071    }
072
073    /** Tag.valueOf with the normalName via the token.normalName, to save redundant lower-casing passes. */
074    static Tag valueOf(String tagName, String normalName, String namespace, ParseSettings settings) {
075        Validate.notNull(tagName);
076        tagName = tagName.trim();
077        Validate.notEmpty(tagName);
078        Validate.notNull(namespace);
079        Tag tag = Tags.get(tagName);
080        if (tag != null && tag.namespace.equals(namespace))
081            return tag;
082
083        tagName = settings.preserveTagCase() ? tagName : normalName;
084        tag = Tags.get(normalName);
085        if (tag != null && tag.namespace.equals(namespace)) {
086            if (settings.preserveTagCase() && !tagName.equals(normalName)) {
087                tag = tag.clone(); // get a new version vs the static one, so name update doesn't reset all
088                tag.tagName = tagName;
089            }
090            return tag;
091        }
092
093        // not defined: create default; go anywhere, do anything! (incl be inside a <p>)
094        tag = new Tag(tagName, normalName, namespace);
095        tag.isBlock = false;
096
097        return tag;
098    }
099
100
101    /**
102     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
103     * <p>
104     * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
105     * </p>
106     *
107     * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>.
108     * @return The tag, either defined or new generic.
109     * @see #valueOf(String tagName, String namespace, ParseSettings settings)
110     */
111    public static Tag valueOf(String tagName) {
112        return valueOf(tagName, Parser.NamespaceHtml, ParseSettings.preserveCase);
113    }
114
115    /**
116     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
117     * <p>
118     * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
119     * </p>
120     *
121     * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>.
122     * @param settings used to control tag name sensitivity
123     * @return The tag, either defined or new generic.
124     * @see #valueOf(String tagName, String namespace, ParseSettings settings)
125     */
126    public static Tag valueOf(String tagName, ParseSettings settings) {
127        return valueOf(tagName, Parser.NamespaceHtml, settings);
128    }
129
130    /**
131     * Gets if this is a block tag.
132     *
133     * @return if block tag
134     */
135    public boolean isBlock() {
136        return isBlock;
137    }
138
139    /**
140     * Gets if this tag should be formatted as a block (or as inline)
141     *
142     * @return if should be formatted as block or inline
143     */
144    public boolean formatAsBlock() {
145        return formatAsBlock;
146    }
147
148    /**
149     * Gets if this tag is an inline tag.
150     *
151     * @return if this tag is an inline tag.
152     */
153    public boolean isInline() {
154        return !isBlock;
155    }
156
157    /**
158     * Get if this is an empty tag
159     *
160     * @return if this is an empty tag
161     */
162    public boolean isEmpty() {
163        return empty;
164    }
165
166    /**
167     * Get if this tag is self-closing.
168     *
169     * @return if this tag should be output as self-closing.
170     */
171    public boolean isSelfClosing() {
172        return empty || selfClosing;
173    }
174
175    /**
176     * Get if this is a pre-defined tag, or was auto created on parsing.
177     *
178     * @return if a known tag
179     */
180    public boolean isKnownTag() {
181        return Tags.containsKey(tagName);
182    }
183
184    /**
185     * Check if this tagname is a known tag.
186     *
187     * @param tagName name of tag
188     * @return if known HTML tag
189     */
190    public static boolean isKnownTag(String tagName) {
191        return Tags.containsKey(tagName);
192    }
193
194    /**
195     * Get if this tag should preserve whitespace within child text nodes.
196     *
197     * @return if preserve whitespace
198     */
199    public boolean preserveWhitespace() {
200        return preserveWhitespace;
201    }
202
203    /**
204     * Get if this tag represents a control associated with a form. E.g. input, textarea, output
205     * @return if associated with a form
206     */
207    public boolean isFormListed() {
208        return formList;
209    }
210
211    /**
212     * Get if this tag represents an element that should be submitted with a form. E.g. input, option
213     * @return if submittable with a form
214     */
215    public boolean isFormSubmittable() {
216        return formSubmit;
217    }
218
219    Tag setSelfClosing() {
220        selfClosing = true;
221        return this;
222    }
223
224    @Override
225    public boolean equals(Object o) {
226        if (this == o) return true;
227        if (!(o instanceof Tag)) return false;
228
229        Tag tag = (Tag) o;
230
231        if (!tagName.equals(tag.tagName)) return false;
232        if (empty != tag.empty) return false;
233        if (formatAsBlock != tag.formatAsBlock) return false;
234        if (isBlock != tag.isBlock) return false;
235        if (preserveWhitespace != tag.preserveWhitespace) return false;
236        if (selfClosing != tag.selfClosing) return false;
237        if (formList != tag.formList) return false;
238        return formSubmit == tag.formSubmit;
239    }
240
241    @Override
242    public int hashCode() {
243        return Objects.hash(tagName, isBlock, formatAsBlock, empty, selfClosing, preserveWhitespace,
244            formList, formSubmit);
245    }
246
247    @Override
248    public String toString() {
249        return tagName;
250    }
251
252    @Override
253    protected Tag clone() {
254        try {
255            return (Tag) super.clone();
256        } catch (CloneNotSupportedException e) {
257            throw new RuntimeException(e);
258        }
259    }
260
261    // internal static initialisers:
262    // prepped from http://www.w3.org/TR/REC-html40/sgml/dtd.html and other sources
263    private static final String[] blockTags = {
264            "html", "head", "body", "frameset", "script", "noscript", "style", "meta", "link", "title", "frame",
265            "noframes", "section", "nav", "aside", "hgroup", "header", "footer", "p", "h1", "h2", "h3", "h4", "h5", "h6",
266            "ul", "ol", "pre", "div", "blockquote", "hr", "address", "figure", "figcaption", "form", "fieldset", "ins",
267            "del", "dl", "dt", "dd", "li", "table", "caption", "thead", "tfoot", "tbody", "colgroup", "col", "tr", "th",
268            "td", "video", "audio", "canvas", "details", "menu", "plaintext", "template", "article", "main",
269            "svg", "math", "center", "template",
270            "dir", "applet", "marquee", "listing" // deprecated but still known / special handling
271    };
272    private static final String[] inlineTags = {
273            "object", "base", "font", "tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code", "samp", "kbd",
274            "var", "cite", "abbr", "time", "acronym", "mark", "ruby", "rt", "rp", "rtc", "a", "img", "br", "wbr", "map", "q",
275            "sub", "sup", "bdo", "iframe", "embed", "span", "input", "select", "textarea", "label", "optgroup",
276            "option", "legend", "datalist", "keygen", "output", "progress", "meter", "area", "param", "source", "track",
277            "summary", "command", "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track",
278            "data", "bdi", "s", "strike", "nobr",
279            "rb", // deprecated but still known / special handling
280            "text", // in SVG NS
281            "mi", "mo", "msup", "mn", "mtext" // in MathML NS, to ensure inline
282    };
283    private static final String[] emptyTags = {
284            "meta", "link", "base", "frame", "img", "br", "wbr", "embed", "hr", "input", "keygen", "col", "command",
285            "device", "area", "basefont", "bgsound", "menuitem", "param", "source", "track"
286    };
287    // todo - rework this to format contents as inline; and update html emitter in Element. Same output, just neater.
288    private static final String[] formatAsInlineTags = {
289            "title", "a", "p", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "address", "li", "th", "td", "script", "style",
290            "ins", "del", "s", "button"
291    };
292    private static final String[] preserveWhitespaceTags = {
293            "pre", "plaintext", "title", "textarea"
294            // script is not here as it is a data node, which always preserve whitespace
295    };
296    // todo: I think we just need submit tags, and can scrub listed
297    private static final String[] formListedTags = {
298            "button", "fieldset", "input", "keygen", "object", "output", "select", "textarea"
299    };
300    private static final String[] formSubmitTags = SharedConstants.FormSubmitTags;
301
302    private static final Map<String, String[]> namespaces = new HashMap<>();
303    static {
304        namespaces.put(Parser.NamespaceMathml, new String[]{"math", "mi", "mo", "msup", "mn", "mtext"});
305        namespaces.put(Parser.NamespaceSvg, new String[]{"svg", "text"});
306        // We don't need absolute coverage here as other cases will be inferred by the HtmlTreeBuilder
307    }
308
309    private static void setupTags(String[] tagNames, Consumer<Tag> tagModifier) {
310        for (String tagName : tagNames) {
311            Tag tag = Tags.get(tagName);
312            if (tag == null) {
313                tag = new Tag(tagName, tagName, Parser.NamespaceHtml);
314                Tags.put(tag.tagName, tag);
315            }
316            tagModifier.accept(tag);
317        }
318    }
319
320    static {
321        setupTags(blockTags, tag -> {
322            tag.isBlock = true;
323            tag.formatAsBlock = true;
324        });
325
326        setupTags(inlineTags, tag -> {
327            tag.isBlock = false;
328            tag.formatAsBlock = false;
329        });
330
331        setupTags(emptyTags, tag -> tag.empty = true);
332        setupTags(formatAsInlineTags, tag -> tag.formatAsBlock = false);
333        setupTags(preserveWhitespaceTags, tag -> tag.preserveWhitespace = true);
334        setupTags(formListedTags, tag -> tag.formList = true);
335        setupTags(formSubmitTags, tag -> tag.formSubmit = true);
336        for (Map.Entry<String, String[]> ns : namespaces.entrySet()) {
337            setupTags(ns.getValue(), tag -> tag.namespace = ns.getKey());
338        }
339    }
340}