001package org.jsoup.parser;
002
003import org.jsoup.internal.StringUtil;
004import org.jspecify.annotations.Nullable;
005
006import java.util.Objects;
007
008import static org.jsoup.parser.Parser.NamespaceHtml;
009
010/**
011 A Tag represents an Element's name and configured options, common throughout the Document. Options may affect the parse
012 and output.
013
014 @see TagSet
015 @see Parser#tagSet(TagSet) */
016public class Tag implements Cloneable {
017    /** Tag option: the tag is known (specifically defined). This impacts if options may need to be inferred (when not
018     known) in, e.g., the pretty-printer. Set when a tag is added to a TagSet, or when settings are set(). */
019    public static int Known = 1;
020    /** Tag option: the tag is a void tag (e.g. {@code <img>}), that can contain no children, and in HTML does not require closing. */
021    public static int Void = 1 << 1;
022    /** Tag option: the tag is a block tag (e.g. {@code <div>}, {@code <p>}). Causes the element to be indented when pretty-printing. If not a block, it is inline. */
023    public static int Block = 1 << 2;
024    /** Tag option: the tag is a block tag that will only hold inline tags (e.g. {@code <p>}); used for formatting. (Must also set Block.) */
025    public static int InlineContainer = 1 << 3;
026    /** Tag option: the tag can self-close (e.g. <foo />). */
027    public static int SelfClose = 1 << 4;
028    /** Tag option: the tag has been seen self-closing in this parse. */
029    public static int SeenSelfClose = 1 << 5;
030    /** Tag option: the tag preserves whitespace (e.g. {@code <pre>}). */
031    public static int PreserveWhitespace = 1 << 6;
032    /** Tag option: the tag is an RCDATA element that can have text and character references (e.g. {@code <title>}, {@code <textarea>}). */
033    public static int RcData = 1 << 7;
034    /** Tag option: the tag is a Data element that can have text but not character references (e.g. {@code <style>}, {@code <script>}). */
035    public static int Data = 1 << 8;
036    /** Tag option: the tag's value will be included when submitting a form (e.g. {@code <input>}). */
037    public static int FormSubmittable = 1 << 9;
038
039    String namespace;
040    String tagName;
041    String normalName; // always the lower case version of this tag, regardless of case preservation mode
042    int options = 0;
043
044    /**
045     Create a new Tag, with the given name and namespace.
046     <p>The tag is not implicitly added to any TagSet.</p>
047     @param tagName the name of the tag. Case-sensitive.
048     @param namespace the namespace for the tag.
049     @see TagSet#valueOf(String, String)
050     @since 1.20.1
051     */
052    public Tag(String tagName, String namespace) {
053        this(tagName, ParseSettings.normalName(tagName), namespace);
054    }
055
056    /**
057     Create a new Tag, with the given name, in the HTML namespace.
058     <p>The tag is not implicitly added to any TagSet.</p>
059     @param tagName the name of the tag. Case-sensitive.
060     @see TagSet#valueOf(String, String)
061     @since 1.20.1
062     */
063    public Tag(String tagName) {
064        this(tagName, ParseSettings.normalName(tagName), NamespaceHtml);
065    }
066
067    /** Path for TagSet defaults, no options set; normal name is already LC. */
068    Tag(String tagName, String normalName, String namespace) {
069        this.tagName = tagName;
070        this.normalName = normalName;
071        this.namespace = namespace;
072    }
073
074    /**
075     * Get this tag's name.
076     *
077     * @return the tag's name
078     */
079    public String getName() {
080        return tagName;
081    }
082
083    /**
084     Get this tag's name.
085     @return the tag's name
086     */
087    public String name() {
088        return tagName;
089    }
090
091    /**
092     Change the tag's name. As Tags are reused throughout a Document, this will change the name for all uses of this tag.
093     @param tagName the new name of the tag. Case-sensitive.
094     @return this tag
095     @since 1.20.1
096     */
097    public Tag name(String tagName) {
098        this.tagName = tagName;
099        this.normalName = ParseSettings.normalName(tagName);
100        return this;
101    }
102
103    /**
104     Get this tag's prefix, if it has one; else the empty string.
105     <p>For example, {@code <book:title>} has prefix {@code book}, and tag name {@code book:title}.</p>
106     @return the tag's prefix
107     @since 1.20.1
108     */
109    public String prefix() {
110        int pos = tagName.indexOf(':');
111        if (pos == -1) return "";
112        else return tagName.substring(0, pos);
113    }
114
115    /**
116     Get this tag's local name. The local name is the name without the prefix (if any).
117     <p>For exmaple, {@code <book:title>} has local name {@code title}, and tag name {@code book:title}.</p>
118     @return the tag's local name
119     @since 1.20.1
120     */
121    public String localName() {
122        int pos = tagName.indexOf(':');
123        if (pos == -1) return tagName;
124        else return tagName.substring(pos + 1);
125    }
126
127    /**
128     * Get this tag's normalized (lowercased) name.
129     * @return the tag's normal name.
130     */
131    public String normalName() {
132        return normalName;
133    }
134
135    /**
136     Get this tag's namespace.
137     @return the tag's namespace
138     */
139    public String namespace() {
140        return namespace;
141    }
142
143    /**
144     Set the tag's namespace. As Tags are reused throughout a Document, this will change the namespace for all uses of this tag.
145     @param namespace the new namespace of the tag.
146     @return this tag
147     @since 1.20.1
148     */
149    public Tag namespace(String namespace) {
150        this.namespace = namespace;
151        return this;
152    }
153
154    /**
155     Set an option on this tag.
156     <p>Once a tag has a setting applied, it will be considered a known tag.</p>
157     @param option the option to set
158     @return this tag
159     @since 1.20.1
160     */
161    public Tag set(int option) {
162        options |= option;
163        options |= Tag.Known; // considered known if touched
164        return this;
165    }
166
167    /**
168     Test if an option is set on this tag.
169
170     @param option the option to test
171     @return true if the option is set
172     @since 1.20.1
173     */
174    public boolean is(int option) {
175        return (options & option) != 0;
176    }
177
178    /**
179     Clear (unset) an option from this tag.
180     @param option the option to clear
181     @return this tag
182     @since 1.20.1
183     */
184    public Tag clear(int option) {
185        options &= ~option;
186        // considered known if touched, unless explicitly clearing known
187        if (option != Tag.Known) options |= Tag.Known;
188        return this;
189    }
190
191    /**
192     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
193     * <p>
194     * Pre-defined tags (p, div etc) will be ==, but unknown tags are not registered and will only .equals().
195     * </p>
196     * 
197     * @param tagName Name of tag, e.g. "p". Case-insensitive.
198     * @param namespace the namespace for the tag.
199     * @param settings used to control tag name sensitivity
200     * @see TagSet
201     * @return The tag, either defined or new generic.
202     */
203    public static Tag valueOf(String tagName, String namespace, ParseSettings settings) {
204        return TagSet.Html().valueOf(tagName, ParseSettings.normalName(tagName), namespace, settings.preserveTagCase());
205    }
206
207    /**
208     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
209     * <p>
210     * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
211     * </p>
212     *
213     * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>.
214     * @return The tag, either defined or new generic.
215     * @see #valueOf(String tagName, String namespace, ParseSettings settings)
216     */
217    public static Tag valueOf(String tagName) {
218        return valueOf(tagName, NamespaceHtml, ParseSettings.preserveCase);
219    }
220
221    /**
222     * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
223     * <p>
224     * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
225     * </p>
226     *
227     * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>.
228     * @param settings used to control tag name sensitivity
229     * @return The tag, either defined or new generic.
230     * @see #valueOf(String tagName, String namespace, ParseSettings settings)
231     */
232    public static Tag valueOf(String tagName, ParseSettings settings) {
233        return valueOf(tagName, NamespaceHtml, settings);
234    }
235
236    /**
237     * Gets if this is a block tag.
238     *
239     * @return if block tag
240     */
241    public boolean isBlock() {
242        return (options & Block) != 0;
243    }
244
245    /**
246     Get if this is an InlineContainer tag.
247
248     @return true if an InlineContainer (which formats children as inline).
249     @deprecated setting is only used within the Printer. Will be removed in 1.21.
250     */
251    @Deprecated public boolean formatAsBlock() {
252        return (options & InlineContainer) != 0;
253    }
254
255    /**
256     * Gets if this tag is an inline tag. Just the opposite of isBlock.
257     *
258     * @return if this tag is an inline tag.
259     */
260    public boolean isInline() {
261        return (options & Block) == 0;
262    }
263
264    /**
265     Get if this is void (aka empty) tag.
266
267     @return true if this is a void tag
268     */
269    public boolean isEmpty() {
270        return (options & Void) != 0;
271    }
272
273    /**
274     * Get if this tag is self-closing.
275     *
276     * @return if this tag should be output as self-closing.
277     */
278    public boolean isSelfClosing() {
279        return (options & SelfClose) != 0 || (options & Void) != 0;
280    }
281
282    /**
283     * Get if this is a pre-defined tag in the TagSet, or was auto created on parsing.
284     *
285     * @return if a known tag
286     */
287    public boolean isKnownTag() {
288        return (options & Known) != 0;
289    }
290
291    /**
292     * Check if this tag name is a known HTML tag.
293     *
294     * @param tagName name of tag
295     * @return if known HTML tag
296     */
297    public static boolean isKnownTag(String tagName) {
298        return TagSet.HtmlTagSet.get(tagName, NamespaceHtml) != null;
299    }
300
301    /**
302     * Get if this tag should preserve whitespace within child text nodes.
303     *
304     * @return if preserve whitespace
305     */
306    public boolean preserveWhitespace() {
307        return (options & PreserveWhitespace) != 0;
308    }
309
310    /**
311     * Get if this tag represents a control associated with a form. E.g. input, textarea, output
312     * @return if associated with a form
313     * @deprecated this method is internal to HtmlTreeBuilder only, and will be removed in 1.21.1.
314     */
315    @Deprecated public boolean isFormListed() {
316        return namespace.equals(NamespaceHtml) && StringUtil.inSorted(normalName, HtmlTreeBuilder.TagFormListed);
317    }
318
319    /**
320     * Get if this tag represents an element that should be submitted with a form. E.g. input, option
321     * @return if submittable with a form
322     */
323    public boolean isFormSubmittable() {
324        return (options &= FormSubmittable) != 0;
325    }
326
327    void setSeenSelfClose() {
328        options |= Tag.SeenSelfClose; // does not change known status
329    }
330
331    /**
332     If this Tag uses a specific text TokeniserState for its content, returns that; otherwise null.
333     */
334    @Nullable TokeniserState textState() {
335        if (is(RcData)) return TokeniserState.Rcdata;
336        if (is(Data))   return TokeniserState.Rawtext;
337        else            return null;
338    }
339
340    @Override
341    public boolean equals(Object o) {
342        if (this == o) return true;
343        if (!(o instanceof Tag)) return false;
344        Tag tag = (Tag) o;
345        return Objects.equals(tagName, tag.tagName) &&
346            Objects.equals(namespace, tag.namespace) &&
347            Objects.equals(normalName, tag.normalName) &&
348            options == tag.options;
349    }
350
351    @Override
352    public int hashCode() {
353        return Objects.hash(tagName, namespace, normalName, options);
354    }
355
356    @Override
357    public String toString() {
358        return tagName;
359    }
360
361    @Override
362    protected Tag clone() {
363        try {
364            return (Tag) super.clone();
365        } catch (CloneNotSupportedException e) {
366            throw new RuntimeException(e);
367        }
368    }
369
370
371}