001package org.jsoup.parser; 002 003import org.jsoup.internal.StringUtil; 004import org.jspecify.annotations.Nullable; 005 006import java.util.Objects; 007 008import static org.jsoup.parser.Parser.NamespaceHtml; 009 010/** 011 A Tag represents an Element's name and configured options, common throughout the Document. Options may affect the parse 012 and output. 013 014 @see TagSet 015 @see Parser#tagSet(TagSet) */ 016public class Tag implements Cloneable { 017 /** Tag option: the tag is known (specifically defined). This impacts if options may need to be inferred (when not 018 known) in, e.g., the pretty-printer. Set when a tag is added to a TagSet, or when settings are set(). */ 019 public static int Known = 1; 020 /** Tag option: the tag is a void tag (e.g. {@code <img>}), that can contain no children, and in HTML does not require closing. */ 021 public static int Void = 1 << 1; 022 /** Tag option: the tag is a block tag (e.g. {@code <div>}, {@code <p>}). Causes the element to be indented when pretty-printing. If not a block, it is inline. */ 023 public static int Block = 1 << 2; 024 /** Tag option: the tag is a block tag that will only hold inline tags (e.g. {@code <p>}); used for formatting. (Must also set Block.) */ 025 public static int InlineContainer = 1 << 3; 026 /** Tag option: the tag can self-close (e.g. <foo />). */ 027 public static int SelfClose = 1 << 4; 028 /** Tag option: the tag has been seen self-closing in this parse. */ 029 public static int SeenSelfClose = 1 << 5; 030 /** Tag option: the tag preserves whitespace (e.g. {@code <pre>}). */ 031 public static int PreserveWhitespace = 1 << 6; 032 /** Tag option: the tag is an RCDATA element that can have text and character references (e.g. {@code <title>}, {@code <textarea>}). */ 033 public static int RcData = 1 << 7; 034 /** Tag option: the tag is a Data element that can have text but not character references (e.g. {@code <style>}, {@code <script>}). */ 035 public static int Data = 1 << 8; 036 /** Tag option: the tag's value will be included when submitting a form (e.g. {@code <input>}). */ 037 public static int FormSubmittable = 1 << 9; 038 039 String namespace; 040 String tagName; 041 String normalName; // always the lower case version of this tag, regardless of case preservation mode 042 int options = 0; 043 044 /** 045 Create a new Tag, with the given name and namespace. 046 <p>The tag is not implicitly added to any TagSet.</p> 047 @param tagName the name of the tag. Case-sensitive. 048 @param namespace the namespace for the tag. 049 @see TagSet#valueOf(String, String) 050 @since 1.20.1 051 */ 052 public Tag(String tagName, String namespace) { 053 this(tagName, ParseSettings.normalName(tagName), namespace); 054 } 055 056 /** 057 Create a new Tag, with the given name, in the HTML namespace. 058 <p>The tag is not implicitly added to any TagSet.</p> 059 @param tagName the name of the tag. Case-sensitive. 060 @see TagSet#valueOf(String, String) 061 @since 1.20.1 062 */ 063 public Tag(String tagName) { 064 this(tagName, ParseSettings.normalName(tagName), NamespaceHtml); 065 } 066 067 /** Path for TagSet defaults, no options set; normal name is already LC. */ 068 Tag(String tagName, String normalName, String namespace) { 069 this.tagName = tagName; 070 this.normalName = normalName; 071 this.namespace = namespace; 072 } 073 074 /** 075 * Get this tag's name. 076 * 077 * @return the tag's name 078 */ 079 public String getName() { 080 return tagName; 081 } 082 083 /** 084 Get this tag's name. 085 @return the tag's name 086 */ 087 public String name() { 088 return tagName; 089 } 090 091 /** 092 Change the tag's name. As Tags are reused throughout a Document, this will change the name for all uses of this tag. 093 @param tagName the new name of the tag. Case-sensitive. 094 @return this tag 095 @since 1.20.1 096 */ 097 public Tag name(String tagName) { 098 this.tagName = tagName; 099 this.normalName = ParseSettings.normalName(tagName); 100 return this; 101 } 102 103 /** 104 Get this tag's prefix, if it has one; else the empty string. 105 <p>For example, {@code <book:title>} has prefix {@code book}, and tag name {@code book:title}.</p> 106 @return the tag's prefix 107 @since 1.20.1 108 */ 109 public String prefix() { 110 int pos = tagName.indexOf(':'); 111 if (pos == -1) return ""; 112 else return tagName.substring(0, pos); 113 } 114 115 /** 116 Get this tag's local name. The local name is the name without the prefix (if any). 117 <p>For exmaple, {@code <book:title>} has local name {@code title}, and tag name {@code book:title}.</p> 118 @return the tag's local name 119 @since 1.20.1 120 */ 121 public String localName() { 122 int pos = tagName.indexOf(':'); 123 if (pos == -1) return tagName; 124 else return tagName.substring(pos + 1); 125 } 126 127 /** 128 * Get this tag's normalized (lowercased) name. 129 * @return the tag's normal name. 130 */ 131 public String normalName() { 132 return normalName; 133 } 134 135 /** 136 Get this tag's namespace. 137 @return the tag's namespace 138 */ 139 public String namespace() { 140 return namespace; 141 } 142 143 /** 144 Set the tag's namespace. As Tags are reused throughout a Document, this will change the namespace for all uses of this tag. 145 @param namespace the new namespace of the tag. 146 @return this tag 147 @since 1.20.1 148 */ 149 public Tag namespace(String namespace) { 150 this.namespace = namespace; 151 return this; 152 } 153 154 /** 155 Set an option on this tag. 156 <p>Once a tag has a setting applied, it will be considered a known tag.</p> 157 @param option the option to set 158 @return this tag 159 @since 1.20.1 160 */ 161 public Tag set(int option) { 162 options |= option; 163 options |= Tag.Known; // considered known if touched 164 return this; 165 } 166 167 /** 168 Test if an option is set on this tag. 169 170 @param option the option to test 171 @return true if the option is set 172 @since 1.20.1 173 */ 174 public boolean is(int option) { 175 return (options & option) != 0; 176 } 177 178 /** 179 Clear (unset) an option from this tag. 180 @param option the option to clear 181 @return this tag 182 @since 1.20.1 183 */ 184 public Tag clear(int option) { 185 options &= ~option; 186 // considered known if touched, unless explicitly clearing known 187 if (option != Tag.Known) options |= Tag.Known; 188 return this; 189 } 190 191 /** 192 * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything. 193 * <p> 194 * Pre-defined tags (p, div etc) will be ==, but unknown tags are not registered and will only .equals(). 195 * </p> 196 * 197 * @param tagName Name of tag, e.g. "p". Case-insensitive. 198 * @param namespace the namespace for the tag. 199 * @param settings used to control tag name sensitivity 200 * @see TagSet 201 * @return The tag, either defined or new generic. 202 */ 203 public static Tag valueOf(String tagName, String namespace, ParseSettings settings) { 204 return TagSet.Html().valueOf(tagName, ParseSettings.normalName(tagName), namespace, settings.preserveTagCase()); 205 } 206 207 /** 208 * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything. 209 * <p> 210 * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals(). 211 * </p> 212 * 213 * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>. 214 * @return The tag, either defined or new generic. 215 * @see #valueOf(String tagName, String namespace, ParseSettings settings) 216 */ 217 public static Tag valueOf(String tagName) { 218 return valueOf(tagName, NamespaceHtml, ParseSettings.preserveCase); 219 } 220 221 /** 222 * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything. 223 * <p> 224 * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals(). 225 * </p> 226 * 227 * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>. 228 * @param settings used to control tag name sensitivity 229 * @return The tag, either defined or new generic. 230 * @see #valueOf(String tagName, String namespace, ParseSettings settings) 231 */ 232 public static Tag valueOf(String tagName, ParseSettings settings) { 233 return valueOf(tagName, NamespaceHtml, settings); 234 } 235 236 /** 237 * Gets if this is a block tag. 238 * 239 * @return if block tag 240 */ 241 public boolean isBlock() { 242 return (options & Block) != 0; 243 } 244 245 /** 246 Get if this is an InlineContainer tag. 247 248 @return true if an InlineContainer (which formats children as inline). 249 @deprecated setting is only used within the Printer. Will be removed in 1.21. 250 */ 251 @Deprecated public boolean formatAsBlock() { 252 return (options & InlineContainer) != 0; 253 } 254 255 /** 256 * Gets if this tag is an inline tag. Just the opposite of isBlock. 257 * 258 * @return if this tag is an inline tag. 259 */ 260 public boolean isInline() { 261 return (options & Block) == 0; 262 } 263 264 /** 265 Get if this is void (aka empty) tag. 266 267 @return true if this is a void tag 268 */ 269 public boolean isEmpty() { 270 return (options & Void) != 0; 271 } 272 273 /** 274 * Get if this tag is self-closing. 275 * 276 * @return if this tag should be output as self-closing. 277 */ 278 public boolean isSelfClosing() { 279 return (options & SelfClose) != 0 || (options & Void) != 0; 280 } 281 282 /** 283 * Get if this is a pre-defined tag in the TagSet, or was auto created on parsing. 284 * 285 * @return if a known tag 286 */ 287 public boolean isKnownTag() { 288 return (options & Known) != 0; 289 } 290 291 /** 292 * Check if this tag name is a known HTML tag. 293 * 294 * @param tagName name of tag 295 * @return if known HTML tag 296 */ 297 public static boolean isKnownTag(String tagName) { 298 return TagSet.HtmlTagSet.get(tagName, NamespaceHtml) != null; 299 } 300 301 /** 302 * Get if this tag should preserve whitespace within child text nodes. 303 * 304 * @return if preserve whitespace 305 */ 306 public boolean preserveWhitespace() { 307 return (options & PreserveWhitespace) != 0; 308 } 309 310 /** 311 * Get if this tag represents a control associated with a form. E.g. input, textarea, output 312 * @return if associated with a form 313 * @deprecated this method is internal to HtmlTreeBuilder only, and will be removed in 1.21.1. 314 */ 315 @Deprecated public boolean isFormListed() { 316 return namespace.equals(NamespaceHtml) && StringUtil.inSorted(normalName, HtmlTreeBuilder.TagFormListed); 317 } 318 319 /** 320 * Get if this tag represents an element that should be submitted with a form. E.g. input, option 321 * @return if submittable with a form 322 */ 323 public boolean isFormSubmittable() { 324 return (options &= FormSubmittable) != 0; 325 } 326 327 void setSeenSelfClose() { 328 options |= Tag.SeenSelfClose; // does not change known status 329 } 330 331 /** 332 If this Tag uses a specific text TokeniserState for its content, returns that; otherwise null. 333 */ 334 @Nullable TokeniserState textState() { 335 if (is(RcData)) return TokeniserState.Rcdata; 336 if (is(Data)) return TokeniserState.Rawtext; 337 else return null; 338 } 339 340 @Override 341 public boolean equals(Object o) { 342 if (this == o) return true; 343 if (!(o instanceof Tag)) return false; 344 Tag tag = (Tag) o; 345 return Objects.equals(tagName, tag.tagName) && 346 Objects.equals(namespace, tag.namespace) && 347 Objects.equals(normalName, tag.normalName) && 348 options == tag.options; 349 } 350 351 @Override 352 public int hashCode() { 353 return Objects.hash(tagName, namespace, normalName, options); 354 } 355 356 @Override 357 public String toString() { 358 return tagName; 359 } 360 361 @Override 362 protected Tag clone() { 363 try { 364 return (Tag) super.clone(); 365 } catch (CloneNotSupportedException e) { 366 throw new RuntimeException(e); 367 } 368 } 369 370 371}