001package org.jsoup.nodes; 002 003import org.jsoup.SerializationException; 004import org.jsoup.helper.Validate; 005import org.jsoup.internal.Normalizer; 006import org.jsoup.internal.SharedConstants; 007import org.jsoup.internal.StringUtil; 008import org.jsoup.nodes.Document.OutputSettings.Syntax; 009import org.jspecify.annotations.Nullable; 010 011import java.io.IOException; 012import java.util.Arrays; 013import java.util.Map; 014import java.util.Objects; 015import java.util.regex.Pattern; 016 017/** 018 A single key + value attribute. (Only used for presentation.) 019 */ 020public class Attribute implements Map.Entry<String, String>, Cloneable { 021 private static final String[] booleanAttributes = { 022 "allowfullscreen", "async", "autofocus", "checked", "compact", "declare", "default", "defer", "disabled", 023 "formnovalidate", "hidden", "inert", "ismap", "itemscope", "multiple", "muted", "nohref", "noresize", 024 "noshade", "novalidate", "nowrap", "open", "readonly", "required", "reversed", "seamless", "selected", 025 "sortable", "truespeed", "typemustmatch" 026 }; 027 028 private String key; 029 @Nullable private String val; 030 @Nullable Attributes parent; // used to update the holding Attributes when the key / value is changed via this interface 031 032 /** 033 * Create a new attribute from unencoded (raw) key and value. 034 * @param key attribute key; case is preserved. 035 * @param value attribute value (may be null) 036 * @see #createFromEncoded 037 */ 038 public Attribute(String key, @Nullable String value) { 039 this(key, value, null); 040 } 041 042 /** 043 * Create a new attribute from unencoded (raw) key and value. 044 * @param key attribute key; case is preserved. 045 * @param val attribute value (may be null) 046 * @param parent the containing Attributes (this Attribute is not automatically added to said Attributes) 047 * @see #createFromEncoded*/ 048 public Attribute(String key, @Nullable String val, @Nullable Attributes parent) { 049 Validate.notNull(key); 050 key = key.trim(); 051 Validate.notEmpty(key); // trimming could potentially make empty, so validate here 052 this.key = key; 053 this.val = val; 054 this.parent = parent; 055 } 056 057 /** 058 Get the attribute's key (aka name). 059 @return the attribute key 060 */ 061 @Override 062 public String getKey() { 063 return key; 064 } 065 066 /** 067 Set the attribute key; case is preserved. 068 @param key the new key; must not be null 069 */ 070 public void setKey(String key) { 071 Validate.notNull(key); 072 key = key.trim(); 073 Validate.notEmpty(key); // trimming could potentially make empty, so validate here 074 if (parent != null) { 075 int i = parent.indexOfKey(this.key); 076 if (i != Attributes.NotFound) { 077 String oldKey = parent.keys[i]; 078 parent.keys[i] = key; 079 080 // if tracking source positions, update the key in the range map 081 Map<String, Range.AttributeRange> ranges = parent.getRanges(); 082 if (ranges != null) { 083 Range.AttributeRange range = ranges.remove(oldKey); 084 ranges.put(key, range); 085 } 086 } 087 } 088 this.key = key; 089 } 090 091 /** 092 Get the attribute value. Will return an empty string if the value is not set. 093 @return the attribute value 094 */ 095 @Override 096 public String getValue() { 097 return Attributes.checkNotNull(val); 098 } 099 100 /** 101 * Check if this Attribute has a value. Set boolean attributes have no value. 102 * @return if this is a boolean attribute / attribute without a value 103 */ 104 public boolean hasDeclaredValue() { 105 return val != null; 106 } 107 108 /** 109 Set the attribute value. 110 @param val the new attribute value; may be null (to set an enabled boolean attribute) 111 @return the previous value (if was null; an empty string) 112 */ 113 @Override public String setValue(@Nullable String val) { 114 String oldVal = this.val; 115 if (parent != null) { 116 int i = parent.indexOfKey(this.key); 117 if (i != Attributes.NotFound) { 118 oldVal = parent.get(this.key); // trust the container more 119 parent.vals[i] = val; 120 } 121 } 122 this.val = val; 123 return Attributes.checkNotNull(oldVal); 124 } 125 126 /** 127 Get this attribute's key prefix, if it has one; else the empty string. 128 <p>For example, the attribute {@code og:title} has prefix {@code og}, and local {@code title}.</p> 129 130 @return the tag's prefix 131 @since 1.20.1 132 */ 133 public String prefix() { 134 int pos = key.indexOf(':'); 135 if (pos == -1) return ""; 136 else return key.substring(0, pos); 137 } 138 139 /** 140 Get this attribute's local name. The local name is the name without the prefix (if any). 141 <p>For example, the attribute key {@code og:title} has local name {@code title}.</p> 142 143 @return the tag's local name 144 @since 1.20.1 145 */ 146 public String localName() { 147 int pos = key.indexOf(':'); 148 if (pos == -1) return key; 149 else return key.substring(pos + 1); 150 } 151 152 /** 153 Get this attribute's namespace URI, if the attribute was prefixed with a defined namespace name. Otherwise, returns 154 the empty string. These will only be defined if using the XML parser. 155 @return the tag's namespace URI, or empty string if not defined 156 @since 1.20.1 157 */ 158 public String namespace() { 159 // set as el.attributes.userData(SharedConstants.XmlnsAttr + prefix, ns) 160 if (parent != null) { 161 String ns = (String) parent.userData(SharedConstants.XmlnsAttr + prefix()); 162 if (ns != null) 163 return ns; 164 } 165 return ""; 166 } 167 168 /** 169 Get the HTML representation of this attribute; e.g. {@code href="index.html"}. 170 @return HTML 171 */ 172 public String html() { 173 StringBuilder sb = StringUtil.borrowBuilder(); 174 175 try { 176 html(sb, (new Document("")).outputSettings()); 177 } catch(IOException exception) { 178 throw new SerializationException(exception); 179 } 180 return StringUtil.releaseBuilder(sb); 181 } 182 183 /** 184 Get the source ranges (start to end positions) in the original input source from which this attribute's <b>name</b> 185 and <b>value</b> were parsed. 186 <p>Position tracking must be enabled prior to parsing the content.</p> 187 @return the ranges for the attribute's name and value, or {@code untracked} if the attribute does not exist or its range 188 was not tracked. 189 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 190 @see Attributes#sourceRange(String) 191 @see Node#sourceRange() 192 @see Element#endSourceRange() 193 @since 1.17.1 194 */ 195 public Range.AttributeRange sourceRange() { 196 if (parent == null) return Range.AttributeRange.UntrackedAttr; 197 return parent.sourceRange(key); 198 } 199 200 protected void html(Appendable accum, Document.OutputSettings out) throws IOException { 201 html(key, val, accum, out); 202 } 203 204 protected static void html(String key, @Nullable String val, Appendable accum, Document.OutputSettings out) throws IOException { 205 key = getValidKey(key, out.syntax()); 206 if (key == null) return; // can't write it :( 207 htmlNoValidate(key, val, accum, out); 208 } 209 210 static void htmlNoValidate(String key, @Nullable String val, Appendable accum, Document.OutputSettings out) throws IOException { 211 // structured like this so that Attributes can check we can write first, so it can add whitespace correctly 212 accum.append(key); 213 if (!shouldCollapseAttribute(key, val, out)) { 214 accum.append("=\""); 215 Entities.escape(accum, Attributes.checkNotNull(val), out, Entities.ForAttribute); // preserves whitespace 216 accum.append('"'); 217 } 218 } 219 220 private static final Pattern xmlKeyReplace = Pattern.compile("[^-a-zA-Z0-9_:.]+"); 221 private static final Pattern htmlKeyReplace = Pattern.compile("[\\x00-\\x1f\\x7f-\\x9f \"'/=]+"); 222 /** 223 * Get a valid attribute key for the given syntax. If the key is not valid, it will be coerced into a valid key. 224 * @param key the original attribute key 225 * @param syntax HTML or XML 226 * @return the original key if it's valid; a key with invalid characters replaced with "_" otherwise; or null if a valid key could not be created. 227 */ 228 @Nullable public static String getValidKey(String key, Syntax syntax) { 229 if (syntax == Syntax.xml && !isValidXmlKey(key)) { 230 key = xmlKeyReplace.matcher(key).replaceAll("_"); 231 return isValidXmlKey(key) ? key : null; // null if could not be coerced 232 } 233 else if (syntax == Syntax.html && !isValidHtmlKey(key)) { 234 key = htmlKeyReplace.matcher(key).replaceAll("_"); 235 return isValidHtmlKey(key) ? key : null; // null if could not be coerced 236 } 237 return key; 238 } 239 240 // perf critical in html() so using manual scan vs regex: 241 // note that we aren't using anything in supplemental space, so OK to iter charAt 242 private static boolean isValidXmlKey(String key) { 243 // =~ [a-zA-Z_:][-a-zA-Z0-9_:.]* 244 final int length = key.length(); 245 if (length == 0) return false; 246 char c = key.charAt(0); 247 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == ':')) 248 return false; 249 for (int i = 1; i < length; i++) { 250 c = key.charAt(i); 251 if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '-' || c == '_' || c == ':' || c == '.')) 252 return false; 253 } 254 return true; 255 } 256 257 private static boolean isValidHtmlKey(String key) { 258 // =~ [\x00-\x1f\x7f-\x9f "'/=]+ 259 final int length = key.length(); 260 if (length == 0) return false; 261 for (int i = 0; i < length; i++) { 262 char c = key.charAt(i); 263 if ((c <= 0x1f) || (c >= 0x7f && c <= 0x9f) || c == ' ' || c == '"' || c == '\'' || c == '/' || c == '=') 264 return false; 265 } 266 return true; 267 } 268 269 /** 270 Get the string representation of this attribute, implemented as {@link #html()}. 271 @return string 272 */ 273 @Override 274 public String toString() { 275 return html(); 276 } 277 278 /** 279 * Create a new Attribute from an unencoded key and a HTML attribute encoded value. 280 * @param unencodedKey assumes the key is not encoded, as can be only run of simple \w chars. 281 * @param encodedValue HTML attribute encoded value 282 * @return attribute 283 */ 284 public static Attribute createFromEncoded(String unencodedKey, String encodedValue) { 285 String value = Entities.unescape(encodedValue, true); 286 return new Attribute(unencodedKey, value, null); // parent will get set when Put 287 } 288 289 protected boolean isDataAttribute() { 290 return isDataAttribute(key); 291 } 292 293 protected static boolean isDataAttribute(String key) { 294 return key.startsWith(Attributes.dataPrefix) && key.length() > Attributes.dataPrefix.length(); 295 } 296 297 /** 298 * Collapsible if it's a boolean attribute and value is empty or same as name 299 * 300 * @param out output settings 301 * @return Returns whether collapsible or not 302 */ 303 protected final boolean shouldCollapseAttribute(Document.OutputSettings out) { 304 return shouldCollapseAttribute(key, val, out); 305 } 306 307 // collapse unknown foo=null, known checked=null, checked="", checked=checked; write out others 308 protected static boolean shouldCollapseAttribute(final String key, @Nullable final String val, final Document.OutputSettings out) { 309 return (out.syntax() == Syntax.html && 310 (val == null || (val.isEmpty() || val.equalsIgnoreCase(key)) && Attribute.isBooleanAttribute(key))); 311 } 312 313 /** 314 * Checks if this attribute name is defined as a boolean attribute in HTML5 315 */ 316 public static boolean isBooleanAttribute(final String key) { 317 return Arrays.binarySearch(booleanAttributes, Normalizer.lowerCase(key)) >= 0; 318 } 319 320 @Override 321 public boolean equals(@Nullable Object o) { // note parent not considered 322 if (this == o) return true; 323 if (o == null || getClass() != o.getClass()) return false; 324 Attribute attribute = (Attribute) o; 325 return Objects.equals(key, attribute.key) && Objects.equals(val, attribute.val); 326 } 327 328 @Override 329 public int hashCode() { // note parent not considered 330 return Objects.hash(key, val); 331 } 332 333 @Override 334 public Attribute clone() { 335 try { 336 return (Attribute) super.clone(); 337 } catch (CloneNotSupportedException e) { 338 throw new RuntimeException(e); 339 } 340 } 341}