001package org.jsoup.nodes; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.internal.QuietAppendable; 005import org.jsoup.internal.SharedConstants; 006import org.jsoup.internal.StringUtil; 007import org.jsoup.parser.ParseSettings; 008import org.jspecify.annotations.Nullable; 009 010import java.util.AbstractMap; 011import java.util.AbstractSet; 012import java.util.ArrayList; 013import java.util.Arrays; 014import java.util.Collections; 015import java.util.ConcurrentModificationException; 016import java.util.HashMap; 017import java.util.Iterator; 018import java.util.List; 019import java.util.Map; 020import java.util.NoSuchElementException; 021import java.util.Objects; 022import java.util.Set; 023 024import static org.jsoup.internal.Normalizer.lowerCase; 025import static org.jsoup.internal.SharedConstants.AttrRangeKey; 026import static org.jsoup.nodes.Range.AttributeRange.UntrackedAttr; 027 028/** 029 * The attributes of an Element. 030 * <p> 031 * During parsing, attributes in with the same name in an element are deduplicated, according to the configured parser's 032 * attribute case-sensitive setting. It is possible to have duplicate attributes subsequently if 033 * {@link #add(String, String)} vs {@link #put(String, String)} is used. 034 * </p> 035 * <p> 036 * Attribute name and value comparisons are generally <b>case sensitive</b>. By default for HTML, attribute names are 037 * normalized to lower-case on parsing. That means you should use lower-case strings when referring to attributes by 038 * name. 039 * </p> 040 * 041 * @author Jonathan Hedley, jonathan@hedley.net 042 */ 043public class Attributes implements Iterable<Attribute>, Cloneable { 044 // Indicates an internal key. Can't be set via HTML. (It could be set via accessor, but not too worried about 045 // that. Suppressed from list, iter.) 046 static final char InternalPrefix = '/'; 047 048 // The Attributes object is only created on the first use of an attribute; the Element will just have a null 049 // Attribute slot otherwise 050 protected static final String dataPrefix = "data-"; 051 private static final int InitialCapacity = 3; // sampling found mean count when attrs present = 1.49; 1.08 overall. 2.6:1 don't have any attrs. 052 053 // manages the key/val arrays 054 private static final int GrowthFactor = 2; 055 static final int NotFound = -1; 056 private static final String EmptyString = ""; 057 058 // the number of instance fields is kept as low as possible giving an object size of 24 bytes 059 private int size = 0; // number of slots used (not total capacity, which is keys.length) 060 @Nullable String[] keys = new String[InitialCapacity]; // keys is not null, but contents may be. Same for vals 061 @Nullable Object[] vals = new Object[InitialCapacity]; // Genericish: all non-internal attribute values must be Strings and are cast on access. 062 // todo - make keys iterable without creating Attribute objects 063 064 // check there's room for more 065 private void checkCapacity(int minNewSize) { 066 Validate.isTrue(minNewSize >= size); 067 int curCap = keys.length; 068 if (curCap >= minNewSize) 069 return; 070 int newCap = curCap >= InitialCapacity ? size * GrowthFactor : InitialCapacity; 071 if (minNewSize > newCap) 072 newCap = minNewSize; 073 074 keys = Arrays.copyOf(keys, newCap); 075 vals = Arrays.copyOf(vals, newCap); 076 } 077 078 int indexOfKey(String key) { 079 Validate.notNull(key); 080 for (int i = 0; i < size; i++) { 081 if (key.equals(keys[i])) 082 return i; 083 } 084 return NotFound; 085 } 086 087 private int indexOfKeyIgnoreCase(String key) { 088 Validate.notNull(key); 089 for (int i = 0; i < size; i++) { 090 if (key.equalsIgnoreCase(keys[i])) 091 return i; 092 } 093 return NotFound; 094 } 095 096 // we track boolean attributes as null in values - they're just keys. so returns empty for consumers 097 // casts to String, so only for non-internal attributes 098 static String checkNotNull(@Nullable Object val) { 099 return val == null ? EmptyString : (String) val; 100 } 101 102 /** 103 Get an attribute value by key. 104 @param key the (case-sensitive) attribute key 105 @return the attribute value if set; or empty string if not set (or a boolean attribute). 106 @see #hasKey(String) 107 */ 108 public String get(String key) { 109 int i = indexOfKey(key); 110 return i == NotFound ? EmptyString : checkNotNull(vals[i]); 111 } 112 113 /** 114 Get an Attribute by key. The Attribute will remain connected to these Attributes, so changes made via 115 {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc will cascade back to these Attributes and 116 their owning Element. 117 @param key the (case-sensitive) attribute key 118 @return the Attribute for this key, or null if not present. 119 @since 1.17.2 120 */ 121 @Nullable public Attribute attribute(String key) { 122 int i = indexOfKey(key); 123 return i == NotFound ? null : new Attribute(key, checkNotNull(vals[i]), this); 124 } 125 126 /** 127 * Get an attribute's value by case-insensitive key 128 * @param key the attribute name 129 * @return the first matching attribute value if set; or empty string if not set (ora boolean attribute). 130 */ 131 public String getIgnoreCase(String key) { 132 int i = indexOfKeyIgnoreCase(key); 133 return i == NotFound ? EmptyString : checkNotNull(vals[i]); 134 } 135 136 /** 137 * Adds a new attribute. Will produce duplicates if the key already exists. 138 * @see Attributes#put(String, String) 139 */ 140 public Attributes add(String key, @Nullable String value) { 141 addObject(key, value); 142 return this; 143 } 144 145 private void addObject(String key, @Nullable Object value) { 146 checkCapacity(size + 1); 147 keys[size] = key; 148 vals[size] = value; 149 size++; 150 } 151 152 /** 153 * Set a new attribute, or replace an existing one by key. 154 * @param key case sensitive attribute key (not null) 155 * @param value attribute value (which can be null, to set a true boolean attribute) 156 * @return these attributes, for chaining 157 */ 158 public Attributes put(String key, @Nullable String value) { 159 Validate.notNull(key); 160 int i = indexOfKey(key); 161 if (i != NotFound) 162 vals[i] = value; 163 else 164 add(key, value); 165 return this; 166 } 167 168 /** 169 Get the map holding any user-data associated with these Attributes. Will be created empty on first use. Held as 170 an internal attribute, not a field member, to reduce the memory footprint of Attributes when not used. Can hold 171 arbitrary objects; use for source ranges, connecting W3C nodes to Elements, etc. 172 * @return the map holding user-data 173 */ 174 Map<String, Object> userData() { 175 final Map<String, Object> userData; 176 int i = indexOfKey(SharedConstants.UserDataKey); 177 if (i == NotFound) { 178 userData = new HashMap<>(); 179 addObject(SharedConstants.UserDataKey, userData); 180 } else { 181 //noinspection unchecked 182 userData = (Map<String, Object>) vals[i]; 183 } 184 assert userData != null; 185 return userData; 186 } 187 188 /** 189 Get an arbitrary user-data object by key. 190 * @param key case-sensitive key to the object. 191 * @return the object associated to this key, or {@code null} if not found. 192 * @see #userData(String key, Object val) 193 * @since 1.17.1 194 */ 195 @Nullable 196 public Object userData(String key) { 197 Validate.notNull(key); 198 if (!hasKey(SharedConstants.UserDataKey)) return null; // no user data exists 199 Map<String, Object> userData = userData(); 200 return userData.get(key); 201 } 202 203 /** 204 Set an arbitrary user-data object by key. Will be treated as an internal attribute, so will not be emitted in HTML. 205 * @param key case-sensitive key 206 * @param value object value 207 * @return these attributes 208 * @see #userData(String key) 209 * @since 1.17.1 210 */ 211 public Attributes userData(String key, Object value) { 212 Validate.notNull(key); 213 userData().put(key, value); 214 return this; 215 } 216 217 void putIgnoreCase(String key, @Nullable String value) { 218 int i = indexOfKeyIgnoreCase(key); 219 if (i != NotFound) { 220 vals[i] = value; 221 String old = keys[i]; 222 assert old != null; 223 if (!old.equals(key)) // case changed, update 224 keys[i] = key; 225 } 226 else 227 add(key, value); 228 } 229 230 /** 231 * Set a new boolean attribute. Removes the attribute if the value is false. 232 * @param key case <b>insensitive</b> attribute key 233 * @param value attribute value 234 * @return these attributes, for chaining 235 */ 236 public Attributes put(String key, boolean value) { 237 if (value) 238 putIgnoreCase(key, null); 239 else 240 remove(key); 241 return this; 242 } 243 244 /** 245 Set a new attribute, or replace an existing one by key. 246 @param attribute attribute with case-sensitive key 247 @return these attributes, for chaining 248 */ 249 public Attributes put(Attribute attribute) { 250 Validate.notNull(attribute); 251 put(attribute.getKey(), attribute.getValue()); 252 attribute.parent = this; 253 return this; 254 } 255 256 // removes and shifts up 257 @SuppressWarnings("AssignmentToNull") 258 private void remove(int index) { 259 Validate.isFalse(index >= size); 260 int shifted = size - index - 1; 261 if (shifted > 0) { 262 System.arraycopy(keys, index + 1, keys, index, shifted); 263 System.arraycopy(vals, index + 1, vals, index, shifted); 264 } 265 size--; 266 keys[size] = null; // release hold 267 vals[size] = null; 268 } 269 270 /** 271 Remove an attribute by key. <b>Case sensitive.</b> 272 @param key attribute key to remove 273 */ 274 public void remove(String key) { 275 int i = indexOfKey(key); 276 if (i != NotFound) 277 remove(i); 278 } 279 280 /** 281 Remove an attribute by key. <b>Case insensitive.</b> 282 @param key attribute key to remove 283 */ 284 public void removeIgnoreCase(String key) { 285 int i = indexOfKeyIgnoreCase(key); 286 if (i != NotFound) 287 remove(i); 288 } 289 290 /** 291 Tests if these attributes contain an attribute with this key. 292 @param key case-sensitive key to check for 293 @return true if key exists, false otherwise 294 */ 295 public boolean hasKey(String key) { 296 return indexOfKey(key) != NotFound; 297 } 298 299 /** 300 Tests if these attributes contain an attribute with this key. 301 @param key key to check for 302 @return true if key exists, false otherwise 303 */ 304 public boolean hasKeyIgnoreCase(String key) { 305 return indexOfKeyIgnoreCase(key) != NotFound; 306 } 307 308 /** 309 * Check if these attributes contain an attribute with a value for this key. 310 * @param key key to check for 311 * @return true if key exists, and it has a value 312 */ 313 public boolean hasDeclaredValueForKey(String key) { 314 int i = indexOfKey(key); 315 return i != NotFound && vals[i] != null; 316 } 317 318 /** 319 * Check if these attributes contain an attribute with a value for this key. 320 * @param key case-insensitive key to check for 321 * @return true if key exists, and it has a value 322 */ 323 public boolean hasDeclaredValueForKeyIgnoreCase(String key) { 324 int i = indexOfKeyIgnoreCase(key); 325 return i != NotFound && vals[i] != null; 326 } 327 328 /** 329 Get the number of attributes in this set, including any jsoup internal-only attributes. Internal attributes are 330 excluded from the {@link #html()}, {@link #asList()}, and {@link #iterator()} methods. 331 @return size 332 */ 333 public int size() { 334 return size; 335 // todo - exclude internal attributes from this count - maintain size, count of internals 336 } 337 338 /** 339 * Test if this Attributes list is empty (size==0). 340 */ 341 public boolean isEmpty() { 342 return size == 0; 343 } 344 345 /** 346 Add all the attributes from the incoming set to this set. 347 @param incoming attributes to add to these attributes. 348 */ 349 public void addAll(Attributes incoming) { 350 if (incoming.size() == 0) 351 return; 352 checkCapacity(size + incoming.size); 353 354 boolean needsPut = size != 0; // if this set is empty, no need to check existing set, so can add() vs put() 355 // (and save bashing on the indexOfKey() 356 for (Attribute attr : incoming) { 357 if (needsPut) 358 put(attr); 359 else 360 add(attr.getKey(), attr.getValue()); 361 } 362 } 363 364 /** 365 Get the source ranges (start to end position) in the original input source from which this attribute's <b>name</b> 366 and <b>value</b> were parsed. 367 <p>Position tracking must be enabled prior to parsing the content.</p> 368 @param key the attribute name 369 @return the ranges for the attribute's name and value, or {@code untracked} if the attribute does not exist or its range 370 was not tracked. 371 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 372 @see Attribute#sourceRange() 373 @see Node#sourceRange() 374 @see Element#endSourceRange() 375 @since 1.17.1 376 */ 377 public Range.AttributeRange sourceRange(String key) { 378 if (!hasKey(key)) return UntrackedAttr; 379 Map<String, Range.AttributeRange> ranges = getRanges(); 380 if (ranges == null) return Range.AttributeRange.UntrackedAttr; 381 Range.AttributeRange range = ranges.get(key); 382 return range != null ? range : Range.AttributeRange.UntrackedAttr; 383 } 384 385 /** Get the Ranges, if tracking is enabled; null otherwise. */ 386 @Nullable Map<String, Range.AttributeRange> getRanges() { 387 //noinspection unchecked 388 return (Map<String, Range.AttributeRange>) userData(AttrRangeKey); 389 } 390 391 /** 392 Set the source ranges (start to end position) from which this attribute's <b>name</b> and <b>value</b> were parsed. 393 @param key the attribute name 394 @param range the range for the attribute's name and value 395 @return these attributes, for chaining 396 @since 1.18.2 397 */ 398 public Attributes sourceRange(String key, Range.AttributeRange range) { 399 Validate.notNull(key); 400 Validate.notNull(range); 401 Map<String, Range.AttributeRange> ranges = getRanges(); 402 if (ranges == null) { 403 ranges = new HashMap<>(); 404 userData(AttrRangeKey, ranges); 405 } 406 ranges.put(key, range); 407 return this; 408 } 409 410 411 @Override 412 public Iterator<Attribute> iterator() { 413 //noinspection ReturnOfInnerClass 414 return new Iterator<Attribute>() { 415 int expectedSize = size; 416 int i = 0; 417 418 @Override 419 public boolean hasNext() { 420 checkModified(); 421 while (i < size) { 422 String key = keys[i]; 423 assert key != null; 424 if (isInternalKey(key)) // skip over internal keys 425 i++; 426 else 427 break; 428 } 429 430 return i < size; 431 } 432 433 @Override 434 public Attribute next() { 435 checkModified(); 436 if (i >= size) throw new NoSuchElementException(); 437 String key = keys[i]; 438 assert key != null; 439 final Attribute attr = new Attribute(key, (String) vals[i], Attributes.this); 440 i++; 441 return attr; 442 } 443 444 private void checkModified() { 445 if (size != expectedSize) throw new ConcurrentModificationException("Use Iterator#remove() instead to remove attributes while iterating."); 446 } 447 448 @Override 449 public void remove() { 450 Attributes.this.remove(--i); // next() advanced, so rewind 451 expectedSize--; 452 } 453 }; 454 } 455 456 /** 457 Get the attributes as a List, for iteration. 458 @return a view of the attributes as an unmodifiable List. 459 */ 460 public List<Attribute> asList() { 461 ArrayList<Attribute> list = new ArrayList<>(size); 462 for (int i = 0; i < size; i++) { 463 String key = keys[i]; 464 assert key != null; 465 if (isInternalKey(key)) 466 continue; // skip internal keys 467 Attribute attr = new Attribute(key, (String) vals[i], Attributes.this); 468 list.add(attr); 469 } 470 return Collections.unmodifiableList(list); 471 } 472 473 /** 474 * Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys 475 * starting with {@code data-}. 476 * @return map of custom data attributes. 477 */ 478 public Map<String, String> dataset() { 479 return new Dataset(this); 480 } 481 482 /** 483 Get the HTML representation of these attributes. 484 @return HTML 485 */ 486 public String html() { 487 StringBuilder sb = StringUtil.borrowBuilder(); 488 html(QuietAppendable.wrap(sb), new Document.OutputSettings()); // output settings a bit funky, but this html() seldom used 489 return StringUtil.releaseBuilder(sb); 490 } 491 492 final void html(final QuietAppendable accum, final Document.OutputSettings out) { 493 final int sz = size; 494 for (int i = 0; i < sz; i++) { 495 String key = keys[i]; 496 assert key != null; 497 if (isInternalKey(key)) 498 continue; 499 final String validated = Attribute.getValidKey(key, out.syntax()); 500 if (validated != null) 501 Attribute.htmlNoValidate(validated, (String) vals[i], accum.append(' '), out); 502 } 503 } 504 505 @Override 506 public String toString() { 507 return html(); 508 } 509 510 /** 511 * Checks if these attributes are equal to another set of attributes, by comparing the two sets. Note that the order 512 * of the attributes does not impact this equality (as per the Map interface equals()). 513 * @param o attributes to compare with 514 * @return if both sets of attributes have the same content 515 */ 516 @Override 517 public boolean equals(@Nullable Object o) { 518 if (this == o) return true; 519 if (o == null || getClass() != o.getClass()) return false; 520 521 Attributes that = (Attributes) o; 522 if (size != that.size) return false; 523 for (int i = 0; i < size; i++) { 524 String key = keys[i]; 525 assert key != null; 526 int thatI = that.indexOfKey(key); 527 if (thatI == NotFound || !Objects.equals(vals[i], that.vals[thatI])) 528 return false; 529 } 530 return true; 531 } 532 533 /** 534 * Calculates the hashcode of these attributes, by iterating all attributes and summing their hashcodes. 535 * @return calculated hashcode 536 */ 537 @Override 538 public int hashCode() { 539 int result = size; 540 result = 31 * result + Arrays.hashCode(keys); 541 result = 31 * result + Arrays.hashCode(vals); 542 return result; 543 } 544 545 @Override 546 public Attributes clone() { 547 Attributes clone; 548 try { 549 clone = (Attributes) super.clone(); 550 } catch (CloneNotSupportedException e) { 551 throw new RuntimeException(e); 552 } 553 clone.size = size; 554 clone.keys = Arrays.copyOf(keys, size); 555 clone.vals = Arrays.copyOf(vals, size); 556 557 // make a copy of the user data map. (Contents are shallow). 558 int i = indexOfKey(SharedConstants.UserDataKey); 559 if (i != NotFound) { 560 //noinspection unchecked 561 vals[i] = new HashMap<>((Map<String, Object>) vals[i]); 562 } 563 564 return clone; 565 } 566 567 /** 568 * Internal method. Lowercases all (non-internal) keys. 569 */ 570 public void normalize() { 571 for (int i = 0; i < size; i++) { 572 assert keys[i] != null; 573 String key = keys[i]; 574 assert key != null; 575 if (!isInternalKey(key)) 576 keys[i] = lowerCase(key); 577 } 578 } 579 580 /** 581 * Internal method. Removes duplicate attribute by name. Settings for case sensitivity of key names. 582 * @param settings case sensitivity 583 * @return number of removed dupes 584 */ 585 public int deduplicate(ParseSettings settings) { 586 if (isEmpty()) 587 return 0; 588 boolean preserve = settings.preserveAttributeCase(); 589 int dupes = 0; 590 for (int i = 0; i < size; i++) { 591 String keyI = keys[i]; 592 assert keyI != null; 593 for (int j = i + 1; j < size; j++) { 594 if ((preserve && keyI.equals(keys[j])) || (!preserve && keyI.equalsIgnoreCase(keys[j]))) { 595 dupes++; 596 remove(j); 597 j--; 598 } 599 } 600 } 601 return dupes; 602 } 603 604 private static class Dataset extends AbstractMap<String, String> { 605 private final Attributes attributes; 606 607 private Dataset(Attributes attributes) { 608 this.attributes = attributes; 609 } 610 611 @Override 612 public Set<Entry<String, String>> entrySet() { 613 return new EntrySet(); 614 } 615 616 @Override 617 public String put(String key, String value) { 618 String dataKey = dataKey(key); 619 String oldValue = attributes.hasKey(dataKey) ? attributes.get(dataKey) : null; 620 attributes.put(dataKey, value); 621 return oldValue; 622 } 623 624 private class EntrySet extends AbstractSet<Map.Entry<String, String>> { 625 626 @Override 627 public Iterator<Map.Entry<String, String>> iterator() { 628 return new DatasetIterator(); 629 } 630 631 @Override 632 public int size() { 633 int count = 0; 634 Iterator<Entry<String, String>> iter = new DatasetIterator(); 635 while (iter.hasNext()) 636 count++; 637 return count; 638 } 639 } 640 641 private class DatasetIterator implements Iterator<Map.Entry<String, String>> { 642 private final Iterator<Attribute> attrIter = attributes.iterator(); 643 private Attribute attr; 644 @Override public boolean hasNext() { 645 while (attrIter.hasNext()) { 646 attr = attrIter.next(); 647 if (attr.isDataAttribute()) return true; 648 } 649 return false; 650 } 651 652 @Override public Entry<String, String> next() { 653 return new Attribute(attr.getKey().substring(dataPrefix.length()), attr.getValue()); 654 } 655 656 @Override public void remove() { 657 attributes.remove(attr.getKey()); 658 } 659 } 660 } 661 662 private static String dataKey(String key) { 663 return dataPrefix + key; 664 } 665 666 static String internalKey(String key) { 667 return InternalPrefix + key; 668 } 669 670 static boolean isInternalKey(String key) { 671 return key.length() > 1 && key.charAt(0) == InternalPrefix; 672 } 673}