001package org.jsoup.nodes; 002 003import org.jsoup.SerializationException; 004import org.jsoup.helper.Validate; 005import org.jsoup.internal.SharedConstants; 006import org.jsoup.internal.StringUtil; 007import org.jsoup.parser.ParseSettings; 008import org.jspecify.annotations.Nullable; 009 010import java.io.IOException; 011import java.util.AbstractMap; 012import java.util.AbstractSet; 013import java.util.ArrayList; 014import java.util.Arrays; 015import java.util.Collections; 016import java.util.ConcurrentModificationException; 017import java.util.HashMap; 018import java.util.Iterator; 019import java.util.List; 020import java.util.Map; 021import java.util.NoSuchElementException; 022import java.util.Objects; 023import java.util.Set; 024 025import static org.jsoup.internal.Normalizer.lowerCase; 026import static org.jsoup.internal.SharedConstants.AttrRangeKey; 027import static org.jsoup.nodes.Range.AttributeRange.UntrackedAttr; 028 029/** 030 * The attributes of an Element. 031 * <p> 032 * During parsing, attributes in with the same name in an element are deduplicated, according to the configured parser's 033 * attribute case-sensitive setting. It is possible to have duplicate attributes subsequently if 034 * {@link #add(String, String)} vs {@link #put(String, String)} is used. 035 * </p> 036 * <p> 037 * Attribute name and value comparisons are generally <b>case sensitive</b>. By default for HTML, attribute names are 038 * normalized to lower-case on parsing. That means you should use lower-case strings when referring to attributes by 039 * name. 040 * </p> 041 * 042 * @author Jonathan Hedley, jonathan@hedley.net 043 */ 044public class Attributes implements Iterable<Attribute>, Cloneable { 045 // Indicates an internal key. Can't be set via HTML. (It could be set via accessor, but not too worried about 046 // that. Suppressed from list, iter.) 047 static final char InternalPrefix = '/'; 048 049 // The Attributes object is only created on the first use of an attribute; the Element will just have a null 050 // Attribute slot otherwise 051 protected static final String dataPrefix = "data-"; 052 private static final int InitialCapacity = 3; // sampling found mean count when attrs present = 1.49; 1.08 overall. 2.6:1 don't have any attrs. 053 054 // manages the key/val arrays 055 private static final int GrowthFactor = 2; 056 static final int NotFound = -1; 057 private static final String EmptyString = ""; 058 059 // the number of instance fields is kept as low as possible giving an object size of 24 bytes 060 private int size = 0; // number of slots used (not total capacity, which is keys.length) 061 @Nullable String[] keys = new String[InitialCapacity]; // keys is not null, but contents may be. Same for vals 062 @Nullable Object[] vals = new Object[InitialCapacity]; // Genericish: all non-internal attribute values must be Strings and are cast on access. 063 // todo - make keys iterable without creating Attribute objects 064 065 // check there's room for more 066 private void checkCapacity(int minNewSize) { 067 Validate.isTrue(minNewSize >= size); 068 int curCap = keys.length; 069 if (curCap >= minNewSize) 070 return; 071 int newCap = curCap >= InitialCapacity ? size * GrowthFactor : InitialCapacity; 072 if (minNewSize > newCap) 073 newCap = minNewSize; 074 075 keys = Arrays.copyOf(keys, newCap); 076 vals = Arrays.copyOf(vals, newCap); 077 } 078 079 int indexOfKey(String key) { 080 Validate.notNull(key); 081 for (int i = 0; i < size; i++) { 082 if (key.equals(keys[i])) 083 return i; 084 } 085 return NotFound; 086 } 087 088 private int indexOfKeyIgnoreCase(String key) { 089 Validate.notNull(key); 090 for (int i = 0; i < size; i++) { 091 if (key.equalsIgnoreCase(keys[i])) 092 return i; 093 } 094 return NotFound; 095 } 096 097 // we track boolean attributes as null in values - they're just keys. so returns empty for consumers 098 // casts to String, so only for non-internal attributes 099 static String checkNotNull(@Nullable Object val) { 100 return val == null ? EmptyString : (String) val; 101 } 102 103 /** 104 Get an attribute value by key. 105 @param key the (case-sensitive) attribute key 106 @return the attribute value if set; or empty string if not set (or a boolean attribute). 107 @see #hasKey(String) 108 */ 109 public String get(String key) { 110 int i = indexOfKey(key); 111 return i == NotFound ? EmptyString : checkNotNull(vals[i]); 112 } 113 114 /** 115 Get an Attribute by key. The Attribute will remain connected to these Attributes, so changes made via 116 {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc will cascade back to these Attributes and 117 their owning Element. 118 @param key the (case-sensitive) attribute key 119 @return the Attribute for this key, or null if not present. 120 @since 1.17.2 121 */ 122 @Nullable public Attribute attribute(String key) { 123 int i = indexOfKey(key); 124 return i == NotFound ? null : new Attribute(key, checkNotNull(vals[i]), this); 125 } 126 127 /** 128 * Get an attribute's value by case-insensitive key 129 * @param key the attribute name 130 * @return the first matching attribute value if set; or empty string if not set (ora boolean attribute). 131 */ 132 public String getIgnoreCase(String key) { 133 int i = indexOfKeyIgnoreCase(key); 134 return i == NotFound ? EmptyString : checkNotNull(vals[i]); 135 } 136 137 /** 138 * Adds a new attribute. Will produce duplicates if the key already exists. 139 * @see Attributes#put(String, String) 140 */ 141 public Attributes add(String key, @Nullable String value) { 142 addObject(key, value); 143 return this; 144 } 145 146 private void addObject(String key, @Nullable Object value) { 147 checkCapacity(size + 1); 148 keys[size] = key; 149 vals[size] = value; 150 size++; 151 } 152 153 /** 154 * Set a new attribute, or replace an existing one by key. 155 * @param key case sensitive attribute key (not null) 156 * @param value attribute value (which can be null, to set a true boolean attribute) 157 * @return these attributes, for chaining 158 */ 159 public Attributes put(String key, @Nullable String value) { 160 Validate.notNull(key); 161 int i = indexOfKey(key); 162 if (i != NotFound) 163 vals[i] = value; 164 else 165 add(key, value); 166 return this; 167 } 168 169 /** 170 Get the map holding any user-data associated with these Attributes. Will be created empty on first use. Held as 171 an internal attribute, not a field member, to reduce the memory footprint of Attributes when not used. Can hold 172 arbitrary objects; use for source ranges, connecting W3C nodes to Elements, etc. 173 * @return the map holding user-data 174 */ 175 Map<String, Object> userData() { 176 final Map<String, Object> userData; 177 int i = indexOfKey(SharedConstants.UserDataKey); 178 if (i == NotFound) { 179 userData = new HashMap<>(); 180 addObject(SharedConstants.UserDataKey, userData); 181 } else { 182 //noinspection unchecked 183 userData = (Map<String, Object>) vals[i]; 184 } 185 assert userData != null; 186 return userData; 187 } 188 189 /** 190 Get an arbitrary user-data object by key. 191 * @param key case-sensitive key to the object. 192 * @return the object associated to this key, or {@code null} if not found. 193 * @see #userData(String key, Object val) 194 * @since 1.17.1 195 */ 196 @Nullable 197 public Object userData(String key) { 198 Validate.notNull(key); 199 if (!hasKey(SharedConstants.UserDataKey)) return null; // no user data exists 200 Map<String, Object> userData = userData(); 201 return userData.get(key); 202 } 203 204 /** 205 Set an arbitrary user-data object by key. Will be treated as an internal attribute, so will not be emitted in HTML. 206 * @param key case-sensitive key 207 * @param value object value 208 * @return these attributes 209 * @see #userData(String key) 210 * @since 1.17.1 211 */ 212 public Attributes userData(String key, Object value) { 213 Validate.notNull(key); 214 userData().put(key, value); 215 return this; 216 } 217 218 void putIgnoreCase(String key, @Nullable String value) { 219 int i = indexOfKeyIgnoreCase(key); 220 if (i != NotFound) { 221 vals[i] = value; 222 String old = keys[i]; 223 assert old != null; 224 if (!old.equals(key)) // case changed, update 225 keys[i] = key; 226 } 227 else 228 add(key, value); 229 } 230 231 /** 232 * Set a new boolean attribute. Removes the attribute if the value is false. 233 * @param key case <b>insensitive</b> attribute key 234 * @param value attribute value 235 * @return these attributes, for chaining 236 */ 237 public Attributes put(String key, boolean value) { 238 if (value) 239 putIgnoreCase(key, null); 240 else 241 remove(key); 242 return this; 243 } 244 245 /** 246 Set a new attribute, or replace an existing one by key. 247 @param attribute attribute with case-sensitive key 248 @return these attributes, for chaining 249 */ 250 public Attributes put(Attribute attribute) { 251 Validate.notNull(attribute); 252 put(attribute.getKey(), attribute.getValue()); 253 attribute.parent = this; 254 return this; 255 } 256 257 // removes and shifts up 258 @SuppressWarnings("AssignmentToNull") 259 private void remove(int index) { 260 Validate.isFalse(index >= size); 261 int shifted = size - index - 1; 262 if (shifted > 0) { 263 System.arraycopy(keys, index + 1, keys, index, shifted); 264 System.arraycopy(vals, index + 1, vals, index, shifted); 265 } 266 size--; 267 keys[size] = null; // release hold 268 vals[size] = null; 269 } 270 271 /** 272 Remove an attribute by key. <b>Case sensitive.</b> 273 @param key attribute key to remove 274 */ 275 public void remove(String key) { 276 int i = indexOfKey(key); 277 if (i != NotFound) 278 remove(i); 279 } 280 281 /** 282 Remove an attribute by key. <b>Case insensitive.</b> 283 @param key attribute key to remove 284 */ 285 public void removeIgnoreCase(String key) { 286 int i = indexOfKeyIgnoreCase(key); 287 if (i != NotFound) 288 remove(i); 289 } 290 291 /** 292 Tests if these attributes contain an attribute with this key. 293 @param key case-sensitive key to check for 294 @return true if key exists, false otherwise 295 */ 296 public boolean hasKey(String key) { 297 return indexOfKey(key) != NotFound; 298 } 299 300 /** 301 Tests if these attributes contain an attribute with this key. 302 @param key key to check for 303 @return true if key exists, false otherwise 304 */ 305 public boolean hasKeyIgnoreCase(String key) { 306 return indexOfKeyIgnoreCase(key) != NotFound; 307 } 308 309 /** 310 * Check if these attributes contain an attribute with a value for this key. 311 * @param key key to check for 312 * @return true if key exists, and it has a value 313 */ 314 public boolean hasDeclaredValueForKey(String key) { 315 int i = indexOfKey(key); 316 return i != NotFound && vals[i] != null; 317 } 318 319 /** 320 * Check if these attributes contain an attribute with a value for this key. 321 * @param key case-insensitive key to check for 322 * @return true if key exists, and it has a value 323 */ 324 public boolean hasDeclaredValueForKeyIgnoreCase(String key) { 325 int i = indexOfKeyIgnoreCase(key); 326 return i != NotFound && vals[i] != null; 327 } 328 329 /** 330 Get the number of attributes in this set, including any jsoup internal-only attributes. Internal attributes are 331 excluded from the {@link #html()}, {@link #asList()}, and {@link #iterator()} methods. 332 @return size 333 */ 334 public int size() { 335 return size; 336 // todo - exclude internal attributes from this count - maintain size, count of internals 337 } 338 339 /** 340 * Test if this Attributes list is empty (size==0). 341 */ 342 public boolean isEmpty() { 343 return size == 0; 344 } 345 346 /** 347 Add all the attributes from the incoming set to this set. 348 @param incoming attributes to add to these attributes. 349 */ 350 public void addAll(Attributes incoming) { 351 if (incoming.size() == 0) 352 return; 353 checkCapacity(size + incoming.size); 354 355 boolean needsPut = size != 0; // if this set is empty, no need to check existing set, so can add() vs put() 356 // (and save bashing on the indexOfKey() 357 for (Attribute attr : incoming) { 358 if (needsPut) 359 put(attr); 360 else 361 add(attr.getKey(), attr.getValue()); 362 } 363 } 364 365 /** 366 Get the source ranges (start to end position) in the original input source from which this attribute's <b>name</b> 367 and <b>value</b> were parsed. 368 <p>Position tracking must be enabled prior to parsing the content.</p> 369 @param key the attribute name 370 @return the ranges for the attribute's name and value, or {@code untracked} if the attribute does not exist or its range 371 was not tracked. 372 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 373 @see Attribute#sourceRange() 374 @see Node#sourceRange() 375 @see Element#endSourceRange() 376 @since 1.17.1 377 */ 378 public Range.AttributeRange sourceRange(String key) { 379 if (!hasKey(key)) return UntrackedAttr; 380 Map<String, Range.AttributeRange> ranges = getRanges(); 381 if (ranges == null) return Range.AttributeRange.UntrackedAttr; 382 Range.AttributeRange range = ranges.get(key); 383 return range != null ? range : Range.AttributeRange.UntrackedAttr; 384 } 385 386 /** Get the Ranges, if tracking is enabled; null otherwise. */ 387 @Nullable Map<String, Range.AttributeRange> getRanges() { 388 //noinspection unchecked 389 return (Map<String, Range.AttributeRange>) userData(AttrRangeKey); 390 } 391 392 /** 393 Set the source ranges (start to end position) from which this attribute's <b>name</b> and <b>value</b> were parsed. 394 @param key the attribute name 395 @param range the range for the attribute's name and value 396 @return these attributes, for chaining 397 @since 1.18.2 398 */ 399 public Attributes sourceRange(String key, Range.AttributeRange range) { 400 Validate.notNull(key); 401 Validate.notNull(range); 402 Map<String, Range.AttributeRange> ranges = getRanges(); 403 if (ranges == null) { 404 ranges = new HashMap<>(); 405 userData(AttrRangeKey, ranges); 406 } 407 ranges.put(key, range); 408 return this; 409 } 410 411 412 @Override 413 public Iterator<Attribute> iterator() { 414 //noinspection ReturnOfInnerClass 415 return new Iterator<Attribute>() { 416 int expectedSize = size; 417 int i = 0; 418 419 @Override 420 public boolean hasNext() { 421 checkModified(); 422 while (i < size) { 423 String key = keys[i]; 424 assert key != null; 425 if (isInternalKey(key)) // skip over internal keys 426 i++; 427 else 428 break; 429 } 430 431 return i < size; 432 } 433 434 @Override 435 public Attribute next() { 436 checkModified(); 437 if (i >= size) throw new NoSuchElementException(); 438 String key = keys[i]; 439 assert key != null; 440 final Attribute attr = new Attribute(key, (String) vals[i], Attributes.this); 441 i++; 442 return attr; 443 } 444 445 private void checkModified() { 446 if (size != expectedSize) throw new ConcurrentModificationException("Use Iterator#remove() instead to remove attributes while iterating."); 447 } 448 449 @Override 450 public void remove() { 451 Attributes.this.remove(--i); // next() advanced, so rewind 452 expectedSize--; 453 } 454 }; 455 } 456 457 /** 458 Get the attributes as a List, for iteration. 459 @return a view of the attributes as an unmodifiable List. 460 */ 461 public List<Attribute> asList() { 462 ArrayList<Attribute> list = new ArrayList<>(size); 463 for (int i = 0; i < size; i++) { 464 String key = keys[i]; 465 assert key != null; 466 if (isInternalKey(key)) 467 continue; // skip internal keys 468 Attribute attr = new Attribute(key, (String) vals[i], Attributes.this); 469 list.add(attr); 470 } 471 return Collections.unmodifiableList(list); 472 } 473 474 /** 475 * Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys 476 * starting with {@code data-}. 477 * @return map of custom data attributes. 478 */ 479 public Map<String, String> dataset() { 480 return new Dataset(this); 481 } 482 483 /** 484 Get the HTML representation of these attributes. 485 @return HTML 486 */ 487 public String html() { 488 StringBuilder sb = StringUtil.borrowBuilder(); 489 try { 490 html(sb, (new Document("")).outputSettings()); // output settings a bit funky, but this html() seldom used 491 } catch (IOException e) { // ought never happen 492 throw new SerializationException(e); 493 } 494 return StringUtil.releaseBuilder(sb); 495 } 496 497 final void html(final Appendable accum, final Document.OutputSettings out) throws IOException { 498 final int sz = size; 499 for (int i = 0; i < sz; i++) { 500 String key = keys[i]; 501 assert key != null; 502 if (isInternalKey(key)) 503 continue; 504 final String validated = Attribute.getValidKey(key, out.syntax()); 505 if (validated != null) 506 Attribute.htmlNoValidate(validated, (String) vals[i], accum.append(' '), out); 507 } 508 } 509 510 @Override 511 public String toString() { 512 return html(); 513 } 514 515 /** 516 * Checks if these attributes are equal to another set of attributes, by comparing the two sets. Note that the order 517 * of the attributes does not impact this equality (as per the Map interface equals()). 518 * @param o attributes to compare with 519 * @return if both sets of attributes have the same content 520 */ 521 @Override 522 public boolean equals(@Nullable Object o) { 523 if (this == o) return true; 524 if (o == null || getClass() != o.getClass()) return false; 525 526 Attributes that = (Attributes) o; 527 if (size != that.size) return false; 528 for (int i = 0; i < size; i++) { 529 String key = keys[i]; 530 assert key != null; 531 int thatI = that.indexOfKey(key); 532 if (thatI == NotFound || !Objects.equals(vals[i], that.vals[thatI])) 533 return false; 534 } 535 return true; 536 } 537 538 /** 539 * Calculates the hashcode of these attributes, by iterating all attributes and summing their hashcodes. 540 * @return calculated hashcode 541 */ 542 @Override 543 public int hashCode() { 544 int result = size; 545 result = 31 * result + Arrays.hashCode(keys); 546 result = 31 * result + Arrays.hashCode(vals); 547 return result; 548 } 549 550 @Override 551 public Attributes clone() { 552 Attributes clone; 553 try { 554 clone = (Attributes) super.clone(); 555 } catch (CloneNotSupportedException e) { 556 throw new RuntimeException(e); 557 } 558 clone.size = size; 559 clone.keys = Arrays.copyOf(keys, size); 560 clone.vals = Arrays.copyOf(vals, size); 561 return clone; 562 } 563 564 /** 565 * Internal method. Lowercases all (non-internal) keys. 566 */ 567 public void normalize() { 568 for (int i = 0; i < size; i++) { 569 assert keys[i] != null; 570 String key = keys[i]; 571 assert key != null; 572 if (!isInternalKey(key)) 573 keys[i] = lowerCase(key); 574 } 575 } 576 577 /** 578 * Internal method. Removes duplicate attribute by name. Settings for case sensitivity of key names. 579 * @param settings case sensitivity 580 * @return number of removed dupes 581 */ 582 public int deduplicate(ParseSettings settings) { 583 if (isEmpty()) 584 return 0; 585 boolean preserve = settings.preserveAttributeCase(); 586 int dupes = 0; 587 for (int i = 0; i < size; i++) { 588 String keyI = keys[i]; 589 assert keyI != null; 590 for (int j = i + 1; j < size; j++) { 591 if ((preserve && keyI.equals(keys[j])) || (!preserve && keyI.equalsIgnoreCase(keys[j]))) { 592 dupes++; 593 remove(j); 594 j--; 595 } 596 } 597 } 598 return dupes; 599 } 600 601 private static class Dataset extends AbstractMap<String, String> { 602 private final Attributes attributes; 603 604 private Dataset(Attributes attributes) { 605 this.attributes = attributes; 606 } 607 608 @Override 609 public Set<Entry<String, String>> entrySet() { 610 return new EntrySet(); 611 } 612 613 @Override 614 public String put(String key, String value) { 615 String dataKey = dataKey(key); 616 String oldValue = attributes.hasKey(dataKey) ? attributes.get(dataKey) : null; 617 attributes.put(dataKey, value); 618 return oldValue; 619 } 620 621 private class EntrySet extends AbstractSet<Map.Entry<String, String>> { 622 623 @Override 624 public Iterator<Map.Entry<String, String>> iterator() { 625 return new DatasetIterator(); 626 } 627 628 @Override 629 public int size() { 630 int count = 0; 631 Iterator<Entry<String, String>> iter = new DatasetIterator(); 632 while (iter.hasNext()) 633 count++; 634 return count; 635 } 636 } 637 638 private class DatasetIterator implements Iterator<Map.Entry<String, String>> { 639 private final Iterator<Attribute> attrIter = attributes.iterator(); 640 private Attribute attr; 641 @Override public boolean hasNext() { 642 while (attrIter.hasNext()) { 643 attr = attrIter.next(); 644 if (attr.isDataAttribute()) return true; 645 } 646 return false; 647 } 648 649 @Override public Entry<String, String> next() { 650 return new Attribute(attr.getKey().substring(dataPrefix.length()), attr.getValue()); 651 } 652 653 @Override public void remove() { 654 attributes.remove(attr.getKey()); 655 } 656 } 657 } 658 659 private static String dataKey(String key) { 660 return dataPrefix + key; 661 } 662 663 static String internalKey(String key) { 664 return InternalPrefix + key; 665 } 666 667 static boolean isInternalKey(String key) { 668 return key.length() > 1 && key.charAt(0) == InternalPrefix; 669 } 670}