Skip to content

Commit

Permalink
Attribute accessor, and maintain source range after key change
Browse files Browse the repository at this point in the history
Fixes #2069
Fixes #2070
  • Loading branch information
jhy committed Nov 29, 2023
1 parent daef8bb commit b4751ad
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 4 deletions.
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

## 1.17.2 (Pending)

### Improvements
* Added `Element.attribute(String)` and `Attributes.attribute(String)` to more simply obtain an `Attribute` object.
[2069](https://github.com/jhy/jsoup/issues/2069)
* If source tracking is on, and an Attribute's key is changed (via `Attribute.setKey(String)`), the source range is
now still tracked in `Attribute.sourceRange()`. [2070](https://github.com/jhy/jsoup/issues/2070)

### Bug Fixes

* When tracking the source position of attributes, if source attribute name was mix-cased but the parser was
Expand Down
12 changes: 10 additions & 2 deletions src/main/java/org/jsoup/nodes/Attribute.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import org.jsoup.SerializationException;
import org.jsoup.helper.Validate;
import org.jsoup.internal.Normalizer;
import org.jsoup.internal.SharedConstants;
import org.jsoup.internal.StringUtil;
import org.jsoup.nodes.Document.OutputSettings.Syntax;
import org.jspecify.annotations.Nullable;
Expand Down Expand Up @@ -72,8 +71,17 @@ public void setKey(String key) {
Validate.notEmpty(key); // trimming could potentially make empty, so validate here
if (parent != null) {
int i = parent.indexOfKey(this.key);
if (i != Attributes.NotFound)
if (i != Attributes.NotFound) {
String oldKey = parent.keys[i];
parent.keys[i] = key;

// if tracking source positions, update the key in the range map
Map<String, Range.AttributeRange> ranges = parent.getRanges();
if (ranges != null) {
Range.AttributeRange range = ranges.remove(oldKey);
ranges.put(key, range);
}
}
}
this.key = key;
}
Expand Down
23 changes: 21 additions & 2 deletions src/main/java/org/jsoup/nodes/Attributes.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,19 @@ public String get(String key) {
return i == NotFound ? EmptyString : checkNotNull(vals[i]);
}

/**
Get an Attribute by key. The Attribute will remain connected to these Attributes, so changes made via
{@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc will cascade back to these Attributes and
their owning Element.
@param key the (case-sensitive) attribute key
@return the Attribute for this key, or null if not present.
@since 1.17.2
*/
public Attribute attribute(String key) {
int i = indexOfKey(key);
return i == NotFound ? null : new Attribute(key, checkNotNull(vals[i]), this);
}

/**
* Get an attribute's value by case-insensitive key
* @param key the attribute name
Expand Down Expand Up @@ -360,13 +373,19 @@ Get the source ranges (start to end position) in the original input source from
*/
public Range.AttributeRange sourceRange(String key) {
if (!hasKey(key)) return UntrackedAttr;
//noinspection unchecked
Map<String, Range.AttributeRange> ranges = (Map<String, Range.AttributeRange>) userData(AttrRangeKey);
Map<String, Range.AttributeRange> ranges = getRanges();
if (ranges == null) return Range.AttributeRange.UntrackedAttr;
Range.AttributeRange range = ranges.get(key);
return range != null ? range : Range.AttributeRange.UntrackedAttr;
}

/** Get the Ranges, if tracking is enabled; null otherwise. */
@Nullable Map<String, Range.AttributeRange> getRanges() {
//noinspection unchecked
return (Map<String, Range.AttributeRange>) userData(AttrRangeKey);
}


@Override
public Iterator<Attribute> iterator() {
return new Iterator<Attribute>() {
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/org/jsoup/nodes/Element.java
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,17 @@ public Element attr(String attributeKey, boolean attributeValue) {
return this;
}

/**
Get an Attribute by key. Changes made via {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc
will cascade back to this Element.
@param key the (case-sensitive) attribute key
@return the Attribute for this key, or null if not present.
@since 1.17.2
*/
public Attribute attribute(String key) {
return hasAttributes() ? attributes().attribute(key) : null;
}

/**
* Get this element's HTML5 custom data attributes. Each attribute in the element that has a key
* starting with "data-" is included the dataset.
Expand Down
19 changes: 19 additions & 0 deletions src/test/java/org/jsoup/nodes/ElementTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2865,4 +2865,23 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) {
doc.outputSettings().escapeMode(Entities.EscapeMode.extended);
assertEquals("Foo&nbsp;&succ;", doc.body().html()); // succ is alias for Succeeds, and first hit in entities
}

@Test void attribute() {
String html = "<p CLASS='yes'>One</p>";
Document doc = Jsoup.parse(html);
Element p = doc.expectFirst("p");
Attribute attr = p.attribute("class"); // HTML parse lower-cases names
assertNotNull(attr);
assertEquals("class", attr.getKey());
assertEquals("yes", attr.getValue());
assertFalse(attr.sourceRange().nameRange().start().isTracked()); // tracking disabled

assertNull(p.attribute("CLASS")); // no such key

attr.setKey("CLASS"); // set preserves input case
attr.setValue("YES");

assertEquals("<p CLASS=\"YES\">One</p>", p.outerHtml());
assertEquals("CLASS=\"YES\"", attr.html());
}
}
46 changes: 46 additions & 0 deletions src/test/java/org/jsoup/parser/PositionTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,52 @@ private void printRange(Node node) {
assertEquals("h1:0-9~12-17; id:4-6=7-8; #text:9-12; #text:17-18; h2:18-27~30-35; id:22-24=25-26; #text:27-30; h10:35-40~43-49; #text:40-43; ", track.toString());
}

@Test void updateKeyMaintainsRangeLc() {
String html = "<p xsi:CLASS=On>One</p>";
Document doc = Jsoup.parse(html, TrackingHtmlParser);
Element p = doc.expectFirst("p");
Attribute attr = p.attribute("xsi:class");
assertNotNull(attr);

String expectedRange = "1,4:3-1,13:12=1,14:13-1,16:15";
assertEquals(expectedRange, attr.sourceRange().toString());
attr.setKey("class");
assertEquals(expectedRange, attr.sourceRange().toString());
assertEquals("class=\"On\"", attr.html());
}

@Test void updateKeyMaintainsRangeUc() {
String html = "<p xsi:CLASS=On>One</p>";
Document doc = Jsoup.parse(html, TrackingXmlParser);
Element p = doc.expectFirst("p");
Attribute attr = p.attribute("xsi:CLASS");
assertNotNull(attr);

String expectedRange = "1,4:3-1,13:12=1,14:13-1,16:15";
assertEquals(expectedRange, attr.sourceRange().toString());
attr.setKey("class");
assertEquals(expectedRange, attr.sourceRange().toString());
assertEquals("class=\"On\"", attr.html());

attr.setKey("CLASSY");
assertEquals(expectedRange, attr.sourceRange().toString());
assertEquals("CLASSY=\"On\"", attr.html());

attr.setValue("To");
assertEquals(expectedRange, attr.sourceRange().toString());
assertEquals("CLASSY=\"To\"", attr.html());

assertEquals("<p CLASSY=\"To\">One</p>", p.outerHtml());

p.attr("CLASSY", "Tree");
assertEquals(expectedRange, attr.sourceRange().toString());
assertEquals("CLASSY=\"To\"", attr.html()); // changes in this direction do not get to the attribute as it's not connected that way

Attribute attr2 = p.attribute("CLASSY");
assertEquals("CLASSY=\"Tree\"", attr2.html());
assertEquals(expectedRange, attr2.sourceRange().toString());
}

static void accumulateAttributePositions(Node node, StringBuilder sb) {
if (node instanceof LeafNode) return; // leafnode pseudo attributes are not tracked
for (Attribute attribute : node.attributes()) {
Expand Down

0 comments on commit b4751ad

Please sign in to comment.