Skip to content

Commit

Permalink
Extend Elements to update the backing DOM on set(), remove(), et al (#…
Browse files Browse the repository at this point in the history
…2017)

Fixes #1522
  • Loading branch information
jhy authored Oct 24, 2023
1 parent 8b6e745 commit 61ac59b
Show file tree
Hide file tree
Showing 4 changed files with 296 additions and 3 deletions.
5 changes: 5 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
jsoup changelog

Release 1.17.1 [PENDING]
* Improvement: in the Elements list, added direct support for `#set(index, element)`, `#remove(index)`,
`#remove(object)`, `#clear()`, `#removeAll(collection)`, `#retainAll(collection)`, `#removeIf(filter)`,
`#replaceAll(operator)`. These methods update the original DOM, as well as the Elements list.

Release 1.16.2 [20-Oct-2023]
* Improvement: optimized the performance of complex CSS selectors, by adding a cost-based query planner. Evaluators
are sorted by their relative execution cost, and executed in order of lower to higher cost. This speeds the
Expand Down
2 changes: 2 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@
<ignore>java.util.function.Supplier</ignore>
<ignore>java.lang.ThreadLocal</ignore>
<ignore>java.io.UncheckedIOException</ignore>
<ignore>java.util.function.Predicate</ignore>
<ignore>java.util.function.UnaryOperator</ignore>
</ignores>
<!-- ^ Provided by https://developer.android.com/studio/write/java8-support#library-desugaring
Possibly OK to remove androidscents; keep for now to validate other additions are supported. -->
Expand Down
128 changes: 125 additions & 3 deletions src/main/java/org/jsoup/select/Elements.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,17 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.function.Predicate;
import java.util.function.UnaryOperator;

/**
A list of {@link Element}s, with methods that act on every element in the list.
<p>
To get an {@code Elements} object, use the {@link Element#select(String)} method.
</p>
<p>To get an {@code Elements} object, use the {@link Element#select(String)} method.</p>
<p>Methods that {@link #set(int, Element) set}, {@link #remove(int) remove}, or {@link #replaceAll(UnaryOperator)
replace} Elements in the list will also act on the underlying {@link org.jsoup.nodes.Document DOM}.</p>
@author Jonathan Hedley, [email protected] */
public class Elements extends ArrayList<Element> {
Expand Down Expand Up @@ -431,6 +434,7 @@ public Elements empty() {

/**
* Remove each matched element from the DOM. This is similar to setting the outer HTML of each element to nothing.
* <p>The elements will still be retained in this list, in case further processing of them is desired.</p>
* <p>
* E.g. HTML: {@code <div><p>Hello</p> <p>there</p> <img /></div>}<br>
* <code>doc.select("p").remove();</code><br>
Expand All @@ -440,6 +444,7 @@ public Elements empty() {
* @return this, for chaining
* @see Element#empty()
* @see #empty()
* @see #clear()
*/
public Elements remove() {
for (Element element : this) {
Expand Down Expand Up @@ -683,4 +688,121 @@ private <T extends Node> List<T> childNodesOfType(Class<T> tClass) {
return nodes;
}

// list methods that update the DOM:

/**
Replace the Element at the specified index in this list, and in the DOM.
* @param index index of the element to replace
* @param element element to be stored at the specified position
* @return the old Element at this index
* @since 1.17.1
*/
@Override public Element set(int index, Element element) {
Validate.notNull(element);
Element old = super.set(index, element);
old.replaceWith(element);
return old;
}

/**
Remove the Element at the specified index in this ist, and from the DOM.
* @param index the index of the element to be removed
* @return the old element at this index
* @since 1.17.1
*/
@Override public Element remove(int index) {
Element old = super.remove(index);
old.remove();
return old;
}

/**
Remove the specified Element from this list, and from th DOM
* @param o element to be removed from this list, if present
* @return if this list contained the Element
* @since 1.17.1
*/
@Override public boolean remove(Object o) {
int index = super.indexOf(o);
if (index == -1) {
return false;
} else {
remove(index);
return true;
}
}

/**
Removes all the elements from this list, and each of them from the DOM.
* @since 1.17.1
* @see #remove()
*/
@Override public void clear() {
remove();
super.clear();
}

/**
Removes from this list, and from the DOM, each of the elements that are contained in the specified collection and
are in this list.
* @param c collection containing elements to be removed from this list
* @return {@code true} if elements were removed from this list
* @since 1.17.1
*/
@Override public boolean removeAll(Collection<?> c) {
boolean anyRemoved = false;
for (Object o : c) {
anyRemoved |= this.remove(o);
}
return anyRemoved;
}

/**
Retain in this list, and in the DOM, only the elements that are in the specified collection and are in this list.
In other words, remove elements from this list and the DOM any item that is in this list but not in the specified
collection.
* @param c collection containing elements to be retained in this list
* @return {@code true} if elements were removed from this list
* @since 1.17.1
*/
@Override public boolean retainAll(Collection<?> c) {
boolean anyRemoved = false;
for (Iterator<Element> it = this.iterator(); it.hasNext(); ) {
Element el = it.next();
if (!c.contains(el)) {
it.remove();
anyRemoved = true;
}
}
return anyRemoved;
}

/**
Remove from the list, and from the DOM, all elements in this list that mach the given filter.
* @param filter a predicate which returns {@code true} for elements to be removed
* @return {@code true} if elements were removed from this list
* @since 1.17.1
*/
@Override public boolean removeIf(Predicate<? super Element> filter) {
boolean anyRemoved = false;
for (Iterator<Element> it = this.iterator(); it.hasNext(); ) {
Element el = it.next();
if (filter.test(el)) {
it.remove();
anyRemoved = true;
}
}
return anyRemoved;
}

/**
Replace each element in this list with the result of the operator, and update the DOM.
* @param operator the operator to apply to each element
* @since 1.17.1
*/
@Override public void replaceAll(UnaryOperator<Element> operator) {
for (int i = 0; i < this.size(); i++) {
this.set(i, operator.apply(this.get(i)));
}
}
}
164 changes: 164 additions & 0 deletions src/test/java/org/jsoup/select/ElementsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
import org.jsoup.nodes.TextNode;
import org.junit.jupiter.api.Test;

import java.util.Iterator;
import java.util.List;

import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assertions.assertFalse;

/**
Tests for ElementList.
Expand Down Expand Up @@ -435,4 +437,166 @@ public void tail(Node node, int depth) {
assertEquals("http://example.com/bar", absAttrs.get(1));
assertEquals("http://example.com", absAttrs.get(2));
}

@Test public void setElementByIndex() {
Document doc = Jsoup.parse("<p>One<p>Two<p>Three");
Element newP = doc.createElement("p").text("New").attr("id", "new");

Elements ps = doc.select("p");
Element two = ps.get(1);
Element old = ps.set(1, newP);
assertSame(old, two);
assertSame(newP, ps.get(1)); // replaced in list
assertEquals("<p>One</p>\n<p id=\"new\">New</p>\n<p>Three</p>", doc.body().html()); // replaced in dom
}

@Test public void removeElementByIndex() {
Document doc = Jsoup.parse("<p>One<p>Two<p>Three");

Elements ps = doc.select("p");
Element two = ps.get(1);
assertTrue(ps.contains(two));
Element old = ps.remove(1);
assertSame(old, two);

assertEquals(2, ps.size()); // removed from list
assertFalse(ps.contains(old));
assertEquals("<p>One</p>\n<p>Three</p>", doc.body().html()); // removed from dom
}

@Test public void removeElementByObject() {
Document doc = Jsoup.parse("<p>One<p>Two<p>Three");

Elements ps = doc.select("p");
Element two = ps.get(1);
assertTrue(ps.contains(two));
boolean removed = ps.remove(two);
assertTrue(removed);

assertEquals(2, ps.size()); // removed from list
assertFalse(ps.contains(two));
assertEquals("<p>One</p>\n<p>Three</p>", doc.body().html()); // removed from dom
}

@Test public void removeElementObjectNoops() {
Document doc = Jsoup.parse("<p>One<p>Two<p>Three");
String origHtml = doc.html();
Element newP = doc.createElement("p").text("New");

Elements ps = doc.select("p");
int size = ps.size();
assertFalse(ps.remove(newP));
assertFalse(ps.remove(newP.childNodes()));
assertEquals(origHtml, doc.html());
assertEquals(size, ps.size());
}

@Test public void clear() {
Document doc = Jsoup.parse("<p>One</p><p>Two</p><div>Three</div>");
Elements ps = doc.select("p");
assertEquals(2, ps.size());
ps.clear();
assertEquals(0, ps.size());

assertEquals(0, doc.select("p").size());
}

@Test public void removeAll() {
Document doc = Jsoup.parse("<p>One<p>Two<p>Three<p>Four</p><div>Div");
Elements ps = doc.select("p");
assertEquals(4, ps.size());
Elements midPs = doc.select("p:gt(0):lt(3)"); //Two and Three
assertEquals(2, midPs.size());

boolean removed = ps.removeAll(midPs);
assertEquals(2, ps.size());
assertTrue(removed);
assertEquals(2, midPs.size());

Elements divs = doc.select("div");
assertEquals(1, divs.size());
assertFalse(ps.removeAll(divs));
assertEquals(2, ps.size());

assertEquals("<p>One</p>\n<p>Four</p>\n<div>\n Div\n</div>", doc.body().html());
}

@Test public void retainAll() {
Document doc = Jsoup.parse("<p>One<p>Two<p>Three<p>Four</p><div>Div");
Elements ps = doc.select("p");
assertEquals(4, ps.size());
Elements midPs = doc.select("p:gt(0):lt(3)"); //Two and Three
assertEquals(2, midPs.size());

boolean removed = ps.retainAll(midPs);
assertEquals(2, ps.size());
assertTrue(removed);
assertEquals(2, midPs.size());

assertEquals("<p>Two</p>\n<p>Three</p>\n<div>\n Div\n</div>", doc.body().html());

Elements psAgain = doc.select("p");
assertFalse(midPs.retainAll(psAgain));

assertEquals("<p>Two</p>\n<p>Three</p>\n<div>\n Div\n</div>", doc.body().html());
}

@Test public void iteratorRemovesFromDom() {
Document doc = Jsoup.parse("<p>One<p>Two<p>Three<p>Four");
Elements ps = doc.select("p");

assertEquals(4, ps.size());
for (Iterator<Element> it = ps.iterator(); it.hasNext(); ) {
Element el = it.next();
if (el.text().contains("Two"))
it.remove();
}
assertEquals(3, ps.size());
assertEquals("<p>One</p>\n<p>Three</p>\n<p>Four</p>", doc.body().html());
}

@Test public void removeIf() {
Document doc = Jsoup.parse("<p>One<p>Two<p>Three<p>Four");
Elements ps = doc.select("p");

assertEquals(4, ps.size());
boolean removed = ps.removeIf(el -> el.text().contains("Two"));
assertTrue(removed);
assertEquals(3, ps.size());
assertEquals("<p>One</p>\n<p>Three</p>\n<p>Four</p>", doc.body().html());

assertFalse(ps.removeIf(el -> el.text().contains("Five")));
assertEquals("<p>One</p>\n<p>Three</p>\n<p>Four</p>", doc.body().html());
}

@Test public void removeIfSupportsConcurrentRead() {
Document doc = Jsoup.parse("<p>One<p>Two<p>Three<p>Four");
Elements ps = doc.select("p");
assertEquals(4, ps.size());

boolean removed = ps.removeIf(el -> ps.contains(el));
assertTrue(removed);
assertEquals(0, ps.size());
assertEquals("", doc.body().html());
}

@Test public void replaceAll() {
Document doc = Jsoup.parse("<p>One<p>Two<p>Three<p>Four");
Elements ps = doc.select("p");
assertEquals(4, ps.size());

ps.replaceAll(el -> {
Element div = doc.createElement("div");
div.text(el.text());
return div;
});

// Check Elements
for (Element p : ps) {
assertEquals("div", p.tagName());
}

// check dom
assertEquals("<div> One</div><div> Two</div><div> Three</div><div> Four</div>", TextUtil.normalizeSpaces(doc.body().html()));
}
}

0 comments on commit 61ac59b

Please sign in to comment.