Skip to content

Commit

Permalink
Added more select methods, tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Isira-Seneviratne committed Jul 23, 2024
1 parent cb74941 commit e1e35bb
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 28 deletions.
144 changes: 119 additions & 25 deletions src/main/java/org/jsoup/nodes/Element.java
Original file line number Diff line number Diff line change
Expand Up @@ -1143,10 +1143,19 @@ private static <E extends Element> int indexInList(Element search, List<E> eleme
* @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match.
*/
public Elements getElementsByTag(String tagName) {
return getElementStreamByTag(tagName)
.collect(Collectors.toCollection(Elements::new));
}

/**
* Finds elements, including and recursively under this element, with the specified tag name.
* @param tagName The tag name to search for (case insensitively).
* @return a stream of elements. Will be empty if this element and none of its children match.
*/
public Stream<Element> getElementStreamByTag(String tagName) {
Validate.notEmpty(tagName);
tagName = normalize(tagName);

return Collector.collect(new Evaluator.Tag(tagName), this);
return selectStream(new Evaluator.Tag(normalize(tagName)));
}

/**
Expand All @@ -1171,40 +1180,76 @@ public Elements getElementsByTag(String tagName) {
* checks each class, so you can find the above with {@code el.getElementsByClass("header");}.
*
* @param className the name of the class to search for.
* @return elements with the supplied class name, empty if none
* @return a list of elements with the supplied class name, empty if none
* @see #hasClass(String)
* @see #classNames()
*/
public Elements getElementsByClass(String className) {
return getElementStreamByClass(className)
.collect(Collectors.toCollection(Elements::new));
}

/**
* Find elements that have this class, including or under this element. Case-insensitive.
* <p>
* Elements can have multiple classes (e.g. {@code <div class="header round first">}). This method
* checks each class, so you can find the above with {@code el.getElementsByClass("header");}.
*
* @param className the name of the class to search for.
* @return a stream of elements with the supplied class name
* @see #hasClass(String)
* @see #classNames()
*/
public Stream<Element> getElementStreamByClass(String className) {
Validate.notEmpty(className);

return Collector.collect(new Evaluator.Class(className), this);
return selectStream(new Evaluator.Class(className));
}

/**
* Find elements that have a named attribute set. Case-insensitive.
*
* @param key name of the attribute, e.g. {@code href}
* @return elements that have this attribute, empty if none
* @return a list of elements that have this attribute, empty if none
*/
public Elements getElementsByAttribute(String key) {
return getElementStreamByAttribute(key)
.collect(Collectors.toCollection(Elements::new));
}

/**
* Find elements that have a named attribute set. Case-insensitive.
*
* @param key name of the attribute, e.g. {@code href}
* @return a stream of elements that have this attribute
*/
public Stream<Element> getElementStreamByAttribute(String key) {
Validate.notEmpty(key);
key = key.trim();

return Collector.collect(new Evaluator.Attribute(key), this);
return selectStream(new Evaluator.Attribute(key.trim()));
}

/**
* Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements
* that have HTML5 datasets.
* @param keyPrefix name prefix of the attribute e.g. {@code data-}
* @return elements that have attribute names that start with the prefix, empty if none.
* @return a list of elements that have attribute names that start with the prefix, empty if none.
*/
public Elements getElementsByAttributeStarting(String keyPrefix) {
return getElementStreamByAttributeStarting(keyPrefix)
.collect(Collectors.toCollection(Elements::new));
}

/**
* Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements
* that have HTML5 datasets.
* @param keyPrefix name prefix of the attribute e.g. {@code data-}
* @return a stream of elements that have attribute names that start with the prefix
*/
public Stream<Element> getElementStreamByAttributeStarting(String keyPrefix) {
Validate.notEmpty(keyPrefix);
keyPrefix = keyPrefix.trim();

return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this);
return selectStream(new Evaluator.AttributeStarting(keyPrefix.trim()));
}

/**
Expand Down Expand Up @@ -1270,7 +1315,6 @@ public Elements getElementsByAttributeValueContaining(String key, String match)
*/
public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) {
return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this);

}

/**
Expand Down Expand Up @@ -1341,27 +1385,47 @@ public Elements getElementsContainingOwnText(String searchText) {
/**
* Find elements whose text matches the supplied regular expression.
* @param pattern regular expression to match text against
* @return elements matching the supplied regular expression.
* @return a list of elements matching the supplied regular expression.
* @see Element#text()
*/
public Elements getElementsMatchingText(Pattern pattern) {
return Collector.collect(new Evaluator.Matches(pattern), this);
return getElementStreamMatchingText(pattern)
.collect(Collectors.toCollection(Elements::new));
}

/**
* Find elements whose text matches the supplied regular expression.
* @param pattern regular expression to match text against
* @return a stream of elements matching the supplied regular expression.
* @see Element#text()
*/
public Stream<Element> getElementStreamMatchingText(Pattern pattern) {
return selectStream(new Evaluator.Matches(pattern));
}

/**
* Find elements whose text matches the supplied regular expression.
* @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
* @return elements matching the supplied regular expression.
* @return a list of elements matching the supplied regular expression.
* @see Element#text()
*/
public Elements getElementsMatchingText(String regex) {
Pattern pattern;
return getElementStreamMatchingText(regex)
.collect(Collectors.toCollection(Elements::new));
}

/**
* Find elements whose text matches the supplied regular expression.
* @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
* @return a stream of elements matching the supplied regular expression.
* @see Element#text()
*/
public Stream<Element> getElementStreamMatchingText(String regex) {
try {
pattern = Pattern.compile(regex);
return getElementStreamMatchingText(Pattern.compile(regex));
} catch (PatternSyntaxException e) {
throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
}
return getElementsMatchingText(pattern);
}

/**
Expand All @@ -1371,32 +1435,62 @@ public Elements getElementsMatchingText(String regex) {
* @see Element#ownText()
*/
public Elements getElementsMatchingOwnText(Pattern pattern) {
return Collector.collect(new Evaluator.MatchesOwn(pattern), this);
return getElementStreamMatchingOwnText(pattern)
.collect(Collectors.toCollection(Elements::new));
}

/**
* Find elements whose own text matches the supplied regular expression.
* @param pattern regular expression to match text against
* @return a stream of elements matching the supplied regular expression.
* @see Element#ownText()
*/
public Stream<Element> getElementStreamMatchingOwnText(Pattern pattern) {
return selectStream(new Evaluator.MatchesOwn(pattern));
}

/**
* Find elements whose own text matches the supplied regular expression.
* @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
* @return elements matching the supplied regular expression.
* @return a list of elements matching the supplied regular expression.
* @see Element#ownText()
*/
public Elements getElementsMatchingOwnText(String regex) {
Pattern pattern;
return getElementStreamMatchingOwnText(regex)
.collect(Collectors.toCollection(Elements::new));
}

/**
* Find elements whose own text matches the supplied regular expression.
* @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as (?i) and (?m) to control regex options.
* @return a stream of elements matching the supplied regular expression.
* @see Element#ownText()
*/
public Stream<Element> getElementStreamMatchingOwnText(String regex) {
try {
pattern = Pattern.compile(regex);
return getElementStreamMatchingOwnText(Pattern.compile(regex));
} catch (PatternSyntaxException e) {
throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
}
return getElementsMatchingOwnText(pattern);
}

/**
* Find all elements under this element (including self, and children of children).
*
* @return all elements
* @return a list of all elements
*/
public Elements getAllElements() {
return Collector.collect(new Evaluator.AllElements(), this);
return getAllElementsStream()
.collect(Collectors.toCollection(Elements::new));
}

/**
* Find all elements under this element (including self, and children of children).
*
* @return a stream of all elements
*/
public Stream<Element> getAllElementsStream() {
return selectStream(new Evaluator.AllElements());
}

/**
Expand Down Expand Up @@ -1892,7 +1986,7 @@ protected Element doClone(@Nullable Node parent) {
public Element clearAttributes() {
if (attributes != null) {
super.clearAttributes(); // keeps internal attributes via iterator
if (attributes.size() == 0)
if (attributes.isEmpty())
attributes = null; // only remove entirely if no internal attributes
}

Expand Down
46 changes: 43 additions & 3 deletions src/test/java/org/jsoup/nodes/ElementTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.junit.jupiter.api.Assertions.*;
Expand Down Expand Up @@ -88,6 +89,28 @@ public void getElementsByTagName() {
assertEquals(0, empty.size());
}

@Test
public void getElementStreamByTagName() {
Document doc = Jsoup.parse(reference);
List<Element> divs = doc.getElementStreamByTag("div").collect(Collectors.toList());
assertEquals(2, divs.size());
assertEquals("div1", divs.get(0).id());
assertEquals("div2", divs.get(1).id());

List<Element> ps = doc.getElementStreamByTag("p").collect(Collectors.toList());
assertEquals(2, ps.size());
assertEquals("Hello", ((TextNode) ps.get(0).childNode(0)).getWholeText());
assertEquals("Another ", ((TextNode) ps.get(1).childNode(0)).getWholeText());
List<Element> ps2 = doc.getElementStreamByTag("P").collect(Collectors.toList());
assertEquals(ps, ps2);

Element img = doc.getElementStreamByTag("img").findFirst().orElse(null);
assertEquals("foo.png", img.attr("src"));

Element empty = doc.getElementsByTag("wtf").findFirst().orElse(null);
assertNull(empty);
}

@Test
public void getNamespacedElementsByTag() {
Document doc = Jsoup.parse("<div><abc:def id=1>Hello</abc:def></div>");
Expand Down Expand Up @@ -354,6 +377,18 @@ public void testGetElementsWithAttribute() {
assertEquals(0, none.size());
}

@Test
public void testGetElementStreamWithAttribute() {
Document doc = Jsoup.parse("<div style='bold'><p title=qux><p><b style></b></p></div>");
List<Element> els = doc.getElementStreamByAttribute("style").collect(Collectors.toList());
assertEquals(2, els.size());
assertEquals("div", els.get(0).tagName());
assertEquals("b", els.get(1).tagName());

Element none = doc.getElementStreamByAttribute("class").findFirst().orElse(null);
assertNull(none);
}

@Test
public void testGetElementsWithAttributeDash() {
Document doc = Jsoup.parse("<meta http-equiv=content-type value=utf8 id=1> <meta name=foo content=bar id=2> <div http-equiv=content-type value=utf8 id=3>");
Expand Down Expand Up @@ -1454,9 +1489,7 @@ public void testChainedRemoveAttributes() {
public void testLoopedRemoveAttributes() {
String html = "<a one two three four>Text</a><p foo>Two</p>";
Document doc = Jsoup.parse(html);
for (Element el : doc.getAllElements()) {
el.clearAttributes();
}
doc.getAllElementsStream().forEach(Element::clearAttributes);

assertEquals("<a>Text</a>\n<p>Two</p>", doc.body().html());
}
Expand Down Expand Up @@ -2721,6 +2754,13 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) {
assertEquals("1", els.get(0).id());
}

@Test void getElementStreamMatchingOwnText() {
Document doc = Jsoup.parse("<div id=1>One</div><div>Two</div>");
Element el = doc.body().getElementStreamMatchingOwnText("O\\w+").findFirst().orElse(null);
assertNotNull(el);
assertEquals("1", el.id());
}

@Test void getElementsMatchingOwnTextValidation() {
Document doc = Jsoup.parse(reference);
Throwable ex = assertThrows(IllegalArgumentException.class,
Expand Down

0 comments on commit e1e35bb

Please sign in to comment.