Skip to content

Commit

Permalink
In :has(), check siblings vs descendants if required
Browse files Browse the repository at this point in the history
Fixes @2137
  • Loading branch information
jhy committed Jul 3, 2024
1 parent ad76a83 commit c3963d4
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 9 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
correctly. [2142](https://github.com/jhy/jsoup/issues/2142)
* When using `W3CDom` with a custom output Document, a Null Pointer Exception would be
thrown. [2114](https://github.com/jhy/jsoup/pull/2114)
* The `:has()` selector did not match correctly when using sibling combinators (like
e.g.: `h1:has(+h2)`). [2137](https://github.com/jhy/jsoup/issues/2137)

---

Expand Down
40 changes: 31 additions & 9 deletions src/main/java/org/jsoup/select/StructuralEvaluator.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,20 +55,42 @@ static class Has extends StructuralEvaluator {
ThreadLocal.withInitial(() -> new NodeIterator<>(new Element("html"), Element.class));
// the element here is just a placeholder so this can be final - gets set in restart()

private final boolean checkSiblings; // evaluating against siblings (or children)

public Has(Evaluator evaluator) {
super(evaluator);
checkSiblings = evalWantsSiblings(evaluator);
}

@Override public boolean matches(Element root, Element element) {
// for :has, we only want to match children (or below), not the input element. And we want to minimize GCs
NodeIterator<Element> it = ThreadElementIter.get();

it.restart(element);
while (it.hasNext()) {
Element el = it.next();
if (el == element) continue; // don't match self, only descendants
if (evaluator.matches(element, el))
return true;
if (checkSiblings) { // evaluating against siblings
for (Element sib = element.firstElementSibling(); sib != null; sib = sib.nextElementSibling()) {
if (sib != element && evaluator.matches(element, sib)) { // don't match against self
return true;
}
}
} else {
// otherwise we only want to match children (or below), and not the input element. And we want to minimize GCs so reusing the Iterator obj
NodeIterator<Element> it = ThreadElementIter.get();
it.restart(element);
while (it.hasNext()) {
Element el = it.next();
if (el == element) continue; // don't match self, only descendants
if (evaluator.matches(element, el))
return true;
}
}
return false;
}

/* Test if the :has sub-clause wants sibling elements (vs nested elements) - will be a Combining eval */
private static boolean evalWantsSiblings(Evaluator eval) {
if (eval instanceof CombiningEvaluator) {
CombiningEvaluator ce = (CombiningEvaluator) eval;
for (Evaluator innerEval : ce.evaluators) {
if (innerEval instanceof PreviousSibling || innerEval instanceof ImmediatePreviousSibling)
return true;
}
}
return false;
}
Expand Down
15 changes: 15 additions & 0 deletions src/test/java/org/jsoup/select/SelectorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,21 @@ public void testByAttributeStarting(Locale locale) {
assertEquals("Two", divs.first().text());
}

@Test public void testHasSibling() {
// https://github.com/jhy/jsoup/issues/2137
Document doc = Jsoup.parse("<h1 id=1>One</h1> <h2>Two</h2> <h1>Three</h1>");
Elements els = doc.select("h1:has(+h2)");
assertSelectedIds(els, "1");

els = doc.select("h1:has(~h1)");
assertSelectedIds(els, "1");

// nested with sibling
doc = Jsoup.parse("<div id=1><p><i>One</i><i>Two</p><p><i>Three</p></div> <div><p><i>Four</div>");
els = doc.select("div:has(p:has(i:has(~i)))");
assertSelectedIds(els, "1");
}

@MultiLocaleTest
public void testPseudoContains(Locale locale) {
Locale.setDefault(locale);
Expand Down

0 comments on commit c3963d4

Please sign in to comment.