Skip to content

Commit

Permalink
Fix StackOverflowErrors on extremely deeply nested Elements
Browse files Browse the repository at this point in the history
Removed recursion on parent() in cssSelector, which could overflow.

Refactored ImmediateParent into a run of parents, so that a very deep chain of And(ImmediateParent, eval) evaluators would not be constructed, which could stack overflow when evaluated.

Fixes #2001
  • Loading branch information
jhy committed Sep 28, 2023
1 parent 17df30a commit 9170b1d
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 7 deletions.
4 changes: 4 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ Release 1.16.2 [PENDING]
* Bugfix: in Jsoup.connect(), a DELETE method request did not support a request body.
<https://github.com/jhy/jsoup/issues/1972>

* Bugfix: when calling Element.cssSelector() on an extremely deeply nested element, a StackOverflowError could occur.
Further, a StackOverflowError may occur when running the query.
<https://github.com/jhy/jsoup/issues/2001>

* Change: removed previously deprecated methods Document#normalise, Element#forEach(org.jsoup.helper.Consumer<>),
Node#forEach(org.jsoup.helper.Consumer<>), and the org.jsoup.helper.Consumer interface; the latter being a
previously required compatibility shim prior to Android's de-sugaring support.
Expand Down
12 changes: 11 additions & 1 deletion src/main/java/org/jsoup/nodes/Element.java
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,16 @@ public String cssSelector() {
}
}

StringBuilder selector = StringUtil.borrowBuilder();
Element el = this;
while (el != null && !(el instanceof Document)) {
selector.insert(0, el.cssSelectorComponent());
el = el.parent();
}
return StringUtil.releaseBuilder(selector);
}

private String cssSelectorComponent() {
// Escape tagname, and translate HTML namespace ns:tag to CSS namespace syntax ns|tag
String tagName = escapeCssIdentifier(tagName()).replace("\\:", "|");
StringBuilder selector = StringUtil.borrowBuilder().append(tagName);
Expand All @@ -859,7 +869,7 @@ public String cssSelector() {
selector.append(String.format(
":nth-child(%d)", elementSiblingIndex() + 1));

return parent().cssSelector() + StringUtil.releaseBuilder(selector);
return StringUtil.releaseBuilder(selector);
}

/**
Expand Down
6 changes: 5 additions & 1 deletion src/main/java/org/jsoup/select/QueryParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static org.jsoup.select.StructuralEvaluator.ImmediateParentRun;
import static org.jsoup.internal.Normalizer.normalize;

/**
Expand Down Expand Up @@ -107,7 +108,10 @@ private void combinator(char combinator) {
// for most combinators: change the current eval into an AND of the current eval and the new eval
switch (combinator) {
case '>':
currentEval = new CombiningEvaluator.And(new StructuralEvaluator.ImmediateParent(currentEval), newEval);
ImmediateParentRun run = currentEval instanceof ImmediateParentRun ?
(ImmediateParentRun) currentEval : new ImmediateParentRun(currentEval);
run.add(newEval);
currentEval = run;
break;
case ' ':
currentEval = new CombiningEvaluator.And(new StructuralEvaluator.Parent(currentEval), newEval);
Expand Down
48 changes: 48 additions & 0 deletions src/main/java/org/jsoup/select/StructuralEvaluator.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package org.jsoup.select;

import org.jsoup.internal.StringUtil;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;

import java.util.ArrayList;
import java.util.IdentityHashMap;

/**
Expand Down Expand Up @@ -139,6 +141,10 @@ public String toString() {
}
}

/**
@deprecated replaced by {@link ImmediateParentRun}
*/
@Deprecated
static class ImmediateParent extends StructuralEvaluator {
public ImmediateParent(Evaluator evaluator) {
super(evaluator);
Expand All @@ -163,6 +169,48 @@ public String toString() {
}
}

/**
Holds a list of evaluators for one > two > three immediate parent matches, and the final direct evaluator under
test. To match, these are effectively ANDed together, starting from the last, matching up to the first.
*/
static class ImmediateParentRun extends Evaluator {
final ArrayList<Evaluator> evaluators = new ArrayList<>();
int cost = 2;

public ImmediateParentRun(Evaluator evaluator) {
evaluators.add(evaluator);
cost += evaluator.cost();
}

void add(Evaluator evaluator) {
evaluators.add(evaluator);
cost += evaluator.cost();
}

@Override
public boolean matches(Element root, Element element) {
// evaluate from last to first
for (int i = evaluators.size() -1; i >= 0; --i) {
if (element == null)
return false;
Evaluator eval = evaluators.get(i);
if (!eval.matches(root, element))
return false;
element = element.parent();
}
return true;
}

@Override protected int cost() {
return cost;
}

@Override
public String toString() {
return StringUtil.join(evaluators, " > ");
}
}

static class PreviousSibling extends StructuralEvaluator {
public PreviousSibling(Evaluator evaluator) {
super(evaluator);
Expand Down
24 changes: 24 additions & 0 deletions src/test/java/org/jsoup/nodes/ElementTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2566,6 +2566,30 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) {
assertEquals("div", el.cssSelector());
}

@Test void cssSelectorDoesntStackOverflow() {
// https://github.com/jhy/jsoup/issues/2001
Element element = new Element("element");
Element root = element;

// Create a long chain of elements
for (int i = 0; i < 5000; i++) {
Element elem2 = new Element("element" + i);
element.appendChild(elem2);
element = elem2;
}

String selector = element.cssSelector(); // would overflow in cssSelector parent() recurse
Evaluator eval = QueryParser.parse(selector);

assertEquals(eval.toString(), selector);
assertTrue(selector.startsWith("element > element0 >"));
assertTrue(selector.endsWith("8 > element4999"));

Elements elements = root.select(selector); // would overflow in nested And ImmediateParent chain eval
assertEquals(1, elements.size());
assertEquals(element, elements.first());
}

@Test void orphanSiblings() {
Element el = new Element("div");
assertEquals(0, el.siblingElements().size());
Expand Down
23 changes: 18 additions & 5 deletions src/test/java/org/jsoup/select/QueryParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
* @author Jonathan Hedley
*/
public class QueryParserTest {

@Test public void testConsumeSubQuery() {
Document doc = Jsoup.parse("<html><head>h</head><body>" +
"<li><strong>l1</strong></li>" +
Expand All @@ -25,6 +24,17 @@ public class QueryParserTest {
assertEquals("l2", doc.select(">body>p>strong,>body>*>li>strong").text());
}

@Test public void testImmediateParentRun() {
String query = "div > p > bold.brass";
Evaluator eval1 = QueryParser.parse(query);
assertEquals(query, eval1.toString());

StructuralEvaluator.ImmediateParentRun run = (StructuralEvaluator.ImmediateParentRun) eval1;
assertTrue(run.evaluators.get(0) instanceof Evaluator.Tag);
assertTrue(run.evaluators.get(1) instanceof Evaluator.Tag);
assertTrue(run.evaluators.get(2) instanceof CombiningEvaluator.And);
}

@Test public void testOrGetsCorrectPrecedence() {
// tests that a selector "a b, c d, e f" evals to (a AND b) OR (c AND d) OR (e AND f)"
// top level or, three child ands
Expand All @@ -42,17 +52,20 @@ public class QueryParserTest {
}

@Test public void testParsesMultiCorrectly() {
String query = ".foo > ol, ol > li + li";
String query = ".foo.qux > ol.bar, ol > li + li";
Evaluator eval = QueryParser.parse(query);
assertTrue(eval instanceof CombiningEvaluator.Or);
CombiningEvaluator.Or or = (CombiningEvaluator.Or) eval;
assertEquals(2, or.evaluators.size());

CombiningEvaluator.And andLeft = (CombiningEvaluator.And) or.evaluators.get(0);
StructuralEvaluator.ImmediateParentRun run = (StructuralEvaluator.ImmediateParentRun) or.evaluators.get(0);
CombiningEvaluator.And andRight = (CombiningEvaluator.And) or.evaluators.get(1);

assertEquals(".foo > ol", andLeft.toString());
assertEquals(2, andLeft.evaluators.size());
assertEquals(".foo.qux > ol.bar", run.toString());
assertEquals(2, run.evaluators.size());
Evaluator runAnd = run.evaluators.get(0);
assertTrue(runAnd instanceof CombiningEvaluator.And);
assertEquals(".foo.qux", runAnd.toString());
assertEquals("ol > li + li", andRight.toString());
assertEquals(2, andRight.evaluators.size());
assertEquals(query, eval.toString());
Expand Down

0 comments on commit 9170b1d

Please sign in to comment.