Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Normalize SQL IN(?, ?, ...) statements to "in(?)" to reduce cardinality of db.statement attribute #10564

Merged
merged 6 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

package io.opentelemetry.instrumentation.api.incubator.semconv.db;

import java.util.regex.Pattern;

%%

%final
Expand Down Expand Up @@ -52,6 +54,10 @@ WHITESPACE = [ \t\r\n]+
// max length of the sanitized statement - SQLs longer than this will be trimmed
static final int LIMIT = 32 * 1024;

// Match on strings like "IN(?, ?, ...)"
private static final Pattern IN_STATEMENT_PATTERN = Pattern.compile("(\\sIN\\s*)\\(\\s*\\?\\s*(?:,\\s*\\?\\s*)*+\\)", Pattern.CASE_INSENSITIVE);
private static final String IN_STATEMENT_NORMALIZED = "$1(?)";

private final StringBuilder builder = new StringBuilder();

private void appendCurrentFragment() {
Expand Down Expand Up @@ -278,7 +284,11 @@ WHITESPACE = [ \t\r\n]+
builder.delete(LIMIT, builder.length());
}
String fullStatement = builder.toString();
return operation.getResult(fullStatement);

// Normalize all 'in (?, ?, ...)' statements to in (?) to reduce cardinality
String normalizedStatement = IN_STATEMENT_PATTERN.matcher(fullStatement).replaceAll(IN_STATEMENT_NORMALIZED);

return operation.getResult(normalizedStatement);
}

%}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,19 @@ void randomBytesDontCauseExceptionsOrTimeouts() {
}
}

@Test
public void longInStatementDoesntCauseStackOverflow() {
StringBuilder s = new StringBuilder("select col from table where col in (");
for (int i = 0; i < 10000; i++) {
s.append("?,");
}
s.append("?)");

String sanitized = SqlStatementSanitizer.create(true).sanitize(s.toString()).getFullStatement();

assertThat(sanitized).isEqualTo("select col from table where col in (?)");
}

static class SqlArgs implements ArgumentsProvider {

@Override
Expand Down Expand Up @@ -271,7 +284,11 @@ public Stream<? extends Arguments> provideArguments(ExtensionContext context) th
Arguments.of("select col from table1 as t1, table2 as t2", expect("SELECT", null)),
Arguments.of(
"select col from table where col in (1, 2, 3)",
expect("select col from table where col in (?, ?, ?)", "SELECT", "table")),
expect("select col from table where col in (?)", "SELECT", "table")),
Arguments.of(
"select 'a' IN(x, 'b') from table where col in (1) and z IN( '3', '4' )",
expect(
"select ? IN(x, ?) from table where col in (?) and z IN(?)", "SELECT", "table")),
Arguments.of("select col from table order by col, col2", expect("SELECT", "table")),
Arguments.of("select ąś∂ń© from źćļńĶ order by col, col2", expect("SELECT", "źćļńĶ")),
Arguments.of("select 12345678", expect("select ?", "SELECT", null)),
Expand All @@ -298,6 +315,9 @@ public Stream<? extends Arguments> provideArguments(ExtensionContext context) th
"delete from `my table` where something something", expect("DELETE", "my table")),
Arguments.of(
"delete from \"my table\" where something something", expect("DELETE", "my table")),
Arguments.of(
"delete from foo where x IN (1,2,3)",
expect("delete from foo where x IN (?)", "DELETE", "foo")),
Arguments.of("delete from 12345678", expect("delete from ?", "DELETE", null)),
Arguments.of("delete (((", expect("delete (((", "DELETE", null)),

Expand All @@ -307,6 +327,12 @@ public Stream<? extends Arguments> provideArguments(ExtensionContext context) th
Arguments.of(
"update `my table` set answer=42",
expect("update `my table` set answer=?", "UPDATE", "my table")),
Arguments.of(
"update `my table` set answer=42 where x IN('a', 'b') AND y In ('a', 'b')",
expect(
"update `my table` set answer=? where x IN(?) AND y In (?)",
"UPDATE",
"my table")),
Arguments.of(
"update \"my table\" set answer=42",
expect("update \"my table\" set answer=?", "UPDATE", "my table")),
Expand Down
Loading