Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: safer insert RLS #20323

Merged
merged 2 commits into from
Nov 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions superset/models/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,12 @@
)
from superset.extensions import feature_flag_manager
from superset.jinja_context import BaseTemplateProcessor
from superset.sql_parse import has_table_query, insert_rls, ParsedQuery, sanitize_clause
from superset.sql_parse import (
has_table_query,
insert_rls_in_predicate,
ParsedQuery,
sanitize_clause,
)
from superset.superset_typing import (
AdhocMetric,
Column as ColumnTyping,
Expand Down Expand Up @@ -128,7 +133,7 @@ def validate_adhoc_subquery(
level=ErrorLevel.ERROR,
)
)
statement = insert_rls(statement, database_id, default_schema)
statement = insert_rls_in_predicate(statement, database_id, default_schema)
statements.append(statement)

return ";\n".join(str(statement) for statement in statements)
Expand Down
19 changes: 17 additions & 2 deletions superset/sql_lab.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,12 @@
from superset.models.core import Database
from superset.models.sql_lab import Query
from superset.result_set import SupersetResultSet
from superset.sql_parse import CtasMethod, insert_rls, ParsedQuery
from superset.sql_parse import (
CtasMethod,
insert_rls_as_subquery,
insert_rls_in_predicate,
ParsedQuery,
)
from superset.sqllab.limiting_factor import LimitingFactor
from superset.sqllab.utils import write_ipc_buffer
from superset.utils.celery import session_scope
Expand Down Expand Up @@ -191,7 +196,7 @@ def get_sql_results( # pylint: disable=too-many-arguments
return handle_query_error(ex, query, session)


def execute_sql_statement( # pylint: disable=too-many-arguments
def execute_sql_statement( # pylint: disable=too-many-arguments, too-many-locals
sql_statement: str,
query: Query,
session: Session,
Expand All @@ -205,6 +210,16 @@ def execute_sql_statement( # pylint: disable=too-many-arguments

parsed_query = ParsedQuery(sql_statement)
if is_feature_enabled("RLS_IN_SQLLAB"):
# There are two ways to insert RLS: either replacing the table with a subquery
# that has the RLS, or appending the RLS to the ``WHERE`` clause. The former is
# safer, but not supported in all databases.
insert_rls = (
insert_rls_as_subquery
if database.db_engine_spec.allows_subqueries
and database.db_engine_spec.allows_alias_in_select
else insert_rls_in_predicate
)

# Insert any applicable RLS predicates
parsed_query = ParsedQuery(
str(
Expand Down
109 changes: 106 additions & 3 deletions superset/sql_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
Punctuation,
String,
Whitespace,
Wildcard,
)
from sqlparse.utils import imt

Expand Down Expand Up @@ -660,26 +661,128 @@ def get_rls_for_table(
return None

rls = sqlparse.parse(predicate)[0]
add_table_name(rls, str(dataset))
add_table_name(rls, table.table)

return rls


def insert_rls(
def insert_rls_as_subquery(
token_list: TokenList,
database_id: int,
default_schema: Optional[str],
) -> TokenList:
"""
Update a statement inplace applying any associated RLS predicates.

The RLS predicate is applied as subquery replacing the original table:

before: SELECT * FROM some_table WHERE 1=1
after: SELECT * FROM (
SELECT * FROM some_table WHERE some_table.id=42
) AS some_table
WHERE 1=1

This method is safer than ``insert_rls_in_predicate``, but doesn't work in all
databases.
"""
rls: Optional[TokenList] = None
state = InsertRLSState.SCANNING
for token in token_list.tokens:
# Recurse into child token list
if isinstance(token, TokenList):
i = token_list.tokens.index(token)
token_list.tokens[i] = insert_rls_as_subquery(
token,
database_id,
default_schema,
)

# Found a source keyword (FROM/JOIN)
if imt(token, m=[(Keyword, "FROM"), (Keyword, "JOIN")]):
state = InsertRLSState.SEEN_SOURCE

# Found identifier/keyword after FROM/JOIN, test for table
elif state == InsertRLSState.SEEN_SOURCE and (
isinstance(token, Identifier) or token.ttype == Keyword
):
rls = get_rls_for_table(token, database_id, default_schema)
if rls:
# replace table with subquery
subquery_alias = (
token.tokens[-1].value
if isinstance(token, Identifier)
else token.value
)
i = token_list.tokens.index(token)

# strip alias from table name
if isinstance(token, Identifier) and token.has_alias():
whitespace_index = token.token_next_by(t=Whitespace)[0]
token.tokens = token.tokens[:whitespace_index]

token_list.tokens[i] = Identifier(
[
Parenthesis(
[
Token(Punctuation, "("),
Token(DML, "SELECT"),
Token(Whitespace, " "),
Token(Wildcard, "*"),
Token(Whitespace, " "),
Token(Keyword, "FROM"),
Token(Whitespace, " "),
token,
Token(Whitespace, " "),
Where(
[
Token(Keyword, "WHERE"),
Token(Whitespace, " "),
rls,
]
),
Token(Punctuation, ")"),
]
),
Token(Whitespace, " "),
Token(Keyword, "AS"),
Token(Whitespace, " "),
Identifier([Token(Name, subquery_alias)]),
]
)
state = InsertRLSState.SCANNING

# Found nothing, leaving source
elif state == InsertRLSState.SEEN_SOURCE and token.ttype != Whitespace:
state = InsertRLSState.SCANNING

return token_list


def insert_rls_in_predicate(
token_list: TokenList,
database_id: int,
default_schema: Optional[str],
) -> TokenList:
"""
Update a statement inplace applying any associated RLS predicates.

The RLS predicate is ``AND``ed to any existing predicates:

before: SELECT * FROM some_table WHERE 1=1
after: SELECT * FROM some_table WHERE ( 1=1) AND some_table.id=42

"""
rls: Optional[TokenList] = None
state = InsertRLSState.SCANNING
for token in token_list.tokens:
# Recurse into child token list
if isinstance(token, TokenList):
i = token_list.tokens.index(token)
token_list.tokens[i] = insert_rls(token, database_id, default_schema)
token_list.tokens[i] = insert_rls_in_predicate(
token,
database_id,
default_schema,
)

# Found a source keyword (FROM/JOIN)
if imt(token, m=[(Keyword, "FROM"), (Keyword, "JOIN")]):
Expand Down
11 changes: 7 additions & 4 deletions tests/unit_tests/sql_lab_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def test_execute_sql_statement_with_rls(
cursor = mocker.MagicMock()
SupersetResultSet = mocker.patch("superset.sql_lab.SupersetResultSet")
mocker.patch(
"superset.sql_lab.insert_rls",
"superset.sql_lab.insert_rls_as_subquery",
return_value=sqlparse.parse("SELECT * FROM sales WHERE organization_id=42")[0],
)
mocker.patch("superset.sql_lab.is_feature_enabled", return_value=True)
Expand All @@ -112,12 +112,12 @@ def test_execute_sql_statement_with_rls(
SupersetResultSet.assert_called_with([(42,)], cursor.description, db_engine_spec)


def test_sql_lab_insert_rls(
def test_sql_lab_insert_rls_as_subquery(
mocker: MockerFixture,
session: Session,
) -> None:
"""
Integration test for `insert_rls`.
Integration test for `insert_rls_as_subquery`.
"""
from flask_appbuilder.security.sqla.models import Role, User

Expand Down Expand Up @@ -213,4 +213,7 @@ def test_sql_lab_insert_rls(
| 2 | 8 |
| 3 | 9 |""".strip()
)
assert query.executed_sql == "SELECT c FROM t WHERE (t.c > 5)\nLIMIT 6"
assert (
query.executed_sql
== "SELECT c FROM (SELECT * FROM t WHERE (t.c > 5)) AS t\nLIMIT 6"
)
Loading
Loading