Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: allow subquery in ad-hoc SQL #19242

Merged
merged 14 commits into from
Mar 18, 2022
1 change: 1 addition & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,7 @@ def _try_json_readsha(filepath: str, length: int) -> Optional[str]:
"ALLOW_FULL_CSV_EXPORT": False,
"UX_BETA": False,
"GENERIC_CHART_AXES": False,
"ALLOW_ADHOC_SUBQUERY": False,
}

# Feature flags may also be set via 'SUPERSET_FEATURE_' prefixed environment vars.
Expand Down
13 changes: 9 additions & 4 deletions superset/connectors/sqla/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
from superset.connectors.sqla.utils import (
get_physical_table_metadata,
get_virtual_table_metadata,
validate_adhoc_subquery,
)
from superset.datasets.models import Dataset as NewDataset
from superset.db_engine_specs.base import BaseEngineSpec, CTE_ALIAS, TimestampExpression
Expand Down Expand Up @@ -885,7 +886,8 @@ def adhoc_metric_to_sqla(
elif expression_type == utils.AdhocMetricExpressionType.SQL:
tp = self.get_template_processor()
expression = tp.process_template(cast(str, metric["sqlExpression"]))
sqla_metric = literal_column(expression)
if validate_adhoc_subquery(expression):
sqla_metric = literal_column(expression)
else:
raise QueryObjectValidationError("Adhoc metric expressionType is invalid")

Expand All @@ -906,9 +908,11 @@ def adhoc_column_to_sqla(
"""
label = utils.get_column_name(col)
expression = col["sqlExpression"]
sqla_metric = None
if template_processor and expression:
expression = template_processor.process_template(expression)
sqla_metric = literal_column(expression)
if expression and validate_adhoc_subquery(expression):
sqla_metric = literal_column(expression)
lilykuang marked this conversation as resolved.
Show resolved Hide resolved
return self.make_sqla_column_compatible(sqla_metric, label)

def make_sqla_column_compatible(
Expand Down Expand Up @@ -1165,7 +1169,7 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma
# if groupby field equals a selected column
elif selected in columns_by_name:
outer = columns_by_name[selected].get_sqla_col()
else:
elif validate_adhoc_subquery(selected):
lilykuang marked this conversation as resolved.
Show resolved Hide resolved
outer = literal_column(f"({selected})")
outer = self.make_sqla_column_compatible(outer, selected)
else:
Expand All @@ -1180,7 +1184,7 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma
for selected in columns:
select_exprs.append(
columns_by_name[selected].get_sqla_col()
if selected in columns_by_name
if selected in columns_by_name and validate_adhoc_subquery(selected)
lilykuang marked this conversation as resolved.
Show resolved Hide resolved
else self.make_sqla_column_compatible(literal_column(selected))
)
metrics_exprs = []
Expand Down Expand Up @@ -1388,6 +1392,7 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma
db_engine_spec.allows_alias_in_select
and db_engine_spec.allows_hidden_cc_in_orderby
and col.name in [select_col.name for select_col in select_exprs]
and validate_adhoc_subquery(str(col.expression))
):
col = literal_column(col.name)
direction = asc if ascending else desc
Expand Down
29 changes: 28 additions & 1 deletion superset/connectors/sqla/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from contextlib import closing
from typing import Dict, List, Optional, TYPE_CHECKING

import sqlparse
from flask_babel import lazy_gettext as _
from sqlalchemy.exc import NoSuchTableError
from sqlalchemy.sql.type_api import TypeEngine
Expand All @@ -28,7 +29,7 @@
)
from superset.models.core import Database
from superset.result_set import SupersetResultSet
from superset.sql_parse import ParsedQuery
from superset.sql_parse import has_table_query, ParsedQuery

if TYPE_CHECKING:
from superset.connectors.sqla.models import SqlaTable
Expand Down Expand Up @@ -119,3 +120,29 @@ def get_virtual_table_metadata(dataset: "SqlaTable") -> List[Dict[str, str]]:
except Exception as ex:
raise SupersetGenericDBErrorException(message=str(ex)) from ex
return cols


def validate_adhoc_subquery(raw_sql: str) -> bool:
"""
check adhoc SQL contains sub-queries or nested sub-queries with table
:param raw_sql: adhoc sql expression
:return True if sql contains no sub-queries or nested sub-queries with table
:raise SupersetSecurityException if sql contains sub-queries or
nested sub-queries with table
"""
# pylint: disable=import-outside-toplevel
from superset import is_feature_enabled

if is_feature_enabled("ALLOW_ADHOC_SUBQUERY"):
return True

for statement in sqlparse.parse(raw_sql):
if has_table_query(statement):
raise SupersetSecurityException(
SupersetError(
error_type=SupersetErrorType.ADHOC_SUBQUERY_NOT_ALLOWED_ERROR,
message=_("Custom SQL fields cannot contain sub-queries."),
level=ErrorLevel.ERROR,
)
)
return True
lilykuang marked this conversation as resolved.
Show resolved Hide resolved
3 changes: 3 additions & 0 deletions superset/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class SupersetErrorType(str, Enum):
SQLLAB_TIMEOUT_ERROR = "SQLLAB_TIMEOUT_ERROR"
RESULTS_BACKEND_ERROR = "RESULTS_BACKEND_ERROR"
ASYNC_WORKERS_ERROR = "ASYNC_WORKERS_ERROR"
ADHOC_SUBQUERY_NOT_ALLOWED_ERROR = "ADHOC_SUBQUERY_NOT_ALLOWED_ERROR"

# Generic errors
GENERIC_COMMAND_ERROR = "GENERIC_COMMAND_ERROR"
Expand Down Expand Up @@ -138,10 +139,12 @@ class SupersetErrorType(str, Enum):
1034: _("The port number is invalid."),
1035: _("Failed to start remote query on a worker."),
1036: _("The database was deleted."),
1037: _("Custom SQL fields cannot contain sub-queries."),
}


ERROR_TYPES_TO_ISSUE_CODES_MAPPING = {
SupersetErrorType.ADHOC_SUBQUERY_NOT_ALLOWED_ERROR: [1037],
SupersetErrorType.BACKEND_TIMEOUT_ERROR: [1000, 1001],
SupersetErrorType.GENERIC_DB_ENGINE_ERROR: [1002],
SupersetErrorType.COLUMN_DOES_NOT_EXIST_ERROR: [1003, 1004],
Expand Down
2 changes: 2 additions & 0 deletions tests/unit_tests/sql_parse_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1208,6 +1208,8 @@ def test_sqlparse_issue_652():
("SELECT * FROM (SELECT 1 AS foo, 2 AS bar) ORDER BY foo ASC, bar", False),
("SELECT * FROM other_table", True),
("extract(HOUR from from_unixtime(hour_ts)", False),
("(SELECT * FROM table)", True),
("(SELECT COUNT(DISTINCT name) from birth_names)", True),
],
)
def test_has_table_query(sql: str, expected: bool) -> None:
Expand Down