Skip to content

Commit

Permalink
[fix] Automatically add relevant Jinja methods to cache key if present (
Browse files Browse the repository at this point in the history
#9572)

* [fix] Adding URL params to cache key if present

* [cache] Wrapping Jinja methods

Co-authored-by: John Bodley <john.bodley@airbnb.com>
  • Loading branch information
john-bodley and John Bodley committed Apr 24, 2020
1 parent 7386b01 commit 955a4fe
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 110 deletions.
2 changes: 2 additions & 0 deletions UPDATING.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ assists people when migrating to a new version.

## Next

* [9572](https://github.com/apache/incubator-superset/pull/9572): a change which by defau;t means that the Jinja `current_user_id`, `current_username`, and `url_param` context calls no longer need to be wrapped via `cache_key_wrapper` in order to be included in the cache key. The `cache_key_wrapper` function should only be required for Jinja add-ons.

* [8867](https://github.com/apache/incubator-superset/pull/8867): a change which adds the `tmp_schema_name` column to the `query` table which requires locking the table. Given the `query` table is heavily used performance may be degraded during the migration. Scheduled downtime may be advised.

* [9238](https://github.com/apache/incubator-superset/pull/9238): the config option `TIME_GRAIN_FUNCTIONS` has been renamed to `TIME_GRAIN_EXPRESSIONS` to better reflect the content of the dictionary.
Expand Down
8 changes: 4 additions & 4 deletions docs/sqllab.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,15 @@ Superset's Jinja context:

`Jinja's builtin filters <http://jinja.pocoo.org/docs/dev/templates/>`_ can be also be applied where needed.

.. autofunction:: superset.jinja_context.current_user_id
.. autofunction:: superset.jinja_context.ExtraCache.current_user_id

.. autofunction:: superset.jinja_context.current_username
.. autofunction:: superset.jinja_context.ExtraCache.current_username

.. autofunction:: superset.jinja_context.url_param
.. autofunction:: superset.jinja_context.ExtraCache.url_param

.. autofunction:: superset.jinja_context.filter_values

.. autofunction:: superset.jinja_context.CacheKeyWrapper.cache_key_wrapper
.. autofunction:: superset.jinja_context.ExtraCache.cache_key_wrapper

.. autoclass:: superset.jinja_context.PrestoTemplateProcessor
:members:
Expand Down
25 changes: 12 additions & 13 deletions superset/connectors/sqla/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
from superset.constants import NULL_STRING
from superset.db_engine_specs.base import TimestampExpression
from superset.exceptions import DatabaseNotFound
from superset.jinja_context import get_template_processor
from superset.jinja_context import ExtraCache, get_template_processor
from superset.models.annotations import Annotation
from superset.models.core import Database
from superset.models.helpers import AuditMixinNullable, QueryResult
Expand Down Expand Up @@ -1216,18 +1216,17 @@ def query_datasources_by_name(
def default_query(qry) -> Query:
return qry.filter_by(is_sqllab_view=False)

def has_calls_to_cache_key_wrapper(self, query_obj: Dict[str, Any]) -> bool:
def has_extra_cache_key_calls(self, query_obj: Dict[str, Any]) -> bool:
"""
Detects the presence of calls to `cache_key_wrapper` in items in query_obj that
Detects the presence of calls to `ExtraCache` methods in items in query_obj that
can be templated. If any are present, the query must be evaluated to extract
additional keys for the cache key. This method is needed to avoid executing
the template code unnecessarily, as it may contain expensive calls, e.g. to
extract the latest partition of a database.
additional keys for the cache key. This method is needed to avoid executing the
template code unnecessarily, as it may contain expensive calls, e.g. to extract
the latest partition of a database.
:param query_obj: query object to analyze
:return: True if at least one item calls `cache_key_wrapper`, otherwise False
:return: True if there are call(s) to an `ExtraCache` method, False otherwise
"""
regex = re.compile(r"\{\{.*cache_key_wrapper\(.*\).*\}\}")
templatable_statements: List[str] = []
if self.sql:
templatable_statements.append(self.sql)
Expand All @@ -1239,20 +1238,20 @@ def has_calls_to_cache_key_wrapper(self, query_obj: Dict[str, Any]) -> bool:
if "having" in extras:
templatable_statements.append(extras["having"])
for statement in templatable_statements:
if regex.search(statement):
if ExtraCache.regex.search(statement):
return True
return False

def get_extra_cache_keys(self, query_obj: Dict[str, Any]) -> List[Hashable]:
"""
The cache key of a SqlaTable needs to consider any keys added by the parent class
and any keys added via `cache_key_wrapper`.
The cache key of a SqlaTable needs to consider any keys added by the parent
class and any keys added via `ExtraCache`.
:param query_obj: query object to analyze
:return: True if at least one item calls `cache_key_wrapper`, otherwise False
:return: The extra cache keys
"""
extra_cache_keys = super().get_extra_cache_keys(query_obj)
if self.has_calls_to_cache_key_wrapper(query_obj):
if self.has_extra_cache_key_calls(query_obj):
sqla_query = self.get_sqla_query(**query_obj)
extra_cache_keys += sqla_query.extra_cache_keys
return extra_cache_keys
Expand Down
161 changes: 94 additions & 67 deletions superset/jinja_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"""Defines the templating context for SQL Lab"""
import inspect
import json
import re
from typing import Any, List, Optional, Tuple

from flask import g, request
Expand All @@ -27,51 +28,6 @@
from superset.utils.core import convert_legacy_filters_into_adhoc, merge_extra_filters


def url_param(param: str, default: Optional[str] = None) -> Optional[Any]:
"""Read a url or post parameter and use it in your SQL Lab query
When in SQL Lab, it's possible to add arbitrary URL "query string"
parameters, and use those in your SQL code. For instance you can
alter your url and add `?foo=bar`, as in
`{domain}/superset/sqllab?foo=bar`. Then if your query is something like
SELECT * FROM foo = '{{ url_param('foo') }}', it will be parsed at
runtime and replaced by the value in the URL.
As you create a visualization form this SQL Lab query, you can pass
parameters in the explore view as well as from the dashboard, and
it should carry through to your queries.
Default values for URL parameters can be defined in chart metdata by
adding the key-value pair `url_params: {'foo': 'bar'}`
:param param: the parameter to lookup
:param default: the value to return in the absence of the parameter
"""
if request.args.get(param):
return request.args.get(param, default)
# Supporting POST as well as get
form_data = request.form.get("form_data")
if isinstance(form_data, str):
form_data = json.loads(form_data)
url_params = form_data.get("url_params") or {}
return url_params.get(param, default)
return default


def current_user_id() -> Optional[int]:
"""The id of the user who is currently logged in"""
if hasattr(g, "user") and g.user:
return g.user.id
return None


def current_username() -> Optional[str]:
"""The username of the user who is currently logged in"""
if g.user:
return g.user.username
return None


def filter_values(column: str, default: Optional[str] = None) -> List[str]:
""" Gets a values for a particular filter as a list
Expand Down Expand Up @@ -122,33 +78,63 @@ def filter_values(column: str, default: Optional[str] = None) -> List[str]:
return []


class CacheKeyWrapper: # pylint: disable=too-few-public-methods
""" Dummy class that exposes a method used to store additional values used in
calculation of query object cache keys"""
class ExtraCache:
"""
Dummy class that exposes a method used to store additional values used in
calculation of query object cache keys.
"""

# Regular expression for detecting the presence of templated methods which could
# be added to the cache key.
regex = re.compile(
r"\{\{.*("
r"current_user_id\(.*\)|"
r"current_username\(.*\)|"
r"cache_key_wrapper\(.*\)|"
r"url_param\(.*\)"
r").*\}\}"
)

def __init__(self, extra_cache_keys: Optional[List[Any]] = None):
self.extra_cache_keys = extra_cache_keys

def current_user_id(self, add_to_cache_keys: bool = True) -> Optional[int]:
"""
Return the user ID of the user who is currently logged in.
:param add_to_cache_keys: Whether the value should be included in the cache key
:returns: The user ID
"""

if hasattr(g, "user") and g.user:
if add_to_cache_keys:
self.cache_key_wrapper(g.user.id)
return g.user.id
return None

def current_username(self, add_to_cache_keys: bool = True) -> Optional[str]:
"""
Return the username of the user who is currently logged in.
:param add_to_cache_keys: Whether the value should be included in the cache key
:returns: The username
"""

if g.user:
if add_to_cache_keys:
self.cache_key_wrapper(g.user.username)
return g.user.username
return None

def cache_key_wrapper(self, key: Any) -> Any:
""" Adds values to a list that is added to the query object used for calculating
a cache key.
"""
Adds values to a list that is added to the query object used for calculating a
cache key.
This is needed if the following applies:
- Caching is enabled
- The query is dynamically generated using a jinja template
- A username or similar is used as a filter in the query
Example when using a SQL query as a data source ::
SELECT action, count(*) as times
FROM logs
WHERE logged_in_user = '{{ cache_key_wrapper(current_username()) }}'
GROUP BY action
This will ensure that the query results that were cached by `user_1` will
**not** be seen by `user_2`, as the `cache_key` for the query will be
different. ``cache_key_wrapper`` can be used similarly for regular table data
sources by adding a `Custom SQL` filter.
- A `JINJA_CONTEXT_ADDONS` or similar is used as a filter in the query
:param key: Any value that should be considered when calculating the cache key
:return: the original value ``key`` passed to the function
Expand All @@ -157,6 +143,44 @@ def cache_key_wrapper(self, key: Any) -> Any:
self.extra_cache_keys.append(key)
return key

def url_param(
self, param: str, default: Optional[str] = None, add_to_cache_keys: bool = True
) -> Optional[Any]:
"""
Read a url or post parameter and use it in your SQL Lab query.
When in SQL Lab, it's possible to add arbitrary URL "query string" parameters,
and use those in your SQL code. For instance you can alter your url and add
`?foo=bar`, as in `{domain}/superset/sqllab?foo=bar`. Then if your query is
something like SELECT * FROM foo = '{{ url_param('foo') }}', it will be parsed
at runtime and replaced by the value in the URL.
As you create a visualization form this SQL Lab query, you can pass parameters
in the explore view as well as from the dashboard, and it should carry through
to your queries.
Default values for URL parameters can be defined in chart metadata by adding the
key-value pair `url_params: {'foo': 'bar'}`
:param param: the parameter to lookup
:param default: the value to return in the absence of the parameter
:param add_to_cache_keys: Whether the value should be included in the cache key
:returns: The URL parameters
"""

if request.args.get(param):
return request.args.get(param, default)
# Supporting POST as well as get
form_data = request.form.get("form_data")
if isinstance(form_data, str):
form_data = json.loads(form_data)
url_params = form_data.get("url_params") or {}
result = url_params.get(param, default)
if add_to_cache_keys:
self.cache_key_wrapper(result)
return result
return default


class BaseTemplateProcessor: # pylint: disable=too-few-public-methods
"""Base class for database-specific jinja context
Expand Down Expand Up @@ -190,11 +214,14 @@ def __init__(
self.schema = query.schema
elif table:
self.schema = table.schema

extra_cache = ExtraCache(extra_cache_keys)

self.context = {
"url_param": url_param,
"current_user_id": current_user_id,
"current_username": current_username,
"cache_key_wrapper": CacheKeyWrapper(extra_cache_keys).cache_key_wrapper,
"url_param": extra_cache.url_param,
"current_user_id": extra_cache.current_user_id,
"current_username": extra_cache.current_username,
"cache_key_wrapper": extra_cache.cache_key_wrapper,
"filter_values": filter_values,
"form_data": {},
}
Expand Down
7 changes: 5 additions & 2 deletions superset/views/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from sqlalchemy import and_, func

from superset import db, utils
from superset.jinja_context import current_user_id, current_username
from superset.jinja_context import ExtraCache
from superset.models.dashboard import Dashboard
from superset.models.slice import Slice
from superset.models.sql_lab import SavedQuery
Expand All @@ -36,7 +36,10 @@
def process_template(content):
env = SandboxedEnvironment()
template = env.from_string(content)
context = {"current_user_id": current_user_id, "current_username": current_username}
context = {
"current_user_id": ExtraCache.current_user_id,
"current_username": ExtraCache.current_username,
}
return template.render(context)


Expand Down
1 change: 0 additions & 1 deletion tests/core_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ def test_viz_cache_key(self):
viz = slc.viz
qobj = viz.query_obj()
cache_key = viz.cache_key(qobj)
self.assertEqual(cache_key, viz.cache_key(qobj))

qobj["groupby"] = []
self.assertNotEqual(cache_key, viz.cache_key(qobj))
Expand Down
Loading

0 comments on commit 955a4fe

Please sign in to comment.