Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: refactor SIP-68 db migrations with INSERT SELECT FROM #19421

Merged
merged 2 commits into from
Apr 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 40 additions & 32 deletions superset/columns/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

These models are not fully implemented, and shouldn't be used yet.
"""

import sqlalchemy as sa
from flask_appbuilder import Model

Expand All @@ -33,6 +32,8 @@
ImportExportMixin,
)

UNKOWN_TYPE = "UNKNOWN"


class Column(
Model,
Expand All @@ -52,51 +53,58 @@ class Column(

id = sa.Column(sa.Integer, primary_key=True)

# Assuming the column is an aggregation, is it additive? Useful for determining which
# aggregations can be done on the metric. Eg, ``COUNT(DISTINCT user_id)`` is not
# additive, so it shouldn't be used in a ``SUM``.
is_additive = sa.Column(sa.Boolean, default=False)

# Is this column an aggregation (metric)?
is_aggregation = sa.Column(sa.Boolean, default=False)

is_filterable = sa.Column(sa.Boolean, nullable=False, default=True)
is_dimensional = sa.Column(sa.Boolean, nullable=False, default=False)

# Is an increase desired? Useful for displaying the results of A/B tests, or setting
# up alerts. Eg, this is true for "revenue", but false for "latency".
is_increase_desired = sa.Column(sa.Boolean, default=True)

# Column is managed externally and should be read-only inside Superset
is_managed_externally = sa.Column(sa.Boolean, nullable=False, default=False)

# Is this column a partition? Useful for scheduling queries and previewing the latest
# data.
is_partition = sa.Column(sa.Boolean, default=False)

# Does the expression point directly to a physical column?
is_physical = sa.Column(sa.Boolean, default=True)

# Is this a spatial column? This could be leveraged in the future for spatial
# visualizations.
is_spatial = sa.Column(sa.Boolean, default=False)

# Is this a time column? Useful for plotting time series.
is_temporal = sa.Column(sa.Boolean, default=False)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A-Z reordering


# We use ``sa.Text`` for these attributes because (1) in modern databases the
# performance is the same as ``VARCHAR``[1] and (2) because some table names can be
# **really** long (eg, Google Sheets URLs).
#
# [1] https://www.postgresql.org/docs/9.1/datatype-character.html
name = sa.Column(sa.Text)
type = sa.Column(sa.Text)
# Raw type as returned and used by db engine.
type = sa.Column(sa.Text, default=UNKOWN_TYPE)

# Columns are defined by expressions. For tables, these are the actual columns names,
# and should match the ``name`` attribute. For datasets, these can be any valid SQL
# expression. If the SQL expression is an aggregation the column is a metric,
# otherwise it's a computed column.
expression = sa.Column(sa.Text)

# Does the expression point directly to a physical column?
is_physical = sa.Column(sa.Boolean, default=True)
unit = sa.Column(sa.Text)

# Additional metadata describing the column.
description = sa.Column(sa.Text)
warning_text = sa.Column(sa.Text)
unit = sa.Column(sa.Text)

# Is this a time column? Useful for plotting time series.
is_temporal = sa.Column(sa.Boolean, default=False)

# Is this a spatial column? This could be leveraged in the future for spatial
# visualizations.
is_spatial = sa.Column(sa.Boolean, default=False)

# Is this column a partition? Useful for scheduling queries and previewing the latest
# data.
is_partition = sa.Column(sa.Boolean, default=False)

# Is this column an aggregation (metric)?
is_aggregation = sa.Column(sa.Boolean, default=False)

# Assuming the column is an aggregation, is it additive? Useful for determining which
# aggregations can be done on the metric. Eg, ``COUNT(DISTINCT user_id)`` is not
# additive, so it shouldn't be used in a ``SUM``.
is_additive = sa.Column(sa.Boolean, default=False)

# Is an increase desired? Useful for displaying the results of A/B tests, or setting
# up alerts. Eg, this is true for "revenue", but false for "latency".
is_increase_desired = sa.Column(sa.Boolean, default=True)

# Column is managed externally and should be read-only inside Superset
is_managed_externally = sa.Column(sa.Boolean, nullable=False, default=False)
external_url = sa.Column(sa.Text, nullable=True)

def __repr__(self) -> str:
return f"<Column id={self.id}>"
6 changes: 3 additions & 3 deletions superset/connectors/base/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from superset.models.slice import Slice
from superset.superset_typing import FilterValue, FilterValues, QueryObjectDict
from superset.utils import core as utils
from superset.utils.core import GenericDataType
from superset.utils.core import GenericDataType, MediumText

METRIC_FORM_DATA_PARAMS = [
"metric",
Expand Down Expand Up @@ -586,7 +586,7 @@ class BaseColumn(AuditMixinNullable, ImportExportMixin):
type = Column(Text)
groupby = Column(Boolean, default=True)
filterable = Column(Boolean, default=True)
description = Column(Text)
description = Column(MediumText())
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MediumText is current type for these fields. They were updated in db migrations at some point. Updating for consistency.

is_dttm = None

# [optional] Set this to support import/export functionality
Expand Down Expand Up @@ -672,7 +672,7 @@ class BaseMetric(AuditMixinNullable, ImportExportMixin):
metric_name = Column(String(255), nullable=False)
verbose_name = Column(String(1024))
metric_type = Column(String(32))
description = Column(Text)
description = Column(MediumText())
d3format = Column(String(128))
warning_text = Column(Text)

Expand Down
Loading