Skip to content

Commit

Permalink
[connectors] Make names non-nullable
Browse files Browse the repository at this point in the history
  • Loading branch information
John Bodley committed Mar 22, 2019
1 parent c1c8e50 commit b581b49
Show file tree
Hide file tree
Showing 7 changed files with 212 additions and 16 deletions.
18 changes: 13 additions & 5 deletions UPDATING.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@ assists people when migrating to a new version.
run `pip install superset[presto]` and/or `pip install superset[hive]` as
required.

* [5445](https://github.com/apache/incubator-superset/pull/5445) : a change
which prevents encoding of empty string from form data in the datanbase.
This involves a non-schema changing migration which does potentially impact
a large number of records. Scheduled downtime may be advised.

## Superset 0.31.0
* boto3 / botocore was removed from the dependency list. If you use s3
as a place to store your SQL Lab result set or Hive uploads, you may
Expand All @@ -42,6 +37,19 @@ favor of good old `npm install`. While yarn should still work just fine,
you should probably align to guarantee builds similar to the ones we
use in testing and across the community in general.

* [5445](https://github.com/apache/incubator-superset/pull/5445) : a change
which prevents encoding of empty string from form data in the database.
This involves a non-schema changing migration which does potentially impact
a large number of records. Scheduled downtime may be advised.

* [7067](https://github.com/apache/incubator-superset/pull/7067) : related to
5445 this change makes certain columns non-nullable and either deletes
erroneous or mutates ill-defined records to ensure that the data conforms to
the schema. All mutated records are identifiable via the migration specific
(`migration_40f150716b13_`) prefix. It is recommended that a manual cleanup
pass is performed to either remove or update violating records.


## Superset 0.30.0
* 0.30.0 includes a db_migration that removes allow_run_sync. This may
require downtime because during the migration if the db is migrated first,
Expand Down
4 changes: 2 additions & 2 deletions superset/connectors/base/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ class BaseColumn(AuditMixinNullable, ImportMixin):
__tablename__ = None # {connector_name}_column

id = Column(Integer, primary_key=True)
column_name = Column(String(255))
column_name = Column(String(255), nullable=False)
verbose_name = Column(String(1024))
is_active = Column(Boolean, default=True)
type = Column(String(32))
Expand Down Expand Up @@ -409,7 +409,7 @@ class BaseMetric(AuditMixinNullable, ImportMixin):
__tablename__ = None # {connector_name}_metric

id = Column(Integer, primary_key=True)
metric_name = Column(String(512))
metric_name = Column(String(512), nullable=False)
verbose_name = Column(String(1024))
metric_type = Column(String(32))
description = Column(Text)
Expand Down
4 changes: 2 additions & 2 deletions superset/connectors/druid/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ class DruidMetric(Model, BaseMetric):
'DruidDatasource',
backref=backref('metrics', cascade='all, delete-orphan'),
enable_typechecks=False)
json = Column(Text)
json = Column(Text, nullable=False)

export_fields = (
'metric_name', 'verbose_name', 'metric_type', 'datasource_id',
Expand Down Expand Up @@ -410,7 +410,7 @@ class DruidDatasource(Model, BaseDatasource):
baselink = 'druiddatasourcemodelview'

# Columns
datasource_name = Column(String(255))
datasource_name = Column(String(255), nullable=False)
is_hidden = Column(Boolean, default=False)
filter_select_enabled = Column(Boolean, default=True) # override default
fetch_values_from = Column(String(100))
Expand Down
4 changes: 2 additions & 2 deletions superset/connectors/sqla/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ class SqlMetric(Model, BaseMetric):
'SqlaTable',
backref=backref('metrics', cascade='all, delete-orphan'),
foreign_keys=[table_id])
expression = Column(Text)
expression = Column(Text, nullable=False)

export_fields = (
'metric_name', 'verbose_name', 'metric_type', 'table_id', 'expression',
Expand Down Expand Up @@ -263,7 +263,7 @@ class SqlaTable(Model, BaseDatasource):
__tablename__ = 'tables'
__table_args__ = (UniqueConstraint('database_id', 'table_name'),)

table_name = Column(String(250))
table_name = Column(String(250), nullable=False)
main_dttm_col = Column(String(250))
database_id = Column(Integer, ForeignKey('dbs.id'), nullable=False)
fetch_values_predicate = Column(String(1000))
Expand Down
174 changes: 174 additions & 0 deletions superset/migrations/versions/40f150716b13_non_nullable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""non-nullable
Revision ID: 40f150716b13
Revises: c82ee8a39623
Create Date: 2019-03-20 11:46:24.211389
"""

# revision identifiers, used by Alembic.
revision = '40f150716b13'
down_revision = 'c82ee8a39623'

from alembic import op
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Text

from superset import db

Base = declarative_base()


class BaseColumnMixin(object):
id = Column(Integer, primary_key=True)
column_name = Column(String(255), nullable=False)


class BaseDatasourceMixin(object):
id = Column(Integer, primary_key=True)


class BaseMetricMixin(object):
id = Column(Integer, primary_key=True)
metric_name = Column(String(512), nullable=False)


class DruidColumn(BaseColumnMixin, Base):
__tablename__ = 'columns'

dimension_spec_json = Column(Text)


class DruidDatasource(BaseDatasourceMixin, Base):
__tablename__ = 'datasources'

datasource_name = Column(String(255), nullable=False)


class DruidMetric(BaseMetricMixin, Base):
__tablename__ = 'metrics'

json = Column(Text, nullable=False)


class SqlaTable(BaseDatasourceMixin, Base):
__tablename__ = 'tables'

table_name = Column(String(250), nullable=False)


class SqlMetric(BaseMetricMixin, Base):
__tablename__ = 'sql_metrics'

expression = Column(Text, nullable=False)


class TableColumn(BaseColumnMixin, Base):
__tablename__ = 'table_columns'

expression = Column(Text)


tables = [
{
'can_delete': True,
'class': DruidColumn,
'optional': ['dimension_spec_json'],
'required': ['column_name'],
},
{
'can_delete': False,
'class': DruidDatasource,
'optional': [],
'required': ['datasource_name'],
},
{
'can_delete': True,
'class': DruidMetric,
'optional': [],
'required': ['metric_name', 'json'],
},
{
'can_delete': True,
'class': SqlMetric,
'optional': [],
'required': ['metric_name', 'expression'],
},
{
'can_delete': False,
'class': SqlaTable,
'optional': [],
'required': ['table_name'],
},
{
'can_delete': True,
'class': TableColumn,
'optional': ['expression'],
'required': ['column_name'],
},
]


def upgrade():
bind = op.get_bind()
session = db.Session(bind=bind)

# Delete erroneous records (if allowed) otherwise mutate ill-defined records.
for table in tables:
idx = 1

for record in session.query(table['class']).all():
optional = [getattr(record, column) for column in table['optional']]
required = [getattr(record, column) for column in table['required']]

if not all(required):
if table['can_delete'] and not (any(required) or any(optional)):
session.delete(record)
else:
for column in table['required']:
if not getattr(record, column):
setattr(record, column, f'migration_40f150716b13_{str(idx)}')
idx += 1

session.commit()

# Make the fields non-nullable.
for table in tables:
with op.batch_alter_table(table['class'].__tablename__) as batch_op:
for column in table['required']:
batch_op.alter_column(
column,
existing_type=getattr(table['class'], column).type,
existing_nullable=True,
nullable=False,
)


def downgrade():

# Make the fields nullable.
for table in tables:
with op.batch_alter_table(table['class'].__tablename__) as batch_op:
for column in table['required']:
batch_op.alter_column(
column,
existing_type=getattr(table['class'], column).type,
existing_nullable=False,
nullable=True,
)
22 changes: 18 additions & 4 deletions tests/dict_import_export_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,11 @@ def create_table(
'params': json.dumps(params),
'columns': [{'column_name': c}
for c in cols_names],
'metrics': [{'metric_name': c} for c in metric_names],
'metrics': [
{
'metric_name': c,
'expression': 'COUNT(1)',
} for c in metric_names],
}

table = SqlaTable(
Expand All @@ -84,7 +88,12 @@ def create_table(
for col_name in cols_names:
table.columns.append(TableColumn(column_name=col_name))
for metric_name in metric_names:
table.metrics.append(SqlMetric(metric_name=metric_name))
table.metrics.append(
SqlMetric(
metric_name=metric_name,
expression='COUNT(1)',
),
)
return table, dict_rep

def create_druid_datasource(
Expand All @@ -98,7 +107,7 @@ def create_druid_datasource(
'id': id,
'params': json.dumps(params),
'columns': [{'column_name': c} for c in cols_names],
'metrics': [{'metric_name': c} for c in metric_names],
'metrics': [{'metric_name': c, 'json': '{}'} for c in metric_names],
}

datasource = DruidDatasource(
Expand All @@ -110,7 +119,12 @@ def create_druid_datasource(
for col_name in cols_names:
datasource.columns.append(DruidColumn(column_name=col_name))
for metric_name in metric_names:
datasource.metrics.append(DruidMetric(metric_name=metric_name))
datasource.metrics.append(
DruidMetric(
metric_name=metric_name,
json='{}',
),
)
return datasource, dict_rep

def get_datasource(self, datasource_id):
Expand Down
2 changes: 1 addition & 1 deletion tests/import_export_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def create_druid_datasource(
DruidColumn(column_name=col_name))
for metric_name in metric_names:
datasource.metrics.append(DruidMetric(
metric_name=metric_name))
metric_name=metric_name, json='{}'))
return datasource

def get_slice(self, slc_id):
Expand Down

0 comments on commit b581b49

Please sign in to comment.