diff --git a/UPDATING.md b/UPDATING.md index 5c7edbe5395a1..3c0318fbadc2d 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -27,11 +27,6 @@ assists people when migrating to a new version. run `pip install superset[presto]` and/or `pip install superset[hive]` as required. -* [5445](https://github.com/apache/incubator-superset/pull/5445) : a change -which prevents encoding of empty string from form data in the datanbase. -This involves a non-schema changing migration which does potentially impact -a large number of records. Scheduled downtime may be advised. - ## Superset 0.31.0 * boto3 / botocore was removed from the dependency list. If you use s3 as a place to store your SQL Lab result set or Hive uploads, you may @@ -42,6 +37,19 @@ favor of good old `npm install`. While yarn should still work just fine, you should probably align to guarantee builds similar to the ones we use in testing and across the community in general. +* [5445](https://github.com/apache/incubator-superset/pull/5445) : a change +which prevents encoding of empty string from form data in the database. +This involves a non-schema changing migration which does potentially impact +a large number of records. Scheduled downtime may be advised. + +* [7067](https://github.com/apache/incubator-superset/pull/7067) : related to +5445 this change makes certain columns non-nullable and either deletes +erroneous or mutates ill-defined records to ensure that the data conforms to +the schema. All mutated records are identifiable via the migration specific +(`migration_40f150716b13_`) prefix. It is recommended that a manual cleanup +pass is performed to either remove or update violating records. + + ## Superset 0.30.0 * 0.30.0 includes a db_migration that removes allow_run_sync. This may require downtime because during the migration if the db is migrated first, diff --git a/superset/connectors/base/models.py b/superset/connectors/base/models.py index 39cc5853d6f66..0e745235ea954 100644 --- a/superset/connectors/base/models.py +++ b/superset/connectors/base/models.py @@ -345,7 +345,7 @@ class BaseColumn(AuditMixinNullable, ImportMixin): __tablename__ = None # {connector_name}_column id = Column(Integer, primary_key=True) - column_name = Column(String(255)) + column_name = Column(String(255), nullable=False) verbose_name = Column(String(1024)) is_active = Column(Boolean, default=True) type = Column(String(32)) @@ -409,7 +409,7 @@ class BaseMetric(AuditMixinNullable, ImportMixin): __tablename__ = None # {connector_name}_metric id = Column(Integer, primary_key=True) - metric_name = Column(String(512)) + metric_name = Column(String(512), nullable=False) verbose_name = Column(String(1024)) metric_type = Column(String(32)) description = Column(Text) diff --git a/superset/connectors/druid/models.py b/superset/connectors/druid/models.py index 577679092c09b..cf4031b437f73 100644 --- a/superset/connectors/druid/models.py +++ b/superset/connectors/druid/models.py @@ -344,7 +344,7 @@ class DruidMetric(Model, BaseMetric): 'DruidDatasource', backref=backref('metrics', cascade='all, delete-orphan'), enable_typechecks=False) - json = Column(Text) + json = Column(Text, nullable=False) export_fields = ( 'metric_name', 'verbose_name', 'metric_type', 'datasource_id', @@ -410,7 +410,7 @@ class DruidDatasource(Model, BaseDatasource): baselink = 'druiddatasourcemodelview' # Columns - datasource_name = Column(String(255)) + datasource_name = Column(String(255), nullable=False) is_hidden = Column(Boolean, default=False) filter_select_enabled = Column(Boolean, default=True) # override default fetch_values_from = Column(String(100)) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 64933708db6b1..5f70a16853649 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -209,7 +209,7 @@ class SqlMetric(Model, BaseMetric): 'SqlaTable', backref=backref('metrics', cascade='all, delete-orphan'), foreign_keys=[table_id]) - expression = Column(Text) + expression = Column(Text, nullable=False) export_fields = ( 'metric_name', 'verbose_name', 'metric_type', 'table_id', 'expression', @@ -263,7 +263,7 @@ class SqlaTable(Model, BaseDatasource): __tablename__ = 'tables' __table_args__ = (UniqueConstraint('database_id', 'table_name'),) - table_name = Column(String(250)) + table_name = Column(String(250), nullable=False) main_dttm_col = Column(String(250)) database_id = Column(Integer, ForeignKey('dbs.id'), nullable=False) fetch_values_predicate = Column(String(1000)) diff --git a/superset/migrations/versions/40f150716b13_non_nullable.py b/superset/migrations/versions/40f150716b13_non_nullable.py new file mode 100644 index 0000000000000..4f326b59a69dc --- /dev/null +++ b/superset/migrations/versions/40f150716b13_non_nullable.py @@ -0,0 +1,174 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""non-nullable + +Revision ID: 40f150716b13 +Revises: c82ee8a39623 +Create Date: 2019-03-20 11:46:24.211389 + +""" + +# revision identifiers, used by Alembic. +revision = '40f150716b13' +down_revision = 'c82ee8a39623' + +from alembic import op +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy import Column, Integer, String, Text + +from superset import db + +Base = declarative_base() + + +class BaseColumnMixin(object): + id = Column(Integer, primary_key=True) + column_name = Column(String(255), nullable=False) + + +class BaseDatasourceMixin(object): + id = Column(Integer, primary_key=True) + + +class BaseMetricMixin(object): + id = Column(Integer, primary_key=True) + metric_name = Column(String(512), nullable=False) + + +class DruidColumn(BaseColumnMixin, Base): + __tablename__ = 'columns' + + dimension_spec_json = Column(Text) + + +class DruidDatasource(BaseDatasourceMixin, Base): + __tablename__ = 'datasources' + + datasource_name = Column(String(255), nullable=False) + + +class DruidMetric(BaseMetricMixin, Base): + __tablename__ = 'metrics' + + json = Column(Text, nullable=False) + + +class SqlaTable(BaseDatasourceMixin, Base): + __tablename__ = 'tables' + + table_name = Column(String(250), nullable=False) + + +class SqlMetric(BaseMetricMixin, Base): + __tablename__ = 'sql_metrics' + + expression = Column(Text, nullable=False) + + +class TableColumn(BaseColumnMixin, Base): + __tablename__ = 'table_columns' + + expression = Column(Text) + + +tables = [ + { + 'can_delete': True, + 'class': DruidColumn, + 'optional': ['dimension_spec_json'], + 'required': ['column_name'], + }, + { + 'can_delete': False, + 'class': DruidDatasource, + 'optional': [], + 'required': ['datasource_name'], + }, + { + 'can_delete': True, + 'class': DruidMetric, + 'optional': [], + 'required': ['metric_name', 'json'], + }, + { + 'can_delete': True, + 'class': SqlMetric, + 'optional': [], + 'required': ['metric_name', 'expression'], + }, + { + 'can_delete': False, + 'class': SqlaTable, + 'optional': [], + 'required': ['table_name'], + }, + { + 'can_delete': True, + 'class': TableColumn, + 'optional': ['expression'], + 'required': ['column_name'], + }, +] + + +def upgrade(): + bind = op.get_bind() + session = db.Session(bind=bind) + + # Delete erroneous records (if allowed) otherwise mutate ill-defined records. + for table in tables: + idx = 1 + + for record in session.query(table['class']).all(): + optional = [getattr(record, column) for column in table['optional']] + required = [getattr(record, column) for column in table['required']] + + if not all(required): + if table['can_delete'] and not (any(required) or any(optional)): + session.delete(record) + else: + for column in table['required']: + if not getattr(record, column): + setattr(record, column, f'migration_40f150716b13_{str(idx)}') + idx += 1 + + session.commit() + + # Make the fields non-nullable. + for table in tables: + with op.batch_alter_table(table['class'].__tablename__) as batch_op: + for column in table['required']: + batch_op.alter_column( + column, + existing_type=getattr(table['class'], column).type, + existing_nullable=True, + nullable=False, + ) + + +def downgrade(): + + # Make the fields nullable. + for table in tables: + with op.batch_alter_table(table['class'].__tablename__) as batch_op: + for column in table['required']: + batch_op.alter_column( + column, + existing_type=getattr(table['class'], column).type, + existing_nullable=False, + nullable=True, + ) diff --git a/tests/dict_import_export_tests.py b/tests/dict_import_export_tests.py index 50cb0f7cbdc8d..43ad2496bf66c 100644 --- a/tests/dict_import_export_tests.py +++ b/tests/dict_import_export_tests.py @@ -72,7 +72,11 @@ def create_table( 'params': json.dumps(params), 'columns': [{'column_name': c} for c in cols_names], - 'metrics': [{'metric_name': c} for c in metric_names], + 'metrics': [ + { + 'metric_name': c, + 'expression': 'COUNT(1)', + } for c in metric_names], } table = SqlaTable( @@ -84,7 +88,12 @@ def create_table( for col_name in cols_names: table.columns.append(TableColumn(column_name=col_name)) for metric_name in metric_names: - table.metrics.append(SqlMetric(metric_name=metric_name)) + table.metrics.append( + SqlMetric( + metric_name=metric_name, + expression='COUNT(1)', + ), + ) return table, dict_rep def create_druid_datasource( @@ -98,7 +107,7 @@ def create_druid_datasource( 'id': id, 'params': json.dumps(params), 'columns': [{'column_name': c} for c in cols_names], - 'metrics': [{'metric_name': c} for c in metric_names], + 'metrics': [{'metric_name': c, 'json': '{}'} for c in metric_names], } datasource = DruidDatasource( @@ -110,7 +119,12 @@ def create_druid_datasource( for col_name in cols_names: datasource.columns.append(DruidColumn(column_name=col_name)) for metric_name in metric_names: - datasource.metrics.append(DruidMetric(metric_name=metric_name)) + datasource.metrics.append( + DruidMetric( + metric_name=metric_name, + json='{}', + ), + ) return datasource, dict_rep def get_datasource(self, datasource_id): diff --git a/tests/import_export_tests.py b/tests/import_export_tests.py index ad1aa908682f0..42ea80f2ad949 100644 --- a/tests/import_export_tests.py +++ b/tests/import_export_tests.py @@ -130,7 +130,7 @@ def create_druid_datasource( DruidColumn(column_name=col_name)) for metric_name in metric_names: datasource.metrics.append(DruidMetric( - metric_name=metric_name)) + metric_name=metric_name, json='{}')) return datasource def get_slice(self, slc_id):