[connectors] Make names non-nullable

apache · Mar 22, 2019 · b581b49 · b581b49
1 parent c1c8e50
commit b581b49
Show file tree

Hide file tree

Showing 7 changed files with 212 additions and 16 deletions.
diff --git a/UPDATING.md b/UPDATING.md
@@ -27,11 +27,6 @@ assists people when migrating to a new version.
   run `pip install superset[presto]` and/or `pip install superset[hive]` as
   required.
 
-* [5445](https://github.com/apache/incubator-superset/pull/5445) : a change 
-which prevents encoding of empty string from form data in the datanbase. 
-This involves a non-schema changing migration which does potentially impact
-a large number of records. Scheduled downtime may be advised.
-
 ## Superset 0.31.0
 * boto3 / botocore was removed from the dependency list. If you use s3
 as a place to store your SQL Lab result set or Hive uploads, you may
@@ -42,6 +37,19 @@ favor of good old `npm install`. While yarn should still work just fine,
 you should probably align to guarantee builds similar to the ones we
 use in testing and across the community in general.
 
+* [5445](https://github.com/apache/incubator-superset/pull/5445) : a change
+which prevents encoding of empty string from form data in the database.
+This involves a non-schema changing migration which does potentially impact
+a large number of records. Scheduled downtime may be advised.
+
+* [7067](https://github.com/apache/incubator-superset/pull/7067) : related to
+5445 this change makes certain columns non-nullable and either deletes
+erroneous or mutates ill-defined records to ensure that the data conforms to
+the schema. All mutated records are identifiable via the migration specific
+(`migration_40f150716b13_`) prefix. It is recommended that a manual cleanup
+pass is performed to either remove or update violating records.
+
+
 ## Superset 0.30.0
 * 0.30.0 includes a db_migration that removes allow_run_sync. This may
 require downtime because during the migration if the db is migrated first,

diff --git a/superset/connectors/base/models.py b/superset/connectors/base/models.py
@@ -345,7 +345,7 @@ class BaseColumn(AuditMixinNullable, ImportMixin):
     __tablename__ = None  # {connector_name}_column
 
     id = Column(Integer, primary_key=True)
-    column_name = Column(String(255))
+    column_name = Column(String(255), nullable=False)
     verbose_name = Column(String(1024))
     is_active = Column(Boolean, default=True)
     type = Column(String(32))
@@ -409,7 +409,7 @@ class BaseMetric(AuditMixinNullable, ImportMixin):
     __tablename__ = None  # {connector_name}_metric
 
     id = Column(Integer, primary_key=True)
-    metric_name = Column(String(512))
+    metric_name = Column(String(512), nullable=False)
     verbose_name = Column(String(1024))
     metric_type = Column(String(32))
     description = Column(Text)

diff --git a/superset/connectors/druid/models.py b/superset/connectors/druid/models.py
@@ -344,7 +344,7 @@ class DruidMetric(Model, BaseMetric):
         'DruidDatasource',
         backref=backref('metrics', cascade='all, delete-orphan'),
         enable_typechecks=False)
-    json = Column(Text)
+    json = Column(Text, nullable=False)
 
     export_fields = (
         'metric_name', 'verbose_name', 'metric_type', 'datasource_id',
@@ -410,7 +410,7 @@ class DruidDatasource(Model, BaseDatasource):
     baselink = 'druiddatasourcemodelview'
 
     # Columns
-    datasource_name = Column(String(255))
+    datasource_name = Column(String(255), nullable=False)
     is_hidden = Column(Boolean, default=False)
     filter_select_enabled = Column(Boolean, default=True)  # override default
     fetch_values_from = Column(String(100))

diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py
@@ -209,7 +209,7 @@ class SqlMetric(Model, BaseMetric):
         'SqlaTable',
         backref=backref('metrics', cascade='all, delete-orphan'),
         foreign_keys=[table_id])
-    expression = Column(Text)
+    expression = Column(Text, nullable=False)
 
     export_fields = (
         'metric_name', 'verbose_name', 'metric_type', 'table_id', 'expression',
@@ -263,7 +263,7 @@ class SqlaTable(Model, BaseDatasource):
     __tablename__ = 'tables'
     __table_args__ = (UniqueConstraint('database_id', 'table_name'),)
 
-    table_name = Column(String(250))
+    table_name = Column(String(250), nullable=False)
     main_dttm_col = Column(String(250))
     database_id = Column(Integer, ForeignKey('dbs.id'), nullable=False)
     fetch_values_predicate = Column(String(1000))

diff --git a/superset/migrations/versions/40f150716b13_non_nullable.py b/superset/migrations/versions/40f150716b13_non_nullable.py
@@ -0,0 +1,174 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""non-nullable
+
+Revision ID: 40f150716b13
+Revises: c82ee8a39623
+Create Date: 2019-03-20 11:46:24.211389
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '40f150716b13'
+down_revision = 'c82ee8a39623'
+
+from alembic import op
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy import Column, Integer, String, Text
+
+from superset import db
+
+Base = declarative_base()
+
+
+class BaseColumnMixin(object):
+    id = Column(Integer, primary_key=True)
+    column_name = Column(String(255), nullable=False)
+
+
+class BaseDatasourceMixin(object):
+    id = Column(Integer, primary_key=True)
+
+
+class BaseMetricMixin(object):
+    id = Column(Integer, primary_key=True)
+    metric_name = Column(String(512), nullable=False)
+
+
+class DruidColumn(BaseColumnMixin, Base):
+    __tablename__ = 'columns'
+
+    dimension_spec_json = Column(Text)
+
+
+class DruidDatasource(BaseDatasourceMixin, Base):
+    __tablename__ = 'datasources'
+
+    datasource_name = Column(String(255), nullable=False)
+
+
+class DruidMetric(BaseMetricMixin, Base):
+    __tablename__ = 'metrics'
+
+    json = Column(Text, nullable=False)
+
+
+class SqlaTable(BaseDatasourceMixin, Base):
+    __tablename__ = 'tables'
+
+    table_name = Column(String(250), nullable=False)
+
+
+class SqlMetric(BaseMetricMixin, Base):
+    __tablename__ = 'sql_metrics'
+
+    expression = Column(Text, nullable=False)
+
+
+class TableColumn(BaseColumnMixin, Base):
+    __tablename__ = 'table_columns'
+
+    expression = Column(Text)
+
+
+tables = [
+    {
+        'can_delete': True,
+        'class': DruidColumn,
+        'optional': ['dimension_spec_json'],
+        'required': ['column_name'],
+    },
+    {
+        'can_delete': False,
+        'class': DruidDatasource,
+        'optional': [],
+        'required': ['datasource_name'],
+    },
+    {
+        'can_delete': True,
+        'class': DruidMetric,
+        'optional': [],
+        'required': ['metric_name', 'json'],
+    },
+    {
+        'can_delete': True,
+        'class': SqlMetric,
+        'optional': [],
+        'required': ['metric_name', 'expression'],
+    },
+    {
+        'can_delete': False,
+        'class': SqlaTable,
+        'optional': [],
+        'required': ['table_name'],
+    },
+    {
+        'can_delete': True,
+        'class': TableColumn,
+        'optional': ['expression'],
+        'required': ['column_name'],
+    },
+]
+
+
+def upgrade():
+    bind = op.get_bind()
+    session = db.Session(bind=bind)
+
+    # Delete erroneous records (if allowed) otherwise mutate ill-defined records.
+    for table in tables:
+        idx = 1
+
+        for record in session.query(table['class']).all():
+            optional = [getattr(record, column) for column in table['optional']]
+            required = [getattr(record, column) for column in table['required']]
+
+            if not all(required):
+                if table['can_delete'] and not (any(required) or any(optional)):
+                    session.delete(record)
+                else:
+                    for column in table['required']:
+                        if not getattr(record, column):
+                            setattr(record, column, f'migration_40f150716b13_{str(idx)}')
+                            idx += 1
+
+        session.commit()
+
+    # Make the fields non-nullable.
+    for table in tables:
+        with op.batch_alter_table(table['class'].__tablename__) as batch_op:
+            for column in table['required']:
+                batch_op.alter_column(
+                    column,
+                    existing_type=getattr(table['class'], column).type,
+                    existing_nullable=True,
+                    nullable=False,
+                )
+
+
+def downgrade():
+
+    # Make the fields nullable.
+    for table in tables:
+        with op.batch_alter_table(table['class'].__tablename__) as batch_op:
+            for column in table['required']:
+                batch_op.alter_column(
+                    column,
+                    existing_type=getattr(table['class'], column).type,
+                    existing_nullable=False,
+                    nullable=True,
+                )
diff --git a/tests/dict_import_export_tests.py b/tests/dict_import_export_tests.py
@@ -72,7 +72,11 @@ def create_table(
             'params': json.dumps(params),
             'columns': [{'column_name': c}
                         for c in cols_names],
-            'metrics': [{'metric_name': c} for c in metric_names],
+            'metrics': [
+                {
+                    'metric_name': c,
+                    'expression': 'COUNT(1)',
+                } for c in metric_names],
         }
 
         table = SqlaTable(
@@ -84,7 +88,12 @@ def create_table(
         for col_name in cols_names:
             table.columns.append(TableColumn(column_name=col_name))
         for metric_name in metric_names:
-            table.metrics.append(SqlMetric(metric_name=metric_name))
+            table.metrics.append(
+                SqlMetric(
+                    metric_name=metric_name,
+                    expression='COUNT(1)',
+                ),
+            )
         return table, dict_rep
 
     def create_druid_datasource(
@@ -98,7 +107,7 @@ def create_druid_datasource(
             'id': id,
             'params': json.dumps(params),
             'columns': [{'column_name': c} for c in cols_names],
-            'metrics': [{'metric_name': c} for c in metric_names],
+            'metrics': [{'metric_name': c, 'json': '{}'} for c in metric_names],
         }
 
         datasource = DruidDatasource(
@@ -110,7 +119,12 @@ def create_druid_datasource(
         for col_name in cols_names:
             datasource.columns.append(DruidColumn(column_name=col_name))
         for metric_name in metric_names:
-            datasource.metrics.append(DruidMetric(metric_name=metric_name))
+            datasource.metrics.append(
+                DruidMetric(
+                    metric_name=metric_name,
+                    json='{}',
+                ),
+            )
         return datasource, dict_rep
 
     def get_datasource(self, datasource_id):

diff --git a/tests/import_export_tests.py b/tests/import_export_tests.py
@@ -130,7 +130,7 @@ def create_druid_datasource(
                 DruidColumn(column_name=col_name))
         for metric_name in metric_names:
             datasource.metrics.append(DruidMetric(
-                metric_name=metric_name))
+                metric_name=metric_name, json='{}'))
         return datasource
 
     def get_slice(self, slc_id):