Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[connectors] Make names non-nullable #7067

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions UPDATING.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@ assists people when migrating to a new version.
run `pip install superset[presto]` and/or `pip install superset[hive]` as
required.

* [5445](https://github.com/apache/incubator-superset/pull/5445) : a change
which prevents encoding of empty string from form data in the datanbase.
This involves a non-schema changing migration which does potentially impact
a large number of records. Scheduled downtime may be advised.

## Superset 0.31.0
* boto3 / botocore was removed from the dependency list. If you use s3
as a place to store your SQL Lab result set or Hive uploads, you may
Expand All @@ -42,6 +37,19 @@ favor of good old `npm install`. While yarn should still work just fine,
you should probably align to guarantee builds similar to the ones we
use in testing and across the community in general.

* [5445](https://github.com/apache/incubator-superset/pull/5445) : a change
which prevents encoding of empty string from form data in the database.
This involves a non-schema changing migration which does potentially impact
a large number of records. Scheduled downtime may be advised.

* [7067](https://github.com/apache/incubator-superset/pull/7067) : related to
5445 this change makes certain columns non-nullable and either deletes
erroneous or mutates ill-defined records to ensure that the data conforms to
the schema. All mutated records are identifiable via the migration specific
(`migration_40f150716b13_`) prefix. It is recommended that a manual cleanup
pass is performed to either remove or update violating records.


## Superset 0.30.0
* 0.30.0 includes a db_migration that removes allow_run_sync. This may
require downtime because during the migration if the db is migrated first,
Expand Down
4 changes: 2 additions & 2 deletions superset/connectors/base/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ class BaseColumn(AuditMixinNullable, ImportMixin):
__tablename__ = None # {connector_name}_column

id = Column(Integer, primary_key=True)
column_name = Column(String(255))
column_name = Column(String(255), nullable=False)
verbose_name = Column(String(1024))
is_active = Column(Boolean, default=True)
type = Column(String(32))
Expand Down Expand Up @@ -409,7 +409,7 @@ class BaseMetric(AuditMixinNullable, ImportMixin):
__tablename__ = None # {connector_name}_metric

id = Column(Integer, primary_key=True)
metric_name = Column(String(512))
metric_name = Column(String(512), nullable=False)
verbose_name = Column(String(1024))
metric_type = Column(String(32))
description = Column(Text)
Expand Down
4 changes: 2 additions & 2 deletions superset/connectors/druid/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ class DruidMetric(Model, BaseMetric):
'DruidDatasource',
backref=backref('metrics', cascade='all, delete-orphan'),
enable_typechecks=False)
json = Column(Text)
json = Column(Text, nullable=False)

export_fields = (
'metric_name', 'verbose_name', 'metric_type', 'datasource_id',
Expand Down Expand Up @@ -410,7 +410,7 @@ class DruidDatasource(Model, BaseDatasource):
baselink = 'druiddatasourcemodelview'

# Columns
datasource_name = Column(String(255))
datasource_name = Column(String(255), nullable=False)
is_hidden = Column(Boolean, default=False)
filter_select_enabled = Column(Boolean, default=True) # override default
fetch_values_from = Column(String(100))
Expand Down
4 changes: 2 additions & 2 deletions superset/connectors/sqla/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ class SqlMetric(Model, BaseMetric):
'SqlaTable',
backref=backref('metrics', cascade='all, delete-orphan'),
foreign_keys=[table_id])
expression = Column(Text)
expression = Column(Text, nullable=False)

export_fields = (
'metric_name', 'verbose_name', 'metric_type', 'table_id', 'expression',
Expand Down Expand Up @@ -263,7 +263,7 @@ class SqlaTable(Model, BaseDatasource):
__tablename__ = 'tables'
__table_args__ = (UniqueConstraint('database_id', 'table_name'),)

table_name = Column(String(250))
table_name = Column(String(250), nullable=False)
main_dttm_col = Column(String(250))
database_id = Column(Integer, ForeignKey('dbs.id'), nullable=False)
fetch_values_predicate = Column(String(1000))
Expand Down
174 changes: 174 additions & 0 deletions superset/migrations/versions/40f150716b13_non_nullable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""non-nullable

Revision ID: 40f150716b13
Revises: c82ee8a39623
Create Date: 2019-03-20 11:46:24.211389

"""

# revision identifiers, used by Alembic.
revision = '40f150716b13'
down_revision = 'c82ee8a39623'

from alembic import op
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Text

from superset import db

Base = declarative_base()


class BaseColumnMixin(object):
id = Column(Integer, primary_key=True)
column_name = Column(String(255), nullable=False)


class BaseDatasourceMixin(object):
id = Column(Integer, primary_key=True)


class BaseMetricMixin(object):
id = Column(Integer, primary_key=True)
metric_name = Column(String(512), nullable=False)


class DruidColumn(BaseColumnMixin, Base):
__tablename__ = 'columns'

dimension_spec_json = Column(Text)


class DruidDatasource(BaseDatasourceMixin, Base):
__tablename__ = 'datasources'

datasource_name = Column(String(255), nullable=False)


class DruidMetric(BaseMetricMixin, Base):
__tablename__ = 'metrics'

json = Column(Text, nullable=False)


class SqlaTable(BaseDatasourceMixin, Base):
__tablename__ = 'tables'

table_name = Column(String(250), nullable=False)


class SqlMetric(BaseMetricMixin, Base):
__tablename__ = 'sql_metrics'

expression = Column(Text, nullable=False)


class TableColumn(BaseColumnMixin, Base):
__tablename__ = 'table_columns'

expression = Column(Text)


tables = [
{
'can_delete': True,
'class': DruidColumn,
'optional': ['dimension_spec_json'],
'required': ['column_name'],
},
{
'can_delete': False,
'class': DruidDatasource,
'optional': [],
'required': ['datasource_name'],
},
{
'can_delete': True,
'class': DruidMetric,
'optional': [],
'required': ['metric_name', 'json'],
},
{
'can_delete': True,
'class': SqlMetric,
'optional': [],
'required': ['metric_name', 'expression'],
},
{
'can_delete': False,
'class': SqlaTable,
'optional': [],
'required': ['table_name'],
},
{
'can_delete': True,
'class': TableColumn,
'optional': ['expression'],
'required': ['column_name'],
},
]


def upgrade():
bind = op.get_bind()
session = db.Session(bind=bind)

# Delete erroneous records (if allowed) otherwise mutate ill-defined records.
for table in tables:
idx = 1

for record in session.query(table['class']).all():
optional = [getattr(record, column) for column in table['optional']]
required = [getattr(record, column) for column in table['required']]

if not all(required):
if table['can_delete'] and not (any(required) or any(optional)):
session.delete(record)
else:
for column in table['required']:
if not getattr(record, column):
setattr(record, column, f'migration_40f150716b13_{str(idx)}')
idx += 1

session.commit()

# Make the fields non-nullable.
for table in tables:
with op.batch_alter_table(table['class'].__tablename__) as batch_op:
for column in table['required']:
batch_op.alter_column(
column,
existing_type=getattr(table['class'], column).type,
existing_nullable=True,
nullable=False,
)


def downgrade():

# Make the fields nullable.
for table in tables:
with op.batch_alter_table(table['class'].__tablename__) as batch_op:
for column in table['required']:
batch_op.alter_column(
column,
existing_type=getattr(table['class'], column).type,
existing_nullable=False,
nullable=True,
)
22 changes: 18 additions & 4 deletions tests/dict_import_export_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,11 @@ def create_table(
'params': json.dumps(params),
'columns': [{'column_name': c}
for c in cols_names],
'metrics': [{'metric_name': c} for c in metric_names],
'metrics': [
{
'metric_name': c,
'expression': 'COUNT(1)',
} for c in metric_names],
}

table = SqlaTable(
Expand All @@ -84,7 +88,12 @@ def create_table(
for col_name in cols_names:
table.columns.append(TableColumn(column_name=col_name))
for metric_name in metric_names:
table.metrics.append(SqlMetric(metric_name=metric_name))
table.metrics.append(
SqlMetric(
metric_name=metric_name,
expression='COUNT(1)',
),
)
return table, dict_rep

def create_druid_datasource(
Expand All @@ -98,7 +107,7 @@ def create_druid_datasource(
'id': id,
'params': json.dumps(params),
'columns': [{'column_name': c} for c in cols_names],
'metrics': [{'metric_name': c} for c in metric_names],
'metrics': [{'metric_name': c, 'json': '{}'} for c in metric_names],
}

datasource = DruidDatasource(
Expand All @@ -110,7 +119,12 @@ def create_druid_datasource(
for col_name in cols_names:
datasource.columns.append(DruidColumn(column_name=col_name))
for metric_name in metric_names:
datasource.metrics.append(DruidMetric(metric_name=metric_name))
datasource.metrics.append(
DruidMetric(
metric_name=metric_name,
json='{}',
),
)
return datasource, dict_rep

def get_datasource(self, datasource_id):
Expand Down
2 changes: 1 addition & 1 deletion tests/import_export_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def create_druid_datasource(
DruidColumn(column_name=col_name))
for metric_name in metric_names:
datasource.metrics.append(DruidMetric(
metric_name=metric_name))
metric_name=metric_name, json='{}'))
return datasource

def get_slice(self, slc_id):
Expand Down