Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add SSL certificate validation for Druid #9396

Merged
merged 5 commits into from
Mar 27, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,12 @@ The native Druid connector (behind the ``DRUID_IS_ACTIVE`` feature flag)
is slowly getting deprecated in favor of the SQLAlchemy/DBAPI connector made
available in the ``pydruid`` library.

To use a custom SSL certificate to validate HTTPS requests, the certificate
contents can be entered in the ``Root Certificate`` field in the Database
dialog. When using a custom certificate, ``pydruid`` will automatically use
``https`` scheme. To disable SSL verification add the following to extras:
``engine_params": {"connect_args": {"scheme": "https", "ssl_verify_cert": false}}``

Dremio
------

Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ combine_as_imports = true
include_trailing_comma = true
line_length = 88
known_first_party = superset
known_third_party =alembic,backoff,bleach,celery,click,colorama,contextlib2,croniter,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,geohash,geopy,humanize,isodate,jinja2,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parsedatetime,pathlib2,polyline,prison,pyarrow,pyhive,pytz,retry,selenium,setuptools,simplejson,sphinx_rtd_theme,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wtforms,wtforms_json,yaml
known_third_party =alembic,backoff,bleach,celery,click,colorama,contextlib2,croniter,cryptography,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,geohash,geopy,humanize,isodate,jinja2,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parsedatetime,pathlib2,polyline,prison,pyarrow,pyhive,pytz,retry,selenium,setuptools,simplejson,sphinx_rtd_theme,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wtforms,wtforms_json,yaml
multi_line_output = 3
order_by_type = false

Expand Down
12 changes: 12 additions & 0 deletions superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -959,3 +959,15 @@ def mutate_db_for_connection_test(database: "Database") -> None:
:param database: instance to be mutated
"""
return None

@staticmethod
def mutate_connection_args(
database: "Database", connect_args: Dict[str, Any]
) -> None:
"""
Some databases require passing additional non-standard parameters to database
connections, for example client certificates.

:param database: instance to be mutated
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit. Missing connect_args params.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait, connect_args is an attribute of the database isn't it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, or more specifically it's in database.extra, which is in JSON format. The reason it's being passed as a separate argument is that it's already been parsed out of extra. We could optionally inject this into extra, too, but that might be more confusing, as then we'd have to json.parse, mutate and json.dumps again.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mistercrunch I changed this so extras is parsed in db_engine_specs, so that we don't need to mutate the Database instance (this caused the mutations to get persisted in the db when saving the database).

"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: it's nice to add a :raises: on the docstring when a method can raise an exception

return None
19 changes: 18 additions & 1 deletion superset/db_engine_specs/druid.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import TYPE_CHECKING
import json
from typing import Any, Dict, TYPE_CHECKING

from superset.db_engine_specs.base import BaseEngineSpec
from superset.utils import core as utils

if TYPE_CHECKING:
from superset.connectors.sqla.models import ( # pylint: disable=unused-import
Expand Down Expand Up @@ -47,3 +49,18 @@ class DruidEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
def alter_new_orm_column(cls, orm_col: "TableColumn") -> None:
if orm_col.column_name == "__time":
orm_col.is_dttm = True

@classmethod
def mutate_connection_args(
cls, database: "Database", connect_args: Dict[str, Any]
) -> None:
"""
Some databases require passing additional non-standard parameters to database
connections, for example client certificates.

:param database: instance to be mutated
villebro marked this conversation as resolved.
Show resolved Hide resolved
"""
if database.server_cert:
connect_args["scheme"] = "https"
path = utils.create_temporary_ssl_cert_file(database.server_cert)
connect_args["ssl_verify_cert"] = path
villebro marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""add certificate to dbs

Revision ID: b5998378c225
Revises: 72428d1ea401
Create Date: 2020-03-25 10:49:10.883065

"""

# revision identifiers, used by Alembic.
revision = "b5998378c225"
down_revision = "72428d1ea401"

from typing import Dict

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects.postgresql.base import PGDialect
from sqlalchemy_utils import EncryptedType


def upgrade():
kwargs: Dict[str, str] = {}
bind = op.get_bind()
if isinstance(bind.dialect, PGDialect):
kwargs["postgresql_using"] = "encrypted_extra::bytea"
op.add_column(
"dbs",
sa.Column("server_cert", EncryptedType(sa.Text()), nullable=True, **kwargs),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense, while we're at it, to create a client_cert field also?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did but took it out, as pydruid doesn't support it yet. However, I opened a PR for that, and will add the client cert field once that PR is merged.

)


def downgrade():
op.drop_column("dbs", "server_cert")
3 changes: 3 additions & 0 deletions superset/models/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ class Database(
encrypted_extra = Column(EncryptedType(Text, config["SECRET_KEY"]), nullable=True)
perm = Column(String(1000))
impersonate_user = Column(Boolean, default=False)
server_cert = Column(EncryptedType(Text, config["SECRET_KEY"]), nullable=True)
export_fields = [
"database_name",
"sqlalchemy_uri",
Expand Down Expand Up @@ -299,6 +300,7 @@ def get_sqla_engine(
params["poolclass"] = NullPool

connect_args = params.get("connect_args", {})
self.db_engine_spec.mutate_connection_args(self, connect_args)
configuration = connect_args.get("configuration", {})

# If using Hive, this will set hive.server2.proxy.user=$effective_username
Expand All @@ -309,6 +311,7 @@ def get_sqla_engine(
)
if configuration:
connect_args["configuration"] = configuration
if connect_args:
params["connect_args"] = connect_args

params.update(self.get_encrypted_extra())
Expand Down
1 change: 1 addition & 0 deletions superset/templates/superset/models/database/add.html
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@
{{ macros.testconn() }}
{{ macros.expand_extra_textarea() }}
{{ macros.expand_encrypted_extra_textarea() }}
{{ macros.expand_server_cert_textarea() }}
{% endblock %}
1 change: 1 addition & 0 deletions superset/templates/superset/models/database/edit.html
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@
{{ macros.testconn() }}
{{ macros.expand_extra_textarea() }}
{{ macros.expand_encrypted_extra_textarea() }}
{{ macros.expand_server_cert_textarea() }}
{% endblock %}
7 changes: 7 additions & 0 deletions superset/templates/superset/models/database/macros.html
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
impersonate_user: $('#impersonate_user').is(':checked'),
extras: extra ? JSON.parse(extra) : {},
encrypted_extra: encryptedExtra ? JSON.parse(encryptedExtra) : {},
server_cert: $("#server_cert").val(),
})
} catch(parse_error){
alert("Malformed JSON in the extras field: " + parse_error);
Expand Down Expand Up @@ -81,3 +82,9 @@
$('#encrypted_extra').attr('rows', '5');
</script>
{% endmacro %}

{% macro expand_server_cert_textarea() %}
<script>
$('#server_cert').attr('rows', '5');
</script>
{% endmacro %}
25 changes: 24 additions & 1 deletion superset/utils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@
import decimal
import errno
import functools
import hashlib
import json
import logging
import os
import re
import signal
import smtplib
import tempfile
import traceback
import uuid
import zlib
Expand All @@ -45,6 +46,8 @@
import pandas as pd
import parsedatetime
import sqlalchemy as sa
from cryptography import x509
from cryptography.hazmat.backends import default_backend
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
from flask import current_app, flash, Flask, g, Markup, render_template
Expand Down Expand Up @@ -1163,6 +1166,26 @@ def get_username() -> Optional[str]:
return None


def create_temporary_ssl_cert_file(certificate: str) -> str:
villebro marked this conversation as resolved.
Show resolved Hide resolved
"""
This creates a temporary certificate file that can be used to validate HTTPS
sessions. A certificate is only written to disk once; on subsequent calls,
only the path of the existing certificate is returned.

:param certificate: The contents of the certificate
:return: The path to the certificate file
"""
filename = hashlib.md5(certificate.encode("utf-8")).hexdigest()
path = os.path.join(tempfile.gettempdir(), filename)
villebro marked this conversation as resolved.
Show resolved Hide resolved
if not os.path.exists(path):
# Validate certificate prior to persisting to temporary directory
x509.load_pem_x509_certificate(certificate.encode("utf-8"), default_backend())
villebro marked this conversation as resolved.
Show resolved Hide resolved
cert_file = open(path, "w+")
cert_file.write(certificate)
cert_file.close()
return path


def MediumText() -> Variant:
return Text().with_variant(MEDIUMTEXT(), "mysql")

Expand Down
1 change: 1 addition & 0 deletions superset/views/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1374,6 +1374,7 @@ def testconn(self):
# this is the database instance that will be tested
database = models.Database(
# extras is sent as json, but required to be a string in the Database model
server_cert=request.json.get("server_cert"),
extra=json.dumps(request.json.get("extras", {})),
impersonate_user=request.json.get("impersonate_user"),
encrypted_extra=json.dumps(request.json.get("encrypted_extra", {})),
Expand Down
11 changes: 11 additions & 0 deletions superset/views/database/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class DatabaseMixin:
"allow_multi_schema_metadata_fetch",
"extra",
"encrypted_extra",
"server_cert",
]
search_exclude_columns = (
"password",
Expand All @@ -74,6 +75,7 @@ class DatabaseMixin:
"queries",
"saved_queries",
"encrypted_extra",
"server_cert",
)
edit_columns = add_columns
show_columns = [
Expand Down Expand Up @@ -149,6 +151,11 @@ class DatabaseMixin:
"syntax normally used by SQLAlchemy.",
True,
),
"server_cert": utils.markdown(
"Optional CA_BUNDLE contents to validate HTTPS requests. Only available "
"on certain database engines.",
True,
),
"impersonate_user": _(
"If Presto, all the queries in SQL Lab are going to be executed as the "
"currently logged on user who must have permission to run them.<br/>"
Expand Down Expand Up @@ -183,6 +190,7 @@ class DatabaseMixin:
"cache_timeout": _("Chart Cache Timeout"),
"extra": _("Extra"),
"encrypted_extra": _("Secure Extra"),
"server_cert": _("Root certificate"),
"allow_run_async": _("Asynchronous Query Execution"),
"impersonate_user": _("Impersonate the logged on user"),
"allow_csv_upload": _("Allow Csv Upload"),
Expand All @@ -196,6 +204,9 @@ def _pre_add_update(self, database):
check_sqlalchemy_uri(database.sqlalchemy_uri)
self.check_extra(database)
self.check_encrypted_extra(database)
database.server_cert = (
database.server_cert.strip() if database.server_cert else ""
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should validate the certificate here also, it would give a better user experience, instead of failing when a connection is made

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, I'll break it out into a separate util function.

Copy link
Member

@dpgaspar dpgaspar Mar 26, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"server_cert" is not being validated before persisting to the DB, are we relying on testconn?. It would be nice to add parse_ssl_cert to the pre_add, pre_update hooks

database.set_sqlalchemy_uri(database.sqlalchemy_uri)
security_manager.add_permission_view_menu("database_access", database.perm)
# adding a new database we always want to force refresh schema list
Expand Down
31 changes: 31 additions & 0 deletions tests/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from superset.utils.core import (
base_json_conv,
convert_legacy_filters_into_adhoc,
create_temporary_ssl_cert_file,
datetime_f,
format_timedelta,
get_iterable,
Expand Down Expand Up @@ -1221,3 +1222,33 @@ def test_build_extra_filters(self):
)
expected = []
self.assertEqual(extra_filters, expected)

def test_ssl_certificate_validation(self):
valid_certificate = """-----BEGIN CERTIFICATE-----
MIIDnDCCAoQCCQCrdpcNPCA/eDANBgkqhkiG9w0BAQsFADCBjzELMAkGA1UEBhMC
VVMxEzARBgNVBAgMCkNhbGlmb3JuaWExEjAQBgNVBAcMCVNhbiBNYXRlbzEPMA0G
A1UECgwGUHJlc2V0MRMwEQYDVQQLDApTa3Vua3dvcmtzMRIwEAYDVQQDDAlwcmVz
ZXQuaW8xHTAbBgkqhkiG9w0BCQEWDmluZm9AcHJlc2V0LmlvMB4XDTIwMDMyNjEw
NTE1NFoXDTQwMDMyNjEwNTE1NFowgY8xCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApD
YWxpZm9ybmlhMRIwEAYDVQQHDAlTYW4gTWF0ZW8xDzANBgNVBAoMBlByZXNldDET
MBEGA1UECwwKU2t1bmt3b3JrczESMBAGA1UEAwwJcHJlc2V0LmlvMR0wGwYJKoZI
hvcNAQkBFg5pbmZvQHByZXNldC5pbzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCC
AQoCggEBAKNHQZcu2L/6HvZfzy4Hnm3POeztfO+NJ7OzppAcNlLbTAatUk1YoDbJ
5m5GUW8m7pVEHb76UL6Xxei9MoMVvHGuXqQeZZnNd+DySW/227wkOPYOCVSuDsWD
1EReG+pv/z8CDhdwmMTkDTZUDr0BUR/yc8qTCPdZoalj2muDl+k2J3LSCkelx4U/
2iYhoUQD+lzFS3k7ohAfaGc2aZOlwTITopXHSFfuZ7j9muBOYtU7NgpnCl6WgxYP
1+4ddBIauPTBY2gWfZC2FeOfYEqfsUUXRsw1ehEQf4uxxTKNJTfTuVbdgrTYx5QQ
jrM88WvWdyVnIM7u7/x9bawfGX/b/F0CAwEAATANBgkqhkiG9w0BAQsFAAOCAQEA
XYLLk3T5RWIagNa3DPrMI+SjRm4PAI/RsijtBV+9hrkCXOQ1mvlo/ORniaiemHvF
Kh6u6MTl014+f6Ytg/tx/OzuK2ffo9x44ZV/yqkbSmKD1pGftYNqCnBCN0uo1Gzb
HZ+bTozo+9raFN7OGPgbdBmpQT2c+LG5n+7REobHFb7VLeY2/7BKtxNBRXfIxn4X
+MIhpASwLH5X64a1f9LyuPNMyUvKgzDe7jRdX1JZ7uw/1T//OHGQth0jLiapa6FZ
GwgYUaruSZH51ZtxrJSXKSNBA7asPSBbyOmGptLsw2GTAsoBd5sUR4+hbuVo+1ai
XeA3AKTX/OdYWJvr5YIgeQ==
-----END CERTIFICATE-----"""
path = create_temporary_ssl_cert_file(valid_certificate)
self.assertIn("5b0df668ac310be3f15c489f303d7d01", path)
villebro marked this conversation as resolved.
Show resolved Hide resolved
invalid_certificate = "XXX" + valid_certificate
self.assertRaises(
ValueError, create_temporary_ssl_cert_file, invalid_certificate
)