diff --git a/superset/examples/bart_lines.py b/superset/examples/bart_lines.py index 5e542559e70c..91257058be75 100644 --- a/superset/examples/bart_lines.py +++ b/superset/examples/bart_lines.py @@ -23,7 +23,7 @@ from superset import db from ..utils.database import get_example_database -from .helpers import get_example_data, get_table_connector_registry +from .helpers import get_example_url, get_table_connector_registry def load_bart_lines(only_metadata: bool = False, force: bool = False) -> None: @@ -34,8 +34,8 @@ def load_bart_lines(only_metadata: bool = False, force: bool = False) -> None: table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): - content = get_example_data("bart-lines.json.gz").decode("utf-8") - df = pd.read_json(content, encoding="latin-1") + url = get_example_url("bart-lines.json.gz") + df = pd.read_json(url, encoding="latin-1", compression="gzip") df["path_json"] = df.path.map(json.dumps) df["polyline"] = df.path.map(polyline.encode) del df["path"] diff --git a/superset/examples/birth_names.py b/superset/examples/birth_names.py index 44ee32675a6e..f8b8a8ecf7ca 100644 --- a/superset/examples/birth_names.py +++ b/superset/examples/birth_names.py @@ -33,7 +33,7 @@ from ..utils.database import get_example_database from .helpers import ( - get_example_data, + get_example_url, get_slice_json, get_table_connector_registry, merge_slice, @@ -66,7 +66,8 @@ def gen_filter( def load_data(tbl_name: str, database: Database, sample: bool = False) -> None: - pdf = pd.read_json(get_example_data("birth_names2.json.gz").decode("utf-8")) + url = get_example_url("birth_names2.json.gz") + pdf = pd.read_json(url, compression="gzip") # TODO(bkyryliuk): move load examples data into the pytest fixture if database.backend == "presto": pdf.ds = pd.to_datetime(pdf.ds, unit="ms") diff --git a/superset/examples/country_map.py b/superset/examples/country_map.py index c959a92085fc..302b55180ea8 100644 --- a/superset/examples/country_map.py +++ b/superset/examples/country_map.py @@ -27,7 +27,7 @@ from superset.utils.core import DatasourceType from .helpers import ( - get_example_data, + get_example_url, get_slice_json, get_table_connector_registry, merge_slice, @@ -44,10 +44,8 @@ def load_country_map_data(only_metadata: bool = False, force: bool = False) -> N table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): - csv_bytes = get_example_data( - "birth_france_data_for_country_map.csv", is_gzip=False, make_bytes=True - ) - data = pd.read_csv(csv_bytes, encoding="utf-8") + url = get_example_url("birth_france_data_for_country_map.csv") + data = pd.read_csv(url, encoding="utf-8") data["dttm"] = datetime.datetime.now().date() data.to_sql( tbl_name, diff --git a/superset/examples/energy.py b/superset/examples/energy.py index 78f194e96617..72b22525f276 100644 --- a/superset/examples/energy.py +++ b/superset/examples/energy.py @@ -28,7 +28,7 @@ from superset.utils.core import DatasourceType from .helpers import ( - get_example_data, + get_example_url, get_table_connector_registry, merge_slice, misc_dash_slices, @@ -46,8 +46,8 @@ def load_energy( table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): - data = get_example_data("energy.json.gz").decode("utf-8") - pdf = pd.read_json(data) + url = get_example_url("energy.json.gz") + pdf = pd.read_json(url, compression="gzip") pdf = pdf.head(100) if sample else pdf pdf.to_sql( tbl_name, diff --git a/superset/examples/flights.py b/superset/examples/flights.py index 46fdc5c1d07a..1389c65c9a90 100644 --- a/superset/examples/flights.py +++ b/superset/examples/flights.py @@ -20,7 +20,7 @@ import superset.utils.database as database_utils from superset import db -from .helpers import get_example_data, get_table_connector_registry +from .helpers import get_example_url, get_table_connector_registry def load_flights(only_metadata: bool = False, force: bool = False) -> None: @@ -32,12 +32,12 @@ def load_flights(only_metadata: bool = False, force: bool = False) -> None: table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): - data = get_example_data("flight_data.csv.gz", make_bytes=True) - pdf = pd.read_csv(data, encoding="latin-1") + flight_data_url = get_example_url("flight_data.csv.gz") + pdf = pd.read_csv(flight_data_url, encoding="latin-1", compression="gzip") # Loading airports info to join and get lat/long - airports_bytes = get_example_data("airports.csv.gz", make_bytes=True) - airports = pd.read_csv(airports_bytes, encoding="latin-1") + airports_url = get_example_url("airports.csv.gz") + airports = pd.read_csv(airports_url, encoding="latin-1", compression="gzip") airports = airports.set_index("IATA_CODE") pdf[ # pylint: disable=unsupported-assignment-operation,useless-suppression diff --git a/superset/examples/helpers.py b/superset/examples/helpers.py index 2183a8d512f1..e26e05e49739 100644 --- a/superset/examples/helpers.py +++ b/superset/examples/helpers.py @@ -17,10 +17,7 @@ """Loads datasets, dashboards and slices in a new superset instance""" import json import os -import zlib -from io import BytesIO -from typing import Union, Any, Dict, List, Set -from urllib import request +from typing import Any, Dict, List, Set from superset import app, db from superset.connectors.sqla.models import SqlaTable @@ -73,14 +70,5 @@ def get_slice_json(defaults: Dict[Any, Any], **kwargs: Any) -> str: return json.dumps(defaults_copy, indent=4, sort_keys=True) -def get_example_data( - filepath: str, is_gzip: bool = True, make_bytes: bool = False -) -> Union[bytes, BytesIO]: - content = request.urlopen( # pylint: disable=consider-using-with - f"{BASE_URL}{filepath}?raw=true" - ).read() - if is_gzip: - content = zlib.decompress(content, zlib.MAX_WBITS | 16) - if make_bytes: - content = BytesIO(content) - return content +def get_example_url(filepath: str) -> str: + return f"{BASE_URL}{filepath}?raw=true" diff --git a/superset/examples/long_lat.py b/superset/examples/long_lat.py index ba9824bb43fe..76f51a615951 100644 --- a/superset/examples/long_lat.py +++ b/superset/examples/long_lat.py @@ -27,7 +27,7 @@ from superset.utils.core import DatasourceType from .helpers import ( - get_example_data, + get_example_url, get_slice_json, get_table_connector_registry, merge_slice, @@ -44,8 +44,8 @@ def load_long_lat_data(only_metadata: bool = False, force: bool = False) -> None table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): - data = get_example_data("san_francisco.csv.gz", make_bytes=True) - pdf = pd.read_csv(data, encoding="utf-8") + url = get_example_url("san_francisco.csv.gz") + pdf = pd.read_csv(url, encoding="utf-8", compression="gzip") start = datetime.datetime.now().replace( hour=0, minute=0, second=0, microsecond=0 ) diff --git a/superset/examples/multiformat_time_series.py b/superset/examples/multiformat_time_series.py index 1209ff184941..62e16d2cb088 100644 --- a/superset/examples/multiformat_time_series.py +++ b/superset/examples/multiformat_time_series.py @@ -25,7 +25,7 @@ from ..utils.database import get_example_database from .helpers import ( - get_example_data, + get_example_url, get_slice_json, get_table_connector_registry, merge_slice, @@ -44,8 +44,8 @@ def load_multiformat_time_series( # pylint: disable=too-many-locals table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): - data = get_example_data("multiformat_time_series.json.gz").decode("utf-8") - pdf = pd.read_json(data) + url = get_example_url("multiformat_time_series.json.gz") + pdf = pd.read_json(url, compression="gzip") # TODO(bkyryliuk): move load examples data into the pytest fixture if database.backend == "presto": pdf.ds = pd.to_datetime(pdf.ds, unit="s") diff --git a/superset/examples/paris.py b/superset/examples/paris.py index 9fa2fedb5e46..c32300702852 100644 --- a/superset/examples/paris.py +++ b/superset/examples/paris.py @@ -22,7 +22,7 @@ import superset.utils.database as database_utils from superset import db -from .helpers import get_example_data, get_table_connector_registry +from .helpers import get_example_url, get_table_connector_registry def load_paris_iris_geojson(only_metadata: bool = False, force: bool = False) -> None: @@ -33,8 +33,8 @@ def load_paris_iris_geojson(only_metadata: bool = False, force: bool = False) -> table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): - data = get_example_data("paris_iris.json.gz").decode("utf-8") - df = pd.read_json(data) + url = get_example_url("paris_iris.json.gz") + df = pd.read_json(url, compression="gzip") df["features"] = df.features.map(json.dumps) df.to_sql( diff --git a/superset/examples/random_time_series.py b/superset/examples/random_time_series.py index a7972e70d7f4..4a2628df7a07 100644 --- a/superset/examples/random_time_series.py +++ b/superset/examples/random_time_series.py @@ -24,7 +24,7 @@ from superset.utils.core import DatasourceType from .helpers import ( - get_example_data, + get_example_url, get_slice_json, get_table_connector_registry, merge_slice, @@ -42,8 +42,8 @@ def load_random_time_series_data( table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): - data = get_example_data("random_time_series.json.gz").decode("utf-8") - pdf = pd.read_json(data) + url = get_example_url("random_time_series.json.gz") + pdf = pd.read_json(url, compression="gzip") if database.backend == "presto": pdf.ds = pd.to_datetime(pdf.ds, unit="s") pdf.ds = pdf.ds.dt.strftime("%Y-%m-%d %H:%M%:%S") diff --git a/superset/examples/sf_population_polygons.py b/superset/examples/sf_population_polygons.py index c5cdc0707c95..71ba34401af9 100644 --- a/superset/examples/sf_population_polygons.py +++ b/superset/examples/sf_population_polygons.py @@ -22,7 +22,7 @@ import superset.utils.database as database_utils from superset import db -from .helpers import get_example_data, get_table_connector_registry +from .helpers import get_example_url, get_table_connector_registry def load_sf_population_polygons( @@ -35,8 +35,8 @@ def load_sf_population_polygons( table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): - data = get_example_data("sf_population.json.gz").decode("utf-8") - df = pd.read_json(data) + url = get_example_url("sf_population.json.gz") + df = pd.read_json(url, compression="gzip") df["contour"] = df.contour.map(json.dumps) df.to_sql( diff --git a/superset/examples/world_bank.py b/superset/examples/world_bank.py index 2531ba34640a..4a18f806eae5 100644 --- a/superset/examples/world_bank.py +++ b/superset/examples/world_bank.py @@ -33,7 +33,7 @@ from ..connectors.base.models import BaseDatasource from .helpers import ( - get_example_data, + get_example_url, get_examples_folder, get_slice_json, get_table_connector_registry, @@ -56,8 +56,8 @@ def load_world_bank_health_n_pop( # pylint: disable=too-many-locals, too-many-s table_exists = database.has_table_by_name(tbl_name) if not only_metadata and (not table_exists or force): - data = get_example_data("countries.json.gz").decode("utf-8") - pdf = pd.read_json(data) + url = get_example_url("countries.json.gz") + pdf = pd.read_json(url, compression="gzip") pdf.columns = [col.replace(".", "_") for col in pdf.columns] if database.backend == "presto": pdf.year = pd.to_datetime(pdf.year)