Skip to content

Commit

Permalink
BUG: Add check_json_dataset_name(): close #190
Browse files Browse the repository at this point in the history
  • Loading branch information
fedarko committed Jul 15, 2019
1 parent 2f6e859 commit b270b37
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 4 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@
([#183](https://github.com/biocore/qurro/issues/183))
Before, input datasets with this column name would have caused either the
loss of this column of data or confusing errors.
- Previously, there was a small (either almost or entirely impossible) chance
that all of the main data within the rank or sample plot JSON could get
overwritten due to a collision in the dataset name. Although this should
basically never happen anyway, Qurro's code now checks for this scenario and
raises an error accordingly.
([#190](https://github.com/biocore/qurro/issues/190))
### Performance enhancements
### Miscellaneous
- A clear error is now raised if Qurro is trying to parse a GNPS feature
Expand Down
22 changes: 22 additions & 0 deletions qurro/_json_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,3 +360,25 @@ def replace_js_json_definitions(
# If this was called on a JS test then this perfectly normal, but if this
# was called during the execution of Qurro then this is a problem.
return 1


def check_json_dataset_names(json_dict, *restricted_names):
"""Checks that certain dataset names aren't present in a Vega-Lite JSON.
This should never really fail, since all of the special dataset names we
use internally start with "qurro_" (and Altair generates hashes of the
data somehow to use as dataset names). However, this function is here
just in the exceedingly rare case that this conflict does actually come
up (if so, we can figure out a way around it).
"""

if "datasets" not in json_dict:
# Should never happen normally but might as well check for this
raise ValueError("Input JSON doesn't have any datasets defined")

intersection = set(json_dict["datasets"].keys()) & set(restricted_names)
if len(intersection) > 0:
raise ValueError(
"Found the following disallowed dataset name(s) in a JSON: "
"{}".format(intersection)
)
13 changes: 9 additions & 4 deletions qurro/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
import pandas as pd
import altair as alt
from qurro._rank_utils import filter_unextreme_features
from qurro._json_utils import replace_js_json_definitions
from qurro._json_utils import (
replace_js_json_definitions,
check_json_dataset_names,
)
from qurro._df_utils import (
replace_nan,
validate_df,
Expand Down Expand Up @@ -290,6 +293,8 @@ def gen_rank_plot(V, ranking_ids, feature_metadata_cols):
rank_chart_json = rank_chart.to_dict()
rank_ordering = "qurro_rank_ordering"
fm_col_ordering = "qurro_feature_metadata_ordering"
check_json_dataset_names(rank_chart_json, rank_ordering, fm_col_ordering)

# Note we don't use rank_data.columns for setting the rank ordering. This
# is because rank_data's columns now include both the ranking IDs and the
# "Feature ID" and "qurro_classification" columns (as well as any feature
Expand Down Expand Up @@ -373,6 +378,8 @@ def gen_sample_plot(metadata):
sample_chart_dict = sample_chart.to_dict()
sample_chart_dict["mark"] = {"type": "circle"}

sm_fields = "qurro_sample_metadata_fields"
check_json_dataset_names(sample_chart_dict, sm_fields)
# Specify an alphabetical ordering for the sample metadata field names.
# This will be used for populating the x-axis / color field selectors in
# Qurro's sample plot controls.
Expand All @@ -388,9 +395,7 @@ def gen_sample_plot(metadata):
# y-axis of the sample plot automatically.)
sorted_md_cols = list(sorted(sample_metadata.columns, key=str.lower))
sorted_md_cols.remove("qurro_balance")
sample_chart_dict["datasets"][
"qurro_sample_metadata_fields"
] = sorted_md_cols
sample_chart_dict["datasets"][sm_fields] = sorted_md_cols
return sample_chart_dict


Expand Down
52 changes: 52 additions & 0 deletions qurro/tests/test_json_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
plot_jsons_equal,
try_to_replace_line_json,
replace_js_json_definitions,
check_json_dataset_names,
)


Expand Down Expand Up @@ -206,6 +207,57 @@ def test_plot_jsons_equal():
assert plot_jsons_equal(a, b)


def test_check_json_dataset_names():
def assert_in_e_info(e_info, *should_be_in):
s_val = str(e_info.value)
for thing in should_be_in:
assert thing in s_val

# Test weird case where no datasets are present
with pytest.raises(ValueError) as exception_info:
check_json_dataset_names({})
assert_in_e_info(exception_info, "JSON doesn't have any datasets defined")

# Should succeed if no conflict between restricted_names and the keys
# within a["datasets"]
a = {"a": "b", "datasets": {"asdf": {1: 2}}}
check_json_dataset_names(a, "abc", "ASDF", "a")

# Should fail if there is a conflict!
with pytest.raises(ValueError) as exception_info:
check_json_dataset_names(a, "lol", "ok", "but", "asdf")
assert_in_e_info(
exception_info,
"Found the following disallowed dataset name(s)",
"asdf",
)

# Also works properly if there are multiple datasets already defined
# (shouldn't ever be the case but might as well check)
a = {"a": "b", "datasets": {"asdf": {1: 2}, "thing2": {3: 4}}}
# (should succeed)
check_json_dataset_names(a, "abc", "ASDF", "a", "THING2")
# (should fail)
with pytest.raises(ValueError) as exception_info:
check_json_dataset_names(a, "lol", "thing2", "asdf")
assert_in_e_info(
exception_info,
"Found the following disallowed dataset name(s)",
"asdf",
"thing2",
)
# (should also fail)
with pytest.raises(ValueError) as exception_info:
check_json_dataset_names(a, "lol", "thing2")
assert_in_e_info(
exception_info,
"Found the following disallowed dataset name(s)",
"thing2",
)
# Only the intersection should be reported
assert "asdf" not in str(exception_info.value)


def test_try_to_replace_line_json():
# Test various cases where we expect a replacement
good_rank_line = " var rankPlotJSON = {};\n"
Expand Down

0 comments on commit b270b37

Please sign in to comment.