feat: separated task preprocessing from simulation

biosimulators · Sep 16, 2021 · d615b24 · d615b24
1 parent c06277b
commit d615b24
Show file tree

Hide file tree

Showing 8 changed files with 1,033 additions and 70 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,7 +1,7 @@
 # Base OS
 FROM python:3.9-slim-buster
 
-ARG VERSION="0.0.6"
+ARG VERSION="0.0.7"
 ARG SIMULATOR_VERSION=3.0.0b
 
 # metadata

diff --git a/biosimulators_ginsim/_version.py b/biosimulators_ginsim/_version.py
@@ -1 +1 @@
-__version__ = '0.0.6'
+__version__ = '0.0.7'
diff --git a/biosimulators_ginsim/core.py b/biosimulators_ginsim/core.py
@@ -6,7 +6,7 @@
 :License: MIT
 """
 
-from .utils import (validate_simulation, get_variable_target_xpath_ids,
+from .utils import (validate_simulation, get_variable_target_xpath_ids, validate_variables,
                     read_model, set_up_simulation, exec_simulation, get_variable_results)
 from biosimulators_utils.combine.exec import exec_sedml_docs_in_archive
 from biosimulators_utils.config import get_config, Config  # noqa: F401
@@ -18,9 +18,12 @@
                                                   SteadyStateSimulation, UniformTimeCourseSimulation,
                                                   Variable, Symbol)
 from biosimulators_utils.sedml.exec import exec_sed_doc as base_exec_sed_doc
+from biosimulators_utils.sedml.utils import apply_changes_to_xml_model
 from biosimulators_utils.utils.core import raise_errors_warnings
+import copy
 import lxml.etree
 import os
+import tempfile
 
 __all__ = ['exec_sedml_docs_in_combine_archive', 'exec_sed_doc', 'exec_sed_task', 'preprocess_sed_task']
 
@@ -129,83 +132,153 @@ def exec_sed_task(task, variables, preprocessed_task=None, log=None, config=None
     model = task.model
     sim = task.simulation
 
+    # read model
+    biolqm_model = preprocessed_task['model']
+    variable_target_sbml_id_map = preprocessed_task['variable_target_sbml_id_map']
+
+    # modify model
+    if model.changes:
+        if model.language == ModelLanguage.SBML.value:
+            allowed_change_types = ()
+            # allowed_change_types = (ModelAttributeChange,) # TODO: uncomment when bioLQM recognizes initial state
+            raise_errors_warnings(
+                validation.validate_model_change_types(model.changes, allowed_change_types),
+                error_summary='Changes for model `{}` are not supported.'.format(model.id))
+
+            model_etree = preprocessed_task['model_etree']
+
+            model = copy.deepcopy(model)
+            for change in model.changes:
+                change.new_value = str(int(float(change.new_value)))
+
+            apply_changes_to_xml_model(model, model_etree, sed_doc=None, working_dir=None)
+
+            model_file, model_filename = tempfile.mkstemp(suffix='.xml')
+            os.close(model_file)
+
+            model_etree.write(model_filename,
+                              xml_declaration=True,
+                              encoding="utf-8",
+                              standalone=False,
+                              pretty_print=False)
+
+            biolqm_model = read_model(model_filename, model.language)
+
+            os.remove(model_filename)
+
+        else:
+            raise_errors_warnings(
+                validation.validate_model_change_types(model.changes, ()),
+                error_summary='Changes for model `{}` are not supported.'.format(model.id))
+
+    # setup simulation
+    raise_errors_warnings(
+        *validate_simulation(sim),
+        error_summary='Simulation `{}` is invalid.'.format(sim.id))
+
+    alg_method_name = preprocessed_task['algorithm_method_name']
+    alg_method_args = preprocessed_task['algorithm_method_args'](sim)
+
+    # run simulation
+    raw_results = exec_simulation(alg_method_name, biolqm_model, alg_method_args)
+
+    # transform results
+    variable_results = get_variable_results(variables, model.language, variable_target_sbml_id_map, sim, raw_results)
+
+    # log action
+    if config.LOG:
+        log.algorithm = preprocessed_task['algorithm_kisao_id']
+        log.simulator_details = {
+            'method': alg_method_name,
+            'arguments': alg_method_args,
+        }
+
+    ############################
+    # return the result of each variable and log
+    return variable_results, log
+
+
+def preprocess_sed_task(task, variables, config=None):
+    """ Preprocess a SED task, including its possible model changes and variables. This is useful for avoiding
+    repeatedly initializing tasks on repeated calls of :obj:`exec_sed_task`.
+
+    Args:
+        task (:obj:`Task`): task
+        variables (:obj:`list` of :obj:`Variable`): variables that should be recorded
+        config (:obj:`Config`, optional): BioSimulators common configuration
+
+    Returns:
+        :obj:`object`: preprocessed information about the task
+    """
+    config = config or get_config()
+
+    # validate task
+    model = task.model
+    sim = task.simulation
+
     if config.VALIDATE_SEDML:
         raise_errors_warnings(
             validation.validate_task(task),
             error_summary='Task `{}` is invalid.'.format(task.id))
         raise_errors_warnings(
             validation.validate_model_language(
-                task.model.language,
+                model.language,
                 (ModelLanguage.SBML, ModelLanguage.ZGINML)),
             error_summary='Language for model `{}` is not supported.'.format(model.id))
+
+        if model.language == ModelLanguage.SBML.value:
+            allowed_change_types = (ModelAttributeChange,)
+        else:
+            allowed_change_types = ()
         raise_errors_warnings(
-            validation.validate_model_change_types(task.model.changes, ()),
+            validation.validate_model_change_types(model.changes, allowed_change_types),
             error_summary='Changes for model `{}` are not supported.'.format(model.id))
         raise_errors_warnings(
-            *validation.validate_model_changes(task.model),
+            *validation.validate_model_changes(model),
             error_summary='Changes for model `{}` are invalid.'.format(model.id))
         raise_errors_warnings(
-            validation.validate_simulation_type(task.simulation,
+            validation.validate_simulation_type(sim,
                                                 (UniformTimeCourseSimulation, SteadyStateSimulation)),
             error_summary='{} `{}` is not supported.'.format(sim.__class__.__name__, sim.id))
         raise_errors_warnings(
-            *validation.validate_simulation(task.simulation),
+            *validation.validate_simulation(sim),
             error_summary='Simulation `{}` is invalid.'.format(sim.id))
         raise_errors_warnings(
-            *validate_simulation(task.simulation),
+            *validate_simulation(sim),
             error_summary='Simulation `{}` is invalid.'.format(sim.id))
 
-    if not os.path.isfile(task.model.source):
-        raise FileNotFoundError('`{}` is not a file.'.format(task.model.source))
+    if not os.path.isfile(model.source):
+        raise FileNotFoundError('`{}` is not a file.'.format(model.source))
 
     if model.language == ModelLanguage.SBML.value:
-        model_etree = lxml.etree.parse(task.model.source)
-        target_xpath_ids = get_variable_target_xpath_ids(variables, model_etree)
+        model_etree = lxml.etree.parse(model.source)
+        variable_target_sbml_id_map = get_variable_target_xpath_ids(variables, model_etree)
     else:
         model_etree = None
-        target_xpath_ids = None
+        variable_target_sbml_id_map = None
 
     # validate model
     if config.VALIDATE_SEDML_MODELS:
-        raise_errors_warnings(*validation.validate_model(task.model, [], working_dir='.'),
+        raise_errors_warnings(*validation.validate_model(model, [], working_dir='.'),
                               error_summary='Model `{}` is invalid.'.format(model.id),
                               warning_summary='Model `{}` may be invalid.'.format(model.id))
 
     # read model
     biolqm_model = read_model(model.source, model.language)
 
+    # validate variables
+    validate_variables(variables, biolqm_model, model.language, variable_target_sbml_id_map, sim)
+
     # setup simulation
     alg_kisao_id, method_name, method_args = set_up_simulation(sim, config=config)
 
-    # run simulation
-    raw_results = exec_simulation(method_name, biolqm_model, method_args)
-
-    # transform results
-    variable_results = get_variable_results(variables, model.language, target_xpath_ids, sim, raw_results)
-
-    # log action
-    if config.LOG:
-        log.algorithm = alg_kisao_id
-        log.simulator_details = {
-            'method': method_name,
-            'arguments': method_args,
-        }
-
     ############################
-    # return the result of each variable and log
-    return variable_results, log
-
-
-def preprocess_sed_task(task, variables, config=None):
-    """ Preprocess a SED task, including its possible model changes and variables. This is useful for avoiding
-    repeatedly initializing tasks on repeated calls of :obj:`exec_sed_task`.
-
-    Args:
-        task (:obj:`Task`): task
-        variables (:obj:`list` of :obj:`Variable`): variables that should be recorded
-        config (:obj:`Config`, optional): BioSimulators common configuration
-
-    Returns:
-        :obj:`object`: preprocessed information about the task
-    """
-    pass
+    # return preprocessed information
+    return {
+        'model': biolqm_model,
+        'model_etree': model_etree,
+        'variable_target_sbml_id_map': variable_target_sbml_id_map,
+        'algorithm_kisao_id': alg_kisao_id,
+        'algorithm_method_name': method_name,
+        'algorithm_method_args': method_args,
+    }
diff --git a/biosimulators_ginsim/utils.py b/biosimulators_ginsim/utils.py
@@ -24,11 +24,13 @@
 import numpy
 import os
 import py4j.java_gateway  # noqa: F401
+import types  # noqa: F401
 
 __all__ = [
     'validate_simulation',
     'validate_time_course',
     'get_variable_target_xpath_ids',
+    'validate_variables',
     'read_model',
     'set_up_simulation',
     'exec_simulation',
@@ -123,6 +125,59 @@ def get_variable_target_xpath_ids(variables, model_etree):
     )
 
 
+def validate_variables(variables, model, model_language, target_xpath_ids, simulation):
+    """ Get the result of each SED-ML variable
+
+    Args:
+        variables (:obj:`list` of :obj:`Variable`): variables
+        model (:obj:`py4j.java_gateway.JavaObject`): bioLQM model
+        model_language (:obj:`str`): model language
+        target_xpath_ids (:obj:`dict`): dictionary that maps XPaths to the SBML qualitative ids
+            of the corresponding objects
+        simulation (:obj:`Simulation`): analysis
+    """
+    component_ids = set(component.getNodeID() for component in model.getComponents())
+    invalid_variables = []
+
+    for variable in variables:
+        if variable.symbol:
+            if not (isinstance(simulation, UniformTimeCourseSimulation) and variable.symbol == Symbol.time.value):
+                invalid_variables.append('{}: symbol: {}'.format(variable.id, variable.symbol))
+        else:
+            if model_language == ModelLanguage.SBML.value:
+                id = target_xpath_ids[variable.target]
+            else:
+                id = variable.target
+
+            if id not in component_ids:
+                invalid_variables.append('{}: target: {}'.format(variable.id, variable.target))
+
+    if invalid_variables:
+        valid_variables = []
+
+        if isinstance(simulation, UniformTimeCourseSimulation):
+            valid_variables.append('symbol: {}'.format(Symbol.time.value))
+
+        for component_id in component_ids:
+            if model_language == ModelLanguage.SBML.value:
+                valid_variables.append(
+                    "target: /sbml:sbml/sbml:model/qual:listOfQualitativeSpecies/qual:qualitativeSpecies[@id='{}']".format(component_id))
+            else:
+                valid_variables.append(
+                    'target: {}'.format(component_id))
+
+        raise ValueError((
+            'The following variables cannot be recorded:\n'
+            '  {}\n'
+            '\n'
+            'Variables with the following symbols and targets can be recorded:\n'
+            '  {}'
+        ).format(
+            '\n  '.join(sorted(invalid_variables)),
+            '\n  '.join(sorted(valid_variables)),
+        ))
+
+
 def read_model(filename, language):
     """ Read a model
 
@@ -160,7 +215,7 @@ def set_up_simulation(simulation, config=None):
 
             * :obj:`str`: KiSAO of algorithm to execute
             * :obj:`str`: name of the :obj:`biolqm` simulation/analysis method
-            * :obj:`list` of :obj:`str`: arguments for simulation method
+            * :obj:`types.LambdaType` of :obj:`Simulation` -> :obj:`list` of :obj:`str`: arguments for simulation method
     """
     # simulation algorithm
     alg_kisao_id = simulation.algorithm.kisao_id
@@ -174,7 +229,7 @@ def set_up_simulation(simulation, config=None):
             simulation.__class__.__name__, exec_kisao_id, alg_props['name']))
 
     method = alg_props['method']
-    method_args = alg_props['method_args'](simulation)
+    method_args = []
 
     # Apply the algorithm parameter changes specified by `simulation.algorithm.parameter_changes`
     if exec_kisao_id == alg_kisao_id:
@@ -211,7 +266,11 @@ def set_up_simulation(simulation, config=None):
             warn('Unsuported algorithm parameter `{}` was ignored.'.format(change.kisao_id), BioSimulatorsWarning)
 
     # return
-    return (exec_kisao_id, method, method_args)
+    return (
+        exec_kisao_id,
+        method,
+        lambda simulation: alg_props['method_args'](simulation) + method_args,
+    )
 
 
 def exec_simulation(method_name, model, args=None):
@@ -253,28 +312,18 @@ def get_variable_results(variables, model_language, target_xpath_ids, simulation
     for variable in variables:
         variable_results[variable.id] = numpy.full((n_states,), numpy.nan)
 
-    invalid_variables = []
     for i_state, state in enumerate(raw_results):
         for variable in variables:
             if variable.symbol:
-                if isinstance(simulation, UniformTimeCourseSimulation) and variable.symbol == Symbol.time.value:
-                    variable_results[variable.id][i_state] = i_state
-                else:
-                    invalid_variables.append('{}: symbol: {}'.format(variable.id, variable.symbol))
+                variable_results[variable.id][i_state] = i_state
 
             else:
                 if model_language == ModelLanguage.SBML.value:
                     id = target_xpath_ids[variable.target]
                 else:
                     id = variable.target
 
-                variable_results[variable.id][i_state] = state.get(id, numpy.nan)
-                if i_state == 0 and numpy.isnan(variable_results[variable.id][i_state]):
-                    invalid_variables.append('{}: target: {}'.format(variable.id, variable.target))
-
-    if invalid_variables:
-        raise ValueError('The following variables could not recorded:\n  {}'.format(
-            '\n  '.join(sorted(invalid_variables))))
+                variable_results[variable.id][i_state] = state[id]
 
     if isinstance(simulation, UniformTimeCourseSimulation):
         for key in variable_results.keys():

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-biosimulators_utils[logging,sbml] >= 0.1.116
+biosimulators_utils[logging,sbml] >= 0.1.120
 ginsim >= 0.4.4
 kisao >= 2.28
 lxml