diff --git a/nmdc_schema/migrators/migrator_from_PR176_to_PR104.py b/nmdc_schema/migrators/migrator_from_PR176_to_PR104.py index 74ad3a5d4a..bd8c3e69bb 100644 --- a/nmdc_schema/migrators/migrator_from_PR176_to_PR104.py +++ b/nmdc_schema/migrators/migrator_from_PR176_to_PR104.py @@ -4,8 +4,6 @@ class Migrator(MigratorBase): r""" Migrates a database between two schemas. - - This migrator removes fields from documents of type `nmdc:MassSpectrometry` in the `data_generation_set` collection. """ _from_version = "PR176" @@ -13,7 +11,8 @@ class Migrator(MigratorBase): def upgrade(self) -> None: r""" - Migrates the database from conforming to the original schema, to conforming to the new schema. + Deletes specific fields from documents (whose `type` is `nmdc:MassSpectrometry`) residing in + the `data_generation_set` collection. """ self.adapter.process_each_document( @@ -24,11 +23,11 @@ def upgrade(self) -> None: @staticmethod def delete_obsolete_fields(data_generation: dict) -> dict: r""" - Deletes fields from the specified document of type `nmdc:MassSpectrometry`. + Deletes specific fields from a document representing a `DataGeneration` instance. - If the document is not of type `nmdc:MassSpectrometry`, this function will not modify the document. - If the field exists and is empty, this function will delete the field. - If the field exists and is not empty, this function will raise an exception. + If the document's `type` is not `nmdc:MassSpectrometry`, this function will not modify the document. + If a target field exists and is empty, this function will delete the field. On the other hand, if a target + field exists and is not empty, this function will not delete the field and will, instead, raise an exception. >>> m = Migrator() >>> m.delete_obsolete_fields({'id': 123, 'type': 'nmdc:MassSpectrometry'}) # no obsolete fields exist @@ -52,12 +51,12 @@ def delete_obsolete_fields(data_generation: dict) -> dict: ... 'ncbi_project_name': 'a'}) # single-valued, is not empty Traceback (most recent call last): ... - ValueError: Field "ncbi_project_name" in document "123" is not empty (has value "a"). + ValueError: Field "ncbi_project_name" in document "123" is not empty (contains value "a"). >>> m.delete_obsolete_fields({'id': 123, 'type': 'nmdc:MassSpectrometry', ... 'gold_sequencing_project_identifiers': ['a']}) # multivalued, is not empty Traceback (most recent call last): ... - ValueError: Field "gold_sequencing_project_identifiers" in document "123" is not empty (has value "['a']"). + ValueError: Field "gold_sequencing_project_identifiers" in document "123" is not empty (contains value "['a']"). >>> m.delete_obsolete_fields({'id': 123, 'type': 'nmdc:MassSpectrometry', ... 'ncbi_project_name': None, ... 'target_gene': '', @@ -68,44 +67,45 @@ def delete_obsolete_fields(data_generation: dict) -> dict: {'id': 123, 'type': 'nmdc:MassSpectrometry'} """ - # If this document is not of type "nmdc:MassSpectrometry", return the document as-is (i.e. no changes). + # If this document's `type` is not "nmdc:MassSpectrometry", leave the document as-is. if data_generation["type"] != "nmdc:MassSpectrometry": return data_generation - # Emptiness is represented by: None or "" + # List the names of the single-valued fields we want to delete. + # Note: Their emptiness is represented by `None` or `""`. single_valued_fields_to_delete = [ "ncbi_project_name", "target_gene", "target_subfragment", ] - # Emptiness is represented by: None or [] + # List the names of the multivalued fields we want to delete. + # Note: Their emptiness is represented by `None` or `[]`. multi_valued_fields_to_delete = [ "gold_sequencing_project_identifiers", "insdc_bioproject_identifiers", "insdc_experiment_identifiers", ] - for field_name in ( - single_valued_fields_to_delete + multi_valued_fields_to_delete - ): + # Combine the two lists. + field_names = single_valued_fields_to_delete + multi_valued_fields_to_delete + + for field_name in field_names: # Check whether the field exists. if field_name in data_generation: - document_id = data_generation["id"] value = data_generation[field_name] # Check whether the field is empty (using emptiness criteria appropriate for the field). if ( - field_name in single_valued_fields_to_delete - and value not in [None, ""] + field_name in single_valued_fields_to_delete and value in [None, ""] ) or ( - field_name in multi_valued_fields_to_delete - and value not in [None, []] + field_name in multi_valued_fields_to_delete and value in [None, []] ): + del data_generation[field_name] + else: + document_id = data_generation["id"] raise ValueError( - f'Field "{field_name}" in document "{document_id}" is not empty (has value "{value}").' + f'Field "{field_name}" in document "{document_id}" is not empty (contains value "{value}").' ) - else: - data_generation.pop(field_name) return data_generation