From b7afa0bb4e7ef2e366b81ae42575fc849d27d458 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Wed, 11 Sep 2024 08:58:29 -0700 Subject: [PATCH 1/8] Add draft of protocol_link type migrator --- .../migrator_from_10_2_0_to_11_0_0_part_17.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py diff --git a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py new file mode 100644 index 0000000000..82b81e4fc2 --- /dev/null +++ b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py @@ -0,0 +1,46 @@ +from nmdc_schema.migrators.migrator_base import MigratorBase + + +class Migrator(MigratorBase): + r""" + Migrates a database between two schemas. + """ + + _from_version = "XX" + _to_version = "XX" #TODO KRH: add version number + + + + def upgrade(self) -> None: + r""" + Migrates the database from conforming to the original schema, to conforming to the new schema. + + Should be run after migrator_from_10_2_0_to_11_0_0_part_16.py. + """ + # Add a type slot on the Protocol class within the protocol_link slot on each document + collections_to_update = [ + "material_processing_set", + "data_generation_set", + "workflow_execution_set" + ] #TODOD KRH: check that these encapsulate all the collections that need to be updated + + for collection_name in collections_to_update: + self.adapter.process_each_document(collection_name, [self.add_type_to_protocol_link]) + + def add_type_to_protocol_link(self, document: dict) -> dict: + r""" + Add a type slot on the Protocol class within the protocol_link slot on each document + + >>> m = Migrator() + >>> m.add_type_to_protocol_link({'id': 123}) # no protocol_link field + {'id': 123} + >>> m.add_type_to_protocol_link({'id': 123, 'protocol_link': {'id': 456}}) + {'id': 123, 'protocol_link': {'id': 456, 'type': 'nmdc:Protocol'}} + >>> m.add_type_to_protocol_link({'id': 123, 'protocol_link': {'id': 456, 'type': 'nmdc:Protocol'}}) # test: does not overwrite existing type slot + {'id': 123, 'protocol_link': {'id': 456, 'type': 'nmdc:Protocol'}} + """ + + self.logger.info(f"Starting migration of {document['id']}") + if "protocol_link" in document: + if "type" not in document["protocol_link"]: + document["protocol_link"]["type"] = "nmdc:Protocol" \ No newline at end of file From 5759d5cb72ab3d8c7bbfa814bb4dc3f1e37f9148 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Wed, 11 Sep 2024 09:09:29 -0700 Subject: [PATCH 2/8] Add PR number and return to migrator --- .../migrator_from_10_2_0_to_11_0_0_part_17.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py index 82b81e4fc2..fbc551bd2e 100644 --- a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py +++ b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py @@ -7,9 +7,7 @@ class Migrator(MigratorBase): """ _from_version = "XX" - _to_version = "XX" #TODO KRH: add version number - - + _to_version = "PR250" def upgrade(self) -> None: r""" @@ -43,4 +41,6 @@ def add_type_to_protocol_link(self, document: dict) -> dict: self.logger.info(f"Starting migration of {document['id']}") if "protocol_link" in document: if "type" not in document["protocol_link"]: - document["protocol_link"]["type"] = "nmdc:Protocol" \ No newline at end of file + document["protocol_link"]["type"] = "nmdc:Protocol" + + return document \ No newline at end of file From edd2f24aa0b915ba70aa0d10b9c081afa0841ce5 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Wed, 11 Sep 2024 09:32:24 -0700 Subject: [PATCH 3/8] Expand collections to search for protocol link --- .../migrator_from_10_2_0_to_11_0_0_part_17.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py index fbc551bd2e..771cd7bcfe 100644 --- a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py +++ b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py @@ -17,10 +17,14 @@ def upgrade(self) -> None: """ # Add a type slot on the Protocol class within the protocol_link slot on each document collections_to_update = [ - "material_processing_set", "data_generation_set", - "workflow_execution_set" - ] #TODOD KRH: check that these encapsulate all the collections that need to be updated + "material_processing_set", + "collecting_biosamples_from_site_set", + "protocol_execution_set", + "storage_process_set", + "workflow_execution_set", + "study_set" + ] for collection_name in collections_to_update: self.adapter.process_each_document(collection_name, [self.add_type_to_protocol_link]) From 1e93b11d46c48998c75edf8fc76ec22c0e996dfa Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Wed, 11 Sep 2024 09:45:36 -0700 Subject: [PATCH 4/8] Add valid and invalid examples for protocol_link type --- .../LibraryPreparation-missing-protocol-type.yaml | 12 ++++++++++++ src/data/valid/LibraryPreparation.yaml | 13 +++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 src/data/invalid/LibraryPreparation-missing-protocol-type.yaml create mode 100644 src/data/valid/LibraryPreparation.yaml diff --git a/src/data/invalid/LibraryPreparation-missing-protocol-type.yaml b/src/data/invalid/LibraryPreparation-missing-protocol-type.yaml new file mode 100644 index 0000000000..7e237df741 --- /dev/null +++ b/src/data/invalid/LibraryPreparation-missing-protocol-type.yaml @@ -0,0 +1,12 @@ +# This record is missing the type field in the protocol_link slot +end_date: '2017-12-12' +has_input: +- nmdc:procsm-12-px6qn983 +has_output: +- nmdc:procsm-12-h63qmv56 +id: nmdc:libprp-12-1qp98z14 +processing_institution: Battelle +start_date: 2014-08-05T18:40Z +protocol_link: + name: BMI_metagenomicsSequencingSOP_v1 +type: nmdc:LibraryPreparation diff --git a/src/data/valid/LibraryPreparation.yaml b/src/data/valid/LibraryPreparation.yaml new file mode 100644 index 0000000000..480d592c08 --- /dev/null +++ b/src/data/valid/LibraryPreparation.yaml @@ -0,0 +1,13 @@ +# This record is missing the type field in the protocol_link slot +end_date: '2017-12-12' +has_input: +- nmdc:procsm-12-px6qn983 +has_output: +- nmdc:procsm-12-h63qmv56 +id: nmdc:libprp-12-1qp98z14 +processing_institution: Battelle +start_date: 2014-08-05T18:40Z +protocol_link: + name: BMI_metagenomicsSequencingSOP_v1 + type: nmdc:Protocol +type: nmdc:LibraryPreparation From 26c8bc6f8dac870c8a9366294d20c6a5d0d944fa Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Wed, 11 Sep 2024 09:52:09 -0700 Subject: [PATCH 5/8] Add part 17 to migrators for berkeley database --- .../partials/migrator_from_10_2_0_to_11_0_0/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/__init__.py b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/__init__.py index 4940a3c136..9f435a2b39 100644 --- a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/__init__.py +++ b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/__init__.py @@ -18,6 +18,7 @@ migrator_from_10_2_0_to_11_0_0_part_14, migrator_from_10_2_0_to_11_0_0_part_15, migrator_from_10_2_0_to_11_0_0_part_16, + migrator_from_10_2_0_to_11_0_0_part_17, ) def get_migrator_classes() -> List[Type[MigratorBase]]: @@ -52,4 +53,5 @@ def get_migrator_classes() -> List[Type[MigratorBase]]: migrator_from_10_2_0_to_11_0_0_part_14.Migrator, migrator_from_10_2_0_to_11_0_0_part_15.Migrator, migrator_from_10_2_0_to_11_0_0_part_16.Migrator, + migrator_from_10_2_0_to_11_0_0_part_17.Migrator, ] From 49440645746f9fa76d31b5910a012726aaa45c48 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Wed, 11 Sep 2024 12:31:35 -0700 Subject: [PATCH 6/8] Update nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py Co-authored-by: eecavanna <134325062+eecavanna@users.noreply.github.com> --- .../migrator_from_10_2_0_to_11_0_0_part_17.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py index 771cd7bcfe..2f711cb41d 100644 --- a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py +++ b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py @@ -7,7 +7,7 @@ class Migrator(MigratorBase): """ _from_version = "XX" - _to_version = "PR250" + _to_version = "FIXES_ISSUE_2180" def upgrade(self) -> None: r""" From 06576cb359436151e48bcd41293ac0aa9c01b3eb Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Wed, 11 Sep 2024 12:32:10 -0700 Subject: [PATCH 7/8] Change slot to field in docstring of migrator part 17 Co-authored-by: eecavanna <134325062+eecavanna@users.noreply.github.com> --- .../migrator_from_10_2_0_to_11_0_0_part_17.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py index 2f711cb41d..d5af4e21d8 100644 --- a/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py +++ b/nmdc_schema/migrators/partials/migrator_from_10_2_0_to_11_0_0/migrator_from_10_2_0_to_11_0_0_part_17.py @@ -31,7 +31,7 @@ def upgrade(self) -> None: def add_type_to_protocol_link(self, document: dict) -> dict: r""" - Add a type slot on the Protocol class within the protocol_link slot on each document + Add a `type` field to the `Protocol` instance within the `protocol_link` field on each document. >>> m = Migrator() >>> m.add_type_to_protocol_link({'id': 123}) # no protocol_link field From 4ba9bcc30cd98574028bc34d3578c256e85cb8e8 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Wed, 11 Sep 2024 12:33:34 -0700 Subject: [PATCH 8/8] Update LibraryPreparation.yaml --- src/data/valid/LibraryPreparation.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/src/data/valid/LibraryPreparation.yaml b/src/data/valid/LibraryPreparation.yaml index 480d592c08..1556f90879 100644 --- a/src/data/valid/LibraryPreparation.yaml +++ b/src/data/valid/LibraryPreparation.yaml @@ -1,4 +1,3 @@ -# This record is missing the type field in the protocol_link slot end_date: '2017-12-12' has_input: - nmdc:procsm-12-px6qn983