Skip to content

Commit

Permalink
Merge pull request #250 from microbiomedata/2180_protocol_type_migrator
Browse files Browse the repository at this point in the history
Add migrator to check and add `type` slot to `Protocol` records
  • Loading branch information
aclum authored Sep 11, 2024
2 parents 7473211 + 4ba9bcc commit 36e5db9
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
migrator_from_10_2_0_to_11_0_0_part_14,
migrator_from_10_2_0_to_11_0_0_part_15,
migrator_from_10_2_0_to_11_0_0_part_16,
migrator_from_10_2_0_to_11_0_0_part_17,
)

def get_migrator_classes() -> List[Type[MigratorBase]]:
Expand Down Expand Up @@ -52,4 +53,5 @@ def get_migrator_classes() -> List[Type[MigratorBase]]:
migrator_from_10_2_0_to_11_0_0_part_14.Migrator,
migrator_from_10_2_0_to_11_0_0_part_15.Migrator,
migrator_from_10_2_0_to_11_0_0_part_16.Migrator,
migrator_from_10_2_0_to_11_0_0_part_17.Migrator,
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from nmdc_schema.migrators.migrator_base import MigratorBase


class Migrator(MigratorBase):
r"""
Migrates a database between two schemas.
"""

_from_version = "XX"
_to_version = "FIXES_ISSUE_2180"

def upgrade(self) -> None:
r"""
Migrates the database from conforming to the original schema, to conforming to the new schema.
Should be run after migrator_from_10_2_0_to_11_0_0_part_16.py.
"""
# Add a type slot on the Protocol class within the protocol_link slot on each document
collections_to_update = [
"data_generation_set",
"material_processing_set",
"collecting_biosamples_from_site_set",
"protocol_execution_set",
"storage_process_set",
"workflow_execution_set",
"study_set"
]

for collection_name in collections_to_update:
self.adapter.process_each_document(collection_name, [self.add_type_to_protocol_link])

def add_type_to_protocol_link(self, document: dict) -> dict:
r"""
Add a `type` field to the `Protocol` instance within the `protocol_link` field on each document.
>>> m = Migrator()
>>> m.add_type_to_protocol_link({'id': 123}) # no protocol_link field
{'id': 123}
>>> m.add_type_to_protocol_link({'id': 123, 'protocol_link': {'id': 456}})
{'id': 123, 'protocol_link': {'id': 456, 'type': 'nmdc:Protocol'}}
>>> m.add_type_to_protocol_link({'id': 123, 'protocol_link': {'id': 456, 'type': 'nmdc:Protocol'}}) # test: does not overwrite existing type slot
{'id': 123, 'protocol_link': {'id': 456, 'type': 'nmdc:Protocol'}}
"""

self.logger.info(f"Starting migration of {document['id']}")
if "protocol_link" in document:
if "type" not in document["protocol_link"]:
document["protocol_link"]["type"] = "nmdc:Protocol"

return document
12 changes: 12 additions & 0 deletions src/data/invalid/LibraryPreparation-missing-protocol-type.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# This record is missing the type field in the protocol_link slot
end_date: '2017-12-12'
has_input:
- nmdc:procsm-12-px6qn983
has_output:
- nmdc:procsm-12-h63qmv56
id: nmdc:libprp-12-1qp98z14
processing_institution: Battelle
start_date: 2014-08-05T18:40Z
protocol_link:
name: BMI_metagenomicsSequencingSOP_v1
type: nmdc:LibraryPreparation
12 changes: 12 additions & 0 deletions src/data/valid/LibraryPreparation.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
end_date: '2017-12-12'
has_input:
- nmdc:procsm-12-px6qn983
has_output:
- nmdc:procsm-12-h63qmv56
id: nmdc:libprp-12-1qp98z14
processing_institution: Battelle
start_date: 2014-08-05T18:40Z
protocol_link:
name: BMI_metagenomicsSequencingSOP_v1
type: nmdc:Protocol
type: nmdc:LibraryPreparation

0 comments on commit 36e5db9

Please sign in to comment.