From cfc17a9101392c218d60078e1d653bb0e24b2405 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Wed, 5 Jun 2024 16:02:22 +0200 Subject: [PATCH 1/4] fix: integrate changest to worker ingest --- cases/models.py | 33 ++ cases_import/models/executors.py | 171 +++--- .../snapshots/snap_test_models_executor.py | 137 +---- cases_import/tests/test_models_executor.py | 4 +- .../migrations/0003_auto_20240604_1128.py | 545 ++++++++++++++++++ varfish/vueapp/components.d.ts | 2 +- 6 files changed, 685 insertions(+), 207 deletions(-) create mode 100644 cases_qc/migrations/0003_auto_20240604_1128.py diff --git a/cases/models.py b/cases/models.py index bc6891cd1..5bed2d982 100644 --- a/cases/models.py +++ b/cases/models.py @@ -1,3 +1,4 @@ +import json import typing import uuid as uuid_object @@ -171,3 +172,35 @@ def write_pedigree_as_plink(pedigree: Pedigree, outputf: typing.TextIO, family_n "\t".join(row), file=outputf, ) + + +def write_id_mapping_json( + identifier_map: typing.Dict[str, typing.Dict[str, str]], + pedigree: Pedigree, + outputf: typing.TextIO, +): + """Write a pedigree as a PLINK file. + + :param identifier_map: Mapping from file path to dict mapping name in PED to name in VCF. + :param pedigree: The pedigree to write. + :param outputf: The output file. + """ + mappings = [] + for path, ped_to_vcf in identifier_map.items(): + mappings.append( + { + "path": path, + "entries": [ + { + "src": vcf_name, + "dst": ped_name, + } + for ped_name, vcf_name in ped_to_vcf.items() + ], + } + ) + json.dump( + obj={"mappings": mappings}, + fp=outputf, + indent=2, + ) diff --git a/cases_import/models/executors.py b/cases_import/models/executors.py index c68c37698..8895c7491 100644 --- a/cases_import/models/executors.py +++ b/cases_import/models/executors.py @@ -18,7 +18,14 @@ from projectroles.models import Project import pydantic -from cases.models import Disease, Individual, Pedigree, PhenotypicFeature, write_pedigree_as_plink +from cases.models import ( + Disease, + Individual, + Pedigree, + PhenotypicFeature, + write_id_mapping_json, + write_pedigree_as_plink, +) from cases_files.models import ( AbstractFile, IndividualExternalFile, @@ -665,6 +672,7 @@ def _import_ngsbits_qc_mappingqc( class VariantImportExecutorBase(FileImportExecutorBase): """Base class for variant import.""" + max_vcf_files: typing.Optional[int] var_type: str def __init__(self, case: Case, bgjob: CaseImportBackgroundJob): @@ -674,8 +682,9 @@ def __init__(self, case: Case, bgjob: CaseImportBackgroundJob): self.case = case #: The background job, used for logging and getting unique internal paths self.bgjob = bgjob - #: The `FileSystemOptions` for the internal storage. + # Shortcut to storage settings storage_settings = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE + #: The `FileSystemOptions` for the internal storage self.internal_fs_options = FileSystemOptions( protocol="s3", host=storage_settings.host, @@ -698,7 +707,7 @@ def run(self) -> typing.List[PedigreeInternalFile]: else: return [] - def copy_external_internal(self) -> typing.Optional[PedigreeInternalFile]: + def copy_external_internal(self) -> typing.List[PedigreeInternalFile]: """Copy the external VCF file to the internal storage. :return: the corresponding `PedigreeInternalFile` object @@ -712,56 +721,71 @@ def copy_external_internal(self) -> typing.Optional[PedigreeInternalFile]: file_attributes__variant_type=self.var_type, mimetype="text/plain+x-bgzip+x-variant-call-format", ) - if extfile_qs.count() > 1: + if self.max_vcf_files is not None and extfile_qs.count() > self.max_vcf_files: raise ValueError( - f"expected at most one {self.var_type} VCF file, found {extfile_qs.count()}" + f"expected at most {self.max_vcf_files} {self.var_type} VCF file(s), found {extfile_qs.count()}" ) - elif extfile_qs.count() == 0: - return None - extfile = extfile_qs.first() - # Copy the file from the external to the internal storage. - bucket = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.bucket - path_int = ( - f"case-data/{uuid_frag(self.case.sodar_uuid)}/{self.bgjob.sodar_uuid}/" - f"{self.var_type}/external-copy.vcf.gz" - ) - path_int_full = f"s3://{bucket}/{path_int}" - with ( - self.external_fs.open(extfile.path, "rb") as inputf, - self.internal_fs.open(path_int_full, "wb") as outputf, - ): - shutil.copyfileobj(inputf, outputf) - - # Create the `PedigreeInternalFile` record after copying is complete. - return PedigreeInternalFile.objects.create( - case=self.case, - path=path_int, - genomebuild=extfile.genomebuild, - mimetype=extfile.mimetype, - file_attributes=extfile.file_attributes, - identifier_map=extfile.identifier_map, - # is copy of the original VCF file - designation=f"variant_calls/{self.var_type}/orig-copy", - # checksum=extfile.checksum, # TODO - pedigree=self.case.pedigree_obj, - ) + result = [] + for idx, extfile in enumerate(extfile_qs): + # Copy the file from the external to the internal storage. + bucket = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.bucket + path_int = ( + f"case-data/{uuid_frag(self.case.sodar_uuid)}/{self.bgjob.sodar_uuid}/" + f"{self.var_type}/external-copy-{idx}.vcf.gz" + ) + path_int_full = f"s3://{bucket}/{path_int}" + path_ext = extfile.path + with ( + self.external_fs.open(path_ext, "rb") as inputf, + self.internal_fs.open(path_int_full, "wb") as outputf, + ): + shutil.copyfileobj(inputf, outputf) + # Create the `PedigreeInternalFile` record after copying is complete. + result.append( + PedigreeInternalFile.objects.create( + case=self.case, + path=path_int, + genomebuild=extfile.genomebuild, + mimetype=extfile.mimetype, + file_attributes=extfile.file_attributes, + identifier_map=extfile.identifier_map, + # is copy of the original VCF file + designation=f"variant_calls/{self.var_type}/orig-copy", + # checksum=extfile.checksum, # TODO + pedigree=self.case.pedigree_obj, + ) + ) + return result - def annotate_outer(self, vcf_on_s3: PedigreeExternalFile) -> typing.List[PedigreeExternalFile]: + def annotate_outer( + self, vcf_on_s3: typing.List[PedigreeExternalFile] + ) -> typing.List[PedigreeExternalFile]: """Annotate the VCF file from the internal storage. Will write temporary PLINK PED file and then call the actual annotation functin. """ - with tempfile.NamedTemporaryFile(mode="w+t") as tmpf: - write_pedigree_as_plink(self.case.pedigree_obj, tmpf) - tmpf.flush() - return self.annotate(vcf_on_s3, path_ped=tmpf.name) + with ( + tempfile.NamedTemporaryFile(mode="w+t", suffix=".ped") as tmpf_ped, + tempfile.NamedTemporaryFile(mode="w+t", suffix=".json") as tmpf_id_map, + ): + write_pedigree_as_plink(self.case.pedigree_obj, tmpf_ped) + tmpf_ped.flush() + bucket = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.bucket + write_id_mapping_json( + {f"{bucket}/{entry.path}": entry.identifier_map for entry in vcf_on_s3}, + self.case.pedigree_obj, + tmpf_id_map, + ) + tmpf_id_map.flush() + return self.annotate(vcf_on_s3, path_ped=tmpf_ped.name, path_id_map=tmpf_id_map.name) def annotate( - self, vcf_on_s3: PedigreeExternalFile, path_ped: str + self, vcf_on_s3: typing.List[PedigreeExternalFile], path_ped: str, path_id_map: str ) -> typing.List[PedigreeExternalFile]: _ = vcf_on_s3 _ = path_ped + _ = path_id_map raise NotImplementedError def run_worker(self, args: list[str], env: typing.Dict[str, str] | None = None): @@ -776,6 +800,7 @@ def run_worker(self, args: list[str], env: typing.Dict[str, str] | None = None): class SeqvarsImportExecutor(VariantImportExecutorBase): """Run the import of sequence variant import.""" + max_vcf_files = 1 var_type = "seqvars" def run(self) -> typing.List[PedigreeInternalFile]: @@ -791,10 +816,12 @@ def run(self) -> typing.List[PedigreeInternalFile]: return int_on_s3 def annotate( - self, vcf_on_s3: PedigreeExternalFile, path_ped: str + self, vcf_on_s3: typing.List[PedigreeExternalFile], path_ped: str, path_id_map: str ) -> typing.List[PedigreeExternalFile]: """Implementation of sequence variant annotation.""" - # Path create path of the new fiel. + assert len(vcf_on_s3) == 1, "ensured earlier" + + # Path create path of the new file. bucket = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.bucket path_out = ( f"case-data/{uuid_frag(self.case.sodar_uuid)}/{self.bgjob.sodar_uuid}/" @@ -809,16 +836,20 @@ def annotate( "--case-uuid", str(self.case.sodar_uuid), "--genomebuild", - vcf_on_s3.genomebuild, + vcf_on_s3[0].genomebuild, "--path-mehari-db", f"{settings.WORKER_DB_PATH}/mehari", "--path-ped", path_ped, "--path-in", - vcf_on_s3.path, + f"{bucket}/{vcf_on_s3[0].path}", "--path-out", f"{bucket}/{path_out}", + "--id-mapping", + f"@{path_id_map}", ] + # if settings.DEBUG: # XXX remove this + # args += ["--max-var-count", "1000"] # Setup environment so the worker can access the internal S3 storage. endpoint_host = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.host endpoint_port = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.port @@ -828,7 +859,7 @@ def annotate( "AWS_ACCESS_KEY_ID": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.access_key, "AWS_SECRET_ACCESS_KEY": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.secret_key, "AWS_ENDPOINT_URL": f"http://{endpoint_host}:{endpoint_port}", - # "AWS_REGION": "us-east-1", + "AWS_REGION": "us-east-1", } # Actually execute the worker. self.run_worker(args=args, env=env) @@ -837,9 +868,10 @@ def annotate( PedigreeInternalFile.objects.create( case=self.case, path=f"{path_out}{suffix}", - genomebuild=vcf_on_s3.genomebuild, + genomebuild=vcf_on_s3[0].genomebuild, mimetype=mimetype, - identifier_map=vcf_on_s3.identifier_map, + # NB: no identifier map as ingest fixed it + identifier_map={}, designation=designation, file_attributes={}, # checksum=extfile.checksum, # TODO @@ -863,21 +895,20 @@ def prefilter_seqvars_outer(self, ingested_on_s3: PedigreeInternalFile): """Writes out the prefilter configuration JSON to file and then calls the actual prefiltration. """ + bucket = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.bucket with tempfile.NamedTemporaryFile(mode="w+t") as tmpf: configs: list[PrefilterConfig] = settings.VARFISH_CASE_IMPORT_SEQVARS_PREFILTER_CONFIGS out_lst = [] for idx, config in enumerate(configs): dirname = os.path.dirname(ingested_on_s3.path) - prefilter_path = f"{dirname}/prefiltered-{idx}.vcf.gz" - out_lst.append( - PrefilterConfig( - **{ - **config.dict(), - "prefilter_path": prefilter_path, - } - ) + prefilter_path = f"{bucket}/{dirname}/prefiltered-{idx}.vcf.gz" + config = PrefilterConfig( + **{ + **config.dict(), + "prefilter_path": prefilter_path, + } ) - json.dump([obj.dict() for obj in out_lst], tmpf) + print(config.model_dump_json(), file=tmpf) tmpf.flush() self.prefilter_seqvars( ingested_on_s3=ingested_on_s3, configs=out_lst, path_config=tmpf.name @@ -888,13 +919,14 @@ def prefilter_seqvars( ): """Run prefiltration of sequence variants.""" # Create arguments to use. + bucket = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.bucket args = [ "seqvars", "prefilter", "--params", f"@{path_config}", "--path-in", - ingested_on_s3.path, + f"{bucket}/{ingested_on_s3.path}", ] # Setup environment so the worker can access the internal S3 storage. endpoint_host = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.host @@ -905,7 +937,7 @@ def prefilter_seqvars( "AWS_ACCESS_KEY_ID": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.access_key, "AWS_SECRET_ACCESS_KEY": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.secret_key, "AWS_ENDPOINT_URL": f"http://{endpoint_host}:{endpoint_port}", - # "AWS_REGION": "us-east-1", + "AWS_REGION": "us-east-1", } # Actually execute the worker. self.run_worker(args=args, env=env) @@ -943,12 +975,15 @@ def prefilter_seqvars( class StrucvarsImportExecutor(VariantImportExecutorBase): """Run the import of structural variant import.""" + max_vcf_files = 10 var_type = "strucvars" def annotate( - self, vcf_on_s3: PedigreeExternalFile, path_ped: str + self, vcf_on_s3: typing.List[PedigreeExternalFile], path_ped: str, path_id_map: str ) -> typing.List[PedigreeExternalFile]: """Implementation of structural variant annotation.""" + assert len(vcf_on_s3) > 0, "ensured earlier" + # Path create path of the new fiel. bucket = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.bucket path_out = ( @@ -964,16 +999,13 @@ def annotate( "--case-uuid", str(self.case.sodar_uuid), "--genomebuild", - vcf_on_s3.genomebuild, - "--path-mehari-db", - f"{settings.WORKER_DB_PATH}/mehari", + vcf_on_s3[0].genomebuild, "--path-ped", path_ped, - "--path-in", - vcf_on_s3.path, - "--path-out", - f"{bucket}/{path_out}", ] + for entry in vcf_on_s3: + args += ["--path-in", f"{bucket}/{entry.path}"] + args += ["--path-out", f"{bucket}/{path_out}", "--id-mapping", f"@{path_id_map}"] # Setup environment so the worker can access the internal S3 storage. endpoint_host = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.host endpoint_port = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.port @@ -983,7 +1015,7 @@ def annotate( "AWS_ACCESS_KEY_ID": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.access_key, "AWS_SECRET_ACCESS_KEY": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.secret_key, "AWS_ENDPOINT_URL": f"http://{endpoint_host}:{endpoint_port}", - # "AWS_REGION": "us-east-1", + "AWS_REGION": "us-east-1", } # Actually execute the worker. self.run_worker(args=args, env=env) @@ -992,9 +1024,10 @@ def annotate( PedigreeInternalFile.objects.create( case=self.case, path=f"{path_out}{suffix}", - genomebuild=vcf_on_s3.genomebuild, + genomebuild=vcf_on_s3[0].genomebuild, mimetype=mimetype, - identifier_map=vcf_on_s3.identifier_map, + # NB: no identifier map as ingest fixed it + identifier_map={}, designation=designation, file_attributes={}, # checksum=extfile.checksum, # TODO diff --git a/cases_import/tests/snapshots/snap_test_models_executor.py b/cases_import/tests/snapshots/snap_test_models_executor.py index 1a57e26c0..4a30779f5 100644 --- a/cases_import/tests/snapshots/snap_test_models_executor.py +++ b/cases_import/tests/snapshots/snap_test_models_executor.py @@ -6,137 +6,6 @@ snapshots = Snapshot() -snapshots["BuildLegacyModelTest::test_build_legacy_pedigree legacy pedigree for family.yaml"] = [ - { - "affected": 2, - "father": "father", - "has_gt_entries": True, - "mother": "mother", - "patient": "index", - "sex": 1, - }, - { - "affected": 1, - "father": "0", - "has_gt_entries": True, - "mother": "0", - "patient": "father", - "sex": 1, - }, - { - "affected": 1, - "father": "0", - "has_gt_entries": True, - "mother": "0", - "patient": "mother", - "sex": 2, - }, -] - -snapshots["ImportCreateWithSeqvarsVcfTest::test_run external files"] = [ - { - "available": None, - "designation": "variant_calls", - "file_attributes": { - "checksum": "sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82", - "designation": "variant_calls", - "genomebuild": "grch37", - "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "variant_type": "seqvars", - }, - "identifier_map": {"index": "NA12878-PCRF450-1"}, - "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "path": "file://cases_import/tests/data/sample-brca1.vcf.gz", - }, - { - "available": None, - "designation": "variant_calls", - "file_attributes": { - "checksum": "sha256:6b137335b7803623c3389424e7b64d704fb1c9f3f55792db2916d312e2da27ef", - "designation": "variant_calls", - "genomebuild": "grch37", - "mimetype": "application/octet-stream+x-tabix-tbi-index", - "variant_type": "seqvars", - }, - "identifier_map": {"index": "NA12878-PCRF450-1"}, - "mimetype": "application/octet-stream+x-tabix-tbi-index", - "path": "file://cases_import/tests/data/sample-brca1.vcf.gz.tbi", - }, -] - -snapshots["ImportCreateWithSeqvarsVcfTest::test_run internal files"] = [ - { - "checksum": None, - "designation": "variant_calls/seqvars/orig-copy", - "file_attributes": { - "checksum": "sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82", - "designation": "variant_calls", - "genomebuild": "grch37", - "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "variant_type": "seqvars", - }, - "identifier_map": {"index": "NA12878-PCRF450-1"}, - "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/external-copy.vcf.gz", - }, - { - "checksum": None, - "designation": "variant_calls/seqvars/ingested-vcf", - "file_attributes": {}, - "identifier_map": {"index": "NA12878-PCRF450-1"}, - "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/ingested.vcf.gz", - }, - { - "checksum": None, - "designation": "variant_calls/seqvars/ingested-tbi", - "file_attributes": {}, - "identifier_map": {"index": "NA12878-PCRF450-1"}, - "mimetype": "application/octet-stream+x-tabix-tbi-index", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/ingested.vcf.gz.tbi", - }, - { - "checksum": None, - "designation": "variant_calls/seqvars/prefiltered-vcf", - "file_attributes": { - "prefilter_config": '{"max_freq": 0.05, "max_exon_dist": 1000, "prefilter_path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/prefiltered-0.vcf.gz"}' - }, - "identifier_map": {"index": "NA12878-PCRF450-1"}, - "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/prefiltered-0.vcf.gz", - }, - { - "checksum": None, - "designation": "variant_calls/seqvars/prefiltered-vcf", - "file_attributes": { - "prefilter_config": '{"max_freq": 0.01, "max_exon_dist": 100, "prefilter_path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/prefiltered-1.vcf.gz"}' - }, - "identifier_map": {"index": "NA12878-PCRF450-1"}, - "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/prefiltered-1.vcf.gz", - }, - { - "checksum": None, - "designation": "variant_calls/seqvars/prefiltered-tbi", - "file_attributes": { - "prefilter_config": '{"max_freq": 0.05, "max_exon_dist": 1000, "prefilter_path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/prefiltered-0.vcf.gz"}' - }, - "identifier_map": {"index": "NA12878-PCRF450-1"}, - "mimetype": "application/octet-stream+x-tabix-tbi-index", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/prefiltered-0.vcf.gz.tbi", - }, - { - "checksum": None, - "designation": "variant_calls/seqvars/prefiltered-tbi", - "file_attributes": { - "prefilter_config": '{"max_freq": 0.01, "max_exon_dist": 100, "prefilter_path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/prefiltered-1.vcf.gz"}' - }, - "identifier_map": {"index": "NA12878-PCRF450-1"}, - "mimetype": "application/octet-stream+x-tabix-tbi-index", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/prefiltered-1.vcf.gz.tbi", - }, -] - snapshots["ImportCreateWithStrucvarsVcfTest::test_run external files"] = [ { "available": None, @@ -181,13 +50,13 @@ }, "identifier_map": {"index": "NA12878-PCRF450-1"}, "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/external-copy.vcf.gz", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/external-copy-0.vcf.gz", }, { "checksum": None, "designation": "variant_calls/strucvars/ingested-vcf", "file_attributes": {}, - "identifier_map": {"index": "NA12878-PCRF450-1"}, + "identifier_map": {}, "mimetype": "text/plain+x-bgzip+x-variant-call-format", "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/ingested.vcf.gz", }, @@ -195,7 +64,7 @@ "checksum": None, "designation": "variant_calls/strucvars/ingested-tbi", "file_attributes": {}, - "identifier_map": {"index": "NA12878-PCRF450-1"}, + "identifier_map": {}, "mimetype": "application/octet-stream+x-tabix-tbi-index", "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/ingested.vcf.gz.tbi", }, diff --git a/cases_import/tests/test_models_executor.py b/cases_import/tests/test_models_executor.py index e5b6b47f6..835ac889a 100644 --- a/cases_import/tests/test_models_executor.py +++ b/cases_import/tests/test_models_executor.py @@ -7,8 +7,6 @@ import os from unittest import mock -from django.conf import settings -from freezegun import freeze_time from google.protobuf.json_format import ParseDict from phenopackets import Family from projectroles.app_settings import AppSettingAPI @@ -106,7 +104,7 @@ def test_run(self, mock_seqvarsimprotexecutor_run_worker): self.assertEqual(Case.objects.count(), 1) self.assertEqual(CaseQc.objects.count(), 1) self.assertEqual(PedigreeExternalFile.objects.count(), 2) - self.assertEqual(PedigreeInternalFile.objects.count(), 7) + self.assertEqual(PedigreeInternalFile.objects.count(), 3) call_list = mock_seqvarsimprotexecutor_run_worker.call_args_list self.assertEqual(len(call_list), 2) diff --git a/cases_qc/migrations/0003_auto_20240604_1128.py b/cases_qc/migrations/0003_auto_20240604_1128.py new file mode 100644 index 000000000..ce1ba077a --- /dev/null +++ b/cases_qc/migrations/0003_auto_20240604_1128.py @@ -0,0 +1,545 @@ +# Generated by Django 3.2.25 on 2024-06-04 11:28 + +import django.core.serializers.json +from django.db import migrations +import django_pydantic_field.compat.django +import django_pydantic_field.fields + +import cases_qc.models.cramino +import cases_qc.models.dragen +import cases_qc.models.ngsbits +import cases_qc.models.samtools + + +class Migration(migrations.Migration): + + dependencies = [ + ("cases_qc", "0002_auto_20240220_1615"), + ] + + operations = [ + migrations.AlterField( + model_name="bcftoolsstatsmetrics", + name="af", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.BcftoolsStatsAfRecord,) + ), + ), + ), + migrations.AlterField( + model_name="bcftoolsstatsmetrics", + name="dp", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.BcftoolsStatsDpRecord,) + ), + ), + ), + migrations.AlterField( + model_name="bcftoolsstatsmetrics", + name="idd", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.BcftoolsStatsIddRecord,) + ), + ), + ), + migrations.AlterField( + model_name="bcftoolsstatsmetrics", + name="qual", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.BcftoolsStatsQualRecord,) + ), + ), + ), + migrations.AlterField( + model_name="bcftoolsstatsmetrics", + name="sis", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.BcftoolsStatsSisRecord,) + ), + ), + ), + migrations.AlterField( + model_name="bcftoolsstatsmetrics", + name="sn", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.BcftoolsStatsSnRecord,) + ), + ), + ), + migrations.AlterField( + model_name="bcftoolsstatsmetrics", + name="st", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.BcftoolsStatsStRecord,) + ), + ), + ), + migrations.AlterField( + model_name="bcftoolsstatsmetrics", + name="tstv", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.BcftoolsStatsTstvRecord,) + ), + ), + ), + migrations.AlterField( + model_name="craminometrics", + name="chrom_counts", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.cramino.CraminoChromNormalizedCountsRecord,) + ), + ), + ), + migrations.AlterField( + model_name="craminometrics", + name="summary", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.cramino.CraminoSummaryRecord,) + ), + ), + ), + migrations.AlterField( + model_name="dragencnvmetrics", + name="metrics", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.dragen.DragenStyleMetric,) + ), + ), + ), + migrations.AlterField( + model_name="dragenmappingmetrics", + name="metrics", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.dragen.DragenStyleMetric,) + ), + ), + ), + migrations.AlterField( + model_name="dragenploidyestimationmetrics", + name="metrics", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.dragen.DragenStyleMetric,) + ), + ), + ), + migrations.AlterField( + model_name="dragenregioncoveragemetrics", + name="metrics", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.dragen.DragenStyleMetric,) + ), + ), + ), + migrations.AlterField( + model_name="dragenregionhist", + name="metrics", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.dragen.DragenStyleMetric,) + ), + ), + ), + migrations.AlterField( + model_name="dragenregionoverallmeancov", + name="metrics", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.dragen.DragenStyleMetric,) + ), + ), + ), + migrations.AlterField( + model_name="dragenrohmetrics", + name="metrics", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.dragen.DragenStyleMetric,) + ), + ), + ), + migrations.AlterField( + model_name="dragensvmetrics", + name="metrics", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.dragen.DragenStyleMetric,) + ), + ), + ), + migrations.AlterField( + model_name="dragentimemetrics", + name="metrics", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.dragen.DragenStyleMetric,) + ), + ), + ), + migrations.AlterField( + model_name="dragentrimmermetrics", + name="metrics", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.dragen.DragenStyleMetric,) + ), + ), + ), + migrations.AlterField( + model_name="dragenvchethomratiometrics", + name="metrics", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.dragen.DragenStyleMetric,) + ), + ), + ), + migrations.AlterField( + model_name="dragenvcmetrics", + name="metrics", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.dragen.DragenStyleMetric,) + ), + ), + ), + migrations.AlterField( + model_name="dragenwgscontigmeancovmetrics", + name="metrics", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.dragen.DragenStyleCoverage,) + ), + ), + ), + migrations.AlterField( + model_name="dragenwgscoveragemetrics", + name="metrics", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.dragen.DragenStyleMetric,) + ), + ), + ), + migrations.AlterField( + model_name="dragenwgsoverallmeancov", + name="metrics", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.dragen.DragenStyleMetric,) + ), + ), + ), + migrations.AlterField( + model_name="ngsbitsmappingqcmetrics", + name="records", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.ngsbits.NgsbitsMappingqcRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsflagstatmetrics", + name="qc_fail", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=cases_qc.models.samtools.SamtoolsFlagstatRecord, + ), + ), + migrations.AlterField( + model_name="samtoolsflagstatmetrics", + name="qc_pass", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=cases_qc.models.samtools.SamtoolsFlagstatRecord, + ), + ), + migrations.AlterField( + model_name="samtoolsidxstatsmetrics", + name="records", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsIdxstatsRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatsmainmetrics", + name="chk", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsChkRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatsmainmetrics", + name="cov", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsHistoRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatsmainmetrics", + name="fbc", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsBasePercentagesRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatsmainmetrics", + name="ffq", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsFqRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatsmainmetrics", + name="frl", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsHistoRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatsmainmetrics", + name="gcd", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsGcdRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatsmainmetrics", + name="idd", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsIdRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatsmainmetrics", + name="isize", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsIsRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatsmainmetrics", + name="lbc", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsBasePercentagesRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatsmainmetrics", + name="lfq", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsFqRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatsmainmetrics", + name="lrl", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsHistoRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatsmainmetrics", + name="sn", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsSnRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatssupplementarymetrics", + name="gcc", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsBasePercentagesRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatssupplementarymetrics", + name="gcf", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsGcRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatssupplementarymetrics", + name="gcl", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsGcRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatssupplementarymetrics", + name="gct", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsBasePercentagesRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatssupplementarymetrics", + name="ic", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsIcRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatssupplementarymetrics", + name="mapq", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsHistoRecord,) + ), + ), + ), + migrations.AlterField( + model_name="samtoolsstatssupplementarymetrics", + name="rl", + field=django_pydantic_field.fields.PydanticSchemaField( + config=None, + encoder=django.core.serializers.json.DjangoJSONEncoder, + schema=django_pydantic_field.compat.django.GenericContainer( + list, (cases_qc.models.samtools.SamtoolsStatsHistoRecord,) + ), + ), + ), + ] diff --git a/varfish/vueapp/components.d.ts b/varfish/vueapp/components.d.ts index 0336e3e5f..1d7a8a735 100644 --- a/varfish/vueapp/components.d.ts +++ b/varfish/vueapp/components.d.ts @@ -1,10 +1,10 @@ /* eslint-disable */ -/* prettier-ignore */ // @ts-nocheck // Generated by unplugin-vue-components // Read more: https://github.com/vuejs/core/pull/3399 export {} +/* prettier-ignore */ declare module 'vue' { export interface GlobalComponents { IBiExclamationCircle: typeof import('~icons/bi/exclamation-circle')['default'] From 98b3c42038b9df4d65163bb5b29dcfbf17e53de0 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Thu, 6 Jun 2024 15:27:47 +0200 Subject: [PATCH 2/4] wip --- cases_import/tests/test_models_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cases_import/tests/test_models_executor.py b/cases_import/tests/test_models_executor.py index 835ac889a..b38713b26 100644 --- a/cases_import/tests/test_models_executor.py +++ b/cases_import/tests/test_models_executor.py @@ -110,7 +110,7 @@ def test_run(self, mock_seqvarsimprotexecutor_run_worker): self.assertEqual(len(call_list), 2) call_1_args = call_list[0].kwargs["args"] self.assertEqual(call_1_args[0:3], ["seqvars", "ingest", "--file-date"]) - self.assertEqual(len(call_1_args), 16) + self.assertEqual(len(call_1_args), 18) call_2_args = call_list[1].kwargs["args"] self.assertEqual(call_2_args[0:3], ["seqvars", "prefilter", "--params"]) self.assertEqual(len(call_2_args), 6) From 98f0920dbcce68e1db6bdd0442e8cbd948df9920 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 10 Jun 2024 06:42:29 +0200 Subject: [PATCH 3/4] wip --- .../snapshots/snap_test_models_executor.py | 212 +++++++++++++----- 1 file changed, 162 insertions(+), 50 deletions(-) diff --git a/cases_import/tests/snapshots/snap_test_models_executor.py b/cases_import/tests/snapshots/snap_test_models_executor.py index 4a30779f5..34a2c85f2 100644 --- a/cases_import/tests/snapshots/snap_test_models_executor.py +++ b/cases_import/tests/snapshots/snap_test_models_executor.py @@ -4,68 +4,180 @@ from snapshottest import Snapshot + snapshots = Snapshot() -snapshots["ImportCreateWithStrucvarsVcfTest::test_run external files"] = [ +snapshots['BuildLegacyModelTest::test_build_legacy_pedigree legacy pedigree for family.yaml'] = [ + { + 'affected': 2, + 'father': 'father', + 'has_gt_entries': True, + 'mother': 'mother', + 'patient': 'index', + 'sex': 1 + }, + { + 'affected': 1, + 'father': '0', + 'has_gt_entries': True, + 'mother': '0', + 'patient': 'father', + 'sex': 1 + }, + { + 'affected': 1, + 'father': '0', + 'has_gt_entries': True, + 'mother': '0', + 'patient': 'mother', + 'sex': 2 + } +] + +snapshots['ImportCreateWithSeqvarsVcfTest::test_run external files'] = [ { - "available": None, - "designation": "variant_calls", - "file_attributes": { - "checksum": "sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82", - "designation": "variant_calls", - "genomebuild": "grch37", - "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "variant_type": "strucvars", - }, - "identifier_map": {"index": "NA12878-PCRF450-1"}, - "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "path": "file://cases_import/tests/data/sample-brca1.vcf.gz", + 'available': None, + 'designation': 'variant_calls', + 'file_attributes': { + 'checksum': 'sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82', + 'designation': 'variant_calls', + 'genomebuild': 'grch37', + 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', + 'variant_type': 'seqvars' + }, + 'identifier_map': { + 'index': 'NA12878-PCRF450-1' + }, + 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', + 'path': 'file://cases_import/tests/data/sample-brca1.vcf.gz' + }, + { + 'available': None, + 'designation': 'variant_calls', + 'file_attributes': { + 'checksum': 'sha256:6b137335b7803623c3389424e7b64d704fb1c9f3f55792db2916d312e2da27ef', + 'designation': 'variant_calls', + 'genomebuild': 'grch37', + 'mimetype': 'application/octet-stream+x-tabix-tbi-index', + 'variant_type': 'seqvars' + }, + 'identifier_map': { + 'index': 'NA12878-PCRF450-1' + }, + 'mimetype': 'application/octet-stream+x-tabix-tbi-index', + 'path': 'file://cases_import/tests/data/sample-brca1.vcf.gz.tbi' + } +] + +snapshots['ImportCreateWithSeqvarsVcfTest::test_run internal files'] = [ + { + 'checksum': None, + 'designation': 'variant_calls/seqvars/orig-copy', + 'file_attributes': { + 'checksum': 'sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82', + 'designation': 'variant_calls', + 'genomebuild': 'grch37', + 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', + 'variant_type': 'seqvars' + }, + 'identifier_map': { + 'index': 'NA12878-PCRF450-1' + }, + 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', + 'path': 'case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/external-copy-0.vcf.gz' }, { - "available": None, - "designation": "variant_calls", - "file_attributes": { - "checksum": "sha256:6b137335b7803623c3389424e7b64d704fb1c9f3f55792db2916d312e2da27ef", - "designation": "variant_calls", - "genomebuild": "grch37", - "mimetype": "application/octet-stream+x-tabix-tbi-index", - "variant_type": "strucvars", - }, - "identifier_map": {"index": "NA12878-PCRF450-1"}, - "mimetype": "application/octet-stream+x-tabix-tbi-index", - "path": "file://cases_import/tests/data/sample-brca1.vcf.gz.tbi", + 'checksum': None, + 'designation': 'variant_calls/seqvars/ingested-vcf', + 'file_attributes': { + }, + 'identifier_map': { + }, + 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', + 'path': 'case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/ingested.vcf.gz' }, + { + 'checksum': None, + 'designation': 'variant_calls/seqvars/ingested-tbi', + 'file_attributes': { + }, + 'identifier_map': { + }, + 'mimetype': 'application/octet-stream+x-tabix-tbi-index', + 'path': 'case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/ingested.vcf.gz.tbi' + } ] -snapshots["ImportCreateWithStrucvarsVcfTest::test_run internal files"] = [ +snapshots['ImportCreateWithStrucvarsVcfTest::test_run external files'] = [ { - "checksum": None, - "designation": "variant_calls/strucvars/orig-copy", - "file_attributes": { - "checksum": "sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82", - "designation": "variant_calls", - "genomebuild": "grch37", - "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "variant_type": "strucvars", - }, - "identifier_map": {"index": "NA12878-PCRF450-1"}, - "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/external-copy-0.vcf.gz", + 'available': None, + 'designation': 'variant_calls', + 'file_attributes': { + 'checksum': 'sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82', + 'designation': 'variant_calls', + 'genomebuild': 'grch37', + 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', + 'variant_type': 'strucvars' + }, + 'identifier_map': { + 'index': 'NA12878-PCRF450-1' + }, + 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', + 'path': 'file://cases_import/tests/data/sample-brca1.vcf.gz' }, { - "checksum": None, - "designation": "variant_calls/strucvars/ingested-vcf", - "file_attributes": {}, - "identifier_map": {}, - "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/ingested.vcf.gz", + 'available': None, + 'designation': 'variant_calls', + 'file_attributes': { + 'checksum': 'sha256:6b137335b7803623c3389424e7b64d704fb1c9f3f55792db2916d312e2da27ef', + 'designation': 'variant_calls', + 'genomebuild': 'grch37', + 'mimetype': 'application/octet-stream+x-tabix-tbi-index', + 'variant_type': 'strucvars' + }, + 'identifier_map': { + 'index': 'NA12878-PCRF450-1' + }, + 'mimetype': 'application/octet-stream+x-tabix-tbi-index', + 'path': 'file://cases_import/tests/data/sample-brca1.vcf.gz.tbi' + } +] + +snapshots['ImportCreateWithStrucvarsVcfTest::test_run internal files'] = [ + { + 'checksum': None, + 'designation': 'variant_calls/strucvars/orig-copy', + 'file_attributes': { + 'checksum': 'sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82', + 'designation': 'variant_calls', + 'genomebuild': 'grch37', + 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', + 'variant_type': 'strucvars' + }, + 'identifier_map': { + 'index': 'NA12878-PCRF450-1' + }, + 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', + 'path': 'case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/external-copy-0.vcf.gz' }, { - "checksum": None, - "designation": "variant_calls/strucvars/ingested-tbi", - "file_attributes": {}, - "identifier_map": {}, - "mimetype": "application/octet-stream+x-tabix-tbi-index", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/ingested.vcf.gz.tbi", + 'checksum': None, + 'designation': 'variant_calls/strucvars/ingested-vcf', + 'file_attributes': { + }, + 'identifier_map': { + }, + 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', + 'path': 'case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/ingested.vcf.gz' }, + { + 'checksum': None, + 'designation': 'variant_calls/strucvars/ingested-tbi', + 'file_attributes': { + }, + 'identifier_map': { + }, + 'mimetype': 'application/octet-stream+x-tabix-tbi-index', + 'path': 'case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/ingested.vcf.gz.tbi' + } ] From fb31726154e4fc2fcd5e9acbd97e8add2299195e Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 10 Jun 2024 00:35:57 +0200 Subject: [PATCH 4/4] wip --- .../snapshots/snap_test_models_executor.py | 269 ++++++++---------- 1 file changed, 124 insertions(+), 145 deletions(-) diff --git a/cases_import/tests/snapshots/snap_test_models_executor.py b/cases_import/tests/snapshots/snap_test_models_executor.py index 34a2c85f2..ece81c52a 100644 --- a/cases_import/tests/snapshots/snap_test_models_executor.py +++ b/cases_import/tests/snapshots/snap_test_models_executor.py @@ -4,180 +4,159 @@ from snapshottest import Snapshot - snapshots = Snapshot() -snapshots['BuildLegacyModelTest::test_build_legacy_pedigree legacy pedigree for family.yaml'] = [ +snapshots["BuildLegacyModelTest::test_build_legacy_pedigree legacy pedigree for family.yaml"] = [ { - 'affected': 2, - 'father': 'father', - 'has_gt_entries': True, - 'mother': 'mother', - 'patient': 'index', - 'sex': 1 + "affected": 2, + "father": "father", + "has_gt_entries": True, + "mother": "mother", + "patient": "index", + "sex": 1, }, { - 'affected': 1, - 'father': '0', - 'has_gt_entries': True, - 'mother': '0', - 'patient': 'father', - 'sex': 1 + "affected": 1, + "father": "0", + "has_gt_entries": True, + "mother": "0", + "patient": "father", + "sex": 1, }, { - 'affected': 1, - 'father': '0', - 'has_gt_entries': True, - 'mother': '0', - 'patient': 'mother', - 'sex': 2 - } + "affected": 1, + "father": "0", + "has_gt_entries": True, + "mother": "0", + "patient": "mother", + "sex": 2, + }, ] -snapshots['ImportCreateWithSeqvarsVcfTest::test_run external files'] = [ +snapshots["ImportCreateWithSeqvarsVcfTest::test_run external files"] = [ { - 'available': None, - 'designation': 'variant_calls', - 'file_attributes': { - 'checksum': 'sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82', - 'designation': 'variant_calls', - 'genomebuild': 'grch37', - 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', - 'variant_type': 'seqvars' - }, - 'identifier_map': { - 'index': 'NA12878-PCRF450-1' - }, - 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', - 'path': 'file://cases_import/tests/data/sample-brca1.vcf.gz' + "available": None, + "designation": "variant_calls", + "file_attributes": { + "checksum": "sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82", + "designation": "variant_calls", + "genomebuild": "grch37", + "mimetype": "text/plain+x-bgzip+x-variant-call-format", + "variant_type": "seqvars", + }, + "identifier_map": {"index": "NA12878-PCRF450-1"}, + "mimetype": "text/plain+x-bgzip+x-variant-call-format", + "path": "file://cases_import/tests/data/sample-brca1.vcf.gz", }, { - 'available': None, - 'designation': 'variant_calls', - 'file_attributes': { - 'checksum': 'sha256:6b137335b7803623c3389424e7b64d704fb1c9f3f55792db2916d312e2da27ef', - 'designation': 'variant_calls', - 'genomebuild': 'grch37', - 'mimetype': 'application/octet-stream+x-tabix-tbi-index', - 'variant_type': 'seqvars' - }, - 'identifier_map': { - 'index': 'NA12878-PCRF450-1' - }, - 'mimetype': 'application/octet-stream+x-tabix-tbi-index', - 'path': 'file://cases_import/tests/data/sample-brca1.vcf.gz.tbi' - } + "available": None, + "designation": "variant_calls", + "file_attributes": { + "checksum": "sha256:6b137335b7803623c3389424e7b64d704fb1c9f3f55792db2916d312e2da27ef", + "designation": "variant_calls", + "genomebuild": "grch37", + "mimetype": "application/octet-stream+x-tabix-tbi-index", + "variant_type": "seqvars", + }, + "identifier_map": {"index": "NA12878-PCRF450-1"}, + "mimetype": "application/octet-stream+x-tabix-tbi-index", + "path": "file://cases_import/tests/data/sample-brca1.vcf.gz.tbi", + }, ] -snapshots['ImportCreateWithSeqvarsVcfTest::test_run internal files'] = [ +snapshots["ImportCreateWithSeqvarsVcfTest::test_run internal files"] = [ { - 'checksum': None, - 'designation': 'variant_calls/seqvars/orig-copy', - 'file_attributes': { - 'checksum': 'sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82', - 'designation': 'variant_calls', - 'genomebuild': 'grch37', - 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', - 'variant_type': 'seqvars' - }, - 'identifier_map': { - 'index': 'NA12878-PCRF450-1' - }, - 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', - 'path': 'case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/external-copy-0.vcf.gz' + "checksum": None, + "designation": "variant_calls/seqvars/orig-copy", + "file_attributes": { + "checksum": "sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82", + "designation": "variant_calls", + "genomebuild": "grch37", + "mimetype": "text/plain+x-bgzip+x-variant-call-format", + "variant_type": "seqvars", + }, + "identifier_map": {"index": "NA12878-PCRF450-1"}, + "mimetype": "text/plain+x-bgzip+x-variant-call-format", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/external-copy-0.vcf.gz", }, { - 'checksum': None, - 'designation': 'variant_calls/seqvars/ingested-vcf', - 'file_attributes': { - }, - 'identifier_map': { - }, - 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', - 'path': 'case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/ingested.vcf.gz' + "checksum": None, + "designation": "variant_calls/seqvars/ingested-vcf", + "file_attributes": {}, + "identifier_map": {}, + "mimetype": "text/plain+x-bgzip+x-variant-call-format", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/ingested.vcf.gz", }, { - 'checksum': None, - 'designation': 'variant_calls/seqvars/ingested-tbi', - 'file_attributes': { - }, - 'identifier_map': { - }, - 'mimetype': 'application/octet-stream+x-tabix-tbi-index', - 'path': 'case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/ingested.vcf.gz.tbi' - } + "checksum": None, + "designation": "variant_calls/seqvars/ingested-tbi", + "file_attributes": {}, + "identifier_map": {}, + "mimetype": "application/octet-stream+x-tabix-tbi-index", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/ingested.vcf.gz.tbi", + }, ] -snapshots['ImportCreateWithStrucvarsVcfTest::test_run external files'] = [ +snapshots["ImportCreateWithStrucvarsVcfTest::test_run external files"] = [ { - 'available': None, - 'designation': 'variant_calls', - 'file_attributes': { - 'checksum': 'sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82', - 'designation': 'variant_calls', - 'genomebuild': 'grch37', - 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', - 'variant_type': 'strucvars' - }, - 'identifier_map': { - 'index': 'NA12878-PCRF450-1' - }, - 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', - 'path': 'file://cases_import/tests/data/sample-brca1.vcf.gz' + "available": None, + "designation": "variant_calls", + "file_attributes": { + "checksum": "sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82", + "designation": "variant_calls", + "genomebuild": "grch37", + "mimetype": "text/plain+x-bgzip+x-variant-call-format", + "variant_type": "strucvars", + }, + "identifier_map": {"index": "NA12878-PCRF450-1"}, + "mimetype": "text/plain+x-bgzip+x-variant-call-format", + "path": "file://cases_import/tests/data/sample-brca1.vcf.gz", }, { - 'available': None, - 'designation': 'variant_calls', - 'file_attributes': { - 'checksum': 'sha256:6b137335b7803623c3389424e7b64d704fb1c9f3f55792db2916d312e2da27ef', - 'designation': 'variant_calls', - 'genomebuild': 'grch37', - 'mimetype': 'application/octet-stream+x-tabix-tbi-index', - 'variant_type': 'strucvars' - }, - 'identifier_map': { - 'index': 'NA12878-PCRF450-1' - }, - 'mimetype': 'application/octet-stream+x-tabix-tbi-index', - 'path': 'file://cases_import/tests/data/sample-brca1.vcf.gz.tbi' - } + "available": None, + "designation": "variant_calls", + "file_attributes": { + "checksum": "sha256:6b137335b7803623c3389424e7b64d704fb1c9f3f55792db2916d312e2da27ef", + "designation": "variant_calls", + "genomebuild": "grch37", + "mimetype": "application/octet-stream+x-tabix-tbi-index", + "variant_type": "strucvars", + }, + "identifier_map": {"index": "NA12878-PCRF450-1"}, + "mimetype": "application/octet-stream+x-tabix-tbi-index", + "path": "file://cases_import/tests/data/sample-brca1.vcf.gz.tbi", + }, ] -snapshots['ImportCreateWithStrucvarsVcfTest::test_run internal files'] = [ +snapshots["ImportCreateWithStrucvarsVcfTest::test_run internal files"] = [ { - 'checksum': None, - 'designation': 'variant_calls/strucvars/orig-copy', - 'file_attributes': { - 'checksum': 'sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82', - 'designation': 'variant_calls', - 'genomebuild': 'grch37', - 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', - 'variant_type': 'strucvars' - }, - 'identifier_map': { - 'index': 'NA12878-PCRF450-1' - }, - 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', - 'path': 'case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/external-copy-0.vcf.gz' + "checksum": None, + "designation": "variant_calls/strucvars/orig-copy", + "file_attributes": { + "checksum": "sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82", + "designation": "variant_calls", + "genomebuild": "grch37", + "mimetype": "text/plain+x-bgzip+x-variant-call-format", + "variant_type": "strucvars", + }, + "identifier_map": {"index": "NA12878-PCRF450-1"}, + "mimetype": "text/plain+x-bgzip+x-variant-call-format", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/external-copy-0.vcf.gz", }, { - 'checksum': None, - 'designation': 'variant_calls/strucvars/ingested-vcf', - 'file_attributes': { - }, - 'identifier_map': { - }, - 'mimetype': 'text/plain+x-bgzip+x-variant-call-format', - 'path': 'case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/ingested.vcf.gz' + "checksum": None, + "designation": "variant_calls/strucvars/ingested-vcf", + "file_attributes": {}, + "identifier_map": {}, + "mimetype": "text/plain+x-bgzip+x-variant-call-format", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/ingested.vcf.gz", }, { - 'checksum': None, - 'designation': 'variant_calls/strucvars/ingested-tbi', - 'file_attributes': { - }, - 'identifier_map': { - }, - 'mimetype': 'application/octet-stream+x-tabix-tbi-index', - 'path': 'case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/ingested.vcf.gz.tbi' - } + "checksum": None, + "designation": "variant_calls/strucvars/ingested-tbi", + "file_attributes": {}, + "identifier_map": {}, + "mimetype": "application/octet-stream+x-tabix-tbi-index", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/ingested.vcf.gz.tbi", + }, ]