From 79cc5e71b02791c14b8079c810f470de753b6088 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Fri, 27 Oct 2023 10:22:20 +0200 Subject: [PATCH] feat: integration of worker "strucvars ingest" (#1190) --- cases_import/models/executors.py | 188 ++++++++++++++---- .../tests/data/singleton_strucvars.yaml | 3 + .../snapshots/snap_test_models_executor.py | 90 +++++++-- cases_import/tests/test_models_executor.py | 87 +++++++- config/settings/base.py | 4 +- 5 files changed, 315 insertions(+), 57 deletions(-) create mode 100644 cases_import/tests/data/singleton_strucvars.yaml diff --git a/cases_import/models/executors.py b/cases_import/models/executors.py index 8c4808dce..e90bcb834 100644 --- a/cases_import/models/executors.py +++ b/cases_import/models/executors.py @@ -661,8 +661,10 @@ def _import_ngsbits_qc_mappingqc( } -class SeqvarImportExecutor(FileImportExecutorBase): - """Run the import of sequence variant import.""" +class VariantImportExecutorBase(FileImportExecutorBase): + """Base class for variant import.""" + + var_type: str def __init__(self, case: Case, bgjob: CaseImportBackgroundJob): super().__init__(case.project) @@ -684,21 +686,21 @@ def __init__(self, case: Case, bgjob: CaseImportBackgroundJob): #: The `FileSystemWrapper` for the internal storage. self.internal_fs = FileSystemWrapper(self.internal_fs_options) - def run(self): - """Perform the import.""" - ext_vcf_on_s3 = self._copy_external_internal() + def run(self) -> typing.List[PedigreeInternalFile]: + """Perform the import. + + :returns: the `PedigreeInternalFile` objects resulting from the import + """ + ext_vcf_on_s3 = self.copy_external_internal() if ext_vcf_on_s3: - int_on_s3 = self._annotate(ext_vcf_on_s3) - int_vcf_on_s3 = [ - obj for obj in int_on_s3 if obj.designation == "variant_calls/seqvars/ingested-vcf" - ] - if int_vcf_on_s3: - self._prefilter(int_vcf_on_s3[0]) - - def _copy_external_internal(self) -> typing.Optional[PedigreeInternalFile]: + return self.annotate_outer(ext_vcf_on_s3) + else: + return [] + + def copy_external_internal(self) -> typing.Optional[PedigreeInternalFile]: """Copy the external VCF file to the internal storage. - :return: whether a file was copied + :return: the corresponding `PedigreeInternalFile` object :raises ValueError: if more than one file is found """ # Find files with the correct designation, variant_type, and mimetype; ensure that there @@ -706,10 +708,13 @@ def _copy_external_internal(self) -> typing.Optional[PedigreeInternalFile]: extfile_qs = PedigreeExternalFile.objects.filter( pedigree=self.case.pedigree_obj, designation=ExternalFileDesignation.VARIANT_CALLS.value, + file_attributes__variant_type=self.var_type, mimetype="text/plain+x-bgzip+x-variant-call-format", ) if extfile_qs.count() > 1: - raise ValueError(f"expected at most one seqvar VCF file, found {extfile_qs.count()}") + raise ValueError( + f"expected at most one {self.var_type} VCF file, found {extfile_qs.count()}" + ) elif extfile_qs.count() == 0: return None extfile = extfile_qs.first() @@ -718,7 +723,7 @@ def _copy_external_internal(self) -> typing.Optional[PedigreeInternalFile]: bucket = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.bucket path_int = ( f"case-data/{uuid_frag(self.case.sodar_uuid)}/{self.bgjob.sodar_uuid}/" - "seqvar/external-copy.vcf.gz" + f"{self.var_type}/external-copy.vcf.gz" ) path_int_full = f"s3://{bucket}/{path_int}" with ( @@ -735,27 +740,64 @@ def _copy_external_internal(self) -> typing.Optional[PedigreeInternalFile]: mimetype=extfile.mimetype, file_attributes=extfile.file_attributes, identifier_map=extfile.identifier_map, - # is copy of the original seqvar VCF file - designation="variant_calls/seqvars/orig-copy", + # is copy of the original VCF file + designation=f"variant_calls/{self.var_type}/orig-copy", # checksum=extfile.checksum, # TODO pedigree=self.case.pedigree_obj, ) - def _annotate(self, vcf_on_s3: PedigreeExternalFile) -> typing.List[PedigreeExternalFile]: - """Annotate the VCF file from the internal storage.""" + def annotate_outer(self, vcf_on_s3: PedigreeExternalFile) -> typing.List[PedigreeExternalFile]: + """Annotate the VCF file from the internal storage. + + Will write temporary PLINK PED file and then call the actual annotation functin. + """ with tempfile.NamedTemporaryFile(mode="w+t") as tmpf: write_pedigree_as_plink(self.case.pedigree_obj, tmpf) tmpf.flush() - return self._annotate_inner(vcf_on_s3, path_ped=tmpf.name) + return self.annotate(vcf_on_s3, path_ped=tmpf.name) + + def annotate( + self, vcf_on_s3: PedigreeExternalFile, path_ped: str + ) -> typing.List[PedigreeExternalFile]: + _ = vcf_on_s3 + _ = path_ped + raise NotImplementedError + + def run_worker(self, args: list[str], env: typing.Dict[str, str] | None = None): + """Run the worker with the given arguments. + + The worker will create a new VCF file and a TBI file. + """ + cmd = [settings.WORKER_EXE_PATH, *args] + subprocess.check_call(cmd, env=env) + + +class SeqvarsImportExecutor(VariantImportExecutorBase): + """Run the import of sequence variant import.""" + + var_type = "seqvars" - def _annotate_inner( + def run(self) -> typing.List[PedigreeInternalFile]: + """Override superclass behaviour to also prefilter the VCF file.""" + int_on_s3 = super().run() + int_vcf_on_s3 = [ + obj + for obj in int_on_s3 + if obj.designation == f"variant_calls/{self.var_type}/ingested-vcf" + ] + if int_vcf_on_s3: + self.prefilter_seqvars_outer(int_vcf_on_s3[0]) + return int_on_s3 + + def annotate( self, vcf_on_s3: PedigreeExternalFile, path_ped: str ) -> typing.List[PedigreeExternalFile]: + """Implementation of sequence variant annotation.""" # Path create path of the new fiel. bucket = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.bucket path_out = ( f"case-data/{uuid_frag(self.case.sodar_uuid)}/{self.bgjob.sodar_uuid}/" - "seqvar/ingested.vcf.gz" + "seqvars/ingested.vcf.gz" ) # Create arguments to use. args = [ @@ -788,7 +830,7 @@ def _annotate_inner( # "AWS_REGION": "us-east-1", } # Actually execute the worker. - self._run_worker(args=args, env=env) + self.run_worker(args=args, env=env) # Create the `PedigreeInternalFile` record after ingest is complete. return [ PedigreeInternalFile.objects.create( @@ -816,9 +858,12 @@ def _annotate_inner( ) ] - def _prefilter(self, ingested_on_s3: PedigreeInternalFile): + def prefilter_seqvars_outer(self, ingested_on_s3: PedigreeInternalFile): + """Writes out the prefilter configuration JSON to file and then calls the actual + prefiltration. + """ with tempfile.NamedTemporaryFile(mode="w+t") as tmpf: - configs: list[PrefilterConfig] = settings.VARFISH_CASE_IMPORT_SEQVAR_PREFILTER_CONFIGS + configs: list[PrefilterConfig] = settings.VARFISH_CASE_IMPORT_SEQVARS_PREFILTER_CONFIGS out_lst = [] for idx, config in enumerate(configs): dirname = os.path.dirname(ingested_on_s3.path) @@ -833,13 +878,14 @@ def _prefilter(self, ingested_on_s3: PedigreeInternalFile): ) json.dump([obj.dict() for obj in out_lst], tmpf) tmpf.flush() - self._prefilter_inner( + self.prefilter_seqvars( ingested_on_s3=ingested_on_s3, configs=out_lst, path_config=tmpf.name ) - def _prefilter_inner( + def prefilter_seqvars( self, ingested_on_s3: PedigreeInternalFile, configs: list[PrefilterConfig], path_config: str ): + """Run prefiltration of sequence variants.""" # Create arguments to use. args = [ "seqvars", @@ -861,7 +907,7 @@ def _prefilter_inner( # "AWS_REGION": "us-east-1", } # Actually execute the worker. - self._run_worker(args=args, env=env) + self.run_worker(args=args, env=env) # Create the `PedigreeInternalFile` records after prefilter is complete. return [ PedigreeInternalFile.objects.create( @@ -892,13 +938,80 @@ def _prefilter_inner( for config in configs ] - def _run_worker(self, args: list[str], env: typing.Dict[str, str] | None = None): - """Run the worker with the given arguments. - The worker will create a new VCF file and a TBI file. - """ - cmd = [settings.WORKER_EXE_PATH, *args] - subprocess.check_call(cmd, env=env) +class StrucvarsImportExecutor(VariantImportExecutorBase): + """Run the import of structural variant import.""" + + var_type = "strucvars" + + def annotate( + self, vcf_on_s3: PedigreeExternalFile, path_ped: str + ) -> typing.List[PedigreeExternalFile]: + """Implementation of structural variant annotation.""" + # Path create path of the new fiel. + bucket = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.bucket + path_out = ( + f"case-data/{uuid_frag(self.case.sodar_uuid)}/{self.bgjob.sodar_uuid}/" + "strucvars/ingested.vcf.gz" + ) + # Create arguments to use. + args = [ + "strucvars", + "ingest", + "--file-date", + timezone.now().strftime("%Y%m%d"), + "--case-uuid", + str(self.case.sodar_uuid), + "--genomebuild", + vcf_on_s3.genomebuild, + "--path-mehari-db", + f"{settings.WORKER_DB_PATH}/mehari", + "--path-ped", + path_ped, + "--path-in", + vcf_on_s3.path, + "--path-out", + f"{bucket}/{path_out}", + ] + # Setup environment so the worker can access the internal S3 storage. + endpoint_host = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.host + endpoint_port = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.port + env = { + **dict(os.environ.items()), + "LC_ALL": "C", + "AWS_ACCESS_KEY_ID": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.access_key, + "AWS_SECRET_ACCESS_KEY": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.secret_key, + "AWS_ENDPOINT_URL": f"http://{endpoint_host}:{endpoint_port}", + # "AWS_REGION": "us-east-1", + } + # Actually execute the worker. + self.run_worker(args=args, env=env) + # Create the `PedigreeInternalFile` record after ingest is complete. + return [ + PedigreeInternalFile.objects.create( + case=self.case, + path=f"{path_out}{suffix}", + genomebuild=vcf_on_s3.genomebuild, + mimetype=mimetype, + identifier_map=vcf_on_s3.identifier_map, + designation=designation, + file_attributes={}, + # checksum=extfile.checksum, # TODO + pedigree=self.case.pedigree_obj, + ) + for mimetype, designation, suffix in ( + ( + "text/plain+x-bgzip+x-variant-call-format", + "variant_calls/strucvars/ingested-vcf", + "", + ), + ( + "application/octet-stream+x-tabix-tbi-index", + "variant_calls/strucvars/ingested-tbi", + ".tbi", + ), + ) + ] class CaseImportBackgroundJobExecutor: @@ -1246,8 +1359,11 @@ def _run_qc_file_import(self, case: Case): def _run_seqvars_import(self, case: Case): self.caseimportbackgroundjob.add_log_entry("running sequence variant import...") - SeqvarImportExecutor(case, bgjob=self.caseimportbackgroundjob).run() + SeqvarsImportExecutor(case, bgjob=self.caseimportbackgroundjob).run() self.caseimportbackgroundjob.add_log_entry("... done with sequence variant import") + self.caseimportbackgroundjob.add_log_entry("running structural variant import...") + StrucvarsImportExecutor(case, bgjob=self.caseimportbackgroundjob).run() + self.caseimportbackgroundjob.add_log_entry("... done with structural variant import") def _run_strucvars_import(self, case: Case): self.caseimportbackgroundjob.add_log_entry("strucvars annotation not implemented yet") diff --git a/cases_import/tests/data/singleton_strucvars.yaml b/cases_import/tests/data/singleton_strucvars.yaml new file mode 100644 index 000000000..6cd393268 --- /dev/null +++ b/cases_import/tests/data/singleton_strucvars.yaml @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33533fd4ab1f33e6cb99fac98988d2473e30c31f877da5996880a6072e1a2d58 +size 2468 diff --git a/cases_import/tests/snapshots/snap_test_models_executor.py b/cases_import/tests/snapshots/snap_test_models_executor.py index 06ba6f352..1a57e26c0 100644 --- a/cases_import/tests/snapshots/snap_test_models_executor.py +++ b/cases_import/tests/snapshots/snap_test_models_executor.py @@ -33,7 +33,7 @@ }, ] -snapshots["ImportCreateWithSeqvarVcfTest::test_run external files"] = [ +snapshots["ImportCreateWithSeqvarsVcfTest::test_run external files"] = [ { "available": None, "designation": "variant_calls", @@ -64,7 +64,7 @@ }, ] -snapshots["ImportCreateWithSeqvarVcfTest::test_run internal files"] = [ +snapshots["ImportCreateWithSeqvarsVcfTest::test_run internal files"] = [ { "checksum": None, "designation": "variant_calls/seqvars/orig-copy", @@ -77,7 +77,7 @@ }, "identifier_map": {"index": "NA12878-PCRF450-1"}, "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvar/external-copy.vcf.gz", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/external-copy.vcf.gz", }, { "checksum": None, @@ -85,7 +85,7 @@ "file_attributes": {}, "identifier_map": {"index": "NA12878-PCRF450-1"}, "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvar/ingested.vcf.gz", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/ingested.vcf.gz", }, { "checksum": None, @@ -93,46 +93,110 @@ "file_attributes": {}, "identifier_map": {"index": "NA12878-PCRF450-1"}, "mimetype": "application/octet-stream+x-tabix-tbi-index", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvar/ingested.vcf.gz.tbi", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/ingested.vcf.gz.tbi", }, { "checksum": None, "designation": "variant_calls/seqvars/prefiltered-vcf", "file_attributes": { - "prefilter_config": '{"max_freq": 0.05, "max_exon_dist": 1000, "prefilter_path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvar/prefiltered-0.vcf.gz"}' + "prefilter_config": '{"max_freq": 0.05, "max_exon_dist": 1000, "prefilter_path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/prefiltered-0.vcf.gz"}' }, "identifier_map": {"index": "NA12878-PCRF450-1"}, "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvar/prefiltered-0.vcf.gz", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/prefiltered-0.vcf.gz", }, { "checksum": None, "designation": "variant_calls/seqvars/prefiltered-vcf", "file_attributes": { - "prefilter_config": '{"max_freq": 0.01, "max_exon_dist": 100, "prefilter_path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvar/prefiltered-1.vcf.gz"}' + "prefilter_config": '{"max_freq": 0.01, "max_exon_dist": 100, "prefilter_path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/prefiltered-1.vcf.gz"}' }, "identifier_map": {"index": "NA12878-PCRF450-1"}, "mimetype": "text/plain+x-bgzip+x-variant-call-format", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvar/prefiltered-1.vcf.gz", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/prefiltered-1.vcf.gz", }, { "checksum": None, "designation": "variant_calls/seqvars/prefiltered-tbi", "file_attributes": { - "prefilter_config": '{"max_freq": 0.05, "max_exon_dist": 1000, "prefilter_path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvar/prefiltered-0.vcf.gz"}' + "prefilter_config": '{"max_freq": 0.05, "max_exon_dist": 1000, "prefilter_path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/prefiltered-0.vcf.gz"}' }, "identifier_map": {"index": "NA12878-PCRF450-1"}, "mimetype": "application/octet-stream+x-tabix-tbi-index", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvar/prefiltered-0.vcf.gz.tbi", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/prefiltered-0.vcf.gz.tbi", }, { "checksum": None, "designation": "variant_calls/seqvars/prefiltered-tbi", "file_attributes": { - "prefilter_config": '{"max_freq": 0.01, "max_exon_dist": 100, "prefilter_path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvar/prefiltered-1.vcf.gz"}' + "prefilter_config": '{"max_freq": 0.01, "max_exon_dist": 100, "prefilter_path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/prefiltered-1.vcf.gz"}' }, "identifier_map": {"index": "NA12878-PCRF450-1"}, "mimetype": "application/octet-stream+x-tabix-tbi-index", - "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvar/prefiltered-1.vcf.gz.tbi", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/seqvars/prefiltered-1.vcf.gz.tbi", + }, +] + +snapshots["ImportCreateWithStrucvarsVcfTest::test_run external files"] = [ + { + "available": None, + "designation": "variant_calls", + "file_attributes": { + "checksum": "sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82", + "designation": "variant_calls", + "genomebuild": "grch37", + "mimetype": "text/plain+x-bgzip+x-variant-call-format", + "variant_type": "strucvars", + }, + "identifier_map": {"index": "NA12878-PCRF450-1"}, + "mimetype": "text/plain+x-bgzip+x-variant-call-format", + "path": "file://cases_import/tests/data/sample-brca1.vcf.gz", + }, + { + "available": None, + "designation": "variant_calls", + "file_attributes": { + "checksum": "sha256:6b137335b7803623c3389424e7b64d704fb1c9f3f55792db2916d312e2da27ef", + "designation": "variant_calls", + "genomebuild": "grch37", + "mimetype": "application/octet-stream+x-tabix-tbi-index", + "variant_type": "strucvars", + }, + "identifier_map": {"index": "NA12878-PCRF450-1"}, + "mimetype": "application/octet-stream+x-tabix-tbi-index", + "path": "file://cases_import/tests/data/sample-brca1.vcf.gz.tbi", + }, +] + +snapshots["ImportCreateWithStrucvarsVcfTest::test_run internal files"] = [ + { + "checksum": None, + "designation": "variant_calls/strucvars/orig-copy", + "file_attributes": { + "checksum": "sha256:4042c2afa59f24a327b3852bfcd0d8d991499d9c4eb81e7a7efe8d081e66af82", + "designation": "variant_calls", + "genomebuild": "grch37", + "mimetype": "text/plain+x-bgzip+x-variant-call-format", + "variant_type": "strucvars", + }, + "identifier_map": {"index": "NA12878-PCRF450-1"}, + "mimetype": "text/plain+x-bgzip+x-variant-call-format", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/external-copy.vcf.gz", + }, + { + "checksum": None, + "designation": "variant_calls/strucvars/ingested-vcf", + "file_attributes": {}, + "identifier_map": {"index": "NA12878-PCRF450-1"}, + "mimetype": "text/plain+x-bgzip+x-variant-call-format", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/ingested.vcf.gz", + }, + { + "checksum": None, + "designation": "variant_calls/strucvars/ingested-tbi", + "file_attributes": {}, + "identifier_map": {"index": "NA12878-PCRF450-1"}, + "mimetype": "application/octet-stream+x-tabix-tbi-index", + "path": "case-data/7a/1d7b28-2bf8-4340-81f3-5487d86c669f/c28a70a6-1c75-40a1-8d89-216ca16cffca/strucvars/ingested.vcf.gz.tbi", }, ] diff --git a/cases_import/tests/test_models_executor.py b/cases_import/tests/test_models_executor.py index e3c202605..292f526cc 100644 --- a/cases_import/tests/test_models_executor.py +++ b/cases_import/tests/test_models_executor.py @@ -79,7 +79,7 @@ def test_run(self): # Cannot use freeze time here as real S3 access is used here and the server # refuses connection otherwise. # @freeze_time("2012-01-14 12:00:01") -class ImportCreateWithSeqvarVcfTest( +class ImportCreateWithSeqvarsVcfTest( helpers.FixRandomSeedMixin, ExecutorTestMixin, TestCaseSnapshot, TestCase ): """Test the executor with action=create and external files for seqvar VCF.""" @@ -92,9 +92,9 @@ def setUp(self): fac_kwargs={"path_phenopacket_yaml": "cases_import/tests/data/singleton_seqvars.yaml"}, ) - @mock.patch("cases_import.models.executors.SeqvarImportExecutor._run_worker") - def test_run(self, mock_seqvarimprotexecutor_run_worker): - """Test import of a case with a seqvar VCF file.""" + @mock.patch("cases_import.models.executors.VariantImportExecutorBase.run_worker") + def test_run(self, mock_seqvarsimprotexecutor_run_worker): + """Test import of a case with a seqvars VCF file.""" self.assertEqual(Case.objects.count(), 0) self.assertEqual(CaseQc.objects.count(), 0) self.assertEqual(PedigreeExternalFile.objects.count(), 0) @@ -107,7 +107,7 @@ def test_run(self, mock_seqvarimprotexecutor_run_worker): self.assertEqual(PedigreeExternalFile.objects.count(), 2) self.assertEqual(PedigreeInternalFile.objects.count(), 7) - call_list = mock_seqvarimprotexecutor_run_worker.call_args_list + call_list = mock_seqvarsimprotexecutor_run_worker.call_args_list self.assertEqual(len(call_list), 2) call_1_args = call_list[0].kwargs["args"] self.assertEqual(call_1_args[0:3], ["seqvars", "ingest", "--file-date"]) @@ -132,6 +132,7 @@ def test_run(self, mock_seqvarimprotexecutor_run_worker): keys_shared, ( "available", + # cannot freeze time # "last_checked", ), ) @@ -145,7 +146,81 @@ def test_run(self, mock_seqvarimprotexecutor_run_worker): keys_int = tuple(itertools.chain(keys_shared, ("checksum",))) dicts_int = [ helpers.extract_from_dict(obj, keys=keys_int) - for obj in PedigreeInternalFile.objects.all() + for obj in PedigreeInternalFile.objects.all().order_by("id") + ] + self.assertMatchSnapshot(dicts_int, "internal files") + + +# Cannot use freeze time here as real S3 access is used here and the server +# refuses connection otherwise. +# @freeze_time("2012-01-14 12:00:01") +class ImportCreateWithStrucvarsVcfTest( + helpers.FixRandomSeedMixin, ExecutorTestMixin, TestCaseSnapshot, TestCase +): + """Test the executor with action=create and external files for seqvar VCF.""" + + def setUp(self): + super().setUp() + self.maxDiff = None + self._setUpExecutor( + CaseImportAction.ACTION_CREATE, + fac_kwargs={ + "path_phenopacket_yaml": "cases_import/tests/data/singleton_strucvars.yaml" + }, + ) + + @mock.patch("cases_import.models.executors.VariantImportExecutorBase.run_worker") + def test_run(self, mock_strucvarsimprotexecutor_run_worker): + """Test import of a case with a strucvars VCF file.""" + self.assertEqual(Case.objects.count(), 0) + self.assertEqual(CaseQc.objects.count(), 0) + self.assertEqual(PedigreeExternalFile.objects.count(), 0) + self.assertEqual(PedigreeInternalFile.objects.count(), 0) + + self.executor.run() + + self.assertEqual(Case.objects.count(), 1) + self.assertEqual(CaseQc.objects.count(), 1) + self.assertEqual(PedigreeExternalFile.objects.count(), 2) + self.assertEqual(PedigreeInternalFile.objects.count(), 3) + + call_list = mock_strucvarsimprotexecutor_run_worker.call_args_list + self.assertEqual(len(call_list), 1) + call_1_args = call_list[0].kwargs["args"] + self.assertEqual(call_1_args[0:3], ["strucvars", "ingest", "--file-date"]) + self.assertEqual(len(call_1_args), 16) + + keys_shared = ( + # cannot freeze time + # "date_created", + # "date_modified", + "designation", + "file_attributes", + "genombuild", + "identifier_map", + "mimetype", + "path", + ) + keys_ext = tuple( + itertools.chain( + keys_shared, + ( + "available", + # cannot freeze time + # "last_checked", + ), + ) + ) + dicts_ext = [ + helpers.extract_from_dict(obj, keys=keys_ext) + for obj in PedigreeExternalFile.objects.all() + ] + self.assertMatchSnapshot(dicts_ext, "external files") + + keys_int = tuple(itertools.chain(keys_shared, ("checksum",))) + dicts_int = [ + helpers.extract_from_dict(obj, keys=keys_int) + for obj in PedigreeInternalFile.objects.all().order_by("id") ] self.assertMatchSnapshot(dicts_int, "internal files") diff --git a/config/settings/base.py b/config/settings/base.py index 54667b335..10645d3bb 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -957,10 +957,10 @@ def set_logging(level): ) ) #: Prefilter configurations. -VARFISH_CASE_IMPORT_SEQVAR_PREFILTER_CONFIGS: list[PrefilterConfig] = [ +VARFISH_CASE_IMPORT_SEQVARS_PREFILTER_CONFIGS: list[PrefilterConfig] = [ PrefilterConfig(**vals) for vals in env.json( - "VARFISH_CASE_IMPORT_SEQVAR_PREFILTER_CONFIGS", + "VARFISH_CASE_IMPORT_SEQVARS_PREFILTER_CONFIGS", # default prefilter configuration [ {"max_freq": 0.05, "max_exon_dist": 1000},