From 14d86708c8ae536cbb75c09352b72b0e69ace7e6 Mon Sep 17 00:00:00 2001 From: Till Hartmann Date: Thu, 30 May 2024 18:39:00 +0200 Subject: [PATCH] sv_calling_targeted: remove superfluous validation, fix minimal config, fakefs --- .../workflows/common/gcnv/gcnv_run.py | 43 ------------------- .../workflows/sv_calling_targeted/model.py | 4 +- .../test_workflows_sv_calling_targeted.py | 41 ++++-------------- 3 files changed, 12 insertions(+), 76 deletions(-) diff --git a/snappy_pipeline/workflows/common/gcnv/gcnv_run.py b/snappy_pipeline/workflows/common/gcnv/gcnv_run.py index b7cbebeae..61d6adaae 100644 --- a/snappy_pipeline/workflows/common/gcnv/gcnv_run.py +++ b/snappy_pipeline/workflows/common/gcnv/gcnv_run.py @@ -105,9 +105,6 @@ def validate_request(self): msg_tpl = "Precomputed model paths must be configured (key: 'precomputed_model_paths')." raise InvalidConfiguration(msg_tpl) else: - # Validate configuration - check if only expected keys are present - self.validate_precomputed_model_paths_config(config=path_to_models) - # Check model directories content for model in path_to_models: # Validate ploidy-model @@ -133,46 +130,6 @@ def validate_request(self): ) raise InvalidConfiguration(msg_tpl.format(str(model))) - def validate_precomputed_model_paths_config(self, config): - """Validate precomputed model config. - - Evaluates if provided configuration has the following format: - - precomputed_model_paths: - - library: "Agilent SureSelect Human All Exon V6" - contig_ploidy": /path/to/ploidy-model - model_pattern: /path/to/model_* - - :param config: List of precomputed model configuration dictionary. - :type config: list - - :raises InvalidConfiguration: if configuration not as expected for - ``precomputed_model_paths`` list. - """ - # Initialise variables - expected_keys = ("library", "model_pattern", "contig_ploidy") - expected_format = ( - '{\n "library": "Agilent SureSelect Human All Exon V6"\n' - ' "contig_ploidy": /path/to/ploidy-model\n' - ' "model_pattern": "/path/to/model_*"\n}' - ) - # Test - for model in config: - # Test keys - n_keys_pass = len(model) == 3 - keys_pass = all(key in expected_keys for key in model) - # Test values - values_pass = all(isinstance(value, str) for value in model.values()) - # Validate - if not (n_keys_pass and keys_pass and values_pass): - pretty_model = self._pretty_print_config(config=model) - msg = ( - "Provided configuration not as expected...\n" - f"\nn_keys_pass={n_keys_pass}, keys_pass={keys_pass}, values_pass={values_pass}\n" - f"Expected:\n{expected_format}\nObserved:\n{pretty_model}\n" - ) - raise InvalidConfiguration(msg) - def _pretty_print_config(self, config): """Pretty format configuration. diff --git a/snappy_pipeline/workflows/sv_calling_targeted/model.py b/snappy_pipeline/workflows/sv_calling_targeted/model.py index a2437b2fc..77df90124 100644 --- a/snappy_pipeline/workflows/sv_calling_targeted/model.py +++ b/snappy_pipeline/workflows/sv_calling_targeted/model.py @@ -25,6 +25,8 @@ class Gcnv(SnappyModel): path: "path/to/targets.bed" """ + path_uniquely_mapable_bed: str + precomputed_model_paths: list[PrecomputedModelEntry] = [] """ Path to gCNV model - will execute analysis in CASE MODE. @@ -92,7 +94,7 @@ class SvCallingTargeted(SnappyStepModel, validators.ToolsMixin): gcnv: Gcnv | None = None - dell2: Delly2 | None = None + delly2: Delly2 | None = None manta: Manta | None = None diff --git a/tests/snappy_pipeline/workflows/test_workflows_sv_calling_targeted.py b/tests/snappy_pipeline/workflows/test_workflows_sv_calling_targeted.py index d8829497e..b987f3b01 100644 --- a/tests/snappy_pipeline/workflows/test_workflows_sv_calling_targeted.py +++ b/tests/snappy_pipeline/workflows/test_workflows_sv_calling_targeted.py @@ -40,12 +40,14 @@ def minimal_config(): - delly2 - manta - gcnv + delly2: {} # use defaults + manta: {} # use defaults gcnv: + path_uniquely_mapable_bed: /path/to/uniquely/mappable/variable/GRCh37/file.bed.gz path_target_interval_list_mapping: - pattern: "Agilent SureSelect Human All Exon V6.*" name: "Agilent_SureSelect_Human_All_Exon_V6" path: /path/to/Agilent/SureSelect_Human_All_Exon_V6_r2/GRCh37/Exons.bed - path_uniquely_mapable_bed: /path/to/uniquely/mappable/variable/GRCh37/file.bed.gz precomputed_model_paths: - library: "Agilent SureSelect Human All Exon V6" contig_ploidy: /path/to/ploidy-model @@ -141,6 +143,7 @@ def sv_calling_targeted_workflow_large_cohort( work_dir, config_paths, germline_sheet_fake_fs2, + aligner_indices_fake_fs, mocker, ): """ @@ -149,6 +152,7 @@ def sv_calling_targeted_workflow_large_cohort( """ # Patch out file-system related things in abstract (the crawling link in step is defined there) patch_module_fs("snappy_pipeline.workflows.abstract", germline_sheet_fake_fs2, mocker) + patch_module_fs("snappy_pipeline.workflows.ngs_mapping", aligner_indices_fake_fs, mocker) # Update the "globals" attribute of the mock workflow (snakemake.workflow.Workflow) so we # can obtain paths from the function as if we really had a NGSMappingPipelineStep here dummy_workflow.globals = {"ngs_mapping": lambda x: "NGS_MAPPING/" + x} @@ -170,12 +174,14 @@ def sv_calling_targeted_workflow_large_cohort_background( work_dir, config_paths, germline_sheet_fake_fs2, + aligner_indices_fake_fs, mocker, ): """Return SvCallingTargetedWorkflow object pre-configured with germline sheet - large trio cohort as background.""" # Patch out file-system related things in abstract (the crawling link in step is defined there) patch_module_fs("snappy_pipeline.workflows.abstract", germline_sheet_fake_fs2, mocker) + patch_module_fs("snappy_pipeline.workflows.ngs_mapping", aligner_indices_fake_fs, mocker) # Update the "globals" attribute of the mock workflow (snakemake.workflow.Workflow) so we # can obtain paths from the function as if we really had a NGSMappingPipelineStep here dummy_workflow.globals = {"ngs_mapping": lambda x: "NGS_MAPPING/" + x} @@ -199,6 +205,7 @@ def test_validate_request( work_dir, config_paths, germline_sheet_fake_fs2_gcnv_model, + aligner_indices_fake_fs, mocker, ): """Tests SvCallingTargetedWorkflow.validate_request()""" @@ -206,6 +213,7 @@ def test_validate_request( patch_module_fs( "snappy_pipeline.workflows.abstract", germline_sheet_fake_fs2_gcnv_model, mocker ) + patch_module_fs("snappy_pipeline.workflows.ngs_mapping", aligner_indices_fake_fs, mocker) patch_module_fs( "snappy_pipeline.workflows.common.gcnv.gcnv_run", germline_sheet_fake_fs2_gcnv_model, @@ -418,37 +426,6 @@ def test_gcnv_get_params(sv_calling_targeted_workflow): sv_calling_targeted_workflow.get_params("gcnv", action) -def test_gcnv_validate_precomputed_model_paths_config(sv_calling_targeted_workflow): - """Tests RunGcnvTargetSeqStepPart.validate_model_requirements()""" - # Initialise input - valid_dict = { - "library": "library", - "contig_ploidy": "/path/to/ploidy-model", - "model_pattern": "/path/to/model_*", - } - typo_dict = { - "library_n": "library", - "contig_ploidy": "/path/to/ploidy-model", - "model_pattern": "/path/to/model_*", - } - missing_key_dict = {"model_pattern": "/path/to/model_*"} - - # Sanity check - sv_calling_targeted_workflow.substep_getattr("gcnv", "validate_precomputed_model_paths_config")( - config=[valid_dict] - ) - # Test key typo - with pytest.raises(InvalidConfiguration): - sv_calling_targeted_workflow.substep_getattr( - "gcnv", "validate_precomputed_model_paths_config" - )(config=[valid_dict, typo_dict]) - # Test key missing - with pytest.raises(InvalidConfiguration): - sv_calling_targeted_workflow.substep_getattr( - "gcnv", "validate_precomputed_model_paths_config" - )(config=[valid_dict, missing_key_dict]) - - def test_gcnv_validate_ploidy_model_directory( fake_fs, mocker, sv_calling_targeted_workflow, ploidy_model_files ):