diff --git a/snappy_pipeline/workflows/ngs_mapping/Snakefile b/snappy_pipeline/workflows/ngs_mapping/Snakefile index 1173b0053..e5ba24f8d 100644 --- a/snappy_pipeline/workflows/ngs_mapping/Snakefile +++ b/snappy_pipeline/workflows/ngs_mapping/Snakefile @@ -274,6 +274,6 @@ rule ngs_mapping_ngs_chew_fingerprint: memory=wf.get_resource("ngs_chew", "run", "memory"), partition=wf.get_resource("ngs_chew", "run", "partition"), log: - **wf.get_log_file("ngs_chew", "run"), + **wf.get_log_files("ngs_chew", "run"), wrapper: wf.wrapper_path("ngs_chew/fingerprint") diff --git a/snappy_pipeline/workflows/ngs_mapping/__init__.py b/snappy_pipeline/workflows/ngs_mapping/__init__.py index b890222a5..61f8fc5b9 100644 --- a/snappy_pipeline/workflows/ngs_mapping/__init__.py +++ b/snappy_pipeline/workflows/ngs_mapping/__init__.py @@ -348,6 +348,9 @@ bam_collect_doc: enabled: false window_length: 1000 + # Compute fingerprints with ngs-chew + ngs_chew_fingerprint: + enabled: true # Configuration for BWA bwa: path_index: REQUIRED # Required if listed in ngs_mapping.tools.dna; otherwise, can be removed. @@ -1189,6 +1192,79 @@ def get_resource_usage(self, action): ) +class NgsChewStepPart(BaseStepPart): + """Analyze BAM File with ``ngs-chew``, e.g., ``fingerprint``""" + + #: Step name + name = "ngs_chew" + + #: Class available actions + actions = ("fingerprint",) + + def __init__(self, parent): + super().__init__(parent) + + def get_input_files(self, action): + """Return required input files""" + self._check_action(action) + return getattr(self, f"_get_input_files_{action}") + + def _check_action(self, action): + if action not in self.actions: + actions_str = ", ".join(self.actions) + error_message = f"Action '{action}' is not supported. Valid options: {actions_str}" + raise UnsupportedActionException(error_message) + + @dictify + def _get_input_files_run(self): + yield "bam", "work/{mapper_lib}/out/{mapper_lib}.bam" + + def get_output_files(self, action): + """Return output files""" + self._check_action(action) + return getattr(self, "_get_output_files_{action}".format(action=action))() + + @dictify + def _get_output_files_run(self): + yield "npz", "work/{mapper_lib}/report/fingerprint/{mapper_lib}.npz" + yield "npz_md5", "work/{mapper_lib}/report/fingerprint/{mapper_lib}.npz.md5" + + def get_log_files(self, action): + self._check_action(action) + return getattr(self, "_get_log_files_{action}".format(action=action))() + + @dictify + def _get_log_files_fingerprint(self): + prefix = "work/{mapper_lib}/log/{mapper_lib}.ngs_chew_fingerprint" + key_ext = ( + ("log", ".log"), + ("conda_info", ".conda_info.txt"), + ("conda_list", ".conda_list.txt"), + ("wrapper", ".wrapper.py"), + ("env_yaml", ".environment.yaml"), + ) + for key, ext in key_ext: + yield key, prefix + ext + yield key + "_md5", prefix + ext + ".md5" + + def get_resource_usage(self, action): + """Get Resource Usage + + :param action: Action (i.e., step) in the workflow, example: 'run'. + :type action: str + + :return: Returns ResourceUsage for step. + + :raises UnsupportedActionException: if action not in class defined list of valid actions. + """ + self._check_action(action) + return ResourceUsage( + threads=1, + time="04:00:00", + memory="2G", + ) + + class NgsMappingWorkflow(BaseStep): """Perform NGS Mapping""" @@ -1218,6 +1294,7 @@ def __init__(self, workflow, config, config_lookup_paths, config_paths, workdir) StarStepPart, TargetCoverageReportStepPart, BamCollectDocStepPart, + NgsChewStepPart, ) ) self.sub_steps["link_out"].disable_patterns = expand("**/*{ext}", ext=EXT_VALUES) @@ -1281,9 +1358,14 @@ def get_result_files(self): yield from self._yield_result_files( os.path.join("output", name_pattern, "out", name_pattern + "{ext}"), ext=EXT_VALUES ) - infixes = ["mapping", "target_cov_report"] + infixes = [ + "mapping", + "target_cov_report", + ] if self.config["bam_collect_doc"]["enabled"]: infixes.append("bam_collect_doc") + if self.config["ngs_chew_fingerprint"]["enabled"]: + infixes.append("ngs_chew_fingerprint") for infix in infixes: yield from self._yield_result_files( os.path.join("output", name_pattern, "log", "{mapper}.{ngs_library.name}.{ext}"), @@ -1305,6 +1387,13 @@ def get_result_files(self): os.path.join("output", name_pattern, "report", "cov", name_pattern + ".cov.{ext}"), ext=("vcf.gz", "vcf.gz.md5", "vcf.gz.tbi", "vcf.gz.tbi.md5", "bw", "bw.md5"), ) + if self.config["ngs_chew_fingerprint"]["enabled"]: + yield from self._yield_result_files( + os.path.join( + "output", name_pattern, "report", "fingerprint", name_pattern + ".{ext}" + ), + ext=("npz", "npz.md5"), + ) yield from self._yield_result_files( os.path.join( "output", name_pattern, "report", "bam_qc", name_pattern + ".bam.{report}.txt" diff --git a/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping.py b/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping.py index ddaf4b6b7..95d94c828 100644 --- a/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping.py +++ b/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping.py @@ -793,6 +793,7 @@ def test_ngs_mapping_workflow_steps(ngs_mapping_workflow): "link_out", "link_out_bam", "minimap2", + "ngs_chew", "star", "target_coverage_report", ] @@ -811,7 +812,7 @@ def test_ngs_mapping_workflow_files(ngs_mapping_workflow): for i in range(1, 7) for ext in ("bam", "bam.bai", "bam.md5", "bam.bai.md5") ] - for infix in ("bam_collect_doc", "mapping", "target_cov_report"): + for infix in ("bam_collect_doc", "mapping", "target_cov_report", "ngs_chew_fingerprint"): expected += [ "output/bwa.P00{i}-N1-DNA1-WGS1/log/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format(i=i, ext=ext) for i in range(1, 7) @@ -850,6 +851,13 @@ def test_ngs_mapping_workflow_files(ngs_mapping_workflow): for ext in ("bw", "bw.md5", "vcf.gz", "vcf.gz.md5", "vcf.gz.tbi", "vcf.gz.tbi.md5") for i in range(1, 7) ] + expected += [ + "output/bwa.P00{i}-N1-DNA1-WGS1/report/fingerprint/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format( + i=i, ext=ext + ) + for ext in ("npz", "npz.md5") + for i in range(1, 7) + ] expected += [ "output/bwa.P00{i}-N1-DNA1-WGS1/report/cov_qc/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format( i=i, ext=ext diff --git a/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping_processed_fastq.py b/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping_processed_fastq.py index 25c197368..7fc765047 100644 --- a/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping_processed_fastq.py +++ b/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping_processed_fastq.py @@ -724,6 +724,7 @@ def test_ngs_mapping_workflow_steps(ngs_mapping_workflow): "link_out", "link_out_bam", "minimap2", + "ngs_chew", "star", "target_coverage_report", ] @@ -742,7 +743,7 @@ def test_ngs_mapping_workflow_files(ngs_mapping_workflow): for i in range(1, 7) for ext in ("bam", "bam.bai", "bam.md5", "bam.bai.md5") ] - for infix in ("bam_collect_doc", "mapping", "target_cov_report"): + for infix in ("bam_collect_doc", "mapping", "target_cov_report", "ngs_chew_fingerprint"): expected += [ "output/bwa.P00{i}-N1-DNA1-WGS1/log/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format(i=i, ext=ext) for i in range(1, 7) @@ -781,6 +782,13 @@ def test_ngs_mapping_workflow_files(ngs_mapping_workflow): for ext in ("bw", "bw.md5", "vcf.gz", "vcf.gz.md5", "vcf.gz.tbi", "vcf.gz.tbi.md5") for i in range(1, 7) ] + expected += [ + "output/bwa.P00{i}-N1-DNA1-WGS1/report/fingerprint/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format( + i=i, ext=ext + ) + for ext in ("npz", "npz.md5") + for i in range(1, 7) + ] expected += [ "output/bwa.P00{i}-N1-DNA1-WGS1/report/cov_qc/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format( i=i, ext=ext