diff --git a/snappy_pipeline/workflows/ngs_mapping/Snakefile b/snappy_pipeline/workflows/ngs_mapping/Snakefile index 929043ea7..25ec76bcc 100644 --- a/snappy_pipeline/workflows/ngs_mapping/Snakefile +++ b/snappy_pipeline/workflows/ngs_mapping/Snakefile @@ -117,7 +117,7 @@ rule ngs_mapping_bwa_run: log: **wf.get_log_file("bwa", "run"), wrapper: - wf.wrapper_path("bwa") # TODO => bwa/run + wf.wrapper_path("bwa") # Run STAR -------------------------------------------------------------------- @@ -138,7 +138,7 @@ rule ngs_mapping_star_run: log: **wf.get_log_file("star", "run"), wrapper: - wf.wrapper_path("star") # TODO => star/run + wf.wrapper_path("star") # GATK-based BAM postprocessing ----------------------------------------------- @@ -158,7 +158,7 @@ if wf.config["postprocessing"] == "gatk_post_bam": log: **wf.get_log_file("gatk_post_bam", "run"), wrapper: - wf.wrapper_path("gatk_post_bam") # TODO => gatk_post_bam/run + wf.wrapper_path("gatk_post_bam") # Run minimap2 --------------------------------------------------------------- @@ -180,48 +180,8 @@ rule ngs_mapping_minimap2_run: wf.wrapper_path("minimap2") -# Run NGMLR ------------------------------------------------------------------- - - -rule ngs_mapping_ngmlr_run: - input: - wf.get_input_files("ngmlr", "run"), - output: - **wf.get_output_files("ngmlr", "run"), - threads: wf.get_resource("ngmlr", "run", "threads") - resources: - time=wf.get_resource("ngmlr", "run", "time"), - memory=wf.get_resource("ngmlr", "run", "memory"), - partition=wf.get_resource("ngmlr", "run", "partition"), - params: - chained=False, - log: - **wf.get_log_file("ngmlr", "run"), - wrapper: - wf.wrapper_path("ngmlr/run") - - # QC / Statistics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -# Generate Picard Hybrid Selection Metrics Report ----------------------------- - - -rule ngs_mapping_picard_hs_metrics_run: - input: - **wf.get_input_files("picard_hs_metrics", "run"), - output: - **wf.get_output_files("picard_hs_metrics", "run"), - threads: wf.get_resource("picard_hs_metrics", "run", "threads") - resources: - time=wf.get_resource("picard_hs_metrics", "run", "time"), - memory=wf.get_resource("picard_hs_metrics", "run", "memory"), - partition=wf.get_resource("picard_hs_metrics", "run", "partition"), - log: - wf.get_log_file("picard_hs_metrics", "run"), - wrapper: - wf.wrapper_path("picard/hs_metrics") - - # Generate target region coverage report -------------------------------------- @@ -238,7 +198,7 @@ rule ngs_mapping_target_coverage_report_run: params: **{"args": wf.get_params("target_coverage_report", "run")}, log: - wf.get_log_file("target_coverage_report", "run"), + **wf.get_log_file("target_coverage_report", "run"), wrapper: wf.wrapper_path("target_cov_report/run") @@ -284,7 +244,7 @@ rule ngs_mapping_genome_coverage_report_run: # Compute depth of coverage files (VCF and bigWig) ---------------------------- -rule ngs_mapping_generate_doc_files: +rule ngs_mapping_bam_collect_doc_run: input: **wf.get_input_files("bam_collect_doc", "run")(), output: @@ -295,6 +255,6 @@ rule ngs_mapping_generate_doc_files: memory=wf.get_resource("bam_collect_doc", "run", "memory"), partition=wf.get_resource("bam_collect_doc", "run", "partition"), log: - wf.get_log_file("bam_collect_doc", "run"), + **wf.get_log_file("bam_collect_doc", "run"), wrapper: wf.wrapper_path("maelstrom/bam_collect_doc") diff --git a/snappy_pipeline/workflows/ngs_mapping/__init__.py b/snappy_pipeline/workflows/ngs_mapping/__init__.py index 7094339d6..b890222a5 100644 --- a/snappy_pipeline/workflows/ngs_mapping/__init__.py +++ b/snappy_pipeline/workflows/ngs_mapping/__init__.py @@ -148,7 +148,6 @@ - (long/PacBio/Nanopore) DNA - ``"minimap2"`` - - ``"ngmlr"`` - ``"external"`` ======= @@ -313,7 +312,7 @@ READ_MAPPERS_RNA = ("star",) #: Available read mappers for (long/PacBio/Nanopoare) DNA-seq data -READ_MAPPERS_DNA_LONG = ("minialign", "ngmlr", "ngmlr_chained") +READ_MAPPERS_DNA_LONG = ("minimap2",) #: Default configuration DEFAULT_CONFIG = r""" @@ -323,7 +322,7 @@ tools: dna: [] # Required if DNA analysis; otherwise, leave empty. Example: 'bwa'. rna: [] # Required if RNA analysis; otherwise, leave empty. Example: 'star'. - dna_long: [] # Required if long-read mapper used; otherwise, leave empty. Example: 'ngmlr'. + dna_long: [] # Required if long-read mapper used; otherwise, leave empty. Example: 'minimap2'. path_link_in: "" # OPTIONAL Override data set configuration search paths for FASTQ files # Whether or not to compute coverage BED file compute_coverage_bed: false @@ -345,13 +344,10 @@ min_cov_warning: 20 # >= 20x for WARNING min_cov_ok: 50 # >= 50x for OK detailed_reporting: false # per-exon details (cannot go into multiqc) + # Depth of coverage collection, mainly useful for genomes. bam_collect_doc: enabled: false window_length: 1000 - # Enable Picard HS metrics by setting both paths - picard_hs_metrics: - path_targets_interval_list: null - path_baits_interval_list: null # Configuration for BWA bwa: path_index: REQUIRED # Required if listed in ngs_mapping.tools.dna; otherwise, can be removed. @@ -391,18 +387,9 @@ out_sam_strand_field: None # or for cufflinks: intronMotif include_unmapped: true quant_mode: '' - # Configuration for Minialign - minialign: - # `path_index`: Required if listed in ngs_mapping.tools.dna_long; otherwise, can be removed. - path_index: REQUIRED - ref_gc_stats: null # Optional + # Configuration for Minimap2 + minimap2: mapping_threads: 16 - num_threads_bam_view: 4 - # Configuration for NGMLR - ngmlr: - # `path_index`: Required if listed in ngs_mapping.tools.dna_long; otherwise, can be removed. - path_index: REQUIRED - ref_gc_stats: null # Optional # Select postprocessing method, only for DNA alignment postprocessing: null # optional, {'gatk_post_bam'} # Configuration for GATK BAM postprocessing @@ -495,13 +482,14 @@ def get_output_files(self, action): def _get_log_file(self, action): """Return dict of log files.""" _ = action - prefix = "work/{mapper}.{{library_name}}/log/{mapper}.{{library_name}}".format( - mapper=self.__class__.name - ) + mapper = self.__class__.name + prefix = f"work/{mapper}.{{library_name}}/log/{mapper}.{{library_name}}.mapping" key_ext = ( ("log", ".log"), ("conda_info", ".conda_info.txt"), ("conda_list", ".conda_list.txt"), + ("wrapper", ".wrapper.py"), + ("env_yaml", ".environment.yaml"), ) for key, ext in key_ext: yield key, prefix + ext @@ -660,9 +648,9 @@ def get_resource_usage(self, action): actions_str = ", ".join(self.actions) error_message = f"Action '{action}' is not supported. Valid options: {actions_str}" raise UnsupportedActionException(error_message) - mem_gb = int(3.5 * self.config["minialign"]["mapping_threads"]) + mem_gb = int(3.5 * self.config["minimap2"]["mapping_threads"]) return ResourceUsage( - threads=self.config["minialign"]["mapping_threads"], + threads=self.config["minimap2"]["mapping_threads"], time="2-00:00:00", # 2 days memory=f"{mem_gb}G", ) @@ -675,50 +663,6 @@ def _get_params_run(self, wildcards): return {"extra_infos": self.parent.ngs_library_to_extra_infos[wildcards.library_name]} -class NgmlrStepPart(ReadMappingStepPart): - """Support for performing PacBio alignment using NGMLR without chaining""" - - #: Step name - name = "ngmlr" - - def check_config(self): - """Check parameters in configuration. - - Method checks that all parameters required to execute BWA are present in the - configuration. If invalid configuration, it raises InvalidConfiguration exception. - """ - # Check if tool is at all included in workflow - if not (set(self.config["tools"]["dna_long"]) & {"ngmlr", "ngmlr_chained"}): - return # NGLMR not run, don't check configuration # pragma: no cover - - # Check required configuration settings present - self.parent.ensure_w_config( - config_keys=("step_config", "ngs_mapping", "ngmlr", "path_index"), - msg="Path to NGMLR index is required", - ) - - def get_resource_usage(self, action): - """Get Resource Usage - - :param action: Action (i.e., step) in the workflow, example: 'run'. - :type action: str - - :return: Returns ResourceUsage for step. - - :raises UnsupportedActionException: if action not in class defined list of valid actions. - """ - if action not in self.actions: - actions_str = ", ".join(self.actions) - error_message = f"Action '{action}' is not supported. Valid options: {actions_str}" - raise UnsupportedActionException(error_message) - # TODO: Add resources to DEFAULT_CONFIG instead of hard-coding. - return ResourceUsage( - threads=16, - time="4-00:00:00", # 4 days - memory="50G", - ) - - class ExternalStepPart(ReadMappingStepPart): """Support for linking in external BAM files""" @@ -846,6 +790,8 @@ def get_log_file(self, action): ("log", ".log"), ("conda_info", ".conda_info.txt"), ("conda_list", ".conda_list.txt"), + ("wrapper", ".wrapper.py"), + ("env_yaml", ".environment.yaml"), ) for key, ext in key_ext: yield key, prefix + ext @@ -959,60 +905,6 @@ def _get_postproc_token(self): }[(do_realignment, do_recalibration)] -class PicardHsMetricsStepPart(BaseStepPart): - """Build target report from Picard HsMetrics""" - - #: Step name - name = "picard_hs_metrics" - - #: Class available actions - actions = ("run",) - - def __init__(self, parent): - super().__init__(parent) - - @staticmethod - def get_input_files(action): - """Return required input files""" - assert action == "run", "Unsupported action" - return { - "bam": "work/{mapper_lib}/out/{mapper_lib}.bam", - "bai": "work/{mapper_lib}/out/{mapper_lib}.bam.bai", - } - - @dictify - def get_output_files(self, action): - """Return output files""" - assert action == "run", "Unsupported action" - yield "txt", "work/{mapper_lib}/report/picard_hs_metrics/{mapper_lib}.txt" - yield "txt_md5", "work/{mapper_lib}/report/picard_hs_metrics/{mapper_lib}.txt.md5" - - @staticmethod - def get_log_file(action): - _ = action - return "work/{mapper_lib}/log/snakemake.picard_hs_metrics.log" - - def get_resource_usage(self, action): - """Get Resource Usage - - :param action: Action (i.e., step) in the workflow, example: 'run'. - :type action: str - - :return: Returns ResourceUsage for step. - - :raises UnsupportedActionException: if action not in class defined list of valid actions. - """ - if action not in self.actions: - actions_str = ", ".join(self.actions) - error_message = f"Action '{action}' is not supported. Valid options: {actions_str}" - raise UnsupportedActionException(error_message) - return ResourceUsage( - threads=2, - time="04:00:00", # 4 hours - memory="20G", - ) - - class TargetCoverageReportStepPart(BaseStepPart): """Build target coverage report""" @@ -1068,12 +960,23 @@ def _get_output_files_collect(self): yield "txt", "work/target_cov_report/out/target_cov_report.txt" yield "txt_md5", "work/target_cov_report/out/target_cov_report.txt.md5" + @dictify def get_log_file(self, action): self._validate_action(action) if action == "run": - return "work/{mapper_lib}/log/snakemake.target_coverage.log" + prefix = "work/{mapper_lib}/log/{mapper_lib}.target_cov_report" + key_ext = ( + ("log", ".log"), + ("conda_info", ".conda_info.txt"), + ("conda_list", ".conda_list.txt"), + ("wrapper", ".wrapper.py"), + ("env_yaml", ".environment.yaml"), + ) + for key, ext in key_ext: + yield key, prefix + ext + yield key + "_md5", prefix + ext + ".md5" else: - return "work/target_cov_report/log/snakemake.target_coverage.log" + yield "log", "work/target_cov_report/log/snakemake.target_coverage.log" def get_params(self, action): assert action == "run", "Parameters only available for action 'run'." @@ -1254,8 +1157,19 @@ def _get_output_files_run(self): yield "bw_md5", "work/{mapper_lib}/report/cov/{mapper_lib}.cov.bw.md5" @staticmethod + @dictify def get_log_file(action): - return "work/{mapper_lib}/log/snakemake.bam_collect_doc.log" + prefix = "work/{mapper_lib}/log/{mapper_lib}.bam_collect_doc" + key_ext = ( + ("log", ".log"), + ("conda_info", ".conda_info.txt"), + ("conda_list", ".conda_list.txt"), + ("wrapper", ".wrapper.py"), + ("env_yaml", ".environment.yaml"), + ) + for key, ext in key_ext: + yield key, prefix + ext + yield key + "_md5", prefix + ext + ".md5" def get_resource_usage(self, action): """Get Resource Usage @@ -1301,8 +1215,6 @@ def __init__(self, workflow, config, config_lookup_paths, config_paths, workdir) LinkOutBamStepPart, LinkOutStepPart, Minimap2StepPart, - NgmlrStepPart, - PicardHsMetricsStepPart, StarStepPart, TargetCoverageReportStepPart, BamCollectDocStepPart, @@ -1369,17 +1281,25 @@ def get_result_files(self): yield from self._yield_result_files( os.path.join("output", name_pattern, "out", name_pattern + "{ext}"), ext=EXT_VALUES ) - yield from self._yield_result_files( - os.path.join("output", name_pattern, "log", "{mapper}.{ngs_library.name}.{ext}"), - ext=( - "log", - "conda_info.txt", - "conda_list.txt", - "log.md5", - "conda_info.txt.md5", - "conda_list.txt.md5", - ), - ) + infixes = ["mapping", "target_cov_report"] + if self.config["bam_collect_doc"]["enabled"]: + infixes.append("bam_collect_doc") + for infix in infixes: + yield from self._yield_result_files( + os.path.join("output", name_pattern, "log", "{mapper}.{ngs_library.name}.{ext}"), + ext=( + f"{infix}.log", + f"{infix}.conda_info.txt", + f"{infix}.conda_list.txt", + f"{infix}.wrapper.py", + f"{infix}.environment.yaml", + f"{infix}.log.md5", + f"{infix}.conda_info.txt.md5", + f"{infix}.conda_list.txt.md5", + f"{infix}.wrapper.py.md5", + f"{infix}.environment.yaml.md5", + ), + ) if self.config["bam_collect_doc"]["enabled"]: yield from self._yield_result_files( os.path.join("output", name_pattern, "report", "cov", name_pattern + ".cov.{ext}"), @@ -1428,20 +1348,6 @@ def get_result_files(self): ) yield "output/target_cov_report/out/target_cov_report.txt" yield "output/target_cov_report/out/target_cov_report.txt.md5" - if ( - self.config["picard_hs_metrics"]["path_targets_interval_list"] - and self.config["picard_hs_metrics"]["path_baits_interval_list"] - ): - yield from self._yield_result_files( - os.path.join( - "output", name_pattern, "report", "picard_hs_metrics", name_pattern + ".txt" - ) - ) - yield from self._yield_result_files( - os.path.join( - "output", name_pattern, "report", "picard_hs_metrics", name_pattern + ".txt.md5" - ) - ) if self.config["compute_coverage_bed"]: yield from self._yield_result_files( os.path.join("output", name_pattern, "report", "coverage", name_pattern + "{ext}"), @@ -1457,7 +1363,7 @@ def _yield_result_files(self, tpl, **kwargs): for sheet in self.shortcut_sheets: for ngs_library in sheet.all_ngs_libraries: extraction_type = ngs_library.test_sample.extra_infos["extractionType"] - if ngs_library.extra_infos["seqPlatform"] in ("ONP", "PacBio"): + if ngs_library.extra_infos["seqPlatform"] in ("ONT", "PacBio"): suffix = "_long" else: suffix = "" diff --git a/snappy_wrappers/wrappers/bwa/__init__.py b/snappy_wrappers/wrappers/bwa/__init__.py deleted file mode 100644 index 1145fb33d..000000000 --- a/snappy_wrappers/wrappers/bwa/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# -*- coding: utf-8 -*- -"""CUBI+Snakemake wrapper for BWA""" diff --git a/snappy_wrappers/wrappers/bwa/environment.yaml b/snappy_wrappers/wrappers/bwa/environment.yaml index d8ca1cb6d..8666d5e3d 100644 --- a/snappy_wrappers/wrappers/bwa/environment.yaml +++ b/snappy_wrappers/wrappers/bwa/environment.yaml @@ -3,12 +3,10 @@ channels: - bioconda dependencies: - bwa ==0.7.17 - - samtools ==1.9 - - samblaster ==0.1.24 + - samtools =1.16 + - samblaster ==0.1.26 - seqtk ==1.3 - trimadap ==r11 - # NB: gnuplot 5.2.7 is incompatible with samtools ==1.9 - # see https://github.com/samtools/samtools/issues/1065 - - gnuplot ==5.2.6 - - libpng ==1.6.37 + - gnuplot ==5.4.5 + - libpng ==1.6.39 - inline-html ==1.0.0 diff --git a/snappy_wrappers/wrappers/bwa/wrapper.py b/snappy_wrappers/wrappers/bwa/wrapper.py index 359cf1097..5a3a5dbac 100644 --- a/snappy_wrappers/wrappers/bwa/wrapper.py +++ b/snappy_wrappers/wrappers/bwa/wrapper.py @@ -13,22 +13,20 @@ input_left = snakemake.params.args["input"]["reads_left"] input_right = snakemake.params.args["input"].get("reads_right", "") -this_file = __file__ - shell( r""" set -x -# TODO: remove this again, is for fail early -# Additional logging for transparency & reproducibility -# Logging: Save a copy this wrapper (with the pickle details in the header) -cp {this_file} $(dirname {snakemake.log.log})/wrapper_bwa.py - -# Write out information about conda installation. +# Write out information about conda and save a copy of the wrapper with picked variables +# as well as the environment.yaml file. conda list >{snakemake.log.conda_list} conda info >{snakemake.log.conda_info} md5sum {snakemake.log.conda_list} >{snakemake.log.conda_list_md5} md5sum {snakemake.log.conda_info} >{snakemake.log.conda_info_md5} +cp {__real_file__} {snakemake.log.wrapper} +md5sum {snakemake.log.wrapper} >{snakemake.log.wrapper_md5} +cp $(dirname {__file__})/environment.yaml {snakemake.log.env_yaml} +md5sum {snakemake.log.env_yaml} >{snakemake.log.env_yaml_md5} # Also pipe stderr to log file if [[ -n "{snakemake.log.log}" ]]; then @@ -269,6 +267,29 @@ {snakemake.output.report_bamstats_txt} \ || true # ignore failure +# Patch inline-html if necessary. +cat >$TMPDIR/inline-html.diff < {snakemake.output.report_bamstats_txt_md5} md5sum {snakemake.output.report_flagstats_txt} >{snakemake.output.report_flagstats_txt_md5} md5sum {snakemake.output.report_idxstats_txt} > {snakemake.output.report_idxstats_txt_md5} - -# Additional logging for transparency & reproducibility -# Logging: Save a copy this wrapper (with the pickle details in the header) -cp {this_file} $(dirname {snakemake.log.log})/wrapper_bwa.py - -# Logging: Save a permanent copy of the environment file used -cp $(dirname {this_file})/environment.yaml $(dirname {snakemake.log.log})/environment_wrapper_bwa.yaml """ ) # Compute MD5 sums of logs. shell( r""" +sleep 1s # try to wait for log file flush md5sum {snakemake.log.log} >{snakemake.log.log_md5} """ ) diff --git a/snappy_wrappers/wrappers/gatk_post_bam/wrapper.py b/snappy_wrappers/wrappers/gatk_post_bam/wrapper.py index 1330d71d4..0c43e8eda 100644 --- a/snappy_wrappers/wrappers/gatk_post_bam/wrapper.py +++ b/snappy_wrappers/wrappers/gatk_post_bam/wrapper.py @@ -22,9 +22,16 @@ r""" set -x -# Write out information about conda installation. +# Write out information about conda and save a copy of the wrapper with picked variables +# as well as the environment.yaml file. conda list >{snakemake.log.conda_list} conda info >{snakemake.log.conda_info} +md5sum {snakemake.log.conda_list} >{snakemake.log.conda_list_md5} +md5sum {snakemake.log.conda_info} >{snakemake.log.conda_info_md5} +cp {__real_file__} {snakemake.log.wrapper} +md5sum {snakemake.log.wrapper} >{snakemake.log.wrapper_md5} +cp $(dirname {__file__})/environment.yaml {snakemake.log.env_yaml} +md5sum {snakemake.log.env_yaml} >{snakemake.log.env_yaml_md5} # Also pipe stderr to log file if [[ -n "{snakemake.log.log}" ]]; then @@ -178,3 +185,11 @@ fi """ ) + +# Compute MD5 sums of logs. +shell( + r""" +sleep 1s # try to wait for log file flush +md5sum {snakemake.log.log} >{snakemake.log.log_md5} +""" +) diff --git a/snappy_wrappers/wrappers/maelstrom/bam_collect_doc/wrapper.py b/snappy_wrappers/wrappers/maelstrom/bam_collect_doc/wrapper.py index 5bf072efe..dac1e31c2 100644 --- a/snappy_wrappers/wrappers/maelstrom/bam_collect_doc/wrapper.py +++ b/snappy_wrappers/wrappers/maelstrom/bam_collect_doc/wrapper.py @@ -6,6 +6,28 @@ r""" set -x +# Write out information about conda and save a copy of the wrapper with picked variables +# as well as the environment.yaml file. +conda list >{snakemake.log.conda_list} +conda info >{snakemake.log.conda_info} +md5sum {snakemake.log.conda_list} >{snakemake.log.conda_list_md5} +md5sum {snakemake.log.conda_info} >{snakemake.log.conda_info_md5} +cp {__real_file__} {snakemake.log.wrapper} +md5sum {snakemake.log.wrapper} >{snakemake.log.wrapper_md5} +cp $(dirname {__file__})/environment.yaml {snakemake.log.env_yaml} +md5sum {snakemake.log.env_yaml} >{snakemake.log.env_yaml_md5} + +# Also pipe stderr to log file +if [[ -n "{snakemake.log.log}" ]]; then + if [[ "$(set +e; tty; set -e)" != "" ]]; then + rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log}) + exec 2> >(tee -a "{snakemake.log.log}" >&2) + else + rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log}) + echo "No tty, logging disabled" >"{snakemake.log.log}" + fi +fi + export TMPDIR=$(mktemp -d) trap "rm -rf $TMPDIR" ERR EXIT @@ -47,3 +69,11 @@ md5sum $(basename {snakemake.output.bw}) >$(basename {snakemake.output.bw_md5}) """ ) + +# Compute MD5 sums of logs. +shell( + r""" +sleep 1s # try to wait for log file flush +md5sum {snakemake.log.log} >{snakemake.log.log_md5} +""" +) diff --git a/snappy_wrappers/wrappers/minimap2/wrapper.py b/snappy_wrappers/wrappers/minimap2/wrapper.py index d22fdc924..1372918f4 100644 --- a/snappy_wrappers/wrappers/minimap2/wrapper.py +++ b/snappy_wrappers/wrappers/minimap2/wrapper.py @@ -15,9 +15,16 @@ r""" set -x -# Write out information about conda installation. +# Write out information about conda and save a copy of the wrapper with picked variables +# as well as the environment.yaml file. conda list >{snakemake.log.conda_list} conda info >{snakemake.log.conda_info} +md5sum {snakemake.log.conda_list} >{snakemake.log.conda_list_md5} +md5sum {snakemake.log.conda_info} >{snakemake.log.conda_info_md5} +cp {__real_file__} {snakemake.log.wrapper} +md5sum {snakemake.log.wrapper} >{snakemake.log.wrapper_md5} +cp $(dirname {__file__})/environment.yaml {snakemake.log.env_yaml} +md5sum {snakemake.log.env_yaml} >{snakemake.log.env_yaml_md5} # Also pipe stderr to log file if [[ -n "{snakemake.log.log}" ]]; then @@ -107,6 +114,29 @@ {snakemake.output.report_bamstats_txt} \ || true # ignore failure +# Patch inline-html if necessary. +cat >$TMPDIR/inline-html.diff < {snakemake.output.report_bamstats_txt_md5} md5sum {snakemake.output.report_flagstats_txt} >{snakemake.output.report_flagstats_txt_md5} md5sum {snakemake.output.report_idxstats_txt} > {snakemake.output.report_idxstats_txt_md5} +""" +) -# Additional logging for transparency & reproducibility -# Logging: Save a copy this wrapper (with the pickle details in the header) -cp {this_file} $(dirname {snakemake.log.log})/wrapper_bwa.py - -# Logging: Save a permanent copy of the environment file used -cp $(dirname {this_file})/environment.yaml $(dirname {snakemake.log.log})/environment_wrapper_bwa.yaml +# Compute MD5 sums of logs. +shell( + r""" +sleep 1s # try to wait for log file flush +md5sum {snakemake.log.log} >{snakemake.log.log_md5} """ ) diff --git a/snappy_wrappers/wrappers/ngmlr/run/environment.yaml b/snappy_wrappers/wrappers/ngmlr/run/environment.yaml deleted file mode 100644 index 2feb54c55..000000000 --- a/snappy_wrappers/wrappers/ngmlr/run/environment.yaml +++ /dev/null @@ -1,10 +0,0 @@ -channels: -- conda-forge -- bioconda -- defaults -dependencies: -- ngmlr ==0.2.7 -- samtools ==1.9 -- htslib ==1.9 -- pysam ==0.15.3 -- inline-html ==0.1.2 diff --git a/snappy_wrappers/wrappers/ngmlr/run/meta.yaml b/snappy_wrappers/wrappers/ngmlr/run/meta.yaml deleted file mode 100644 index da144de0f..000000000 --- a/snappy_wrappers/wrappers/ngmlr/run/meta.yaml +++ /dev/null @@ -1,4 +0,0 @@ -name: ngmlr -description: Next Generation Mapper for Long-Read Data -authors: -- Manuel Holtgrewe diff --git a/snappy_wrappers/wrappers/ngmlr/run/wrapper.py b/snappy_wrappers/wrappers/ngmlr/run/wrapper.py deleted file mode 100644 index 7f438a733..000000000 --- a/snappy_wrappers/wrappers/ngmlr/run/wrapper.py +++ /dev/null @@ -1,126 +0,0 @@ -# -*- coding: utf-8 -*- -"""Wrapper for running NGMLR (PacBio/Nanopore aligner) -""" - -from snakemake.shell import shell - -__author__ = "Manuel Holtgrewe" -__email__ = "manuel.holtgrewe@bih-charite.de" - -this_file = __file__ - -# TODO: write out separate read groups - -shell( - r""" -set -x - -# Write out information about conda installation. -conda list >{snakemake.log.conda_list} -conda info >{snakemake.log.conda_info} - -# Also pipe stderr to log file -if [[ -n "{snakemake.log.log}" ]]; then - if [[ "$(set +e; tty; set -e)" != "" ]]; then - rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log}) - exec 2> >(tee -a "{snakemake.log.log}" >&2) - else - rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log}) - echo "No tty, logging disabled" >"{snakemake.log.log}" - fi -fi - -BAM_CHAIN=/fast/users/mholtgr/scratch/build/bamChain/bamChain - -export TMPDIR=$(mktemp -d) -trap "rm -rf $TMPDIR" EXIT - -mkdir -p $TMPDIR/out $TMPDIR/chained $TMPDIR/sorted $TMPDIR/sort.tmp - -#if [[ "{snakemake.wildcards.library_name}" == *PacBio* ]]; then -preset=pacbio -#else -# preset=ont -#fi - -i=1 -for fname in $(find $(dirname {snakemake.input}) -name '*.bam' -or -name '*.fast?.gz'); do - basename=$(basename $fname .bam) - - if [[ "$fname" == *.bam ]]; then \ - samtools fastq -F 2048 $fname; \ - else \ - zcat $fname; \ - fi \ - | ngmlr \ - -t 16 \ - -x $preset \ - -r {snakemake.config[step_config][ngs_mapping][ngmlr][path_index]} \ - -q /dev/stdin \ - -o /dev/stdout \ - | samtools addreplacerg \ - -r "@RT\tID:{snakemake.wildcards.library_name}.$i\tSM:{snakemake.wildcards.library_name}\tPL:PACBIO" - \ - | samtools sort -l 9 -n -m 4G -@4 -O BAM \ - >$TMPDIR/out/$i.bam - - if [[ "{snakemake.params[chained]}" == "True" ]]; then - python3 $BAM_CHAIN --minmapq 10 $TMPDIR/out/$i.bam $TMPDIR/chained/$i.bam - else - mv $TMPDIR/out/$i.bam $TMPDIR/chained/$i.bam - fi - - samtools sort -m 4G -@ 3 \ - -o $TMPDIR/sorted/$i.bam \ - -T $TMPDIR/sort.tmp/ \ - $TMPDIR/chained/$i.bam - - let "i=$i+1" -done - -out_bam={snakemake.output.bam} - -samtools merge -@ 8 $out_bam $TMPDIR/sorted/*.bam - -samtools index $out_bam - -# Compute MD5 sums -pushd $(dirname $out_bam) -md5sum $(basename $out_bam) >$(basename $out_bam).md5 -md5sum $(basename $out_bam).bai >$(basename $out_bam).bai.md5 -popd - -# QC Report --------------------------------------------------------------------------------------- - -# gather statistics from BAM file -# TODO: use pipes for only reading once from disk? -samtools stats {snakemake.output.bam} > {snakemake.output.report_bamstats_txt} -samtools flagstat {snakemake.output.bam} > {snakemake.output.report_flagstats_txt} -samtools idxstats {snakemake.output.bam} > {snakemake.output.report_idxstats_txt} - -# call plot-bamstats -mkdir $TMPDIR/bamstats.d -plot-bamstats \ - -p $TMPDIR/bamstats.d/ \ - {snakemake.output.report_bamstats_txt} \ -|| true # ignore failure - -# Convert HTML report into one file. -inline-html \ - --in-file $TMPDIR/bamstats.d/index.html \ - --out-file {snakemake.output.report_bamstats_html} \ -|| touch {snakemake.output.report_bamstats_html} - -# Build MD5 files for the reports -md5sum {snakemake.output.report_bamstats_html} > {snakemake.output.report_bamstats_html_md5} -md5sum {snakemake.output.report_bamstats_txt} > {snakemake.output.report_bamstats_txt_md5} -md5sum {snakemake.output.report_flagstats_txt} >{snakemake.output.report_flagstats_txt_md5} -md5sum {snakemake.output.report_idxstats_txt} > {snakemake.output.report_idxstats_txt_md5} - -# Additional logging for transparency & reproducibility -# Logging: Save a copy this wrapper (with the pickle details in the header) -cp {this_file} $(dirname {snakemake.log.log})/wrapper_bwa.py - -# Logging: Save a permanent copy of the environment file used -cp $(dirname {this_file})/environment.yaml $(dirname {snakemake.log.log})/environment_wrapper_bwa.yaml -""" -) diff --git a/snappy_wrappers/wrappers/picard/hs_metrics/environment.yaml b/snappy_wrappers/wrappers/picard/hs_metrics/environment.yaml deleted file mode 100644 index 725e3a2c0..000000000 --- a/snappy_wrappers/wrappers/picard/hs_metrics/environment.yaml +++ /dev/null @@ -1,6 +0,0 @@ -channels: -- conda-forge -- bioconda -- r -dependencies: -- picard ==2.14 diff --git a/snappy_wrappers/wrappers/picard/hs_metrics/wrapper.py b/snappy_wrappers/wrappers/picard/hs_metrics/wrapper.py deleted file mode 100644 index b22db3a97..000000000 --- a/snappy_wrappers/wrappers/picard/hs_metrics/wrapper.py +++ /dev/null @@ -1,32 +0,0 @@ -from snakemake import shell - -shell( - r""" -set -x - -# Also pipe stderr to log file -if [[ -n "{snakemake.log}" ]]; then - if [[ "$(set +e; tty; set -e)" != "" ]]; then - rm -f "{snakemake.log}" && mkdir -p $(dirname {snakemake.log}) - exec 2> >(tee -a "{snakemake.log}" >&2) - else - rm -f "{snakemake.log}" && mkdir -p $(dirname {snakemake.log}) - echo "No tty, logging disabled" >"{snakemake.log}" - fi -fi - -picard -Xmx6g -Djava.io.tmpdir=$TMPDIR \ - CollectHsMetrics \ - I={snakemake.input.bam} \ - O={snakemake.output.txt} \ - R={snakemake.config[static_data_config][reference][path]} \ - MINIMUM_MAPPING_QUALITY=0 \ - TARGET_INTERVALS={snakemake.config[step_config][ngs_mapping][picard_hs_metrics][path_targets_interval_list]} \ - BAIT_INTERVALS={snakemake.config[step_config][ngs_mapping][picard_hs_metrics][path_baits_interval_list]} \ - VALIDATION_STRINGENCY=SILENT - -pushd $(dirname {snakemake.output.txt}) -md5sum $(basename {snakemake.output.txt}) >$(basename {snakemake.output.txt}).md5 -popd -""" -) diff --git a/snappy_wrappers/wrappers/star/wrapper.py b/snappy_wrappers/wrappers/star/wrapper.py index a3709815a..a90691185 100644 --- a/snappy_wrappers/wrappers/star/wrapper.py +++ b/snappy_wrappers/wrappers/star/wrapper.py @@ -13,17 +13,20 @@ reads_left = snakemake.params.args["input"]["reads_left"] reads_right = snakemake.params.args["input"].get("reads_right", "") -this_file = __file__ - shell( r""" set -x -# Write out information about conda installation. +# Write out information about conda and save a copy of the wrapper with picked variables +# as well as the environment.yaml file. conda list >{snakemake.log.conda_list} conda info >{snakemake.log.conda_info} md5sum {snakemake.log.conda_list} >{snakemake.log.conda_list_md5} md5sum {snakemake.log.conda_info} >{snakemake.log.conda_info_md5} +cp {__real_file__} {snakemake.log.wrapper} +md5sum {snakemake.log.wrapper} >{snakemake.log.wrapper_md5} +cp $(dirname {__file__})/environment.yaml {snakemake.log.env_yaml} +md5sum {snakemake.log.env_yaml} >{snakemake.log.env_yaml_md5} # Also pipe stderr to log file if [[ -n "{snakemake.log.log}" ]]; then @@ -206,6 +209,29 @@ {snakemake.output.report_bamstats_txt} \ || true # ignore failure +# Patch inline-html if necessary. +cat >$TMPDIR/inline-html.diff <{snakemake.log.log_md5} """ ) diff --git a/snappy_wrappers/wrappers/target_cov_report/run/wrapper.py b/snappy_wrappers/wrappers/target_cov_report/run/wrapper.py index 5ed15ad38..254c0d51a 100644 --- a/snappy_wrappers/wrappers/target_cov_report/run/wrapper.py +++ b/snappy_wrappers/wrappers/target_cov_report/run/wrapper.py @@ -8,22 +8,29 @@ shell.executable("/bin/bash") - shell( r""" set -x -# Hack: get back bin directory of base/root environment. -export PATH=$PATH:$(dirname $(dirname $(which conda)))/bin +# Write out information about conda and save a copy of the wrapper with picked variables +# as well as the environment.yaml file. +conda list >{snakemake.log.conda_list} +conda info >{snakemake.log.conda_info} +md5sum {snakemake.log.conda_list} >{snakemake.log.conda_list_md5} +md5sum {snakemake.log.conda_info} >{snakemake.log.conda_info_md5} +cp {__real_file__} {snakemake.log.wrapper} +md5sum {snakemake.log.wrapper} >{snakemake.log.wrapper_md5} +cp $(dirname {__file__})/environment.yaml {snakemake.log.env_yaml} +md5sum {snakemake.log.env_yaml} >{snakemake.log.env_yaml_md5} # Also pipe stderr to log file -if [[ -n "{snakemake.log}" ]]; then +if [[ -n "{snakemake.log.log}" ]]; then if [[ "$(set +e; tty; set -e)" != "" ]]; then - rm -f "{snakemake.log}" && mkdir -p $(dirname {snakemake.log}) - exec 2> >(tee -a "{snakemake.log}" >&2) + rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log}) + exec 2> >(tee -a "{snakemake.log.log}" >&2) else - rm -f "{snakemake.log}" && mkdir -p $(dirname {snakemake.log}) - echo "No tty, logging disabled" >"{snakemake.log}" + rm -f "{snakemake.log.log}" && mkdir -p $(dirname {snakemake.log.log}) + echo "No tty, logging disabled" >"{snakemake.log.log}" fi fi @@ -60,3 +67,11 @@ md5sum {snakemake.output.txt} > {snakemake.output.txt_md5} """ ) + +# Compute MD5 sums of logs. +shell( + r""" +sleep 1s # try to wait for log file flush +md5sum {snakemake.log.log} >{snakemake.log.log_md5} +""" +) diff --git a/tests/snappy_pipeline/workflows/common.py b/tests/snappy_pipeline/workflows/common.py index 951965542..3f7794ccc 100644 --- a/tests/snappy_pipeline/workflows/common.py +++ b/tests/snappy_pipeline/workflows/common.py @@ -1,25 +1,41 @@ """Shared method used in methods.""" +import typing -def get_expected_log_files_dict(base_out): + +def get_expected_log_files_dict( + *, base_out: str, infix: typing.Optional[str] = None, extended: bool = False +): """ :param base_out: Base path structure for log files. For example, if the expected path for the log is 'work/step.path/log/step.conda_info.txt', the argument should be 'work/step.path/log/step'. - :type base_out: str + :param infix: Optional infix string. + :param extended: Whether to include env_yaml and wrapper in output :return: Returns dictionary with expected path for log files based on the provided input. """ - # Define expected + if infix: + infix_dot = f"{infix}." + else: + infix_dot = "" expected = { - "conda_info": base_out + ".conda_info.txt", - "conda_info_md5": base_out + ".conda_info.txt.md5", - "conda_list": base_out + ".conda_list.txt", - "conda_list_md5": base_out + ".conda_list.txt.md5", - "log": base_out + ".log", - "log_md5": base_out + ".log.md5", + "conda_info": f"{base_out}.{infix_dot}conda_info.txt", + "conda_info_md5": f"{base_out}.{infix_dot}conda_info.txt.md5", + "conda_list": f"{base_out}.{infix_dot}conda_list.txt", + "conda_list_md5": f"{base_out}.{infix_dot}conda_list.txt.md5", + "log": f"{base_out}.{infix_dot}log", + "log_md5": f"{base_out}.{infix_dot}log.md5", } - # Return + if extended: + expected.update( + { + "env_yaml": f"{base_out}.{infix_dot}environment.yaml", + "env_yaml_md5": f"{base_out}.{infix_dot}environment.yaml.md5", + "wrapper": f"{base_out}.{infix_dot}wrapper.py", + "wrapper_md5": f"{base_out}.{infix_dot}wrapper.py.md5", + } + ) return expected @@ -35,10 +51,10 @@ def get_expected_output_vcf_files_dict(base_out): """ # Define expected expected = { - "vcf": base_out + ".vcf.gz", - "vcf_md5": base_out + ".vcf.gz.md5", - "tbi": base_out + ".vcf.gz.tbi", - "tbi_md5": base_out + ".vcf.gz.tbi.md5", + "vcf": f"{base_out}.vcf.gz", + "vcf_md5": f"{base_out}.vcf.gz.md5", + "tbi": f"{base_out}.vcf.gz.tbi", + "tbi_md5": f"{base_out}.vcf.gz.tbi.md5", } # Return return expected @@ -56,10 +72,10 @@ def get_expected_output_bcf_files_dict(base_out): """ # Define expected expected = { - "bcf": base_out + ".bcf", - "bcf_md5": base_out + ".bcf.md5", - "csi": base_out + ".bcf.csi", - "csi_md5": base_out + ".bcf.csi.md5", + "bcf": f"{base_out}.bcf", + "bcf_md5": f"{base_out}.bcf.md5", + "csi": f"{base_out}.bcf.csi", + "csi_md5": f"{base_out}.bcf.csi.md5", } # Return return expected diff --git a/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping.py b/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping.py index 22c5f55cd..ddaf4b6b7 100644 --- a/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping.py +++ b/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping.py @@ -269,7 +269,9 @@ def test_bwa_step_part_get_log_file(ngs_mapping_workflow): """Tests BaseStepPart.get_log_file()""" # Define expected expected = get_expected_log_files_dict( - base_out="work/bwa.{library_name}/log/bwa.{library_name}" + base_out="work/bwa.{library_name}/log/bwa.{library_name}", + infix="mapping", + extended=True, ) # Get actual actual = ngs_mapping_workflow.get_log_file("bwa", "run") @@ -332,7 +334,9 @@ def test_star_step_part_get_log_file(ngs_mapping_workflow): """Tests StarStepPart.get_log_file()""" # Define expected expected = get_expected_log_files_dict( - base_out="work/star.{library_name}/log/star.{library_name}" + base_out="work/star.{library_name}/log/star.{library_name}", + infix="mapping", + extended=True, ) # Get actual actual = ngs_mapping_workflow.get_log_file("star", "run") @@ -395,7 +399,9 @@ def test_minimap2_step_part_get_log_file(ngs_mapping_workflow): """Tests Minimap2StepPart.get_log_file()""" # Define expected expected = get_expected_log_files_dict( - base_out="work/minimap2.{library_name}/log/minimap2.{library_name}" + base_out="work/minimap2.{library_name}/log/minimap2.{library_name}", + infix="mapping", + extended=True, ) # Get actual actual = ngs_mapping_workflow.get_log_file("minimap2", "run") @@ -413,69 +419,6 @@ def test_minimap2_step_part_get_resource(ngs_mapping_workflow): assert actual == expected, msg_error -# Tests for NgmlrStepPart ----------------------------------------------------------------------- - - -def test_ngmlr_step_part_get_args(ngs_mapping_workflow): - """Tests NgmlrStepPart.get_args()""" - # Define expected - wildcards = Wildcards(fromdict={"library_name": "P001-N1-DNA1-WGS1"}) - expected = { - "input": { - "reads_left": ["work/input_links/P001-N1-DNA1-WGS1/FCXXXXXX/L001/P001_R1.fastq.gz"], - "reads_right": ["work/input_links/P001-N1-DNA1-WGS1/FCXXXXXX/L001/P001_R2.fastq.gz"], - }, - "platform": "ILLUMINA", - "sample_name": "P001-N1-DNA1-WGS1", - } - # Get actual and assert - actual = ngs_mapping_workflow.get_args("ngmlr", "run")(wildcards) - assert actual == expected - - -def test_ngmlr_step_part_get_input_files(ngs_mapping_workflow): - """Tests NgmlrStepPart.get_input_files()""" - # Define expected - wildcards = Wildcards(fromdict={"library_name": "P001-N1-DNA1-WGS1"}) - expected = "work/input_links/P001-N1-DNA1-WGS1/.done" - # Get actual and assert - actual = ngs_mapping_workflow.get_input_files("ngmlr", "run")(wildcards) - assert actual == expected - - -def test_ngmlr_step_part_get_output_files(ngs_mapping_workflow): - """Tests NgmlrStepPart.get_output_files()""" - # Define expected - bam_base_out = "work/ngmlr.{library_name}/out/ngmlr.{library_name}" - report_base_out = "work/ngmlr.{library_name}/report/bam_qc/ngmlr.{library_name}" - expected = get_expected_output_files_dict(bam_base_out, report_base_out) - # Get actual - actual = ngs_mapping_workflow.get_output_files("ngmlr", "run") - assert actual == expected - - -def test_ngmlr_step_part_get_log_file(ngs_mapping_workflow): - """Tests NgmlrStepPart.get_log_file()""" - # Define expected - expected = get_expected_log_files_dict( - base_out="work/ngmlr.{library_name}/log/ngmlr.{library_name}" - ) - # Get actual - actual = ngs_mapping_workflow.get_log_file("ngmlr", "run") - assert actual == expected - - -def test_ngmlr_step_part_get_resource(ngs_mapping_workflow): - """Tests NgmlrStepPart.get_resource()""" - # Define expected - expected_dict = {"threads": 16, "time": "4-00:00:00", "memory": "50G", "partition": "medium"} - # Evaluate - for resource, expected in expected_dict.items(): - msg_error = f"Assertion error for resource '{resource}'." - actual = ngs_mapping_workflow.get_resource("ngmlr", "run", resource) - assert actual == expected, msg_error - - # Tests for ExternalStepPart ----------------------------------------------------------------------- @@ -554,7 +497,8 @@ def test_gatk_post_bam_step_part_get_output_files(ngs_mapping_workflow): def test_gatk_post_bam_step_part_get_log_file(ngs_mapping_workflow): """Tests GatkPostBamStepPart.get_log_file()""" expected = get_expected_log_files_dict( - base_out="work/{mapper}.{library_name}/log/gatk_post_bam.{library_name}" + base_out="work/{mapper}.{library_name}/log/gatk_post_bam.{library_name}", + extended=True, ) actual = ngs_mapping_workflow.get_log_file("gatk_post_bam", "run") assert actual == expected @@ -621,53 +565,6 @@ def test_link_out_bam_step_part_get_shell_cmd(ngs_mapping_workflow): assert actual == expected -# Tests for PicardHsMetricsStepPart ---------------------------------------------------------------- - - -def test_picard_hs_metrics_step_part_get_input_files(ngs_mapping_workflow): - """Tests PicardHsMetricsStepPart.get_input_files()""" - # Define expected - expected = { - "bam": "work/{mapper_lib}/out/{mapper_lib}.bam", - "bai": "work/{mapper_lib}/out/{mapper_lib}.bam.bai", - } - # Get actual and assert - actual = ngs_mapping_workflow.get_input_files("picard_hs_metrics", "run") - assert actual == expected - - -def test_picard_hs_metrics_step_part_get_output_files(ngs_mapping_workflow): - """Tests PicardHsMetricsStepPart.get_output_files()""" - # Define expected - expected = { - "txt": "work/{mapper_lib}/report/picard_hs_metrics/{mapper_lib}.txt", - "txt_md5": "work/{mapper_lib}/report/picard_hs_metrics/{mapper_lib}.txt.md5", - } - # Get actual - actual = ngs_mapping_workflow.get_output_files("picard_hs_metrics", "run") - assert actual == expected - - -def test_picard_hs_metrics_step_part_get_log_file(ngs_mapping_workflow): - """Tests PicardHsMetricsStepPart.get_log_file()""" - # Define expected - expected = "work/{mapper_lib}/log/snakemake.picard_hs_metrics.log" - # Get actual - actual = ngs_mapping_workflow.get_log_file("picard_hs_metrics", "run") - assert actual == expected - - -def test_picard_hs_metrics_step_part_get_resource(ngs_mapping_workflow): - """Tests PicardHsMetricsStepPart.get_resource()""" - # Define expected - expected_dict = {"threads": 2, "time": "04:00:00", "memory": "20G", "partition": "medium"} - # Evaluate - for resource, expected in expected_dict.items(): - msg_error = f"Assertion error for resource '{resource}'." - actual = ngs_mapping_workflow.get_resource("picard_hs_metrics", "run", resource) - assert actual == expected, msg_error - - # Tests for TargetCoverageReportStepPart ---------------------------------------------------------- @@ -712,13 +609,24 @@ def test_target_coverage_report_step_part_get_output_files(ngs_mapping_workflow) def test_target_coverage_report_step_part_run_get_log_file(ngs_mapping_workflow): """Tests TargetCoverageReportStepPart.get_log_file() - run""" - expected = "work/{mapper_lib}/log/snakemake.target_coverage.log" + expected = { + "log": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.log", + "log_md5": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.log.md5", + "conda_info": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.conda_info.txt", + "conda_info_md5": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.conda_info.txt.md5", + "conda_list": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.conda_list.txt", + "conda_list_md5": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.conda_list.txt.md5", + "wrapper": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.wrapper.py", + "wrapper_md5": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.wrapper.py.md5", + "env_yaml": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.environment.yaml", + "env_yaml_md5": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.environment.yaml.md5", + } assert ngs_mapping_workflow.get_log_file("target_coverage_report", "run") == expected def test_target_coverage_report_step_part_collect_get_log_file(ngs_mapping_workflow): """Tests TargetCoverageReportStepPart.get_log_file() - collect""" - expected = "work/target_cov_report/log/snakemake.target_coverage.log" + expected = {"log": "work/target_cov_report/log/snakemake.target_coverage.log"} assert ngs_mapping_workflow.get_log_file("target_coverage_report", "collect") == expected @@ -788,7 +696,18 @@ def test_generate_doc_files_step_part_get_output_files(ngs_mapping_workflow): def test_generate_doc_files_step_part_run_get_log_file(ngs_mapping_workflow): """Tests BamCollectDocStepPart.get_log_file() - run""" - expected = "work/{mapper_lib}/log/snakemake.bam_collect_doc.log" + expected = { + "log": "work/{mapper_lib}/log/{mapper_lib}.bam_collect_doc.log", + "log_md5": "work/{mapper_lib}/log/{mapper_lib}.bam_collect_doc.log.md5", + "conda_info": "work/{mapper_lib}/log/{mapper_lib}.bam_collect_doc.conda_info.txt", + "conda_info_md5": "work/{mapper_lib}/log/{mapper_lib}.bam_collect_doc.conda_info.txt.md5", + "conda_list": "work/{mapper_lib}/log/{mapper_lib}.bam_collect_doc.conda_list.txt", + "conda_list_md5": "work/{mapper_lib}/log/{mapper_lib}.bam_collect_doc.conda_list.txt.md5", + "wrapper": "work/{mapper_lib}/log/{mapper_lib}.bam_collect_doc.wrapper.py", + "wrapper_md5": "work/{mapper_lib}/log/{mapper_lib}.bam_collect_doc.wrapper.py.md5", + "env_yaml": "work/{mapper_lib}/log/{mapper_lib}.bam_collect_doc.environment.yaml", + "env_yaml_md5": "work/{mapper_lib}/log/{mapper_lib}.bam_collect_doc.environment.yaml.md5", + } assert ngs_mapping_workflow.get_log_file("bam_collect_doc", "run") == expected @@ -874,8 +793,6 @@ def test_ngs_mapping_workflow_steps(ngs_mapping_workflow): "link_out", "link_out_bam", "minimap2", - "ngmlr", - "picard_hs_metrics", "star", "target_coverage_report", ] @@ -886,26 +803,31 @@ def test_ngs_mapping_workflow_steps(ngs_mapping_workflow): def test_ngs_mapping_workflow_files(ngs_mapping_workflow): """Tests simple functionality of the workflow: checks if file structure is created according to the expected results from the tools, namely: bwa, external, gatk_post_bam, - genome_coverage_report, link_in, link_out, link_out_bam, minimap2, ngmlr, picard_hs_metrics, - star, target_coverage_report.""" + genome_coverage_report, link_in, link_out, link_out_bam, minimap2, star, target_coverage_report. + """ # Check result file construction expected = [ "output/bwa.P00{i}-N1-DNA1-WGS1/out/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format(i=i, ext=ext) for i in range(1, 7) for ext in ("bam", "bam.bai", "bam.md5", "bam.bai.md5") ] - expected += [ - "output/bwa.P00{i}-N1-DNA1-WGS1/log/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format(i=i, ext=ext) - for i in range(1, 7) - for ext in ( - "log", - "conda_info.txt", - "conda_list.txt", - "log.md5", - "conda_info.txt.md5", - "conda_list.txt.md5", - ) - ] + for infix in ("bam_collect_doc", "mapping", "target_cov_report"): + expected += [ + "output/bwa.P00{i}-N1-DNA1-WGS1/log/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format(i=i, ext=ext) + for i in range(1, 7) + for ext in ( + f"{infix}.log", + f"{infix}.log.md5", + f"{infix}.conda_info.txt", + f"{infix}.conda_info.txt.md5", + f"{infix}.conda_list.txt", + f"{infix}.conda_list.txt.md5", + f"{infix}.environment.yaml", + f"{infix}.environment.yaml.md5", + f"{infix}.wrapper.py", + f"{infix}.wrapper.py.md5", + ) + ] bam_stats_text_out = ( "output/bwa.P00{i}-N1-DNA1-WGS1/report/bam_qc/bwa.P00{i}-N1-DNA1-WGS1.bam.{stats}.{ext}" ) diff --git a/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping_processed_fastq.py b/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping_processed_fastq.py index 375b95662..25c197368 100644 --- a/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping_processed_fastq.py +++ b/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping_processed_fastq.py @@ -271,7 +271,9 @@ def test_bwa_step_part_get_log_file(ngs_mapping_workflow): """Tests BaseStepPart.get_log_file()""" # Define expected expected = get_expected_log_files_dict( - base_out="work/bwa.{library_name}/log/bwa.{library_name}" + base_out="work/bwa.{library_name}/log/bwa.{library_name}", + infix="mapping", + extended=True, ) # Get actual actual = ngs_mapping_workflow.get_log_file("bwa", "run") @@ -336,7 +338,9 @@ def test_star_step_part_get_log_file(ngs_mapping_workflow): """Tests StarStepPart.get_log_file()""" # Define expected expected = get_expected_log_files_dict( - base_out="work/star.{library_name}/log/star.{library_name}" + base_out="work/star.{library_name}/log/star.{library_name}", + infix="mapping", + extended=True, ) # Get actual actual = ngs_mapping_workflow.get_log_file("star", "run") @@ -401,7 +405,9 @@ def test_minimap2_step_part_get_log_file(ngs_mapping_workflow): """Tests Minimap2StepPart.get_log_file()""" # Define expected expected = get_expected_log_files_dict( - base_out="work/minimap2.{library_name}/log/minimap2.{library_name}" + base_out="work/minimap2.{library_name}/log/minimap2.{library_name}", + infix="mapping", + extended=True, ) # Get actual actual = ngs_mapping_workflow.get_log_file("minimap2", "run") @@ -419,71 +425,6 @@ def test_minimap2_step_part_get_resource(ngs_mapping_workflow): assert actual == expected, msg_error -# Tests for NgmlrStepPart ----------------------------------------------------------------------- - - -def test_ngmlr_step_part_get_args(ngs_mapping_workflow): - """Tests NgmlrStepPart.get_args()""" - # Define expected - wildcards = Wildcards(fromdict={"library_name": "P001-N1-DNA1-WGS1"}) - expected = { - "input": { - "reads_left": ["work/input_links/P001-N1-DNA1-WGS1/FCXXXXXX/L001/out/P001_R1.fastq.gz"], - "reads_right": [ - "work/input_links/P001-N1-DNA1-WGS1/FCXXXXXX/L001/out/P001_R2.fastq.gz" - ], - }, - "platform": "ILLUMINA", - "sample_name": "P001-N1-DNA1-WGS1", - } - # Get actual and assert - actual = ngs_mapping_workflow.get_args("ngmlr", "run")(wildcards) - assert actual == expected - - -def test_ngmlr_step_part_get_input_files(ngs_mapping_workflow): - """Tests NgmlrStepPart.get_input_files()""" - # Define expected - wildcards = Wildcards(fromdict={"library_name": "P001-N1-DNA1-WGS1"}) - expected = "work/input_links/P001-N1-DNA1-WGS1/.done" - # Get actual and assert - actual = ngs_mapping_workflow.get_input_files("ngmlr", "run")(wildcards) - assert actual == expected - - -def test_ngmlr_step_part_get_output_files(ngs_mapping_workflow): - """Tests NgmlrStepPart.get_output_files()""" - # Define expected - bam_base_out = "work/ngmlr.{library_name}/out/ngmlr.{library_name}" - report_base_out = "work/ngmlr.{library_name}/report/bam_qc/ngmlr.{library_name}" - expected = get_expected_output_files_dict(bam_base_out, report_base_out) - # Get actual - actual = ngs_mapping_workflow.get_output_files("ngmlr", "run") - assert actual == expected - - -def test_ngmlr_step_part_get_log_file(ngs_mapping_workflow): - """Tests NgmlrStepPart.get_log_file()""" - # Define expected - expected = get_expected_log_files_dict( - base_out="work/ngmlr.{library_name}/log/ngmlr.{library_name}" - ) - # Get actual - actual = ngs_mapping_workflow.get_log_file("ngmlr", "run") - assert actual == expected - - -def test_ngmlr_step_part_get_resource(ngs_mapping_workflow): - """Tests NgmlrStepPart.get_resource()""" - # Define expected - expected_dict = {"threads": 16, "time": "4-00:00:00", "memory": "50G", "partition": "medium"} - # Evaluate - for resource, expected in expected_dict.items(): - msg_error = f"Assertion error for resource '{resource}'." - actual = ngs_mapping_workflow.get_resource("ngmlr", "run", resource) - assert actual == expected, msg_error - - # Tests for ExternalStepPart ----------------------------------------------------------------------- @@ -562,7 +503,8 @@ def test_gatk_post_bam_step_part_get_output_files(ngs_mapping_workflow): def test_gatk_post_bam_step_part_get_log_file(ngs_mapping_workflow): """Tests GatkPostBamStepPart.get_log_file()""" expected = get_expected_log_files_dict( - base_out="work/{mapper}.{library_name}/log/gatk_post_bam.{library_name}" + base_out="work/{mapper}.{library_name}/log/gatk_post_bam.{library_name}", + extended=True, ) actual = ngs_mapping_workflow.get_log_file("gatk_post_bam", "run") assert actual == expected @@ -629,53 +571,6 @@ def test_link_out_bam_step_part_get_shell_cmd(ngs_mapping_workflow): assert actual == expected -# Tests for PicardHsMetricsStepPart ---------------------------------------------------------------- - - -def test_picard_hs_metrics_step_part_get_input_files(ngs_mapping_workflow): - """Tests PicardHsMetricsStepPart.get_input_files()""" - # Define expected - expected = { - "bam": "work/{mapper_lib}/out/{mapper_lib}.bam", - "bai": "work/{mapper_lib}/out/{mapper_lib}.bam.bai", - } - # Get actual and assert - actual = ngs_mapping_workflow.get_input_files("picard_hs_metrics", "run") - assert actual == expected - - -def test_picard_hs_metrics_step_part_get_output_files(ngs_mapping_workflow): - """Tests PicardHsMetricsStepPart.get_output_files()""" - # Define expected - expected = { - "txt": "work/{mapper_lib}/report/picard_hs_metrics/{mapper_lib}.txt", - "txt_md5": "work/{mapper_lib}/report/picard_hs_metrics/{mapper_lib}.txt.md5", - } - # Get actual - actual = ngs_mapping_workflow.get_output_files("picard_hs_metrics", "run") - assert actual == expected - - -def test_picard_hs_metrics_step_part_get_log_file(ngs_mapping_workflow): - """Tests PicardHsMetricsStepPart.get_log_file()""" - # Define expected - expected = "work/{mapper_lib}/log/snakemake.picard_hs_metrics.log" - # Get actual - actual = ngs_mapping_workflow.get_log_file("picard_hs_metrics", "run") - assert actual == expected - - -def test_picard_hs_metrics_step_part_get_resource(ngs_mapping_workflow): - """Tests PicardHsMetricsStepPart.get_resource()""" - # Define expected - expected_dict = {"threads": 2, "time": "04:00:00", "memory": "20G", "partition": "medium"} - # Evaluate - for resource, expected in expected_dict.items(): - msg_error = f"Assertion error for resource '{resource}'." - actual = ngs_mapping_workflow.get_resource("picard_hs_metrics", "run", resource) - assert actual == expected, msg_error - - # Tests for TargetCoverageReportStepPart ---------------------------------------------------------- @@ -721,12 +616,24 @@ def test_target_coverage_report_step_part_get_output_files(ngs_mapping_workflow) def test_target_coverage_report_step_part_run_get_log_file(ngs_mapping_workflow): """Tests TargetCoverageReportStepPart.get_log_file() - run""" expected = "work/{mapper_lib}/log/snakemake.target_coverage.log" + expected = { + "log": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.log", + "log_md5": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.log.md5", + "conda_info": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.conda_info.txt", + "conda_info_md5": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.conda_info.txt.md5", + "conda_list": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.conda_list.txt", + "conda_list_md5": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.conda_list.txt.md5", + "wrapper": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.wrapper.py", + "wrapper_md5": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.wrapper.py.md5", + "env_yaml": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.environment.yaml", + "env_yaml_md5": "work/{mapper_lib}/log/{mapper_lib}.target_cov_report.environment.yaml.md5", + } assert ngs_mapping_workflow.get_log_file("target_coverage_report", "run") == expected def test_target_coverage_report_step_part_collect_get_log_file(ngs_mapping_workflow): """Tests TargetCoverageReportStepPart.get_log_file() - collect""" - expected = "work/target_cov_report/log/snakemake.target_coverage.log" + expected = {"log": "work/target_cov_report/log/snakemake.target_coverage.log"} assert ngs_mapping_workflow.get_log_file("target_coverage_report", "collect") == expected @@ -817,8 +724,6 @@ def test_ngs_mapping_workflow_steps(ngs_mapping_workflow): "link_out", "link_out_bam", "minimap2", - "ngmlr", - "picard_hs_metrics", "star", "target_coverage_report", ] @@ -829,26 +734,31 @@ def test_ngs_mapping_workflow_steps(ngs_mapping_workflow): def test_ngs_mapping_workflow_files(ngs_mapping_workflow): """Tests simple functionality of the workflow: checks if file structure is created according to the expected results from the tools, namely: bwa, external, gatk_post_bam, - genome_coverage_report, link_in, link_out, link_out_bam, minimap2, ngmlr, picard_hs_metrics, - star, target_coverage_report.""" + genome_coverage_report, link_in, link_out, link_out_bam, minimap2, star, target_coverage_report. + """ # Check result file construction expected = [ "output/bwa.P00{i}-N1-DNA1-WGS1/out/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format(i=i, ext=ext) for i in range(1, 7) for ext in ("bam", "bam.bai", "bam.md5", "bam.bai.md5") ] - expected += [ - "output/bwa.P00{i}-N1-DNA1-WGS1/log/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format(i=i, ext=ext) - for i in range(1, 7) - for ext in ( - "log", - "conda_info.txt", - "conda_list.txt", - "log.md5", - "conda_info.txt.md5", - "conda_list.txt.md5", - ) - ] + for infix in ("bam_collect_doc", "mapping", "target_cov_report"): + expected += [ + "output/bwa.P00{i}-N1-DNA1-WGS1/log/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format(i=i, ext=ext) + for i in range(1, 7) + for ext in ( + f"{infix}.log", + f"{infix}.log.md5", + f"{infix}.conda_info.txt", + f"{infix}.conda_info.txt.md5", + f"{infix}.conda_list.txt", + f"{infix}.conda_list.txt.md5", + f"{infix}.environment.yaml", + f"{infix}.environment.yaml.md5", + f"{infix}.wrapper.py", + f"{infix}.wrapper.py.md5", + ) + ] bam_stats_text_out = ( "output/bwa.P00{i}-N1-DNA1-WGS1/report/bam_qc/bwa.P00{i}-N1-DNA1-WGS1.bam.{stats}.{ext}" ) diff --git a/tests/snappy_pipeline/workflows/test_workflows_somatic_hla_loh_calling.py b/tests/snappy_pipeline/workflows/test_workflows_somatic_hla_loh_calling.py index 7bfb24790..f82881af0 100644 --- a/tests/snappy_pipeline/workflows/test_workflows_somatic_hla_loh_calling.py +++ b/tests/snappy_pipeline/workflows/test_workflows_somatic_hla_loh_calling.py @@ -111,7 +111,7 @@ def test_lohhla_step_part_get_log_file(somatic_hla_loh_calling_workflow): "work/{mapper}.{hla_caller}.lohhla.{tumor_library}/log/" "{mapper}.{hla_caller}.lohhla.{tumor_library}" ) - expected = get_expected_log_files_dict(base_name_log) + expected = get_expected_log_files_dict(base_out=base_name_log) actual = somatic_hla_loh_calling_workflow.get_log_file("lohhla", "run") assert actual == expected diff --git a/tests/snappy_pipeline/workflows/test_workflows_variant_export_external.py b/tests/snappy_pipeline/workflows/test_workflows_variant_export_external.py index 3545920b0..a7930569a 100644 --- a/tests/snappy_pipeline/workflows/test_workflows_variant_export_external.py +++ b/tests/snappy_pipeline/workflows/test_workflows_variant_export_external.py @@ -233,7 +233,7 @@ def test_bam_reports_step_part_call_get_log_file_bam_qc( ): """Tests BamReportsExternalStepPart._get_log_file_bam_qc()""" base_out = "work/{mapper_lib}/log/{mapper_lib}.bam_qc" - expected = get_expected_log_files_dict(base_out) + expected = get_expected_log_files_dict(base_out=base_out) actual = variant_export_external_workflow.get_log_file("bam_reports", "bam_qc") assert actual == expected