diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 1fdc8e3066..903e7aa141 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,2 +1,3 @@ -* @FriederikeHanssen -* @maxulysse +* @FriederikeHanssen @maxulysse +*.nf.test* @nf-core/nf-test +.github/workflows/ @nf-core/a-team diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 361fca04fe..0535814391 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,34 +11,40 @@ on: - master - dev +env: + NXF_ANSI_LOG: false + NFTEST_VER: "0.8.1" + # Cancel if a newer run is started concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: - changes: - name: Check for changes + pytest-changes: + name: Check for changes (pytest) runs-on: ubuntu-latest outputs: # Expose matched filters as job 'tags' output variable tags: ${{ steps.filter.outputs.changes }} steps: - uses: actions/checkout@v3 - - uses: dorny/paths-filter@v2 + + - uses: frouioui/paths-filter@main id: filter with: - filters: "tests/config/tags.yml" + filters: "tests/config/pytesttags.yml" + token: "" - test: + pytest: name: ${{ matrix.tags }} ${{ matrix.profile }} NF ${{ matrix.NXF_VER }} runs-on: ubuntu-latest - needs: changes - if: needs.changes.outputs.tags != '[]' + needs: pytest-changes + if: needs.pytest-changes.outputs.tags != '[]' strategy: fail-fast: false matrix: - tags: ["${{ fromJson(needs.changes.outputs.tags) }}"] + tags: ["${{ fromJson(needs.pytest-changes.outputs.tags) }}"] profile: ["docker"] # profile: ["docker", "singularity", "conda"] TEST_DATA_BASE: @@ -50,6 +56,7 @@ jobs: NXF_ANSI_LOG: false TEST_DATA_BASE: "${{ github.workspace }}/test-datasets" SENTIEON_LICENSE_BASE64: ${{ secrets.SENTIEON_LICENSE_BASE64 }} + steps: - name: Check out pipeline code uses: actions/checkout@v3 @@ -154,10 +161,135 @@ jobs: !/home/runner/pytest_workflow_*/*/work/conda !/home/runner/pytest_workflow_*/*/work/singularity + nftest-changes: + name: Check for changes (nf-test) + runs-on: ubuntu-latest + outputs: + tags: ${{ steps.filter.outputs.changes }} + + steps: + - uses: actions/checkout@v3 + + - name: Combine all tags.yml files + id: get_tags + run: find . -name "tags.yml" -not -path "./.github/*" -exec cat {} + > .github/tags.yml + + - name: debug + run: cat .github/tags.yml + + - uses: frouioui/paths-filter@main + id: filter + with: + filters: ".github/tags.yml" + token: "" + + nftest: + name: ${{ matrix.tags }} ${{ matrix.profile }} NF ${{ matrix.NXF_VER }} + runs-on: ubuntu-latest + needs: nftest-changes + if: needs.nftest-changes.outputs.tags != '[]' + strategy: + fail-fast: false + matrix: + tags: ["${{ fromJson(needs.nftest-changes.outputs.tags) }}"] + profile: ["docker"] + # profile: ["docker", "singularity", "conda"] + TEST_DATA_BASE: + - "test-datasets/data" + NXF_VER: + - "23.04.0" + - "latest-everything" + exclude: + - tags: "bwa/index" + - tags: "bwa/mem" + - tags: "cat/cat" + - tags: "cat/fastq" + - tags: "custom/dumpsoftwareversions" + - tags: "fastp" + - tags: "fastqc" + - tags: "samtools/stats" + env: + NXF_ANSI_LOG: false + TEST_DATA_BASE: "${{ github.workspace }}/test-datasets" + SENTIEON_LICENSE_BASE64: ${{ secrets.SENTIEON_LICENSE_BASE64 }} + + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-java@v3 + with: + distribution: "temurin" + java-version: "17" + - name: Setup Nextflow + uses: nf-core/setup-nextflow@v1 + + - name: Cache nf-test installation + id: cache-software + uses: actions/cache@v3 + with: + path: | + /usr/local/bin/nf-test + /home/runner/.nf-test/nf-test.jar + key: ${{ runner.os }}-${{ env.NFTEST_VER }}-nftest + + - name: Install nf-test + if: steps.cache-software.outputs.cache-hit != 'true' + run: | + wget -qO- https://code.askimed.com/install/nf-test | bash + sudo mv nf-test /usr/local/bin/ + + - name: Setup apptainer + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: matrix.profile == 'singularity' + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Set up miniconda + uses: conda-incubator/setup-miniconda@v2 + with: + miniconda-version: "latest" + auto-update-conda: true + channels: conda-forge,bioconda,defaults + python-version: ${{ matrix.python-version }} + + - name: Conda setup + run: | + conda clean -a + conda install -n base conda-libmamba-solver + conda config --set solver libmamba + echo $(realpath $CONDA)/condabin >> $GITHUB_PATH + echo $(realpath python) >> $GITHUB_PATH + + # Set up secrets + - name: Set up nextflow secrets + if: env.SENTIEON_LICENSE_BASE64 != null + run: | + nextflow secrets set SENTIEON_LICENSE_BASE64 ${{ secrets.SENTIEON_LICENSE_BASE64 }} + nextflow secrets set SENTIEON_AUTH_MECH_BASE64 ${{ secrets.SENTIEON_AUTH_MECH_BASE64 }} + SENTIEON_ENCRYPTION_KEY=$(echo -n "${{ secrets.ENCRYPTION_KEY_BASE64 }}" | base64 -d) + SENTIEON_LICENSE_MESSAGE=$(echo -n "${{ secrets.LICENSE_MESSAGE_BASE64 }}" | base64 -d) + SENTIEON_AUTH_DATA=$(python3 bin/license_message.py encrypt --key "$SENTIEON_ENCRYPTION_KEY" --message "$SENTIEON_LICENSE_MESSAGE") + SENTIEON_AUTH_DATA_BASE64=$(echo -n "$SENTIEON_AUTH_DATA" | base64 -w 0) + nextflow secrets set SENTIEON_AUTH_DATA_BASE64 $SENTIEON_AUTH_DATA_BASE64 + + # Test the module + - name: Run nf-test + run: | + nf-test test \ + --profile=${{ matrix.profile }} \ + --tag ${{ matrix.tags }} \ + --tap=test.tap \ + --verbose + confirm-pass: runs-on: ubuntu-latest needs: - - test + - pytest + - nftest if: always() steps: - name: All tests ok diff --git a/.gitignore b/.gitignore index e90c12ab54..f3bc0d64f3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,12 @@ *.code-workspace +*.pyc .nextflow* -work/ -data/ -results/ .DS_Store -testing/ +.nf-test* +.nf-test/ +data/ testing* -*.pyc +testing/ test-datasets/ +results/ +work/ diff --git a/.nf-core.yml b/.nf-core.yml index ee325600a3..36189e1fcb 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -5,6 +5,7 @@ lint: - .github/workflows/awsfulltest.yml - conf/modules.config files_unchanged: + - .gitignore - assets/nf-core-sarek_logo_light.png - docs/images/nf-core-sarek_logo_dark.png - docs/images/nf-core-sarek_logo_light.png diff --git a/CHANGELOG.md b/CHANGELOG.md index bda739b522..023536d87e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,55 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [3.4.0](https://github.com/nf-core/sarek/releases/tag/3.4.0) - Pårtetjåkko + +Pårtetjåkko is a mountain in the south of the park. + +### Added + +- [#1113](https://github.com/nf-core/sarek/pull/1113) - Adding CNVkit genemetrics module +- [#1193](https://github.com/nf-core/sarek/pull/1193) - Adding support for Sentieon's DnaScope for germline variant-calling including joint-germline +- [#1252](https://github.com/nf-core/sarek/pull/1252) - Added NGSCheckMate tool for checking that samples come from the same individual +- [#1271](https://github.com/nf-core/sarek/pull/1271) - Back to dev +- [#1290](https://github.com/nf-core/sarek/pull/1290) - Add nf-test for whole pipeline. + +### Changed + +- [#1278](https://github.com/nf-core/sarek/pull/1278) - Hide sentieon parameters similar to other variant callers +- [#1280](https://github.com/nf-core/sarek/pull/1280) - Replacing link to `SentieonDNAscopeModel1.1.model` in Sentieon's S3 with link to same file in igenomes' S3 +- [#1303](https://github.com/nf-core/sarek/pull/1303) - Ressurect vep_version params and changed its scope to pipeline to enable usage for vep loftee plugin +- [#1304](https://github.com/nf-core/sarek/pull/1304) - Update modules +- [#1311](https://github.com/nf-core/sarek/pull/1311) - Update local modules with an `environment.yml` file +- [#1317](https://github.com/nf-core/sarek/pull/1317) - Add new tools to subway map + +### Fixed + +- [#1277](https://github.com/nf-core/sarek/pull/1277) - Fix null value issue for Mutect2 joint calling +- [#1287](https://github.com/nf-core/sarek/pull/1287) - Adding label `process_single` to local modules +- [#1298](https://github.com/nf-core/sarek/pull/1298) - Fix annotation cache usage +- [#1301](https://github.com/nf-core/sarek/pull/1301) - Fix nf-prov usage +- [#1315](https://github.com/nf-core/sarek/pull/1315) - Avoid clash of configs of `FILTERVARIANTTRANCHES` in the Sentieon-Haplotyper and GATK-Haplotypecaller subworkflows +- [#1318](https://github.com/nf-core/sarek/pull/1218) - Fix writing of params.json on S3 + +### Removed + +- [#1298](https://github.com/nf-core/sarek/pull/1298) - Remove `--use_annotation_cache_keys` params + +### Dependencies + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| fastqc | 0.11.9 | 0.12.1 | +| multiqc | 1.15 | 1.17 | + +### Modules / Subworkflows + +| script | Old name | New name | +| ----------------------------- | ----------------------------- | ----------------------------- | +| `gatk4spark/applybqsr` | `GATK4_APPLYBQSRSPARK` | `GATK4SPARK_APPLYBQSR` | +| `gatk4spark/baserecalibrator` | `GATK4_BASERECALIBRATORSPARK` | `GATK4SPARK_BASERECALIBRATOR` | +| `gatk4spark/markduplicates` | `GATK4_MARKDUPLICATESSPARK` | `GATK4SPARK_MARKDUPLICATES` | + ## [3.3.2](https://github.com/nf-core/sarek/releases/tag/3.3.2) - Ráhpajávvre Ráhpajávvre is the Lule Sámi spelling of Rapaselet. @@ -38,6 +87,7 @@ A lake near the Rapaselet delta. ### Added - [#1231](https://github.com/nf-core/sarek/pull/1231) - Back to dev +- [#1244](https://github.com/nf-core/sarek/pull/1244) - Add bcf annotate module ### Changed @@ -72,6 +122,7 @@ Rapaselet is a delta formed by the Rapaätno river between the Bielloriehppe mas - [#1173](https://github.com/nf-core/sarek/pull/1173) - CI tests for VQSR track with stub runs - [#1122](https://github.com/nf-core/sarek/pull/1122), [#1196](https://github.com/nf-core/sarek/pull/1196) - Add `annotation cache` functionality - [#1184](https://github.com/nf-core/sarek/pull/1184) - Stub-based CI-test of Sentieon joint-germline variant-calling with VQSR +- [#1288](https://github.com/nf-core/sarek/pull/1288) - Add nf-test continuous integration. ### Changed diff --git a/README.md b/README.md index 8c1ef5d130..c128e04955 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ Depending on the options and samples provided, the pipeline can currently perfor - `Sentieon Haplotyper` - `Strelka2` - `TIDDIT` -- Variant filtering and annotation (`SnpEff`, `Ensembl VEP`) +- Variant filtering and annotation (`SnpEff`, `Ensembl VEP`, `BCFtools annotate`) - Summarise and represent QC (`MultiQC`)

@@ -131,6 +131,7 @@ We thank the following people for their extensive assistance in the development - [Francesco Lescai](https://github.com/lescai) - [Gavin Mackenzie](https://github.com/GCJMackenzie) - [Gisela Gabernet](https://github.com/ggabernet) +- [Grant Neilson](https://github.com/grantn5) - [gulfshores](https://github.com/gulfshores) - [Harshil Patel](https://github.com/drpatelh) - [James A. Fellows Yates](https://github.com/jfy133) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 862e4dbafd..e175753b62 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -3,9 +3,9 @@ custom_logo_url: https://github.com/nf-core/sarek/ custom_logo_title: "nf-core/sarek" report_comment: > - This report has been generated by the nf-core/sarek + This report has been generated by the nf-core/sarek analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-sarek-methods-description": order: -1000 diff --git a/conf/base.config b/conf/base.config index db1175874f..a132f3cb6b 100644 --- a/conf/base.config +++ b/conf/base.config @@ -70,11 +70,15 @@ process { cpus = { check_max( 24 * task.attempt, 'cpus' ) } memory = { check_max( 30.GB * task.attempt, 'memory' ) } } - withName: 'GATK4_MARKDUPLICATES|GATK4_MARKDUPLICATESSPARK' { + withName:'CNVKIT_BATCH' { + label = "process_high" + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + } + withName: 'GATK4_MARKDUPLICATES|GATK4SPARK_MARKDUPLICATES' { cpus = { check_max( 6 * task.attempt, 'cpus' ) } memory = { check_max( 30.GB * task.attempt, 'memory' ) } } - withName:'GATK4_APPLYBQSR|GATK4_APPLYBQSR_SPARK|GATK4_BASERECALIBRATOR|GATK4_BASERECALIBRATOR_SPARK|GATK4_GATHERBQSRREPORTS'{ + withName:'GATK4_APPLYBQSR|GATK4SPARK_APPLYBQSR|GATK4_BASERECALIBRATOR|GATK4SPARK_BASERECALIBRATOR|GATK4_GATHERBQSRREPORTS'{ cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 4.GB * task.attempt, 'memory' ) } } diff --git a/conf/igenomes.config b/conf/igenomes.config index 0e68dfee19..b020ddee5f 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -35,6 +35,7 @@ params { known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz.tbi" known_indels_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_phase1.indels.b37.vcf.gz --resource:mills,known=false,training=true,truth=true,prior=10.0 Mills_and_1000G_gold_standard.indels.b37.vcf.gz' mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/Control-FREEC/out100m2_hg19.gem" + ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/NGSCheckMate/SNP_GRCh37_hg19_wChr.bed" snpeff_db = 87 snpeff_genome = 'GRCh37' vep_cache_version = 110 @@ -68,6 +69,7 @@ params { known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" known_indels_vqsr = '--resource:gatk,known=false,training=true,truth=true,prior=10.0 Homo_sapiens_assembly38.known_indels.vcf.gz --resource:mills,known=false,training=true,truth=true,prior=10.0 Mills_and_1000G_gold_standard.indels.hg38.vcf.gz' mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Control-FREEC/out100m2_hg38.gem" + ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/NGSCheckMate/SNP_GRCh38_hg38_wChr.bed" pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz" pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi" snpeff_db = 105 @@ -79,6 +81,7 @@ params { 'Ensembl.GRCh37' { bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/NGSCheckMate/SNP_GRCh37_hg19_woChr.bed" readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" snpeff_db = 87 snpeff_genome = 'GRCh37' @@ -89,6 +92,7 @@ params { 'NCBI.GRCh38' { bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + ngscheckmate_bed ="${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/NGSCheckMate/SNP_GRCh38_hg38_wChr.bed" snpeff_db = 105 snpeff_genome = 'GRCh38' vep_cache_version = 110 diff --git a/conf/modules/annotate.config b/conf/modules/annotate.config index bf92ecf7b8..87ca891277 100644 --- a/conf/modules/annotate.config +++ b/conf/modules/annotate.config @@ -37,7 +37,7 @@ process { ext.args = { [ (params.vep_dbnsfp && params.dbnsfp && !params.dbnsfp_consequence) ? "--plugin dbNSFP,${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', (params.vep_dbnsfp && params.dbnsfp && params.dbnsfp_consequence) ? "--plugin dbNSFP,'consequence=${params.dbnsfp_consequence}',${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', - (params.vep_loftee) ? "--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-${params.vep_version}/share/ensembl-vep-${params.vep_version}-0" : '', + (params.vep_loftee) ? "--plugin LoF,loftee_path://usr/local/share/ensembl-vep-${params.vep_version}" : '', (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? "--plugin SpliceAI,snv=${params.spliceai_snv.split("/")[-1]},indel=${params.spliceai_indel.split("/")[-1]}" : '', (params.vep_spliceregion) ? '--plugin SpliceRegion' : '', (params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf', @@ -60,6 +60,19 @@ process { } } + // BCFTOOLS ANNOTATE + if (params.tools && params.tools.split(',').contains('bcfann')) { + withName: 'NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:VCF_ANNOTATE_BCFTOOLS:BCFTOOLS_ANNOTATE' { + ext.args = '--output-type z' + ext.prefix = { input.baseName - ".vcf" + "_BCF.ann" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz}" + ] + } + } + // SNPEFF THEN VEP if (params.tools && params.tools.split(',').contains('merge')) { withName: "NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:VCF_ANNOTATE_MERGE:ENSEMBLVEP_VEP" { @@ -69,7 +82,7 @@ process { } // ALL ANNOTATION TOOLS - if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) { + if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge') || params.tools.split(',').contains('bcfann'))) { withName: "NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:.*:(TABIX_BGZIPTABIX|TABIX_TABIX)" { ext.prefix = { input.name - ".vcf" } publishDir = [ diff --git a/conf/modules/cnvkit.config b/conf/modules/cnvkit.config index f77c6ef446..bf5ff6c6c2 100644 --- a/conf/modules/cnvkit.config +++ b/conf/modules/cnvkit.config @@ -47,4 +47,14 @@ process { pattern: "*{bed,cnn,cnr,cns,pdf,png}" ] } + // CNVKIT + withName: 'CNVKIT_GENEMETRICS' { + ext.prefix = { "${cnr.baseName}.genemetrics" } + ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/cnvkit/${meta.id}/" }, + pattern: "*{tsv}" + ] + } } diff --git a/conf/modules/haplotypecaller.config b/conf/modules/haplotypecaller.config index be159c4152..7f7dfa1027 100644 --- a/conf/modules/haplotypecaller.config +++ b/conf/modules/haplotypecaller.config @@ -43,7 +43,7 @@ process { ] } - withName: 'FILTERVARIANTTRANCHES' { + withName: '.*:VCF_VARIANT_FILTERING_GATK:FILTERVARIANTTRANCHES' { ext.prefix = {"${meta.id}.haplotypecaller"} ext.args = { "--info-key CNN_1D" } publishDir = [ diff --git a/conf/modules/markduplicates.config b/conf/modules/markduplicates.config index 702b9b721b..ef106423ef 100644 --- a/conf/modules/markduplicates.config +++ b/conf/modules/markduplicates.config @@ -90,7 +90,7 @@ process { ] } - withName: 'GATK4_MARKDUPLICATES_SPARK' { + withName: 'GATK4SPARK_MARKDUPLICATES' { ext.args = '--remove-sequencing-duplicates false -VS LENIENT' ext.prefix = { "${meta.id}.md.cram" } publishDir = [ diff --git a/conf/modules/ngscheckmate.config b/conf/modules/ngscheckmate.config new file mode 100644 index 0000000000..89fa045ed4 --- /dev/null +++ b/conf/modules/ngscheckmate.config @@ -0,0 +1,26 @@ +process { + withName: ".*BAM_NGSCHECKMATE:BCFTOOLS_MPILEUP" { + + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/ngscheckmate/vcfs" }, + pattern: "*{vcf.gz}" + ] + ext.prefix = { "${meta.id}.ngscheckmate" } + ext.when = { params.tools && params.tools.split(',').contains('ngscheckmate') } + ext.args2 = '--no-version --ploidy 1 -c' + ext.args3 = '--no-version' + } + + withName: ".*BAM_NGSCHECKMATE:NGSCHECKMATE_NCM" { + ext.args = '-V' + + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/ngscheckmate/" }, + pattern: "*" + ] + + } + +} diff --git a/conf/modules/prepare_genome.config b/conf/modules/prepare_genome.config index 66eb2041d2..85367196c2 100644 --- a/conf/modules/prepare_genome.config +++ b/conf/modules/prepare_genome.config @@ -76,7 +76,7 @@ process { } withName: 'TABIX_DBSNP' { - ext.when = { !params.dbsnp_tbi && params.dbsnp && ((params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('mutect2'))) } + ext.when = { !params.dbsnp_tbi && params.dbsnp && ((params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('sentieon_dnascope') || params.tools.split(',').contains('mutect2'))) } publishDir = [ enabled: (params.save_reference || params.build_only_index), mode: params.publish_dir_mode, @@ -96,7 +96,7 @@ process { } withName: 'TABIX_KNOWN_INDELS' { - ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper'))) ) } + ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('sentieon_dnascope'))) ) } publishDir = [ enabled: (params.save_reference || params.build_only_index), mode: params.publish_dir_mode, @@ -106,7 +106,7 @@ process { } withName: 'TABIX_KNOWN_SNPS' { - ext.when = { !params.known_snps_tbi && params.known_snps && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper'))) ) } + ext.when = { !params.known_snps_tbi && params.known_snps && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') )) ) } publishDir = [ enabled: (params.save_reference || params.build_only_index), mode: params.publish_dir_mode, diff --git a/conf/modules/prepare_recalibration.config b/conf/modules/prepare_recalibration.config index cb394f0a2f..915075d5b4 100644 --- a/conf/modules/prepare_recalibration.config +++ b/conf/modules/prepare_recalibration.config @@ -15,7 +15,7 @@ process { - withName: 'GATK4_BASERECALIBRATOR|GATK4_BASERECALIBRATOR_SPARK' { + withName: 'GATK4_BASERECALIBRATOR|GATK4SPARK_BASERECALIBRATOR' { ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" } publishDir = [ mode: params.publish_dir_mode, diff --git a/conf/modules/recalibrate.config b/conf/modules/recalibrate.config index 9602e33caf..c46a1bb82b 100644 --- a/conf/modules/recalibrate.config +++ b/conf/modules/recalibrate.config @@ -15,7 +15,7 @@ process { - withName: 'GATK4_APPLYBQSR|GATK4_APPLYBQSR_SPARK' { + withName: 'GATK4_APPLYBQSR|GATK4SPARK_APPLYBQSR' { ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" } publishDir = [ enabled: !params.save_output_as_bam, diff --git a/conf/modules/sentieon_dnascope.config b/conf/modules/sentieon_dnascope.config new file mode 100644 index 0000000000..0419107ca6 --- /dev/null +++ b/conf/modules/sentieon_dnascope.config @@ -0,0 +1,56 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// SENTIEON DNASCOPE + +process { + + withName: 'SENTIEON_DNASCOPE' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.dnascope" : "${meta.id}.dnascope.${intervals.simpleName}" } + ext.when = { params.tools && params.tools.split(',').contains('sentieon_dnascope') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/"}, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "sentieon_dnascope/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_SENTIEON_DNASCOPE_VCFS' { + ext.prefix = { params.joint_germline ? "${meta.id}.dnascope.g" : "${meta.id}.dnascope.unfiltered" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sentieon_dnascope/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'MERGE_SENTIEON_DNASCOPE_GVCFS' { + ext.prefix = { "${meta.id}.dnascope.g" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sentieon_dnascope/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SENTIEON_DNAMODELAPPLY' { + ext.prefix = {"${meta.id}.dnascope.filtered"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sentieon_dnascope/${meta.id}/"}, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + +} diff --git a/conf/modules/sentieon_dnascope_joint_germline.config b/conf/modules/sentieon_dnascope_joint_germline.config new file mode 100644 index 0000000000..72dd6c3144 --- /dev/null +++ b/conf/modules/sentieon_dnascope_joint_germline.config @@ -0,0 +1,45 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// SENTIEON DNASCOPE JOINT_GERMLINE + +process { + + // TO-DO: duplicate!! + withName: 'SENTIEON_GVCFTYPER' { + ext.args = { "--allow-old-rms-mapping-quality-annotation-data" } + ext.prefix = { meta.intervals_name } + publishDir = [ + enabled: false + ] + } + + if (params.tools && params.tools.contains('sentieon_dnascope') && params.joint_germline) { + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:BCFTOOLS_SORT' { + ext.prefix = { vcf.baseName - ".vcf" + ".sort" } + publishDir = [ + enabled: false + ] + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_JOINT_CALLING_GERMLINE_SENTIEON:MERGE_GENOTYPEGVCFS' { + ext.prefix = "joint_germline" + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/sentieon_dnascope/joint_variant_calling/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + } +} diff --git a/conf/modules/sentieon_haplotyper.config b/conf/modules/sentieon_haplotyper.config index bbcdba8c22..62734c2b07 100644 --- a/conf/modules/sentieon_haplotyper.config +++ b/conf/modules/sentieon_haplotyper.config @@ -45,7 +45,7 @@ process { } if (params.tools && params.tools.contains('sentieon_haplotyper')) { - withName: '.*FILTERVARIANTTRANCHES' { + withName: '.*:SENTIEON_HAPLOTYPER_VCF_VARIANT_FILTERING_GATK:FILTERVARIANTTRANCHES' { ext.prefix = {"${meta.id}.haplotyper"} ext.args = { "--info-key CNN_1D" } publishDir = [ diff --git a/conf/modules/sentieon_joint_germline.config b/conf/modules/sentieon_haplotyper_joint_germline.config similarity index 100% rename from conf/modules/sentieon_joint_germline.config rename to conf/modules/sentieon_haplotyper_joint_germline.config diff --git a/conf/test.config b/conf/test.config index 2403fc63e9..4b9c3c73af 100644 --- a/conf/test.config +++ b/conf/test.config @@ -36,6 +36,10 @@ params { vep_cache_version = 110 vep_genome = 'WBcel235' vep_species = 'caenorhabditis_elegans' + bcftools_annotations = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/vcf/test2.vcf.gz" + bcftools_annotations_index = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi" + bcftools_header_lines = "${projectDir}/tests/config/bcfann_test_header.txt" + ngscheckmate_bed = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/germlineresources/SNP_GRCh38_hg38_wChr.bed" // default params split_fastq = 0 // no FASTQ splitting diff --git a/conf/test/cache.config b/conf/test/cache.config index 9f51f72354..95e6fb40f7 100644 --- a/conf/test/cache.config +++ b/conf/test/cache.config @@ -48,6 +48,7 @@ params { vep_cache_version = 110 vep_genome = 'WBcel235' vep_species = 'caenorhabditis_elegans' + ngscheckmate_bed = params.test_data['homo_sapiens']['genome']['ngscheckmate_bed'] // default params split_fastq = 0 // no FASTQ splitting diff --git a/conf/test_full.config b/conf/test_full.config index 664ce37695..8acaae89d8 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -18,9 +18,8 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/HCC1395_WXS_somatic_full_test.csv' // Other params - tools = 'strelka,mutect2,freebayes,ascat,manta,cnvkit,tiddit,controlfreec,vep' - split_fastq = 20000000 - intervals = 's3://ngi-igenomes/test-data/sarek/S07604624_Padded_Agilent_SureSelectXT_allexons_V6_UTR.bed' - wes = true - use_annotation_cache_keys = true + tools = 'strelka,mutect2,freebayes,ascat,manta,cnvkit,tiddit,controlfreec,vep,ngscheckmate' + split_fastq = 20000000 + intervals = 's3://ngi-igenomes/test-data/sarek/S07604624_Padded_Agilent_SureSelectXT_allexons_V6_UTR.bed' + wes = true } diff --git a/conf/test_full_germline.config b/conf/test_full_germline.config index c477f7d6f6..e03541be84 100644 --- a/conf/test_full_germline.config +++ b/conf/test_full_germline.config @@ -19,6 +19,5 @@ params { // Other params tools = 'strelka,freebayes,haplotypecaller,deepvariant,manta,tiddit,cnvkit,vep' - split_fastq = 50000000 - use_annotation_cache_keys = true + split_fastq = 50000000 } diff --git a/docs/images/sarek_subway.png b/docs/images/sarek_subway.png index e2a689b1ca..ff73d272d9 100644 Binary files a/docs/images/sarek_subway.png and b/docs/images/sarek_subway.png differ diff --git a/docs/images/sarek_subway.svg b/docs/images/sarek_subway.svg index ad8352b1c7..d6d1653c12 100644 --- a/docs/images/sarek_subway.svg +++ b/docs/images/sarek_subway.svg @@ -2,13 +2,13 @@ mappingsnpeffbcftools, vcftoolsngscheckmateconcatenate(germline)multiqcvariant callingannotationbcftools annotatedeepvariantfreebayeshaplotypecallermantastrelka2tiddittidditmutect2ascatmsisensorprocontrolfreeccnvkitfreebayesmantastrelka2Example analysis pathwaysmpileupmpileupSentieon haplotyperSentieon haplotyperSentieon dnascopeSNPs & IndelsSV & CNVMSI diff --git a/docs/images/sarek_workflow.png b/docs/images/sarek_workflow.png index 048c404447..7fb4cd52c2 100644 Binary files a/docs/images/sarek_workflow.png and b/docs/images/sarek_workflow.png differ diff --git a/docs/images/sarek_workflow.svg b/docs/images/sarek_workflow.svg index e013313517..5f4cbd2ddd 100644 --- a/docs/images/sarek_workflow.svg +++ b/docs/images/sarek_workflow.svg @@ -4,15 +4,15 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - Preprocessing - - - - - - - -Preprocessing snpeff, vep - - -bcftools annotate, snpeff, vep Annotation - - - - - - - -Annotation Reports - - - - - - - - - - -Reports - - - Variant Calling - - -Variant Calling • msisensorpro - - -• msisensorpro Germline -Germline deepvariant, freebayes - deepvariant, freebayes GATK haplotypecaller, - GATK haplotypecaller, mpileup, strelka2, - mpileup, strelka2, Sentieon haplotyper - Sentieon haplotyper • manta, tiddit -• manta, tiddit • cnvkit• cnvkit - - - - - - - - diff --git a/docs/output.md b/docs/output.md index b250ed2817..769ee71fbd 100644 --- a/docs/output.md +++ b/docs/output.md @@ -37,8 +37,10 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [GATK Germline Single Sample Variant Calling](#gatk-germline-single-sample-variant-calling) - [GATK Joint Germline Variant Calling](#gatk-joint-germline-variant-calling) - [GATK Mutect2](#gatk-mutect2) + - [Sentieon DNAscope](#sentieon-dnascope) + - [Sentieon DNAscope joint germline variant calling](#sentieon-dnascope-joint-germline-variant-calling) - [Sentieon Haplotyper](#sentieon-haplotyper) - - [Sentieon Joint Germline Variant Calling](#sentieon-joint-germline-variant-calling) + - [Sentieon Haplotyper joint germline variant calling](#sentieon-haplotyper-joint-germline-variant-calling) - [Strelka2](#strelka2) - [Structural Variants](#structural-variants) - [Manta](#manta) @@ -53,11 +55,13 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Variant annotation](#variant-annotation) - [snpEff](#snpeff) - [VEP](#vep) + - [BCFtools annotate](#bcftools-annotate) - [Quality control and reporting](#quality-control-and-reporting) - [Quality control](#quality-control) - [FastQC](#fastqc) - [FastP](#fastp) - [Mosdepth](#mosdepth) + - [NGSCheckMate](#ngscheckmate) - [GATK MarkDuplicates reports](#gatk-markduplicates-reports) - [Sentieon Dedup reports](#sentieon-dedup-reports) - [samtools stats](#samtools-stats) @@ -305,7 +309,7 @@ See the [`input`](usage#input-sample-sheet-configurations) section in the usage ## Variant Calling The results regarding variant calling are collected in `{outdir}/variantcalling/`. -If some results from a variant caller do not appear here, please check out the `--tools` section in the parameter [documentation](https://nf-co.re/sarek/3.0.1/parameters). +If some results from a variant caller do not appear here, please check out the `--tools` section in the parameter [documentation](https://nf-co.re/sarek/latest/parameters). (Recalibrated) CRAM files can used as an input to start the variant calling. @@ -442,6 +446,53 @@ Files created: +#### Sentieon DNAscope + +[Sentieon DNAscope](https://support.sentieon.com/appnotes/dnascope_ml/#dnascope-germline-variant-calling-with-a-machine-learning-model) is a variant-caller which aims at outperforming GATK's Haplotypecaller in terms of both speed and accuracy. DNAscope allows you to use a machine learning model to perform variant calling with higher accuracy by improving the candidate detection and filtering. + +

+Unfiltered VCF-files for normal samples + +**Output directory: `{outdir}/variantcalling/sentieon_dnascope//`** + +- `.dnascope.unfiltered.vcf.gz` and `.dnascope.unfiltered.vcf.gz.tbi` + - VCF with tabix index + +
+ +The output from Sentieon's DNAscope can be controlled through the option `--sentieon_dnascope_emit_mode` for Sarek, see [Basic usage of Sentieon functions](#basic-usage-of-sentieon-functions). + +Unless `dnascope_filter` is listed under `--skip_tools` in the nextflow command, Sentieon's [DNAModelApply](https://support.sentieon.com/manual/usages/general/#dnamodelapply-algorithm) is applied to the unfiltered VCF-files in order to obtain filtered VCF-files. + +
+Filtered VCF-files for normal samples + +**Output directory: `{outdir}/variantcalling/sentieon_dnascope//`** + +- `.dnascope.filtered.vcf.gz` and `.dnascope.filtered.vcf.gz.tbi` + - VCF with tabix index + +
+ +##### Sentieon DNAscope joint germline variant calling + +In Sentieon's package DNAscope, joint germline variant calling is done by first running Sentieon's Dnacope in emit-mode `gvcf` for each sample and then running Sentieon's [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) on the set of gVCF-files. See [Basic usage of Sentieon functions](#basic-usage-of-sentieon-functions) for information on how joint germline variant calling can be done in Sarek using Sentieon's DNAscope. + +
+Output files from joint germline variant calling + +**Output directory: `{outdir}/variantcalling/sentieon_dnascope//`** + +- `.dnascope.g.vcf.gz` and `.dnascope.g.vcf.gz.tbi` + - VCF with tabix index + +**Output directory: `{outdir}/variantcalling/sentieon_dnascope/joint_variant_calling/`** + +- `joint_germline.vcf.gz` and `joint_germline.vcf.gz.tbi` + - VCF with tabix index + +
+ #### Sentieon Haplotyper [Sentieon Haplotyper](https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm) is Sention's speedup version of GATK's Haplotypecaller (see above). @@ -456,7 +507,7 @@ Files created: -The output from Sentieon's Haplotyper can be controlled through the option `--sentieon_haplotyper_emit_mode` for Sarek, see [Basic usage of Sentieon functions in Sarek](#basic-usage-of-sentieon-functions-in-sarek). +The output from Sentieon's Haplotyper can be controlled through the option `--sentieon_haplotyper_emit_mode` for Sarek, see [Basic usage of Sentieon functions](#basic-usage-of-sentieon-functions). Unless `haplotyper_filter` is listed under `--skip_tools` in the nextflow command, GATK's CNNScoreVariants and FilterVariantTranches (see above) is applied to the unfiltered VCF-files in order to obtain filtered VCF-files. @@ -470,16 +521,16 @@ Unless `haplotyper_filter` is listed under `--skip_tools` in the nextflow comman -##### Sentieon Joint Germline Variant Calling +##### Sentieon Haplotyper joint germline variant calling -In Sentieon's package DNAseq, joint germline variant calling is done by first running Sentieon's Haplotyper in emit-mode `gvcf` for each sample and then running Sentieon's [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) on the set of gVCF-files. See [Basic usage of Sentieon functions in Sarek](#basic-usage-of-sentieon-functions-in-sarek) for information on how joint germline variant calling can be done in Sarek using Sentieon's DNAseq. After joint genotyping, Sentieon's version of VQSR ([VarCal](https://support.sentieon.com/manual/usages/general/#varcal-algorithm) and [ApplyVarCal](https://support.sentieon.com/manual/usages/general/#applyvarcal-algorithm)) is applied for filtering to produce the final multisample callset with the desired balance of precision and sensitivity. +In Sentieon's package DNAseq, joint germline variant calling is done by first running Sentieon's Haplotyper in emit-mode `gvcf` for each sample and then running Sentieon's [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) on the set of gVCF-files. See [Basic usage of Sentieon functions](#basic-usage-of-sentieon-functions) for information on how joint germline variant calling can be done in Sarek using Sentieon's DNAseq. After joint genotyping, Sentieon's version of VQSR ([VarCal](https://support.sentieon.com/manual/usages/general/#varcal-algorithm) and [ApplyVarCal](https://support.sentieon.com/manual/usages/general/#applyvarcal-algorithm)) is applied for filtering to produce the final multisample callset with the desired balance of precision and sensitivity.
Output files from joint germline variant calling **Output directory: `{outdir}/variantcalling/sentieon_haplotyper//`** -- `.haplotypecaller.g.vcf.gz` and `.haplotypecaller.g.vcf.gz.tbi` +- `.haplotyper.g.vcf.gz` and `.haplotyper.g.vcf.gz.tbi` - VCF with tabix index **Output directory: `{outdir}/variantcalling/sentieon_haplotyper/joint_variant_calling/`** @@ -668,8 +719,9 @@ The file `.cnvs.txt` contains all segments predicte - file containing copy number segment information - `.call.cns` - file containing copy number segment information - -
+- `.genemetrics.tsv` + - file containing per gene copy number information (if input files are annotated) +
Output files for tumor/normal samples @@ -696,6 +748,8 @@ The file `.cnvs.txt` contains all segments predicte - file containing copy number segment information - `.call.cns` - file containing copy number segment information +- `.genemetrics.tsv` + - file containing per gene copy number information (if input files are annotated)
#### Control-FREEC @@ -834,6 +888,18 @@ plus any additional filed selected via the plugins: [dbNSFP](https://sites.googl +### BCFtools annotate + +[BCFtools annotate](https://samtools.github.io/bcftools/bcftools.html#annotate) is used to add annotations to VCF files. The annotations are added to the INFO column of the VCF file. The annotations are added to the VCF header and the VCF header is updated with the new annotations. For further reading and documentation see the [BCFtools annotate manual](https://samtools.github.io/bcftools/bcftools.html#annotate). + +
+Output files for all samples + +- `{sample,tumorsample_vs_normalsample}._bcf.ann.vcf.gz` and `{sample,tumorsample_vs_normalsample}._bcf.ann.vcf.gz.tbi` + - VCF with tabix index + +
+ ## Quality control and reporting ### Quality control @@ -920,6 +986,25 @@ Plots will show: - CSI index for per-base depth for targeted data, per-window (500bp) depth of WGS +#### NGSCheckMate + +[NGSCheckMate](https://github.com/parklab/NGSCheckMate) is a tool for determining whether samples come from the same genetic individual, using a set of commonly heterozygous SNPs. This enables for the detecting of sample mislabelling events. The output includes a text file indicating whether samples have matched or not according to the algorithm, as well as a dendrogram visualising these results. + +
+Output files for all samples + +**Output directory: `{outdir}/reports/ngscheckmate/`** + +- `ngscheckmate_all.txt` + - Tab delimited text file listing all the comparisons made, whether they were considered as a match, with the correlation and a normalised depth. +- `ngscheckmate_matched.txt` + - Tab delimited text file listing only the comparison that were considered to match, with the correlation and a normalised depth. +- `ngscheckmate_output_corr_matrix.txt` + - Tab delimited text file containing a matrix of all correlations for all comparisons made. +- `vcfs/.vcf.gz` + - Set of vcf files for each sample. Contains calls for the set of SNP positions used to calculate sample relatedness. +
+ #### GATK MarkDuplicates reports More information in the [GATK MarkDuplicates section](#gatk-markduplicates) @@ -1077,10 +1162,9 @@ Results generated by MultiQC collect pipeline QC from supported tools e.g. FastQ Output files - `pipeline_info/` - - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by Nextflow: `execution_report_.html`, `execution_timeline_.html`, `execution_trace_.txt`, `pipeline_dag_.dot`/`pipeline_dag_.svg` and `manifest_.bco.json`. - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. - - Parameters used by the pipeline run: `params.json`. + - Parameters used by the pipeline run: `params_.json`. diff --git a/docs/usage.md b/docs/usage.md index 0895fac23c..451c69ff9d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -18,10 +18,16 @@ Sarek is designed to handle single samples, such as single-normal or single-tumo The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/sarek --input ./samplesheet.csv --outdir ./results --genome GATK.GRCh38 --tools -profile docker +nextflow run nf-core/sarek -r -profile --input ./samplesheet.csv --outdir ./results --genome GATK.GRCh38 --tools ``` -This will launch the pipeline and perform variant calling with the tools specified in `--tools`, see the [parameter section](https://nf-co.re/sarek/3.2.3/parameters#tools) for details on variant calling tools. +`-r ` is optional but strongly recommended for reproducibility and should match the latest version. + +`-profile ` is mandatory and should reflect either your own institutional profile or any pipeline profile specified in the [profile section](##-profile). + +This documentation imply that any `nextflow run nf-core/sarek` command is run with the appropriate `-r` and `-profile` commands. + +This will launch the pipeline and perform variant calling with the tools specified in `--tools`, see the [parameter section](https://nf-co.re/sarek/latest/parameters#tools) for details on variant calling tools. In the above example the pipeline runs with the `docker` configuration profile. See below for more information about profiles. Note that the pipeline will create the following files in your working directory: @@ -44,7 +50,7 @@ Do not use `-c ` to specify parameters as this will result in errors. Cust The above pipeline run specified with a params file in yaml format: ```bash -nextflow run nf-core/sarek -profile docker -params-file params.yaml +nextflow run nf-core/sarek -params-file params.yaml ``` with `params.yaml` containing: @@ -312,7 +318,7 @@ test,sample4_vs_sample3,manta,sample4_vs_sample3.somatic_sv.vcf.gz ## Updating the pipeline -When you launch a pipeline from the command-line with `nextflow run nf-core/sarek -profile docker -params-file params.yaml`, Nextflow will automatically pull the pipeline code from GitHub and store it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: +When you launch a pipeline from the command-line with `nextflow run nf-core/sarek -params-file params.yaml`, Nextflow will automatically pull the pipeline code from GitHub and store it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: ```bash nextflow pull nf-core/sarek @@ -322,8 +328,8 @@ nextflow pull nf-core/sarek It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/sarek releases page](https://github.com/nf-core/sarek/releases) and find the latest version number - numeric only (eg. `3.1.1`). -Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 3.1.1`. Of course, you can switch to another version by changing the number after the `-r` flag. +First, go to the [nf-core/sarek releases page](https://github.com/nf-core/sarek/releases) and find the latest version number - numeric only (eg. `3.3.2`). +Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 3.3.2`. Of course, you can switch to another version by changing the number after the `-r` flag. This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. @@ -448,7 +454,7 @@ When using default parameters only, sarek runs preprocessing and `Strelka2`. This is reflected in the default test profile: ```bash -nextflow run nf-core/sarek -r 3.2.1 -profile test, --outdir results +nextflow run nf-core/sarek -profile test, --outdir results ``` Expected run output: @@ -525,7 +531,7 @@ For more extensive testing purpose, we have the `test_cache` profile that contai Annotation is generally tested separately from the remaining workflow, since we use references for `C.elegans`, while the remaining tests are run on downsampled human data. ```bash -nextflow run nf-core/sarek -r 3.2.1 -profile test_cache, --outdir results --tools snpeff --step annotation +nextflow run nf-core/sarek -profile test_cache, --outdir results --tools snpeff --step annotation ``` If you are interested in any of the other tests that are run on every code change or would like to run them yourself, you can take a look at `tests/.yml`. @@ -962,7 +968,7 @@ aws s3 --no-sign-request ls s3://annotation-cache/vep_cache/ Since both Snpeff and VEP are internally figuring the path towards the specific cache version / species, `annotation-cache` is using an extra set of keys to specify the species and genome build. -So if you are using this resource, please either set `--use_annotation_cache_keys` to use the AWS annotation cache, or point towards your own cache folder structure matching the expected structure. +Which is handled internally by Sarek. Please refer to the [annotation-cache documentation](https://annotation-cache.github.io) for more details. @@ -988,13 +994,13 @@ Else, it will be downloaded in `cache/` in the specified `--outdir` location. This command could be used to download the cache for both tools in the specified `--outdir_cache` location: ```bash -nextflow run nf-core/sarek -r 3.3.0 --outdir results --outdir_cache /path_to/my-own-cache --tools vep,snpeff --download_cache --build_only_index --input false +nextflow run nf-core/sarek --outdir results --outdir_cache /path_to/my-own-cache --tools vep,snpeff --download_cache --build_only_index --input false ``` This command could be used to point to the recently downloaded cache and run SnpEff and VEP: ```bash -nextflow run nf-core/sarek -r 3.3.0 --outdir results --vep_cache /path_to/my-own-cache/vep_cache --snpeff_cache /path_to/my-own-cache/snpeff_cache --tools vep,snpeff --input samplesheet_vcf.csv +nextflow run nf-core/sarek --outdir results --vep_cache /path_to/my-own-cache/vep_cache --snpeff_cache /path_to/my-own-cache/snpeff_cache --tools vep,snpeff --input samplesheet_vcf.csv ``` ### Create containers with pre-downloaded cache @@ -1048,6 +1054,14 @@ Enable with `--vep_spliceregion`. For more details, see [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html#spliceregion) and [here](https://www.ensembl.info/2018/10/26/cool-stuff-the-vep-can-do-splice-site-variant-annotation/)." +### BCFTOOLS Annotate + +It is possible to annotate a VCF file with a custom annotation file using [BCFTOOLS Annotate](https://samtools.github.io/bcftools/bcftools.html#annotate). This can be done by setting adding bcfann to the tools list and setting the following parameters: + +- annotations: path to vcf annotation file +- annotations_index: path to vcf annotation index file +- header_lines: path to header lines file + ## MultiQC related issues ### Plots for SnpEff are missing @@ -1083,17 +1097,34 @@ nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat ### Available Sentieon functions -Sarek contains the following Sentieon functions [bwa mem](https://support.sentieon.com/manual/usages/general/#bwa-mem-syntax), [LocusCollector](https://support.sentieon.com/manual/usages/general/#locuscollector-algorithm) + [Dedup](https://support.sentieon.com/manual/usages/general/#dedup-algorithm), [Haplotyper](https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm), [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) and [VarCal](https://support.sentieon.com/manual/usages/general/#varcal-algorithm) + [ApplyVarCal](https://support.sentieon.com/manual/usages/general/#applyvarcal-algorithm), so the basic processing of alignment of fastq-files to VCF-files can be done using speedup Sentieon functions. +Sarek contains the following Sentieon functions from [DnaSeq](https://support.sentieon.com/manual/DNAseq_usage/dnaseq/) : [bwa mem](https://support.sentieon.com/manual/usages/general/#bwa-mem-syntax), [LocusCollector](https://support.sentieon.com/manual/usages/general/#locuscollector-algorithm) + [Dedup](https://support.sentieon.com/manual/usages/general/#dedup-algorithm), [Haplotyper](https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm), [GVCFtyper](https://support.sentieon.com/manual/usages/general/#gvcftyper-algorithm) and [VarCal](https://support.sentieon.com/manual/usages/general/#varcal-algorithm) + [ApplyVarCal](https://support.sentieon.com/manual/usages/general/#applyvarcal-algorithm), so the basic processing of alignment of fastq-files to VCF-files can be done using speedup Sentieon functions. + +Sarek also contains the Sentieon functions [DnaScope](https://support.sentieon.com/manual/usages/general/?highlight=dnamodelapply#dnascope-algorithm) and [DNAModelApply](https://support.sentieon.com/manual/usages/general/?highlight=dnamodelapply#dnamodelapply-algorithm). ### Basic usage of Sentieon functions -To use Sentieon's aligner `bwa mem`, set the aligner option `sentieon-bwamem`. (This can, for example, be done by adding `--aligner sentieon-bwamem` to the nextflow run command.) +To use Sentieon's aligner `bwa mem`, set the aligner option `sentieon-bwamem`. +(This can, for example, be done by adding `--aligner sentieon-bwamem` to the `nextflow run` command.) + +To use Sentieon's function `Dedup`, specify `sentieon_dedup` as one of the tools. +(This can, for example, be done by adding `--tools sentieon_dedup` to the `nextflow run` command.) -To use Sentieon's function `Dedup`, specify `sentieon_dedup` as one of the tools. (This can, for example, be done by adding `--tools sentieon_dedup` to the nextflow run command.) +To use Sentieon's function `DNAscope`, specify `sentieon_dnascope` as one of the tools. +This can, for example, be done by adding `--tools sentieon_dnascope` to the `nextflow run` command. +In order to skip Sentieon's variant-filter `DNAModelApply`, one may add `--skip_tools dnascope_filter` to the `nextflow run` command. +Sarek also provides the option `sentieon_dnascope_emit_mode` which can be used to set the [emit-mode](https://support.sentieon.com/manual/usages/general/#dnascope-algorithm) of Sentieon's dnascope. +Sentieon's dnascope can output both a vcf-file and a gvcf-file in the same run; this is achieved by setting `sentieon_dnascope_emit_mode` to `,gvcf`, where `` is `variant`, `confident` or `all`. -To use Sentieon's function `Haplotyper`, specify `sentieon_haplotyper` as one of the tools. This can, for example, be done by adding `--tools sentieon_haplotyper` to the nextflow run command. In order to skip the GATK-based variant-filter, one may add `--skip_tools haplotyper_filter` to the nextflow run command. Sarek also provides the option `sentieon_haplotyper_emit_mode` which can be used to set the [emit-mode](https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm) of Sentieon's haplotyper. Sentieon's haplotyper can output both a vcf-file and a gvcf-file in the same run; this is achieved by setting `sentieon_haplotyper_emit_mode` to `,gvcf`, where `` is `variant`, `confident` or `all`. +Sentieon's function `Haplotyper` is used in much the same way as `DNAscope`. +To use Sentieon's function `Haplotyper`, specify `sentieon_haplotyper` as one of the tools. +This can, for example, be done by adding `--tools sentieon_haplotyper` to the `nextflow run` command. +In order to skip the GATK-based variant-filter, one may add `--skip_tools haplotyper_filter` to the `nextflow run` command. +Sarek also provides the option `sentieon_haplotyper_emit_mode` which can be used to set the [emit-mode](https://support.sentieon.com/manual/usages/general/#haplotyper-algorithm) of Sentieon's haplotyper. +Sentieon's haplotyper can output both a vcf-file and a gvcf-file in the same run; this is achieved by setting `sentieon_haplotyper_emit_mode` to `,gvcf`, where `` is `variant`, `confident` or `all`. -To use Sentieon's function `GVCFtyper` along with Sention's version of VQSR (`VarCal` and `ApplyVarCal`) for joint-germline genotyping, specify `sentieon_haplotyper` as one of the tools, set the option `sentieon_haplotyper_emit_mode` to `gvcf`, and add the option `joint_germline`. This can, for example, be done by adding `--tools sentieon_haplotyper --joint_germline --sentieon_haplotyper_emit_mode gvcf` to the nextflow run command. +To use Sentieon's function `GVCFtyper` along with Sention's version of VQSR (`VarCal` and `ApplyVarCal`) for joint-germline genotyping, specify `sentieon_haplotyper` as one of the tools, set the option `sentieon_haplotyper_emit_mode` to `gvcf`, and add the option `joint_germline`. +This can, for example, be done by adding `--tools sentieon_haplotyper --joint_germline --sentieon_haplotyper_emit_mode gvcf` to the `nextflow run` command. +If `sentieon_dnascope` is chosen instead of `sentieon_haplotyper`, then Sention's version of VQSR is skipped, as recommended by Sentieon. ### Joint germline variant calling diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 2fdc3c29eb..755ee64d44 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -4,6 +4,7 @@ import org.yaml.snakeyaml.Yaml import groovy.json.JsonOutput +import nextflow.extension.FilesEx class NfcoreTemplate { @@ -141,12 +142,14 @@ class NfcoreTemplate { try { if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } [ 'sendmail', '-t' ].execute() << sendmail_html log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" } catch (all) { // Catch failures and try with plaintext def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { mail_cmd += [ '-A', mqc_report ] } mail_cmd.execute() << email_html @@ -155,14 +158,16 @@ class NfcoreTemplate { } // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") + FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() } // @@ -227,17 +232,17 @@ class NfcoreTemplate { // Dump pipeline parameters in a json file // public static void dump_parameters(workflow, params) { - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def output_pf = new File(output_d, "params_${timestamp}.json") + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") def jsonStr = JsonOutput.toJson(params) - output_pf.text = JsonOutput.prettyPrint(jsonStr) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() } + // // Print pipeline summary on completion // diff --git a/main.nf b/main.nf index 99ea7db553..bc69e55d1b 100644 --- a/main.nf +++ b/main.nf @@ -52,6 +52,7 @@ params.known_indels = WorkflowMain.getGenomeAttribute(params, 'known_in params.known_indels_tbi = WorkflowMain.getGenomeAttribute(params, 'known_indels_tbi') params.known_indels_vqsr = WorkflowMain.getGenomeAttribute(params, 'known_indels_vqsr') params.mappability = WorkflowMain.getGenomeAttribute(params, 'mappability') +params.ngscheckmate_bed = WorkflowMain.getGenomeAttribute(params, 'ngscheckmate_bed') params.pon = WorkflowMain.getGenomeAttribute(params, 'pon') params.pon_tbi = WorkflowMain.getGenomeAttribute(params, 'pon_tbi') params.snpeff_db = WorkflowMain.getGenomeAttribute(params, 'snpeff_db') diff --git a/modules.json b/modules.json index 84b30a76dc..1ba2f99ca0 100644 --- a/modules.json +++ b/modules.json @@ -7,469 +7,500 @@ "nf-core": { "ascat": { "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, + "bcftools/annotate": { + "branch": "master", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", + "installed_by": ["modules"], + "patch": "modules/nf-core/bcftools/annotate/bcftools-annotate.diff" + }, "bcftools/concat": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "bcftools/mpileup": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", + "installed_by": ["bam_ngscheckmate", "modules"] }, "bcftools/sort": { "branch": "master", - "git_sha": "d6d112a1af2ee8c97fc1932df008183341e7d8fe", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "bcftools/stats": { "branch": "master", - "git_sha": "e2693a7e2d773b92e0649b25880ee22fe82bb79d", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "bwa/index": { "branch": "master", - "git_sha": "bfed129da5134b4439b1821c917972570d44d39c", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "bwa/mem": { "branch": "master", - "git_sha": "3dc300ddcaa563c1e3503477557c0e0def6df2ce", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "bwamem2/index": { "branch": "master", - "git_sha": "bfed129da5134b4439b1821c917972570d44d39c", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "bwamem2/mem": { "branch": "master", - "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "cat/cat": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "cat/fastq": { "branch": "master", - "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "cnvkit/antitarget": { "branch": "master", - "git_sha": "3b63e1df297ef474b0070aa5fabb30d732173671", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "cnvkit/batch": { "branch": "master", - "git_sha": "3b63e1df297ef474b0070aa5fabb30d732173671", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", + "installed_by": ["modules"] + }, + "cnvkit/genemetrics": { + "branch": "master", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "cnvkit/reference": { "branch": "master", - "git_sha": "3b63e1df297ef474b0070aa5fabb30d732173671", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "controlfreec/assesssignificance": { "branch": "master", - "git_sha": "3fbcafe2543dabcc7b2be0f3b24507002b3e4b0d", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"], "patch": "modules/nf-core/controlfreec/assesssignificance/controlfreec-assesssignificance.diff" }, "controlfreec/freec": { "branch": "master", - "git_sha": "3fbcafe2543dabcc7b2be0f3b24507002b3e4b0d", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "controlfreec/freec2bed": { "branch": "master", - "git_sha": "3fbcafe2543dabcc7b2be0f3b24507002b3e4b0d", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "controlfreec/freec2circos": { "branch": "master", - "git_sha": "3fbcafe2543dabcc7b2be0f3b24507002b3e4b0d", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "controlfreec/makegraph": { "branch": "master", - "git_sha": "3fbcafe2543dabcc7b2be0f3b24507002b3e4b0d", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"], "patch": "modules/nf-core/controlfreec/makegraph/controlfreec-makegraph.diff" }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "deepvariant": { "branch": "master", - "git_sha": "ed67f2fadd6d2a155b296f728e6b1f8c92ddc1a6", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", "installed_by": ["modules"] }, "dragmap/align": { "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "dragmap/hashtable": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"], "patch": "modules/nf-core/dragmap/hashtable/dragmap-hashtable.diff" }, "ensemblvep/download": { "branch": "master", - "git_sha": "9f9e1fc31cb35876922070c0e601ae05abae5cae", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "ensemblvep/vep": { "branch": "master", - "git_sha": "9f9e1fc31cb35876922070c0e601ae05abae5cae", - "installed_by": ["modules", "vcf_annotate_ensemblvep"] + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", + "installed_by": ["vcf_annotate_ensemblvep", "modules"] }, "fastp": { "branch": "master", - "git_sha": "d497a4868ace3302016ea8ed4b395072d5e833cd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "fgbio/callmolecularconsensusreads": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "fgbio/fastqtobam": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "fgbio/groupreadsbyumi": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "freebayes": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/applybqsr": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", - "installed_by": ["modules"] - }, - "gatk4/applybqsrspark": { - "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/applyvqsr": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/baserecalibrator": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", - "installed_by": ["modules"] - }, - "gatk4/baserecalibratorspark": { - "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/calculatecontamination": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/cnnscorevariants": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", "installed_by": ["modules"] }, "gatk4/createsequencedictionary": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/estimatelibrarycomplexity": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/filtermutectcalls": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/filtervarianttranches": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/gatherbqsrreports": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/gatherpileupsummaries": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/genomicsdbimport": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/genotypegvcfs": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/getpileupsummaries": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/haplotypecaller": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/intervallisttobed": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/learnreadorientationmodel": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/markduplicates": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", - "installed_by": ["modules"] - }, - "gatk4/markduplicatesspark": { - "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/mergemutectstats": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/mergevcfs": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/mutect2": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "gatk4/variantrecalibrator": { "branch": "master", - "git_sha": "cf8f9ace77aac01caa5c7cb92af5bbda7adb77bd", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", + "installed_by": ["modules"] + }, + "gatk4spark/applybqsr": { + "branch": "master", + "git_sha": "c3aaaa6696c4baed36e13210f4ba772bda2e7c1a", + "installed_by": ["modules"] + }, + "gatk4spark/baserecalibrator": { + "branch": "master", + "git_sha": "c3aaaa6696c4baed36e13210f4ba772bda2e7c1a", + "installed_by": ["modules"] + }, + "gatk4spark/markduplicates": { + "branch": "master", + "git_sha": "c3aaaa6696c4baed36e13210f4ba772bda2e7c1a", "installed_by": ["modules"] }, "manta/germline": { "branch": "master", - "git_sha": "80dbd95c558a0ebb2123d95f50c093a7f714a0d7", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "manta/somatic": { "branch": "master", - "git_sha": "b178a8cc4b8d38b9dc2d0a1e6a9b63e6786ce263", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "manta/tumoronly": { "branch": "master", - "git_sha": "b178a8cc4b8d38b9dc2d0a1e6a9b63e6786ce263", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "mosdepth": { "branch": "master", - "git_sha": "ebb27711cd5f4de921244bfa81c676504072d31c", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "msisensorpro/msisomatic": { "branch": "master", - "git_sha": "5b5d7fba9e08624def34f40e3f4b9268f2eeabbc", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "msisensorpro/scan": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, + "ngscheckmate/ncm": { + "branch": "master", + "git_sha": "32d6725f584ebf460de39b7c1c53a29d5384d697", + "installed_by": ["bam_ngscheckmate"] + }, "samblaster": { "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "samtools/bam2fq": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "samtools/collatefastq": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "samtools/convert": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", - "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "samtools/index": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", - "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", + "git_sha": "e7ce60acc8a33fa17429e966364657a63016e870", "installed_by": ["modules"] }, "samtools/mpileup": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "samtools/stats": { "branch": "master", - "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "samtools/view": { "branch": "master", - "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "sentieon/applyvarcal": { "branch": "master", - "git_sha": "6c9c11ee96796e53a01b4719286acce6af14bc3a", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", "installed_by": ["modules"] }, "sentieon/bwamem": { "branch": "master", - "git_sha": "149b4746c6e16ef84f64db5bb245d5b9495fdc3f", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", "installed_by": ["modules"] }, "sentieon/dedup": { "branch": "master", - "git_sha": "915a0b16ba3e40ef59e7b44843b3118e17a9c906", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "installed_by": ["modules"] + }, + "sentieon/dnamodelapply": { + "branch": "master", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "installed_by": ["modules"] + }, + "sentieon/dnascope": { + "branch": "master", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", "installed_by": ["modules"] }, "sentieon/gvcftyper": { "branch": "master", - "git_sha": "6c9c11ee96796e53a01b4719286acce6af14bc3a", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", "installed_by": ["modules"] }, "sentieon/haplotyper": { "branch": "master", - "git_sha": "b9172e8c26a3db5009f7872654c44587e254f094", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", "installed_by": ["modules"] }, "sentieon/varcal": { "branch": "master", - "git_sha": "6c9c11ee96796e53a01b4719286acce6af14bc3a", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", "installed_by": ["modules"] }, "snpeff/download": { "branch": "master", - "git_sha": "4d584d5cf6ed5f7363a51cdb4b3eb25398e9e537", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "snpeff/snpeff": { "branch": "master", - "git_sha": "4d584d5cf6ed5f7363a51cdb4b3eb25398e9e537", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["vcf_annotate_snpeff", "modules"] }, "strelka/germline": { "branch": "master", - "git_sha": "80dbd95c558a0ebb2123d95f50c093a7f714a0d7", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "strelka/somatic": { "branch": "master", - "git_sha": "80dbd95c558a0ebb2123d95f50c093a7f714a0d7", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "svdb/merge": { "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "tabix/bgziptabix": { "branch": "master", - "git_sha": "591b71642820933dcb3c954c934b397bd00d8e5e", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["vcf_annotate_snpeff", "modules"] }, "tabix/tabix": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["vcf_annotate_ensemblvep", "modules"] }, "tiddit/sv": { "branch": "master", - "git_sha": "1c90a501d102b800c27697f5ef39a6e217ab1915", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "untar": { "branch": "master", - "git_sha": "d0b4fc03af52a1cc8c6fb4493b921b57352b1dd8", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "unzip": { "branch": "master", - "git_sha": "cf67a6d7d043e2bd6a3099be84c72046fc71508f", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, "vcftools": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] } } }, "subworkflows": { "nf-core": { + "bam_ngscheckmate": { + "branch": "master", + "git_sha": "32d6725f584ebf460de39b7c1c53a29d5384d697", + "installed_by": ["subworkflows"] + }, "vcf_annotate_ensemblvep": { "branch": "master", - "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", + "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", "installed_by": ["subworkflows"] }, "vcf_annotate_snpeff": { "branch": "master", - "git_sha": "dedc0e31087f3306101c38835d051bf49789445a", + "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", "installed_by": ["subworkflows"] } } diff --git a/modules/local/add_info_to_vcf/environment.yml b/modules/local/add_info_to_vcf/environment.yml new file mode 100644 index 0000000000..710fbfd120 --- /dev/null +++ b/modules/local/add_info_to_vcf/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - anaconda::gawk=5.1.0 diff --git a/modules/local/add_info_to_vcf/main.nf b/modules/local/add_info_to_vcf/main.nf index 875acdff79..a55cf14b56 100644 --- a/modules/local/add_info_to_vcf/main.nf +++ b/modules/local/add_info_to_vcf/main.nf @@ -1,7 +1,8 @@ process ADD_INFO_TO_VCF { tag "$meta.id" + label 'process_single' - conda "anaconda::gawk=5.1.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : 'biocontainers/gawk:5.1.0' }" diff --git a/modules/local/build_intervals/environment.yml b/modules/local/build_intervals/environment.yml new file mode 100644 index 0000000000..710fbfd120 --- /dev/null +++ b/modules/local/build_intervals/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - anaconda::gawk=5.1.0 diff --git a/modules/local/build_intervals/main.nf b/modules/local/build_intervals/main.nf index 29a6b5afad..9c562f5c17 100644 --- a/modules/local/build_intervals/main.nf +++ b/modules/local/build_intervals/main.nf @@ -1,7 +1,8 @@ process BUILD_INTERVALS { tag "$meta.id" + label 'process_single' - conda "anaconda::gawk=5.1.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : 'biocontainers/gawk:5.1.0' }" diff --git a/modules/local/create_intervals_bed/environment.yml b/modules/local/create_intervals_bed/environment.yml new file mode 100644 index 0000000000..710fbfd120 --- /dev/null +++ b/modules/local/create_intervals_bed/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - anaconda::gawk=5.1.0 diff --git a/modules/local/create_intervals_bed/main.nf b/modules/local/create_intervals_bed/main.nf index 4ade5d8b0c..88160ccbf4 100644 --- a/modules/local/create_intervals_bed/main.nf +++ b/modules/local/create_intervals_bed/main.nf @@ -1,7 +1,8 @@ process CREATE_INTERVALS_BED { tag "$intervals" + label 'process_single' - conda "anaconda::gawk=5.1.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : 'biocontainers/gawk:5.1.0' }" diff --git a/modules/nf-core/ascat/environment.yml b/modules/nf-core/ascat/environment.yml new file mode 100644 index 0000000000..6708ce86a8 --- /dev/null +++ b/modules/nf-core/ascat/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ascat=3.1.1 + - bioconda::cancerit-allelecount=4.3.0 diff --git a/modules/nf-core/ascat/main.nf b/modules/nf-core/ascat/main.nf index 922f8b3cac..8aeb9847b5 100644 --- a/modules/nf-core/ascat/main.nf +++ b/modules/nf-core/ascat/main.nf @@ -2,7 +2,7 @@ process ASCAT { tag "$meta.id" label 'process_medium' - conda "bioconda::ascat=3.1.1 bioconda::cancerit-allelecount=4.3.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-c278c7398beb73294d78639a864352abef2931ce:ba3e6d2157eac2d38d22e62ec87675e12adb1010-0': 'biocontainers/mulled-v2-c278c7398beb73294d78639a864352abef2931ce:ba3e6d2157eac2d38d22e62ec87675e12adb1010-0' }" diff --git a/modules/nf-core/ascat/meta.yml b/modules/nf-core/ascat/meta.yml index 104ef0fb33..34ea2e51d9 100644 --- a/modules/nf-core/ascat/meta.yml +++ b/modules/nf-core/ascat/meta.yml @@ -7,12 +7,10 @@ keywords: tools: - ascat: description: ASCAT is a method to derive copy number profiles of tumour cells, accounting for normal cell admixture and tumour aneuploidy. ASCAT infers tumour purity (the fraction of tumour cells) and ploidy (the amount of DNA per tumour cell), expressed as multiples of haploid genomes from SNP array or massively parallel sequencing data, and calculates whole-genome allele-specific copy number profiles (the number of copies of both parental alleles for all SNP loci across the genome). - documentation: https://github.com/VanLoo-lab/ascat/tree/master/man tool_dev_url: https://github.com/VanLoo-lab/ascat doi: "10.1093/bioinformatics/btaa538" licence: ["GPL v3"] - input: - args: type: map @@ -38,7 +36,6 @@ input: ] } ``` - - meta: type: map description: | @@ -46,8 +43,7 @@ input: e.g. [ id:'test', single_end:false ] - input_normal: type: file - description: BAM/CRAM file, must adhere to chr1, chr2, ...chrX notation - For modifying chromosome notation in bam files please follow https://josephcckuo.wordpress.com/2016/11/17/modify-chromosome-notation-in-bam-file/. + description: BAM/CRAM file, must adhere to chr1, chr2, ...chrX notation For modifying chromosome notation in bam files please follow https://josephcckuo.wordpress.com/2016/11/17/modify-chromosome-notation-in-bam-file/. pattern: "*.{bam,cram}" - index_normal: type: file @@ -66,10 +62,7 @@ input: description: allele files for ASCAT WGS. Can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS - loci_files: type: file - description: - loci files for ASCAT WGS. Loci files without chromosome notation can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS - Make sure the chromosome notation matches the bam/cram input files. To add the chromosome notation to loci files (hg19/hg38) if necessary, you can run this command - `if [[ $(samtools view | head -n1 | cut -f3)\" == *\"chr\"* ]]; then for i in {1..22} X; do sed -i 's/^/chr/' G1000_loci_hg19_chr_${i}.txt; done; fi` + description: loci files for ASCAT WGS. Loci files without chromosome notation can be downloaded here https://github.com/VanLoo-lab/ascat/tree/master/ReferenceFiles/WGS Make sure the chromosome notation matches the bam/cram input files. To add the chromosome notation to loci files (hg19/hg38) if necessary, you can run this command `if [[ $(samtools view | head -n1 | cut -f3)\" == *\"chr\"* ]]; then for i in {1..22} X; do sed -i 's/^/chr/' G1000_loci_hg19_chr_${i}.txt; done; fi` - bed_file: type: file description: Bed file for ASCAT WES (optional, but recommended for WES) @@ -118,3 +111,9 @@ authors: - "@FriederikeHanssen" - "@maxulysse" - "@SusiJo" +maintainers: + - "@aasNGC" + - "@lassefolkersen" + - "@FriederikeHanssen" + - "@maxulysse" + - "@SusiJo" diff --git a/modules/nf-core/bcftools/annotate/bcftools-annotate.diff b/modules/nf-core/bcftools/annotate/bcftools-annotate.diff new file mode 100644 index 0000000000..4376af5172 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/bcftools-annotate.diff @@ -0,0 +1,28 @@ +Changes in module 'nf-core/bcftools/annotate' +--- modules/nf-core/bcftools/annotate/main.nf ++++ modules/nf-core/bcftools/annotate/main.nf +@@ -8,7 +8,10 @@ + 'biocontainers/bcftools:1.17--haef29d1_0' }" + + input: +- tuple val(meta), path(input), path(index), path(annotations), path(annotations_index), path(header_lines) ++ tuple val(meta), path(input) ++ path annotations ++ path annotations_index ++ path header_lines + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf +@@ -29,6 +32,10 @@ + "vcf" + if ("$input" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ ++ bcftools \\ ++ index \\ ++ $input ++ + bcftools \\ + annotate \\ + $args \\ + +************************************************************ diff --git a/modules/nf-core/bcftools/annotate/environment.yml b/modules/nf-core/bcftools/annotate/environment.yml new file mode 100644 index 0000000000..57631f42ea --- /dev/null +++ b/modules/nf-core/bcftools/annotate/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.17 diff --git a/modules/nf-core/bcftools/annotate/main.nf b/modules/nf-core/bcftools/annotate/main.nf new file mode 100644 index 0000000000..f00c5fe2d7 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/main.nf @@ -0,0 +1,70 @@ +process BCFTOOLS_ANNOTATE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': + 'biocontainers/bcftools:1.17--haef29d1_0' }" + + input: + tuple val(meta), path(input) + path annotations + path annotations_index + path header_lines + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def header_file = header_lines ? "--header-lines ${header_lines}" : '' + def annotations_file = annotations ? "--annotations ${annotations}" : '' + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + if ("$input" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + bcftools \\ + index \\ + $input + + bcftools \\ + annotate \\ + $args \\ + $annotations_file \\ + $header_file \\ + --output ${prefix}.${extension} \\ + --threads $task.cpus \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$( bcftools --version |& sed '1!d; s/^.*bcftools //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/annotate/meta.yml b/modules/nf-core/bcftools/annotate/meta.yml new file mode 100644 index 0000000000..f3aa463bf5 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/meta.yml @@ -0,0 +1,56 @@ +name: bcftools_annotate +description: Add or remove annotations. +keywords: + - bcftools + - annotate + - vcf + - remove + - add +tools: + - annotate: + description: Add or remove annotations. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: https://samtools.github.io/bcftools/bcftools.html#annotate + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Query VCF or BCF file, can be either uncompressed or compressed + - index: + type: file + description: Index of the query VCF or BCF file + - annotations: + type: file + description: Bgzip-compressed file with annotations + - annotations_index: + type: file + description: Index of the annotations file + - header_lines: + type: file + description: Contains lines to append to the output VCF header +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Compressed annotated VCF file + pattern: "*{vcf,vcf.gz,bcf,bcf.gz}" +authors: + - "@projectoriented" + - "@ramprasadn" +maintainers: + - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/concat/environment.yml b/modules/nf-core/bcftools/concat/environment.yml new file mode 100644 index 0000000000..57631f42ea --- /dev/null +++ b/modules/nf-core/bcftools/concat/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.17 diff --git a/modules/nf-core/bcftools/concat/main.nf b/modules/nf-core/bcftools/concat/main.nf index 244a42ccf0..2ff690b1be 100644 --- a/modules/nf-core/bcftools/concat/main.nf +++ b/modules/nf-core/bcftools/concat/main.nf @@ -2,7 +2,7 @@ process BCFTOOLS_CONCAT { tag "$meta.id" label 'process_medium' - conda "bioconda::bcftools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': 'biocontainers/bcftools:1.17--haef29d1_0' }" diff --git a/modules/nf-core/bcftools/concat/meta.yml b/modules/nf-core/bcftools/concat/meta.yml index e8c83cd685..8731b17bc8 100644 --- a/modules/nf-core/bcftools/concat/meta.yml +++ b/modules/nf-core/bcftools/concat/meta.yml @@ -5,7 +5,6 @@ keywords: - concat - bcftools - VCF - tools: - concat: description: | @@ -47,3 +46,6 @@ output: authors: - "@abhi18av" - "@nvnieuwk" +maintainers: + - "@abhi18av" + - "@nvnieuwk" diff --git a/modules/nf-core/bcftools/mpileup/environment.yml b/modules/nf-core/bcftools/mpileup/environment.yml new file mode 100644 index 0000000000..57631f42ea --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.17 diff --git a/modules/nf-core/bcftools/mpileup/main.nf b/modules/nf-core/bcftools/mpileup/main.nf index aa9e811fa1..83bec8ef5f 100644 --- a/modules/nf-core/bcftools/mpileup/main.nf +++ b/modules/nf-core/bcftools/mpileup/main.nf @@ -2,14 +2,14 @@ process BCFTOOLS_MPILEUP { tag "$meta.id" label 'process_medium' - conda "bioconda::bcftools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': 'biocontainers/bcftools:1.17--haef29d1_0' }" input: tuple val(meta), path(bam), path(intervals) - path fasta + tuple val(meta2), path(fasta) val save_mpileup output: diff --git a/modules/nf-core/bcftools/mpileup/meta.yml b/modules/nf-core/bcftools/mpileup/meta.yml index 5619a6f511..65410ddd66 100644 --- a/modules/nf-core/bcftools/mpileup/meta.yml +++ b/modules/nf-core/bcftools/mpileup/meta.yml @@ -25,6 +25,10 @@ input: - intervals: type: file description: Input intervals file. A file (commonly '.bed') containing regions to subset + - meta: + type: map + description: | + Groovy Map containing information about the genome fasta, e.g. [ id: 'sarscov2' ] - fasta: type: file description: FASTA reference file @@ -61,3 +65,6 @@ output: authors: - "@joseespinosa" - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/sort/environment.yml b/modules/nf-core/bcftools/sort/environment.yml new file mode 100644 index 0000000000..57631f42ea --- /dev/null +++ b/modules/nf-core/bcftools/sort/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.17 diff --git a/modules/nf-core/bcftools/sort/main.nf b/modules/nf-core/bcftools/sort/main.nf index 702d510f37..c842daf232 100644 --- a/modules/nf-core/bcftools/sort/main.nf +++ b/modules/nf-core/bcftools/sort/main.nf @@ -2,7 +2,7 @@ process BCFTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda "bioconda::bcftools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': 'biocontainers/bcftools:1.17--haef29d1_0' }" diff --git a/modules/nf-core/bcftools/sort/meta.yml b/modules/nf-core/bcftools/sort/meta.yml index 0c244a486e..84747c6d89 100644 --- a/modules/nf-core/bcftools/sort/meta.yml +++ b/modules/nf-core/bcftools/sort/meta.yml @@ -12,7 +12,6 @@ tools: tool_dev_url: https://github.com/samtools/bcftools doi: "10.1093/bioinformatics/btp352" licence: ["MIT"] - input: - meta: type: map @@ -23,7 +22,6 @@ input: type: file description: The VCF/BCF file to be sorted pattern: "*.{vcf.gz,vcf,bcf}" - output: - meta: type: map @@ -38,6 +36,7 @@ output: type: file description: Sorted VCF file pattern: "*.{vcf.gz}" - authors: - "@Gwennid" +maintainers: + - "@Gwennid" diff --git a/modules/nf-core/bcftools/stats/environment.yml b/modules/nf-core/bcftools/stats/environment.yml new file mode 100644 index 0000000000..57631f42ea --- /dev/null +++ b/modules/nf-core/bcftools/stats/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.17 diff --git a/modules/nf-core/bcftools/stats/main.nf b/modules/nf-core/bcftools/stats/main.nf index 7ccb9bf6cb..b3a5f23ba6 100644 --- a/modules/nf-core/bcftools/stats/main.nf +++ b/modules/nf-core/bcftools/stats/main.nf @@ -2,7 +2,7 @@ process BCFTOOLS_STATS { tag "$meta.id" label 'process_single' - conda "bioconda::bcftools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': 'biocontainers/bcftools:1.17--haef29d1_0' }" diff --git a/modules/nf-core/bcftools/stats/meta.yml b/modules/nf-core/bcftools/stats/meta.yml index 5850d25f71..7ea2103e3b 100644 --- a/modules/nf-core/bcftools/stats/meta.yml +++ b/modules/nf-core/bcftools/stats/meta.yml @@ -70,3 +70,8 @@ authors: - "@drpatelh" - "@SusiJo" - "@TCLamnidis" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@SusiJo" + - "@TCLamnidis" diff --git a/modules/nf-core/bwa/index/environment.yml b/modules/nf-core/bwa/index/environment.yml new file mode 100644 index 0000000000..c5b0826008 --- /dev/null +++ b/modules/nf-core/bwa/index/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bwa=0.7.17 diff --git a/modules/nf-core/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf index c30d194d05..24b5a2ea99 100644 --- a/modules/nf-core/bwa/index/main.nf +++ b/modules/nf-core/bwa/index/main.nf @@ -2,7 +2,7 @@ process BWA_INDEX { tag "$fasta" label 'process_single' - conda "bioconda::bwa=0.7.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : 'biocontainers/bwa:0.7.17--hed695b0_7' }" diff --git a/modules/nf-core/bwa/index/meta.yml b/modules/nf-core/bwa/index/meta.yml index 2c6cfcd795..730628d005 100644 --- a/modules/nf-core/bwa/index/meta.yml +++ b/modules/nf-core/bwa/index/meta.yml @@ -40,3 +40,6 @@ output: authors: - "@drpatelh" - "@maxulysse" +maintainers: + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/bwa/index/tests/main.nf.test b/modules/nf-core/bwa/index/tests/main.nf.test new file mode 100644 index 0000000000..5fc8d49662 --- /dev/null +++ b/modules/nf-core/bwa/index/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process BWA_INDEX" + tag "modules_nfcore" + tag "modules" + tag "bwa" + tag "bwa/index" + script "../main.nf" + process "BWA_INDEX" + + test("BWA index") { + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bwa/index/tests/main.nf.test.snap b/modules/nf-core/bwa/index/tests/main.nf.test.snap new file mode 100644 index 0000000000..e51ad5bf2b --- /dev/null +++ b/modules/nf-core/bwa/index/tests/main.nf.test.snap @@ -0,0 +1,43 @@ +{ + "BWA index": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", + "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" + ] + ] + ], + "1": [ + "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f" + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", + "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" + ] + ] + ], + "versions": [ + "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f" + ] + } + ], + "timestamp": "2023-10-17T17:20:20.180927714" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwa/index/tests/tags.yml b/modules/nf-core/bwa/index/tests/tags.yml new file mode 100644 index 0000000000..28bb483c4e --- /dev/null +++ b/modules/nf-core/bwa/index/tests/tags.yml @@ -0,0 +1,2 @@ +bwa/index: + - modules/nf-core/bwa/index/** diff --git a/modules/nf-core/bwa/mem/environment.yml b/modules/nf-core/bwa/mem/environment.yml new file mode 100644 index 0000000000..34f50c48b4 --- /dev/null +++ b/modules/nf-core/bwa/mem/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bwa=0.7.17 + - bioconda::samtools=1.16.1 diff --git a/modules/nf-core/bwa/mem/main.nf b/modules/nf-core/bwa/mem/main.nf index 8ba99dfdb7..17e6fbd06c 100644 --- a/modules/nf-core/bwa/mem/main.nf +++ b/modules/nf-core/bwa/mem/main.nf @@ -2,7 +2,7 @@ process BWA_MEM { tag "$meta.id" label 'process_high' - conda "bioconda::bwa=0.7.17 bioconda::samtools=1.16.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' : 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' }" diff --git a/modules/nf-core/bwa/mem/meta.yml b/modules/nf-core/bwa/mem/meta.yml index 62357bf8d5..440fb1f9cf 100644 --- a/modules/nf-core/bwa/mem/meta.yml +++ b/modules/nf-core/bwa/mem/meta.yml @@ -53,3 +53,6 @@ output: authors: - "@drpatelh" - "@jeremy1805" +maintainers: + - "@drpatelh" + - "@jeremy1805" diff --git a/modules/nf-core/bwa/mem/tests/main.nf.test b/modules/nf-core/bwa/mem/tests/main.nf.test new file mode 100644 index 0000000000..b199bb70f6 --- /dev/null +++ b/modules/nf-core/bwa/mem/tests/main.nf.test @@ -0,0 +1,172 @@ +nextflow_process { + + name "Test Process BWA_MEM" + tag "modules_nfcore" + tag "modules" + tag "bwa" + tag "bwa/mem" + script "../main.nf" + process "BWA_MEM" + + test("Single-End") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Single-End Sort") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Paired-End") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Paired-End Sort") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/bwa/mem/tests/main.nf.test.snap b/modules/nf-core/bwa/mem/tests/main.nf.test.snap new file mode 100644 index 0000000000..ea3bfed4fd --- /dev/null +++ b/modules/nf-core/bwa/mem/tests/main.nf.test.snap @@ -0,0 +1,126 @@ +{ + "Single-End": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,3d43027d4163ada97911b814001511e5" + ] + ], + "1": [ + "versions.yml:md5,809f4a8c7f0c8497a9099dab9d6cc71e" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,3d43027d4163ada97911b814001511e5" + ] + ], + "versions": [ + "versions.yml:md5,809f4a8c7f0c8497a9099dab9d6cc71e" + ] + } + ], + "timestamp": "2023-10-18T11:02:55.420631681" + }, + "Single-End Sort": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,61eac1213d2bf5e88e225e545010e9b8" + ] + ], + "1": [ + "versions.yml:md5,809f4a8c7f0c8497a9099dab9d6cc71e" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,61eac1213d2bf5e88e225e545010e9b8" + ] + ], + "versions": [ + "versions.yml:md5,809f4a8c7f0c8497a9099dab9d6cc71e" + ] + } + ], + "timestamp": "2023-10-18T11:03:02.646869498" + }, + "Paired-End": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,809ccfe4300fa5005a9d0d4dc09b1a36" + ] + ], + "1": [ + "versions.yml:md5,809f4a8c7f0c8497a9099dab9d6cc71e" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,809ccfe4300fa5005a9d0d4dc09b1a36" + ] + ], + "versions": [ + "versions.yml:md5,809f4a8c7f0c8497a9099dab9d6cc71e" + ] + } + ], + "timestamp": "2023-10-18T11:03:09.793041294" + }, + "Paired-End Sort": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,2622f4380f992c505af7dab8c256313f" + ] + ], + "1": [ + "versions.yml:md5,809f4a8c7f0c8497a9099dab9d6cc71e" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,2622f4380f992c505af7dab8c256313f" + ] + ], + "versions": [ + "versions.yml:md5,809f4a8c7f0c8497a9099dab9d6cc71e" + ] + } + ], + "timestamp": "2023-10-18T11:04:43.662093286" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwa/mem/tests/tags.yml b/modules/nf-core/bwa/mem/tests/tags.yml new file mode 100644 index 0000000000..82992d1f0b --- /dev/null +++ b/modules/nf-core/bwa/mem/tests/tags.yml @@ -0,0 +1,3 @@ +bwa/mem: + - modules/nf-core/bwa/index/** + - modules/nf-core/bwa/mem/** diff --git a/modules/nf-core/bwamem2/index/environment.yml b/modules/nf-core/bwamem2/index/environment.yml new file mode 100644 index 0000000000..cef9ec4e78 --- /dev/null +++ b/modules/nf-core/bwamem2/index/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bwa-mem2=2.2.1 diff --git a/modules/nf-core/bwamem2/index/main.nf b/modules/nf-core/bwamem2/index/main.nf index 244e1005a1..b7688285d7 100644 --- a/modules/nf-core/bwamem2/index/main.nf +++ b/modules/nf-core/bwamem2/index/main.nf @@ -2,7 +2,7 @@ process BWAMEM2_INDEX { tag "$fasta" label 'process_single' - conda "bioconda::bwa-mem2=2.2.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bwa-mem2:2.2.1--he513fc3_0' : 'biocontainers/bwa-mem2:2.2.1--he513fc3_0' }" diff --git a/modules/nf-core/bwamem2/index/meta.yml b/modules/nf-core/bwamem2/index/meta.yml index 40c26c3873..c14a109252 100644 --- a/modules/nf-core/bwamem2/index/meta.yml +++ b/modules/nf-core/bwamem2/index/meta.yml @@ -38,3 +38,5 @@ output: pattern: "versions.yml" authors: - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/bwamem2/mem/environment.yml b/modules/nf-core/bwamem2/mem/environment.yml new file mode 100644 index 0000000000..2d75270b2b --- /dev/null +++ b/modules/nf-core/bwamem2/mem/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bwa-mem2=2.2.1 + - bioconda::samtools=1.16.1 diff --git a/modules/nf-core/bwamem2/mem/main.nf b/modules/nf-core/bwamem2/mem/main.nf index d427dea3c4..cbec7eef50 100644 --- a/modules/nf-core/bwamem2/mem/main.nf +++ b/modules/nf-core/bwamem2/mem/main.nf @@ -2,7 +2,7 @@ process BWAMEM2_MEM { tag "$meta.id" label 'process_high' - conda "bioconda::bwa-mem2=2.2.1 bioconda::samtools=1.16.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' : 'biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' }" diff --git a/modules/nf-core/bwamem2/mem/meta.yml b/modules/nf-core/bwamem2/mem/meta.yml index bc3dfcdd0e..04891b26a9 100644 --- a/modules/nf-core/bwamem2/mem/meta.yml +++ b/modules/nf-core/bwamem2/mem/meta.yml @@ -57,3 +57,5 @@ output: pattern: "versions.yml" authors: - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 0000000000..cdad6baafc --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf index 9f062219bb..4264a92ccc 100644 --- a/modules/nf-core/cat/cat/main.nf +++ b/modules/nf-core/cat/cat/main.nf @@ -2,7 +2,7 @@ process CAT_CAT { tag "$meta.id" label 'process_low' - conda "conda-forge::pigz=2.3.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : 'biocontainers/pigz:2.3.4' }" diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml index 8acc0bfac5..00a8db0bca 100644 --- a/modules/nf-core/cat/cat/meta.yml +++ b/modules/nf-core/cat/cat/meta.yml @@ -7,9 +7,7 @@ keywords: tools: - cat: description: Just concatenation - documentation: https://man7.org/linux/man-pages/man1/cat.1.html - licence: ["GPL-3.0-or-later"] input: - meta: @@ -21,7 +19,6 @@ input: type: file description: List of compressed / uncompressed files pattern: "*" - output: - versions: type: file @@ -31,7 +28,9 @@ output: type: file description: Concatenated file. Will be gzipped if file_out ends with ".gz" pattern: "${file_out}" - authors: - "@erikrikarddaniel" - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 0000000000..5766daafbe --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,153 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("test_cat_unzipped_unzipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test_cat_zipped_zipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")} + ) + } + } + + test("test_cat_zipped_unzipped") { + config './nextflow_zipped_unzipped.config' + + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_cat_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")} + ) + } + } + + test("test_cat_one_file_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")} + ) + } + } +} + diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 0000000000..423571ba27 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,121 @@ +{ + "test_cat_unzipped_zipped_size": { + "content": [ + 375 + ], + "timestamp": "2023-10-16T14:33:08.049445686" + }, + "test_cat_unzipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:18.500464399" + }, + "test_cat_zipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:49.642741302" + }, + "test_cat_zipped_zipped_lines": { + "content": [ + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ] + ], + "timestamp": "2023-10-16T14:32:33.629048645" + }, + "test_cat_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] + ], + "timestamp": "2023-10-16T14:33:08.038830506" + }, + "test_cat_one_file_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] + ], + "timestamp": "2023-10-16T14:33:21.39642399" + }, + "test_cat_zipped_zipped_size": { + "content": [ + 78 + ], + "timestamp": "2023-10-16T14:32:33.641869244" + }, + "test_cat_one_file_unzipped_zipped_size": { + "content": [ + 374 + ], + "timestamp": "2023-10-16T14:33:21.4094373" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config new file mode 100644 index 0000000000..ec26b0fdc6 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = 'cat.txt.gz' + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config new file mode 100644 index 0000000000..fbc79783d5 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config @@ -0,0 +1,8 @@ + +process { + + withName: CAT_CAT { + ext.prefix = 'cat.txt' + } + +} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml new file mode 100644 index 0000000000..37b578f523 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/tags.yml @@ -0,0 +1,2 @@ +cat/cat: + - modules/nf-core/cat/cat/** diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml new file mode 100644 index 0000000000..222b301f38 --- /dev/null +++ b/modules/nf-core/cat/fastq/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf index 5021e6fcbb..3d963784cb 100644 --- a/modules/nf-core/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -2,7 +2,7 @@ process CAT_FASTQ { tag "$meta.id" label 'process_single' - conda "conda-forge::sed=4.7" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml index 8a39e309fb..db4ac3c79a 100644 --- a/modules/nf-core/cat/fastq/meta.yml +++ b/modules/nf-core/cat/fastq/meta.yml @@ -34,7 +34,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@joseespinosa" - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test new file mode 100644 index 0000000000..f5f941825c --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -0,0 +1,143 @@ +nextflow_process { + + name "Test Process CAT_FASTQ" + script "../main.nf" + process "CAT_FASTQ" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/fastq" + + test("test_cat_fastq_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_single_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_paired_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } + + test("test_cat_fastq_single_end_single_file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match() }, + { assert path(process.out.versions.get(0)).getText().contains("cat") } + ) + } + } +} diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap new file mode 100644 index 0000000000..ec2342e549 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -0,0 +1,78 @@ +{ + "test_cat_fastq_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d" + ] + ] + ], + "timestamp": "2023-10-17T23:19:12.990284837" + }, + "test_cat_fastq_single_end_same_name": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66" + ] + ] + ], + "timestamp": "2023-10-17T23:19:31.554568147" + }, + "test_cat_fastq_single_end_single_file": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,e325ef7deb4023447a1f074e285761af" + ] + ] + ], + "timestamp": "2023-10-17T23:19:49.629360033" + }, + "test_cat_fastq_paired_end_same_name": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66", + "test_2.merged.fastq.gz:md5,fe9f266f43a6fc3dcab690a18419a56e" + ] + ] + ] + ], + "timestamp": "2023-10-17T23:19:40.711617539" + }, + "test_cat_fastq_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d", + "test_2.merged.fastq.gz:md5,77c8e966e130d8c6b6ec9be52fcb2bda" + ] + ] + ] + ], + "timestamp": "2023-10-18T07:53:20.923560211" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/fastq/tests/tags.yml b/modules/nf-core/cat/fastq/tests/tags.yml new file mode 100644 index 0000000000..6ac4361405 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/tags.yml @@ -0,0 +1,2 @@ +cat/fastq: + - modules/nf-core/cat/fastq/** diff --git a/modules/nf-core/cnvkit/antitarget/environment.yml b/modules/nf-core/cnvkit/antitarget/environment.yml new file mode 100644 index 0000000000..ef6bec487e --- /dev/null +++ b/modules/nf-core/cnvkit/antitarget/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::cnvkit=0.9.10 + - bioconda::samtools=1.17 diff --git a/modules/nf-core/cnvkit/antitarget/main.nf b/modules/nf-core/cnvkit/antitarget/main.nf index cda05c635b..795145451b 100644 --- a/modules/nf-core/cnvkit/antitarget/main.nf +++ b/modules/nf-core/cnvkit/antitarget/main.nf @@ -2,7 +2,7 @@ process CNVKIT_ANTITARGET { tag "$meta.id" label 'process_low' - conda "bioconda::cnvkit=0.9.10 bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.10--pyhdfd78af_0': 'biocontainers/cnvkit:0.9.10--pyhdfd78af_0' }" diff --git a/modules/nf-core/cnvkit/antitarget/meta.yml b/modules/nf-core/cnvkit/antitarget/meta.yml index f22c07b13e..d879092d33 100644 --- a/modules/nf-core/cnvkit/antitarget/meta.yml +++ b/modules/nf-core/cnvkit/antitarget/meta.yml @@ -15,7 +15,6 @@ tools: tool_dev_url: "https://github.com/etal/cnvkit" doi: 10.1371/journal.pcbi.1004873 licence: ["Apache-2.0"] - input: - meta: type: map @@ -26,7 +25,6 @@ input: type: file description: File containing genomic regions pattern: "*.{bed}" - output: - meta: type: map @@ -41,8 +39,11 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@adamrtalbot" - "@priesgo" - "@SusiJo" +maintainers: + - "@adamrtalbot" + - "@priesgo" + - "@SusiJo" diff --git a/modules/nf-core/cnvkit/batch/environment.yml b/modules/nf-core/cnvkit/batch/environment.yml new file mode 100644 index 0000000000..ef6bec487e --- /dev/null +++ b/modules/nf-core/cnvkit/batch/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::cnvkit=0.9.10 + - bioconda::samtools=1.17 diff --git a/modules/nf-core/cnvkit/batch/main.nf b/modules/nf-core/cnvkit/batch/main.nf index b253d6ab11..3ccc9faa44 100644 --- a/modules/nf-core/cnvkit/batch/main.nf +++ b/modules/nf-core/cnvkit/batch/main.nf @@ -2,17 +2,17 @@ process CNVKIT_BATCH { tag "$meta.id" label 'process_low' - conda "bioconda::cnvkit=0.9.10 bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:c94363856059151a2974dc501fb07a0360cc60a3-0' : 'biocontainers/mulled-v2-780d630a9bb6a0ff2e7b6f730906fd703e40e98f:c94363856059151a2974dc501fb07a0360cc60a3-0' }" input: tuple val(meta), path(tumor), path(normal) - path fasta - path fasta_fai - path targets - path reference + tuple val(meta2), path(fasta) + tuple val(meta3), path(fasta_fai) + tuple val(meta4), path(targets) + tuple val(meta5), path(reference) val panel_of_normals output: diff --git a/modules/nf-core/cnvkit/batch/meta.yml b/modules/nf-core/cnvkit/batch/meta.yml index 950b0c9589..4f88ba3f9b 100644 --- a/modules/nf-core/cnvkit/batch/meta.yml +++ b/modules/nf-core/cnvkit/batch/meta.yml @@ -26,18 +26,38 @@ input: type: file description: | Input normal sample bam file (or cram) + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta: type: file description: | Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta_fai: type: file description: | Input reference genome fasta index (optional, but recommended for cram_input) + - meta4: + type: map + description: | + Groovy Map containing information about target file + e.g. [ id:'test' ] - targetfile: type: file description: | Input target bed file + - meta5: + type: map + description: | + Groovy Map containing information about reference file + e.g. [ id:'test' ] - reference: type: file description: | @@ -86,3 +106,13 @@ authors: - "@MaxUlysse" - "@priesgo" - "@SusiJo" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@fbdtemme" + - "@kaurravneet4123" + - "@KevinMenden" + - "@lassefolkersen" + - "@MaxUlysse" + - "@priesgo" + - "@SusiJo" diff --git a/modules/nf-core/cnvkit/genemetrics/environment.yml b/modules/nf-core/cnvkit/genemetrics/environment.yml new file mode 100644 index 0000000000..ef6bec487e --- /dev/null +++ b/modules/nf-core/cnvkit/genemetrics/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::cnvkit=0.9.10 + - bioconda::samtools=1.17 diff --git a/modules/nf-core/cnvkit/genemetrics/main.nf b/modules/nf-core/cnvkit/genemetrics/main.nf new file mode 100755 index 0000000000..825b12bdac --- /dev/null +++ b/modules/nf-core/cnvkit/genemetrics/main.nf @@ -0,0 +1,39 @@ +process CNVKIT_GENEMETRICS { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.10--pyhdfd78af_0': + 'biocontainers/cnvkit:0.9.10--pyhdfd78af_0' }" + + input: + tuple val(meta), path(cnr), path(cns) + + output: + tuple val(meta), path("*.tsv"), emit: tsv + //tuple val(meta), path("*.cnn"), emit: cnn + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def segments = cns ? "--segment ${cns}" : "" + + """ + cnvkit.py \\ + genemetrics \\ + $cnr \\ + $segments \\ + --output ${prefix}.tsv \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/cnvkit/genemetrics/meta.yml b/modules/nf-core/cnvkit/genemetrics/meta.yml new file mode 100755 index 0000000000..4bef28c7d5 --- /dev/null +++ b/modules/nf-core/cnvkit/genemetrics/meta.yml @@ -0,0 +1,50 @@ +name: cnvkit_genemetrics +description: Copy number variant detection from high-throughput sequencing data +keywords: + - cnvkit + - bam + - fasta + - copy number +tools: + - cnvkit: + description: | + CNVkit is a Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data. It is designed for use with hybrid capture, including both whole-exome and custom target panels, and short-read sequencing platforms such as Illumina and Ion Torrent. + homepage: https://cnvkit.readthedocs.io/en/stable/index.html + documentation: https://cnvkit.readthedocs.io/en/stable/index.html + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cnr: + type: file + description: CNR file + pattern: "*.cnr" + - cns: + type: file + description: CNS file [Optional] + pattern: "*.cns" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - txt: + type: file + description: TXT file + pattern: "*.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@adamrtalbot" + - "@marrip" + - "@priesgo" +maintainers: + - "@adamrtalbot" + - "@marrip" + - "@priesgo" diff --git a/modules/nf-core/cnvkit/reference/environment.yml b/modules/nf-core/cnvkit/reference/environment.yml new file mode 100644 index 0000000000..ef6bec487e --- /dev/null +++ b/modules/nf-core/cnvkit/reference/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::cnvkit=0.9.10 + - bioconda::samtools=1.17 diff --git a/modules/nf-core/cnvkit/reference/main.nf b/modules/nf-core/cnvkit/reference/main.nf index 57e1aed4f0..0e0b20a01e 100644 --- a/modules/nf-core/cnvkit/reference/main.nf +++ b/modules/nf-core/cnvkit/reference/main.nf @@ -2,7 +2,7 @@ process CNVKIT_REFERENCE { tag "$fasta" label 'process_low' - conda "bioconda::cnvkit=0.9.10 bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/cnvkit:0.9.10--pyhdfd78af_0': 'biocontainers/cnvkit:0.9.10--pyhdfd78af_0' }" diff --git a/modules/nf-core/cnvkit/reference/meta.yml b/modules/nf-core/cnvkit/reference/meta.yml index eae82c75e7..8c561c9953 100644 --- a/modules/nf-core/cnvkit/reference/meta.yml +++ b/modules/nf-core/cnvkit/reference/meta.yml @@ -15,7 +15,6 @@ tools: tool_dev_url: https://github.com/etal/cnvkit doi: 10.1371/journal.pcbi.1004873 licence: ["Apache-2.0"] - input: - fasta: type: file @@ -29,7 +28,6 @@ input: type: file description: File containing off-target genomic regions pattern: "*.{bed}" - output: - meta: type: map @@ -44,8 +42,11 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@adamrtalbot" - "@priesgo" - "@SusiJo" +maintainers: + - "@adamrtalbot" + - "@priesgo" + - "@SusiJo" diff --git a/modules/nf-core/controlfreec/assesssignificance/controlfreec-assesssignificance.diff b/modules/nf-core/controlfreec/assesssignificance/controlfreec-assesssignificance.diff index a80891d309..c8ace426e4 100644 --- a/modules/nf-core/controlfreec/assesssignificance/controlfreec-assesssignificance.diff +++ b/modules/nf-core/controlfreec/assesssignificance/controlfreec-assesssignificance.diff @@ -1,12 +1,9 @@ Changes in module 'nf-core/controlfreec/assesssignificance' --- modules/nf-core/controlfreec/assesssignificance/main.nf +++ modules/nf-core/controlfreec/assesssignificance/main.nf -@@ -2,10 +2,10 @@ - tag "$meta.id" - label 'process_low' +@@ -4,8 +4,8 @@ -- conda "bioconda::control-freec=11.6b" -+ conda "bioconda::control-freec=11.6" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/control-freec:11.6b--hdbdd923_0' : - 'biocontainers/control-freec:11.6b--hdbdd923_0' }" @@ -16,4 +13,13 @@ Changes in module 'nf-core/controlfreec/assesssignificance' input: tuple val(meta), path(cnvs), path(ratio) +--- modules/nf-core/controlfreec/assesssignificance/environment.yml ++++ modules/nf-core/controlfreec/assesssignificance/environment.yml +@@ -3,4 +3,4 @@ + - bioconda + - defaults + dependencies: +- - bioconda::control-freec=11.6b ++ - bioconda::control-freec=11.6 + ************************************************************ diff --git a/modules/nf-core/controlfreec/assesssignificance/environment.yml b/modules/nf-core/controlfreec/assesssignificance/environment.yml new file mode 100644 index 0000000000..f1047dbbbb --- /dev/null +++ b/modules/nf-core/controlfreec/assesssignificance/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::control-freec=11.6 diff --git a/modules/nf-core/controlfreec/assesssignificance/main.nf b/modules/nf-core/controlfreec/assesssignificance/main.nf index 8c5cd42b41..4be70b176d 100644 --- a/modules/nf-core/controlfreec/assesssignificance/main.nf +++ b/modules/nf-core/controlfreec/assesssignificance/main.nf @@ -2,7 +2,7 @@ process CONTROLFREEC_ASSESSSIGNIFICANCE { tag "$meta.id" label 'process_low' - conda "bioconda::control-freec=11.6" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/control-freec:11.6--h1b792b2_1' : 'biocontainers/control-freec:11.6--h1b792b2_1' }" diff --git a/modules/nf-core/controlfreec/assesssignificance/meta.yml b/modules/nf-core/controlfreec/assesssignificance/meta.yml index 0451cca349..b8cda6dd50 100644 --- a/modules/nf-core/controlfreec/assesssignificance/meta.yml +++ b/modules/nf-core/controlfreec/assesssignificance/meta.yml @@ -14,7 +14,6 @@ tools: tool_dev_url: https://github.com/BoevaLab/FREEC/ doi: "10.1093/bioinformatics/btq635" licence: ["GPL >=2"] - input: # Only when we have meta - meta: @@ -30,7 +29,6 @@ input: type: file description: ratio file generated by FREEC pattern: "*.ratio.txt" - output: - meta: type: map @@ -45,6 +43,7 @@ output: type: file description: CNV file containing p_values for each call pattern: "*.p.value.txt" - authors: - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/controlfreec/freec/environment.yml b/modules/nf-core/controlfreec/freec/environment.yml new file mode 100644 index 0000000000..83094f46ca --- /dev/null +++ b/modules/nf-core/controlfreec/freec/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::control-freec=11.6b diff --git a/modules/nf-core/controlfreec/freec/main.nf b/modules/nf-core/controlfreec/freec/main.nf index 213ef92927..65cae5cba8 100644 --- a/modules/nf-core/controlfreec/freec/main.nf +++ b/modules/nf-core/controlfreec/freec/main.nf @@ -2,7 +2,7 @@ process CONTROLFREEC_FREEC { tag "$meta.id" label 'process_low' - conda "bioconda::control-freec=11.6b" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/control-freec:11.6b--hdbdd923_0' : 'biocontainers/control-freec:11.6b--hdbdd923_0' }" diff --git a/modules/nf-core/controlfreec/freec/meta.yml b/modules/nf-core/controlfreec/freec/meta.yml index bbb152d432..1442bbe44a 100644 --- a/modules/nf-core/controlfreec/freec/meta.yml +++ b/modules/nf-core/controlfreec/freec/meta.yml @@ -14,7 +14,6 @@ tools: tool_dev_url: https://github.com/BoevaLab/FREEC/ doi: "10.1093/bioinformatics/btq635" licence: ["GPL >=2"] - input: - args: type: map @@ -68,7 +67,6 @@ input: ] } ``` - - meta: type: map description: | @@ -130,7 +128,6 @@ input: type: file description: Sorted bed file containing capture regions (optional) pattern: "*.bed" - output: - meta: type: map @@ -177,6 +174,7 @@ output: type: file description: Config file used to run Control-FREEC pattern: "config.txt" - authors: - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/controlfreec/freec2bed/environment.yml b/modules/nf-core/controlfreec/freec2bed/environment.yml new file mode 100644 index 0000000000..83094f46ca --- /dev/null +++ b/modules/nf-core/controlfreec/freec2bed/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::control-freec=11.6b diff --git a/modules/nf-core/controlfreec/freec2bed/main.nf b/modules/nf-core/controlfreec/freec2bed/main.nf index 0d5332a2d7..d2649cf95b 100644 --- a/modules/nf-core/controlfreec/freec2bed/main.nf +++ b/modules/nf-core/controlfreec/freec2bed/main.nf @@ -2,7 +2,7 @@ process CONTROLFREEC_FREEC2BED { tag "$meta.id" label 'process_low' - conda "bioconda::control-freec=11.6b" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/control-freec:11.6b--hdbdd923_0' : 'biocontainers/control-freec:11.6b--hdbdd923_0' }" diff --git a/modules/nf-core/controlfreec/freec2bed/meta.yml b/modules/nf-core/controlfreec/freec2bed/meta.yml index 47fff8ab80..b10c8ab377 100644 --- a/modules/nf-core/controlfreec/freec2bed/meta.yml +++ b/modules/nf-core/controlfreec/freec2bed/meta.yml @@ -14,7 +14,6 @@ tools: tool_dev_url: https://github.com/BoevaLab/FREEC/ doi: "10.1093/bioinformatics/btq635" licence: ["GPL >=2"] - input: - meta: type: map @@ -25,7 +24,6 @@ input: type: file description: ratio file generated by FREEC pattern: "*.ratio.txt" - output: - meta: type: map @@ -40,6 +38,7 @@ output: type: file description: Bed file pattern: "*.bed" - authors: - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/controlfreec/freec2circos/environment.yml b/modules/nf-core/controlfreec/freec2circos/environment.yml new file mode 100644 index 0000000000..83094f46ca --- /dev/null +++ b/modules/nf-core/controlfreec/freec2circos/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::control-freec=11.6b diff --git a/modules/nf-core/controlfreec/freec2circos/main.nf b/modules/nf-core/controlfreec/freec2circos/main.nf index c748af8be7..f80116799c 100644 --- a/modules/nf-core/controlfreec/freec2circos/main.nf +++ b/modules/nf-core/controlfreec/freec2circos/main.nf @@ -2,7 +2,7 @@ process CONTROLFREEC_FREEC2CIRCOS { tag "$meta.id" label 'process_low' - conda "bioconda::control-freec=11.6b" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/control-freec:11.6b--hdbdd923_0' : 'biocontainers/control-freec:11.6b--hdbdd923_0' }" diff --git a/modules/nf-core/controlfreec/freec2circos/meta.yml b/modules/nf-core/controlfreec/freec2circos/meta.yml index ff845a826c..2c6b77d611 100644 --- a/modules/nf-core/controlfreec/freec2circos/meta.yml +++ b/modules/nf-core/controlfreec/freec2circos/meta.yml @@ -14,7 +14,6 @@ tools: tool_dev_url: https://github.com/BoevaLab/FREEC/ doi: "10.1093/bioinformatics/btq635" licence: ["GPL >=2"] - input: - meta: type: map @@ -25,7 +24,6 @@ input: type: file description: ratio file generated by FREEC pattern: "*.ratio.txt" - output: - meta: type: map @@ -40,6 +38,7 @@ output: type: file description: Txt file pattern: "*.circos.txt" - authors: - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/controlfreec/makegraph/environment.yml b/modules/nf-core/controlfreec/makegraph/environment.yml new file mode 100644 index 0000000000..83094f46ca --- /dev/null +++ b/modules/nf-core/controlfreec/makegraph/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::control-freec=11.6b diff --git a/modules/nf-core/controlfreec/makegraph/main.nf b/modules/nf-core/controlfreec/makegraph/main.nf index 533065c6e4..8d489f71f7 100644 --- a/modules/nf-core/controlfreec/makegraph/main.nf +++ b/modules/nf-core/controlfreec/makegraph/main.nf @@ -2,7 +2,7 @@ process CONTROLFREEC_MAKEGRAPH { tag "$meta.id" label 'process_low' - conda "bioconda::control-freec=11.6b" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/control-freec:11.6b--hdbdd923_0' : 'biocontainers/control-freec:11.6b--hdbdd923_0' }" diff --git a/modules/nf-core/controlfreec/makegraph/meta.yml b/modules/nf-core/controlfreec/makegraph/meta.yml index 7b97dcf9bc..6f91db38aa 100644 --- a/modules/nf-core/controlfreec/makegraph/meta.yml +++ b/modules/nf-core/controlfreec/makegraph/meta.yml @@ -14,7 +14,6 @@ tools: tool_dev_url: https://github.com/BoevaLab/FREEC/ doi: "10.1093/bioinformatics/btq635" licence: ["GPL >=2"] - input: # Only when we have meta - meta: @@ -33,7 +32,6 @@ input: - ploidy: type: integer description: Ploidy value for which graph should be created - output: - meta: type: map @@ -56,6 +54,7 @@ output: type: file description: Image of ratio plot pattern: "*_ratio.png" - authors: - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 0000000000..7ca221616b --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.15 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index c9d014b113..e562259e7d 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,7 +2,7 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.15" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index c32657de7a..9414c32d71 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -16,7 +16,6 @@ input: type: file description: YML file containing software versions pattern: "*.yml" - output: - yml: type: file @@ -30,7 +29,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 0000000000..eec1db10a2 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 0000000000..8713b92164 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,27 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + "software_versions.yml:md5,a027f820f30b8191a20ca16465daaf37" + ], + "1": [ + "software_versions_mqc.yml:md5,ee4a1d028ad29987f9ac511f4668f17c" + ], + "2": [ + "versions.yml:md5,f47ebd22aba1dd987b7e5d5247b766c3" + ], + "mqc_yml": [ + "software_versions_mqc.yml:md5,ee4a1d028ad29987f9ac511f4668f17c" + ], + "versions": [ + "versions.yml:md5,f47ebd22aba1dd987b7e5d5247b766c3" + ], + "yml": [ + "software_versions.yml:md5,a027f820f30b8191a20ca16465daaf37" + ] + } + ], + "timestamp": "2023-10-11T17:10:02.930699" + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 0000000000..405aa24ae3 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/deepvariant/environment.yml b/modules/nf-core/deepvariant/environment.yml new file mode 100644 index 0000000000..bcd3a8b1c6 --- /dev/null +++ b/modules/nf-core/deepvariant/environment.yml @@ -0,0 +1,4 @@ +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/deepvariant/meta.yml b/modules/nf-core/deepvariant/meta.yml index c7d11ae3b1..a50dc57d9a 100644 --- a/modules/nf-core/deepvariant/meta.yml +++ b/modules/nf-core/deepvariant/meta.yml @@ -12,7 +12,6 @@ tools: tool_dev_url: https://github.com/google/deepvariant doi: "10.1038/nbt.4235" licence: ["BSD-3-clause"] - input: - meta: type: map @@ -58,7 +57,6 @@ input: type: file description: GZI index of reference fasta file pattern: "*.gzi" - output: - meta: type: map @@ -77,7 +75,9 @@ output: type: file description: File containing software version pattern: "*.{version.txt}" - authors: - "@abhi18av" - "@ramprasadn" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/dragmap/align/environment.yml b/modules/nf-core/dragmap/align/environment.yml new file mode 100644 index 0000000000..66482a4f9f --- /dev/null +++ b/modules/nf-core/dragmap/align/environment.yml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::dragmap=1.2.1 + - bioconda::samtools=1.15.1 + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/dragmap/align/main.nf b/modules/nf-core/dragmap/align/main.nf index 6221fde67f..6eddb2d6c0 100644 --- a/modules/nf-core/dragmap/align/main.nf +++ b/modules/nf-core/dragmap/align/main.nf @@ -2,7 +2,7 @@ process DRAGMAP_ALIGN { tag "$meta.id" label 'process_high' - conda "bioconda::dragmap=1.2.1 bioconda::samtools=1.15.1 conda-forge::pigz=2.3.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:5ebebbc128cd624282eaa37d2c7fe01505a91a69-0': 'biocontainers/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:5ebebbc128cd624282eaa37d2c7fe01505a91a69-0' }" diff --git a/modules/nf-core/dragmap/align/meta.yml b/modules/nf-core/dragmap/align/meta.yml index 763e00525c..f0def75567 100644 --- a/modules/nf-core/dragmap/align/meta.yml +++ b/modules/nf-core/dragmap/align/meta.yml @@ -12,7 +12,6 @@ tools: homepage: https://github.com/Illumina/dragmap documentation: https://github.com/Illumina/dragmap tool_dev_url: https://github.com/Illumina/dragmap#basic-command-line-usage - licence: ["GPL v3"] input: - meta: @@ -45,3 +44,5 @@ output: pattern: "versions.yml" authors: - "@Emiller88" +maintainers: + - "@Emiller88" diff --git a/modules/nf-core/dragmap/hashtable/dragmap-hashtable.diff b/modules/nf-core/dragmap/hashtable/dragmap-hashtable.diff index 069f133e61..aeb9694244 100644 --- a/modules/nf-core/dragmap/hashtable/dragmap-hashtable.diff +++ b/modules/nf-core/dragmap/hashtable/dragmap-hashtable.diff @@ -1,12 +1,9 @@ Changes in module 'nf-core/dragmap/hashtable' --- modules/nf-core/dragmap/hashtable/main.nf +++ modules/nf-core/dragmap/hashtable/main.nf -@@ -2,10 +2,10 @@ - tag "$fasta" - label 'process_high' +@@ -4,8 +4,8 @@ -- conda "bioconda::dragmap=1.3.0" -+ conda "bioconda::dragmap=1.2.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/dragmap:1.3.0--h72d16da_1': - 'biocontainers/dragmap:1.3.0--h72d16da_1' }" @@ -16,4 +13,13 @@ Changes in module 'nf-core/dragmap/hashtable' input: tuple val(meta), path(fasta) +--- modules/nf-core/dragmap/hashtable/environment.yml ++++ modules/nf-core/dragmap/hashtable/environment.yml +@@ -3,4 +3,4 @@ + - bioconda + - defaults + dependencies: +- - bioconda::dragmap=1.3.0 ++ - bioconda::dragmap=1.2.1 + ************************************************************ diff --git a/modules/nf-core/dragmap/hashtable/environment.yml b/modules/nf-core/dragmap/hashtable/environment.yml new file mode 100644 index 0000000000..c8e351255f --- /dev/null +++ b/modules/nf-core/dragmap/hashtable/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::dragmap=1.2.1 diff --git a/modules/nf-core/dragmap/hashtable/main.nf b/modules/nf-core/dragmap/hashtable/main.nf index 73f6c2785a..604052f33f 100644 --- a/modules/nf-core/dragmap/hashtable/main.nf +++ b/modules/nf-core/dragmap/hashtable/main.nf @@ -2,7 +2,7 @@ process DRAGMAP_HASHTABLE { tag "$fasta" label 'process_high' - conda "bioconda::dragmap=1.2.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/dragmap:1.2.1--h72d16da_1': 'biocontainers/dragmap:1.2.1--h72d16da_1' }" diff --git a/modules/nf-core/dragmap/hashtable/meta.yml b/modules/nf-core/dragmap/hashtable/meta.yml index 133cc9fc67..8daca83273 100644 --- a/modules/nf-core/dragmap/hashtable/meta.yml +++ b/modules/nf-core/dragmap/hashtable/meta.yml @@ -11,7 +11,6 @@ tools: homepage: https://github.com/Illumina/dragmap documentation: https://github.com/Illumina/dragmap tool_dev_url: https://github.com/Illumina/dragmap#basic-command-line-usage - licence: ["GPL v3"] input: - meta: @@ -38,3 +37,5 @@ output: pattern: "versions.yml" authors: - "@Emiller88" +maintainers: + - "@Emiller88" diff --git a/modules/nf-core/ensemblvep/download/environment.yml b/modules/nf-core/ensemblvep/download/environment.yml new file mode 100644 index 0000000000..d3805c0c05 --- /dev/null +++ b/modules/nf-core/ensemblvep/download/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ensembl-vep=110.0 diff --git a/modules/nf-core/ensemblvep/download/main.nf b/modules/nf-core/ensemblvep/download/main.nf index 4873b91d57..a770cbfc6f 100644 --- a/modules/nf-core/ensemblvep/download/main.nf +++ b/modules/nf-core/ensemblvep/download/main.nf @@ -2,7 +2,7 @@ process ENSEMBLVEP_DOWNLOAD { tag "$meta.id" label 'process_medium' - conda "bioconda::ensembl-vep=110.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" diff --git a/modules/nf-core/ensemblvep/download/meta.yml b/modules/nf-core/ensemblvep/download/meta.yml index acb337c399..0183a435b8 100644 --- a/modules/nf-core/ensemblvep/download/meta.yml +++ b/modules/nf-core/ensemblvep/download/meta.yml @@ -41,3 +41,5 @@ output: pattern: "versions.yml" authors: - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/ensemblvep/vep/environment.yml b/modules/nf-core/ensemblvep/vep/environment.yml new file mode 100644 index 0000000000..d3805c0c05 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ensembl-vep=110.0 diff --git a/modules/nf-core/ensemblvep/vep/main.nf b/modules/nf-core/ensemblvep/vep/main.nf index da0e36460a..3a2b742348 100644 --- a/modules/nf-core/ensemblvep/vep/main.nf +++ b/modules/nf-core/ensemblvep/vep/main.nf @@ -2,7 +2,7 @@ process ENSEMBLVEP_VEP { tag "$meta.id" label 'process_medium' - conda "bioconda::ensembl-vep=110.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" diff --git a/modules/nf-core/ensemblvep/vep/meta.yml b/modules/nf-core/ensemblvep/vep/meta.yml index 7783847dc4..61d29d3fb1 100644 --- a/modules/nf-core/ensemblvep/vep/meta.yml +++ b/modules/nf-core/ensemblvep/vep/meta.yml @@ -86,3 +86,7 @@ authors: - "@maxulysse" - "@matthdsm" - "@nvnieuwk" +maintainers: + - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml new file mode 100644 index 0000000000..19ccec2564 --- /dev/null +++ b/modules/nf-core/fastp/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastp=0.23.4 diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index 831b7f128e..c8e815aead 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -2,7 +2,7 @@ process FASTP { tag "$meta.id" label 'process_medium' - conda "bioconda::fastp=0.23.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : 'biocontainers/fastp:0.23.4--h5f740d0_0' }" diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml index 197ea7ca65..c22a16abd9 100644 --- a/modules/nf-core/fastp/meta.yml +++ b/modules/nf-core/fastp/meta.yml @@ -33,7 +33,6 @@ input: - save_merged: type: boolean description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` - output: - meta: type: map @@ -71,3 +70,6 @@ output: authors: - "@drpatelh" - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test new file mode 100644 index 0000000000..f610b735e2 --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -0,0 +1,485 @@ +nextflow_process { + + name "Test Process FASTP" + script "../main.nf" + process "FASTP" + tag "modules" + tag "modules_nfcore" + tag "fastp" + + test("test_fastp_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ + [ id:'test', single_end:true ], + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:12.922000 K (92.984097%)", + "single end (151 cycles)" ] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 99" ] + def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("test_fastp_single_end_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] + def log_text = [ "No adapter detected for read1", + "Q30 bases: 12281(88.3716%)"] + def json_text = ['"passed_filter_reads": 198'] + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("fastp test_fastp_interleaved") { + config './nextflow.config' + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ] + ] + + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "paired end (151 cycles + 151 cycles)"] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 198"] + def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_single_end_trim_fail") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:12.922000 K (92.984097%)", + "single end (151 cycles)"] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 99" ] + def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { failed_read_lines.each { failed_read_line -> + { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_trim_fail") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] + def log_text = [ "No adapter detected for read1", + "Q30 bases: 12281(88.3716%)"] + def json_text = ['"passed_filter_reads": 198'] + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { failed_read2_lines.each { failed_read2_line -> + { assert path(process.out.reads_fail.get(0).get(1).get(1)).linesGzip.contains(failed_read2_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_merged") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "
"] + def log_text = [ "Merged and filtered:", + "total reads: 75", + "total bases: 13683"] + def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683'] + def read1_lines = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", + "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", + "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { read_merged_lines.each { read_merged_line -> + { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/fastp/adapters.fasta", checkIfExists: true) + save_trimmed_fail = false + save_merged = true + + input[0] = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "
"] + def log_text = [ "Merged and filtered:", + "total reads: 75", + "total bases: 13683"] + def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"] + def read1_lines = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", + "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", + "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { read_merged_lines.each { read_merged_line -> + { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap new file mode 100644 index 0000000000..0fa68c7d71 --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -0,0 +1,52 @@ +{ + "fastp test_fastp_interleaved_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,168f516f7bd4b7b6c32da7cba87299a4" + ] + ] + ], + "timestamp": "2023-10-17T11:04:45.794175881" + }, + "test_fastp_single_end_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc" + ] + ] + ], + "timestamp": "2023-10-17T11:04:10.566343705" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "timestamp": "2023-10-17T11:04:10.582076024" + }, + "test_fastp_single_end_trim_fail_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5" + ] + ] + ], + "timestamp": "2023-10-17T11:05:00.379878948" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/nextflow.config b/modules/nf-core/fastp/tests/nextflow.config new file mode 100644 index 0000000000..0f7849ad96 --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: FASTP { + ext.args = "--interleaved_in" + } +} diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml new file mode 100644 index 0000000000..c1afcce75f --- /dev/null +++ b/modules/nf-core/fastp/tests/tags.yml @@ -0,0 +1,2 @@ +fastp: + - modules/nf-core/fastp/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 0000000000..f52a53a0fa --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 249f90644d..50e59f2b8c 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,10 +2,10 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda "bioconda::fastqc=0.11.9" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'biocontainers/fastqc:0.11.9--0' }" + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 4da5bb5a06..ee5507e06b 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 0000000000..6437a144d9 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,41 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("Single-Read") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, + { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + ) + } + } +} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 0000000000..636a32cead --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "timestamp": "2023-10-09T23:40:54+0000" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 0000000000..7834294ba0 --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/fgbio/callmolecularconsensusreads/environment.yml b/modules/nf-core/fgbio/callmolecularconsensusreads/environment.yml new file mode 100644 index 0000000000..d64c6844b3 --- /dev/null +++ b/modules/nf-core/fgbio/callmolecularconsensusreads/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fgbio=2.0.2 diff --git a/modules/nf-core/fgbio/callmolecularconsensusreads/main.nf b/modules/nf-core/fgbio/callmolecularconsensusreads/main.nf index 4515040cc9..e9f209ef16 100644 --- a/modules/nf-core/fgbio/callmolecularconsensusreads/main.nf +++ b/modules/nf-core/fgbio/callmolecularconsensusreads/main.nf @@ -2,7 +2,7 @@ process FGBIO_CALLMOLECULARCONSENSUSREADS { tag "$meta.id" label 'process_medium' - conda "bioconda::fgbio=2.0.2" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fgbio:2.0.2--hdfd78af_0' : 'biocontainers/fgbio:2.0.2--hdfd78af_0' }" diff --git a/modules/nf-core/fgbio/callmolecularconsensusreads/meta.yml b/modules/nf-core/fgbio/callmolecularconsensusreads/meta.yml index 371825123a..f4a6ab1bb8 100644 --- a/modules/nf-core/fgbio/callmolecularconsensusreads/meta.yml +++ b/modules/nf-core/fgbio/callmolecularconsensusreads/meta.yml @@ -1,6 +1,5 @@ name: fgbio_callmolecularconsensusreads description: Calls consensus sequences from reads with the same unique molecular tag. - keywords: - UMIs - consensus sequence @@ -12,7 +11,6 @@ tools: homepage: https://github.com/fulcrumgenomics/fgbio documentation: http://fulcrumgenomics.github.io/fgbio/ licence: ["MIT"] - input: - meta: type: map @@ -24,7 +22,6 @@ input: description: | The input SAM or BAM file. pattern: "*.{bam,sam}" - output: - meta: type: map @@ -40,6 +37,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@sruthipsuresh" +maintainers: + - "@sruthipsuresh" diff --git a/modules/nf-core/fgbio/fastqtobam/environment.yml b/modules/nf-core/fgbio/fastqtobam/environment.yml new file mode 100644 index 0000000000..d64c6844b3 --- /dev/null +++ b/modules/nf-core/fgbio/fastqtobam/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fgbio=2.0.2 diff --git a/modules/nf-core/fgbio/fastqtobam/main.nf b/modules/nf-core/fgbio/fastqtobam/main.nf index 74016d3c45..f7302171b8 100644 --- a/modules/nf-core/fgbio/fastqtobam/main.nf +++ b/modules/nf-core/fgbio/fastqtobam/main.nf @@ -2,7 +2,7 @@ process FGBIO_FASTQTOBAM { tag "$meta.id" label 'process_low' - conda "bioconda::fgbio=2.0.2" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fgbio:2.0.2--hdfd78af_0' : 'biocontainers/fgbio:2.0.2--hdfd78af_0' }" diff --git a/modules/nf-core/fgbio/fastqtobam/meta.yml b/modules/nf-core/fgbio/fastqtobam/meta.yml index f85485cab8..4b37cd530f 100644 --- a/modules/nf-core/fgbio/fastqtobam/meta.yml +++ b/modules/nf-core/fgbio/fastqtobam/meta.yml @@ -10,15 +10,12 @@ tools: homepage: http://fulcrumgenomics.github.io/fgbio/ documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/ tool_dev_url: https://github.com/fulcrumgenomics/fgbio - licence: ["MIT"] - input: - reads: type: file description: pair of reads to be converted into BAM file pattern: "*.{fastq.gz}" - output: - meta: type: map @@ -37,7 +34,9 @@ output: type: file description: Unaligned, unsorted CRAM file pattern: "*.{cram}" - authors: - "@lescai" - "@matthdsm" +maintainers: + - "@lescai" + - "@matthdsm" diff --git a/modules/nf-core/fgbio/groupreadsbyumi/environment.yml b/modules/nf-core/fgbio/groupreadsbyumi/environment.yml new file mode 100644 index 0000000000..d64c6844b3 --- /dev/null +++ b/modules/nf-core/fgbio/groupreadsbyumi/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fgbio=2.0.2 diff --git a/modules/nf-core/fgbio/groupreadsbyumi/main.nf b/modules/nf-core/fgbio/groupreadsbyumi/main.nf index 879e453452..7179290c91 100644 --- a/modules/nf-core/fgbio/groupreadsbyumi/main.nf +++ b/modules/nf-core/fgbio/groupreadsbyumi/main.nf @@ -2,7 +2,7 @@ process FGBIO_GROUPREADSBYUMI { tag "$meta.id" label 'process_low' - conda "bioconda::fgbio=2.0.2" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fgbio:2.0.2--hdfd78af_0' : 'biocontainers/fgbio:2.0.2--hdfd78af_0' }" diff --git a/modules/nf-core/fgbio/groupreadsbyumi/meta.yml b/modules/nf-core/fgbio/groupreadsbyumi/meta.yml index f40a5ac7fc..02ca91f19f 100644 --- a/modules/nf-core/fgbio/groupreadsbyumi/meta.yml +++ b/modules/nf-core/fgbio/groupreadsbyumi/meta.yml @@ -16,9 +16,7 @@ tools: homepage: http://fulcrumgenomics.github.io/fgbio/ documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/ tool_dev_url: https://github.com/fulcrumgenomics/fgbio - licence: ["MIT"] - input: - meta: type: map @@ -35,7 +33,6 @@ input: description: | Reguired argument: defines the UMI assignment strategy. Must be chosen among: Identity, Edit, Adjacency, Paired. - output: - meta: type: map @@ -54,6 +51,7 @@ output: type: file description: A text file containing the tag family size counts pattern: "*.txt" - authors: - "@lescai" +maintainers: + - "@lescai" diff --git a/modules/nf-core/freebayes/environment.yml b/modules/nf-core/freebayes/environment.yml new file mode 100644 index 0000000000..b074f213f4 --- /dev/null +++ b/modules/nf-core/freebayes/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::freebayes=1.3.6 diff --git a/modules/nf-core/freebayes/main.nf b/modules/nf-core/freebayes/main.nf index 1466f085e8..8a1c641ded 100644 --- a/modules/nf-core/freebayes/main.nf +++ b/modules/nf-core/freebayes/main.nf @@ -2,7 +2,7 @@ process FREEBAYES { tag "$meta.id" label 'process_single' - conda "bioconda::freebayes=1.3.6" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/freebayes:1.3.6--hbfe0e7f_2' : 'biocontainers/freebayes:1.3.6--hbfe0e7f_2' }" diff --git a/modules/nf-core/freebayes/meta.yml b/modules/nf-core/freebayes/meta.yml index 17d83cba2b..e2cf1a175c 100644 --- a/modules/nf-core/freebayes/meta.yml +++ b/modules/nf-core/freebayes/meta.yml @@ -8,7 +8,6 @@ keywords: - germline variant calling - bacterial variant calling - bayesian - tools: - freebayes: description: Bayesian haplotype-based polymorphism discovery and genotyping @@ -17,7 +16,6 @@ tools: tool_dev_url: https://github.com/freebayes/freebayes doi: "10.48550/arXiv.1207.3907" licence: ["MIT"] - input: - meta: type: map @@ -60,7 +58,6 @@ input: or a region-specific format: seq_name start end sample_name copy_number pattern: "*.bed" - output: - meta: type: map @@ -75,8 +72,11 @@ output: type: file description: Compressed VCF file pattern: "*.vcf.gz" - authors: - "@maxibor" - "@FriederikeHanssen" - "@maxulysse" +maintainers: + - "@maxibor" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/applybqsr/environment.yml b/modules/nf-core/gatk4/applybqsr/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/applybqsr/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/applybqsr/main.nf b/modules/nf-core/gatk4/applybqsr/main.nf index e5e6bf99c7..7e49563739 100644 --- a/modules/nf-core/gatk4/applybqsr/main.nf +++ b/modules/nf-core/gatk4/applybqsr/main.nf @@ -2,7 +2,7 @@ process GATK4_APPLYBQSR { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/applybqsr/meta.yml b/modules/nf-core/gatk4/applybqsr/meta.yml index 2085fa97b2..ab9efea3f4 100644 --- a/modules/nf-core/gatk4/applybqsr/meta.yml +++ b/modules/nf-core/gatk4/applybqsr/meta.yml @@ -16,7 +16,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -49,7 +48,6 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - output: - meta: type: map @@ -68,7 +66,9 @@ output: type: file description: Recalibrated CRAM file pattern: "*.{cram}" - authors: - "@yocra3" - "@FriederikeHanssen" +maintainers: + - "@yocra3" + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/applyvqsr/environment.yml b/modules/nf-core/gatk4/applyvqsr/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/applyvqsr/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/applyvqsr/main.nf b/modules/nf-core/gatk4/applyvqsr/main.nf index 8413f2bb3b..21afe9a528 100644 --- a/modules/nf-core/gatk4/applyvqsr/main.nf +++ b/modules/nf-core/gatk4/applyvqsr/main.nf @@ -2,7 +2,7 @@ process GATK4_APPLYVQSR { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/applyvqsr/meta.yml b/modules/nf-core/gatk4/applyvqsr/meta.yml index e1e121a661..de5d6d067a 100644 --- a/modules/nf-core/gatk4/applyvqsr/meta.yml +++ b/modules/nf-core/gatk4/applyvqsr/meta.yml @@ -19,7 +19,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -58,7 +57,6 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - output: - vcf: type: file @@ -72,6 +70,7 @@ output: type: file description: File containing software versions. pattern: "versions.yml" - authors: - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/baserecalibrator/environment.yml b/modules/nf-core/gatk4/baserecalibrator/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/baserecalibrator/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/baserecalibrator/main.nf b/modules/nf-core/gatk4/baserecalibrator/main.nf index 5375289a16..e893b65036 100644 --- a/modules/nf-core/gatk4/baserecalibrator/main.nf +++ b/modules/nf-core/gatk4/baserecalibrator/main.nf @@ -2,7 +2,7 @@ process GATK4_BASERECALIBRATOR { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/baserecalibrator/meta.yml b/modules/nf-core/gatk4/baserecalibrator/meta.yml index db4fecfc92..8252b8c290 100644 --- a/modules/nf-core/gatk4/baserecalibrator/meta.yml +++ b/modules/nf-core/gatk4/baserecalibrator/meta.yml @@ -16,7 +16,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -54,7 +53,6 @@ input: type: file description: Tabix index of the known_sites (optional) pattern: "*.vcf.gz.tbi" - output: - meta: type: map @@ -69,8 +67,11 @@ output: type: file description: Recalibration table from BaseRecalibrator pattern: "*.{table}" - authors: - "@yocra3" - "@FriederikeHanssen" - "@maxulysse" +maintainers: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/calculatecontamination/environment.yml b/modules/nf-core/gatk4/calculatecontamination/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/calculatecontamination/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/calculatecontamination/main.nf b/modules/nf-core/gatk4/calculatecontamination/main.nf index 9dd961bec0..8d43c4ee6b 100644 --- a/modules/nf-core/gatk4/calculatecontamination/main.nf +++ b/modules/nf-core/gatk4/calculatecontamination/main.nf @@ -2,7 +2,7 @@ process GATK4_CALCULATECONTAMINATION { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/calculatecontamination/meta.yml b/modules/nf-core/gatk4/calculatecontamination/meta.yml index 7767bd0807..b0ffe814c5 100644 --- a/modules/nf-core/gatk4/calculatecontamination/meta.yml +++ b/modules/nf-core/gatk4/calculatecontamination/meta.yml @@ -17,7 +17,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -32,7 +31,6 @@ input: type: file description: File containing the pileups summary table of a normal sample that matches with the tumor sample specified in pileup argument. This is an optional input. pattern: "*.pileups.table" - output: - contamination: type: file @@ -46,7 +44,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@GCJMackenzie" - "@maxulysse" +maintainers: + - "@GCJMackenzie" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/cnnscorevariants/environment.yml b/modules/nf-core/gatk4/cnnscorevariants/environment.yml new file mode 100644 index 0000000000..bcd3a8b1c6 --- /dev/null +++ b/modules/nf-core/gatk4/cnnscorevariants/environment.yml @@ -0,0 +1,4 @@ +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/gatk4/cnnscorevariants/meta.yml b/modules/nf-core/gatk4/cnnscorevariants/meta.yml index a2fe3d47c6..8a9d0f51c2 100644 --- a/modules/nf-core/gatk4/cnnscorevariants/meta.yml +++ b/modules/nf-core/gatk4/cnnscorevariants/meta.yml @@ -14,7 +14,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -56,7 +55,6 @@ input: type: file description: Keras model HD5 file with neural net weights. (optional) pattern: "*.hd5" - output: - meta: type: map @@ -75,6 +73,7 @@ output: type: file description: VCF index file pattern: "*.vcf.gz.tbi" - authors: - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/createsequencedictionary/environment.yml b/modules/nf-core/gatk4/createsequencedictionary/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf index 3e4efdd983..b47ad16221 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/main.nf +++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf @@ -2,7 +2,7 @@ process GATK4_CREATESEQUENCEDICTIONARY { tag "$fasta" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/gatk4/createsequencedictionary/meta.yml index 9b8b8c8917..f9d70be098 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/meta.yml +++ b/modules/nf-core/gatk4/createsequencedictionary/meta.yml @@ -15,7 +15,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -38,3 +37,6 @@ output: authors: - "@maxulysse" - "@ramprasadn" +maintainers: + - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/estimatelibrarycomplexity/environment.yml b/modules/nf-core/gatk4/estimatelibrarycomplexity/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/estimatelibrarycomplexity/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf b/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf index 81fc83513d..c0eef7b327 100644 --- a/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf +++ b/modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf @@ -2,7 +2,7 @@ process GATK4_ESTIMATELIBRARYCOMPLEXITY { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/estimatelibrarycomplexity/meta.yml b/modules/nf-core/gatk4/estimatelibrarycomplexity/meta.yml index 2783152a59..2d5bddf6c9 100644 --- a/modules/nf-core/gatk4/estimatelibrarycomplexity/meta.yml +++ b/modules/nf-core/gatk4/estimatelibrarycomplexity/meta.yml @@ -13,7 +13,6 @@ tools: tool_dev_url: https://github.com/broadinstitute/gatk doi: "10.1158/1538-7445.AM2017-3590" licence: ["Apache-2.0"] - input: - meta: type: map @@ -36,7 +35,6 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - output: - meta: type: map @@ -51,7 +49,9 @@ output: type: file description: File containing metrics on the input files pattern: "*.{metrics}" - authors: - "@FriederikeHanssen" - "@maxulysse" +maintainers: + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/filtermutectcalls/environment.yml b/modules/nf-core/gatk4/filtermutectcalls/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/filtermutectcalls/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/filtermutectcalls/main.nf b/modules/nf-core/gatk4/filtermutectcalls/main.nf index 623b91aece..fa6b46ab3c 100644 --- a/modules/nf-core/gatk4/filtermutectcalls/main.nf +++ b/modules/nf-core/gatk4/filtermutectcalls/main.nf @@ -2,7 +2,7 @@ process GATK4_FILTERMUTECTCALLS { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/filtermutectcalls/meta.yml b/modules/nf-core/gatk4/filtermutectcalls/meta.yml index 095a28e321..736c838625 100644 --- a/modules/nf-core/gatk4/filtermutectcalls/meta.yml +++ b/modules/nf-core/gatk4/filtermutectcalls/meta.yml @@ -16,7 +16,6 @@ tools: homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 - input: - meta: type: map @@ -77,7 +76,6 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - output: - vcf: type: file @@ -95,8 +93,11 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@GCJMackenzie" - "@maxulysse" - "@ramprasadn" +maintainers: + - "@GCJMackenzie" + - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/filtervarianttranches/environment.yml b/modules/nf-core/gatk4/filtervarianttranches/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/filtervarianttranches/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/filtervarianttranches/main.nf b/modules/nf-core/gatk4/filtervarianttranches/main.nf index 90cbf5f0a6..9da47ab739 100644 --- a/modules/nf-core/gatk4/filtervarianttranches/main.nf +++ b/modules/nf-core/gatk4/filtervarianttranches/main.nf @@ -2,7 +2,7 @@ process GATK4_FILTERVARIANTTRANCHES { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/filtervarianttranches/meta.yml b/modules/nf-core/gatk4/filtervarianttranches/meta.yml index c5325b4891..9346d2b4a4 100644 --- a/modules/nf-core/gatk4/filtervarianttranches/meta.yml +++ b/modules/nf-core/gatk4/filtervarianttranches/meta.yml @@ -14,7 +14,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360051308071-FilterVariantTranches doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -49,7 +48,6 @@ input: type: file description: GATK sequence dictionary pattern: ".dict" - output: - meta: type: map @@ -68,6 +66,7 @@ output: type: file description: VCF index file pattern: "*.vcf.gz.tbi" - authors: - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/gatherbqsrreports/environment.yml b/modules/nf-core/gatk4/gatherbqsrreports/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/gatherbqsrreports/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/gatherbqsrreports/main.nf b/modules/nf-core/gatk4/gatherbqsrreports/main.nf index 3eeca5ad90..e783701017 100644 --- a/modules/nf-core/gatk4/gatherbqsrreports/main.nf +++ b/modules/nf-core/gatk4/gatherbqsrreports/main.nf @@ -2,7 +2,7 @@ process GATK4_GATHERBQSRREPORTS { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/gatherbqsrreports/meta.yml b/modules/nf-core/gatk4/gatherbqsrreports/meta.yml index d9faf09d45..b9f5bf5f8b 100644 --- a/modules/nf-core/gatk4/gatherbqsrreports/meta.yml +++ b/modules/nf-core/gatk4/gatherbqsrreports/meta.yml @@ -13,7 +13,6 @@ tools: tool_dev_url: https://github.com/broadinstitute/gatk doi: "10.1158/1538-7445.AM2017-3590" licence: ["BSD-3-clause"] - input: - meta: type: map @@ -24,7 +23,6 @@ input: type: file description: File(s) containing BQSR table(s) pattern: "*.table" - output: - meta: type: map @@ -39,6 +37,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/gatherpileupsummaries/environment.yml b/modules/nf-core/gatk4/gatherpileupsummaries/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/gatherpileupsummaries/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/gatherpileupsummaries/main.nf b/modules/nf-core/gatk4/gatherpileupsummaries/main.nf index f315e1af3b..1863133d2c 100644 --- a/modules/nf-core/gatk4/gatherpileupsummaries/main.nf +++ b/modules/nf-core/gatk4/gatherpileupsummaries/main.nf @@ -2,7 +2,7 @@ process GATK4_GATHERPILEUPSUMMARIES { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/gatherpileupsummaries/meta.yml b/modules/nf-core/gatk4/gatherpileupsummaries/meta.yml index 6b8569dd5b..35381a3b51 100644 --- a/modules/nf-core/gatk4/gatherpileupsummaries/meta.yml +++ b/modules/nf-core/gatk4/gatherpileupsummaries/meta.yml @@ -12,7 +12,6 @@ tools: tool_dev_url: https://github.com/broadinstitute/gatk doi: "10.1158/1538-7445.AM2017-3590" licence: ["BSD-3-clause"] - input: - meta: type: map @@ -23,7 +22,6 @@ input: type: file description: Pileup files from gatk4/getpileupsummaries pattern: "*.pileups.table" - output: - meta: type: map @@ -38,7 +36,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@FriederikeHanssen" - "@maxulysse" +maintainers: + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/genomicsdbimport/environment.yml b/modules/nf-core/gatk4/genomicsdbimport/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/genomicsdbimport/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/genomicsdbimport/main.nf b/modules/nf-core/gatk4/genomicsdbimport/main.nf index a8725d3f9a..916037ebef 100644 --- a/modules/nf-core/gatk4/genomicsdbimport/main.nf +++ b/modules/nf-core/gatk4/genomicsdbimport/main.nf @@ -2,7 +2,7 @@ process GATK4_GENOMICSDBIMPORT { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/genomicsdbimport/meta.yml b/modules/nf-core/gatk4/genomicsdbimport/meta.yml index ff114d7d20..ca8fe3d076 100644 --- a/modules/nf-core/gatk4/genomicsdbimport/meta.yml +++ b/modules/nf-core/gatk4/genomicsdbimport/meta.yml @@ -15,7 +15,6 @@ tools: homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 - input: - meta: type: map @@ -26,42 +25,34 @@ input: type: list description: either a list of vcf files to be used to create or update a genomicsdb, or a file that contains a map to vcf files to be used. pattern: "*.vcf.gz" - - tbi: type: list description: list of tbi files that match with the input vcf files pattern: "*.vcf.gz_tbi" - - wspace: type: file description: path to an existing genomicsdb to be used in update db mode or get intervals mode. This WILL NOT specify name of a new genomicsdb in create db mode. pattern: "/path/to/existing/gendb" - - intervalfile: type: file description: file containing the intervals to be used when creating the genomicsdb pattern: "*.interval_list" - - intervalval: type: string description: if an intervals file has not been spcified, the value enetered here will be used as an interval via the "-L" argument pattern: "example: chr1:1000-10000" - - run_intlist: type: boolean description: Specify whether to run get interval list mode, this option cannot be specified at the same time as run_updatewspace. pattern: "true/false" - - run_updatewspace: type: boolean description: Specify whether to run update genomicsdb mode, this option takes priority over run_intlist. pattern: "true/false" - - input_map: type: boolean description: Specify whether the vcf input is providing a list of vcf file(s) or a single file containing a map of paths to vcf files to be used to create or update a genomicsdb. pattern: "*.sample_map" - output: - genomicsdb: type: directory @@ -79,6 +70,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/genotypegvcfs/environment.yml b/modules/nf-core/gatk4/genotypegvcfs/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/genotypegvcfs/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/genotypegvcfs/main.nf b/modules/nf-core/gatk4/genotypegvcfs/main.nf index a3e3129fcf..c6c0ba501d 100644 --- a/modules/nf-core/gatk4/genotypegvcfs/main.nf +++ b/modules/nf-core/gatk4/genotypegvcfs/main.nf @@ -2,7 +2,7 @@ process GATK4_GENOTYPEGVCFS { tag "$meta.id" label 'process_high' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/genotypegvcfs/meta.yml b/modules/nf-core/gatk4/genotypegvcfs/meta.yml index d759270d6a..8f1e377eb9 100644 --- a/modules/nf-core/gatk4/genotypegvcfs/meta.yml +++ b/modules/nf-core/gatk4/genotypegvcfs/meta.yml @@ -14,7 +14,6 @@ tools: tool_dev_url: https://github.com/broadinstitute/gatk doi: "10.1158/1538-7445.AM2017-3590" licence: ["BSD-3-clause"] - input: - meta: type: map @@ -57,7 +56,6 @@ input: type: file description: dbSNP VCF index file pattern: "*.tbi" - output: - meta: type: map @@ -76,7 +74,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@santiagorevale" - "@maxulysse" +maintainers: + - "@santiagorevale" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/getpileupsummaries/environment.yml b/modules/nf-core/gatk4/getpileupsummaries/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/getpileupsummaries/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/getpileupsummaries/main.nf b/modules/nf-core/gatk4/getpileupsummaries/main.nf index f7d0f2942c..d509cdf3bb 100644 --- a/modules/nf-core/gatk4/getpileupsummaries/main.nf +++ b/modules/nf-core/gatk4/getpileupsummaries/main.nf @@ -2,7 +2,7 @@ process GATK4_GETPILEUPSUMMARIES { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/getpileupsummaries/meta.yml b/modules/nf-core/gatk4/getpileupsummaries/meta.yml index 6aaa4b9e7c..fab3c1435e 100644 --- a/modules/nf-core/gatk4/getpileupsummaries/meta.yml +++ b/modules/nf-core/gatk4/getpileupsummaries/meta.yml @@ -16,7 +16,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -70,7 +69,6 @@ input: type: file description: Index file for the germline resource. pattern: "*.vcf.gz.tbi" - output: - pileup: type: file @@ -80,6 +78,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/haplotypecaller/environment.yml b/modules/nf-core/gatk4/haplotypecaller/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/haplotypecaller/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/haplotypecaller/main.nf b/modules/nf-core/gatk4/haplotypecaller/main.nf index 9ac87518e9..fdecf5f830 100644 --- a/modules/nf-core/gatk4/haplotypecaller/main.nf +++ b/modules/nf-core/gatk4/haplotypecaller/main.nf @@ -2,7 +2,7 @@ process GATK4_HAPLOTYPECALLER { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/haplotypecaller/meta.yml b/modules/nf-core/gatk4/haplotypecaller/meta.yml index 03afe29427..f38dc37dd0 100644 --- a/modules/nf-core/gatk4/haplotypecaller/meta.yml +++ b/modules/nf-core/gatk4/haplotypecaller/meta.yml @@ -14,7 +14,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -54,7 +53,6 @@ input: - dbsnp_tbi: type: file description: VCF index of dbsnp (optional) - output: - meta: type: map @@ -77,7 +75,9 @@ output: type: file description: Assembled haplotypes and locally realigned reads pattern: "*.realigned.bam" - authors: - "@suzannejin" - "@FriederikeHanssen" +maintainers: + - "@suzannejin" + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/intervallisttobed/environment.yml b/modules/nf-core/gatk4/intervallisttobed/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttobed/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/intervallisttobed/main.nf b/modules/nf-core/gatk4/intervallisttobed/main.nf index 2537f0aa68..89772081e0 100644 --- a/modules/nf-core/gatk4/intervallisttobed/main.nf +++ b/modules/nf-core/gatk4/intervallisttobed/main.nf @@ -2,7 +2,7 @@ process GATK4_INTERVALLISTTOBED { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/intervallisttobed/meta.yml b/modules/nf-core/gatk4/intervallisttobed/meta.yml index df3705aa15..28d264dfef 100644 --- a/modules/nf-core/gatk4/intervallisttobed/meta.yml +++ b/modules/nf-core/gatk4/intervallisttobed/meta.yml @@ -13,7 +13,6 @@ tools: tool_dev_url: https://github.com/broadinstitute/gatk doi: "10.1158/1538-7445.AM2017-3590" licence: ["BSD-3-clause"] - input: - meta: type: map @@ -24,7 +23,6 @@ input: type: file description: Interval list pattern: "*.{interval,interval_list}" - output: - meta: type: map @@ -39,6 +37,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/learnreadorientationmodel/environment.yml b/modules/nf-core/gatk4/learnreadorientationmodel/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/learnreadorientationmodel/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/learnreadorientationmodel/main.nf b/modules/nf-core/gatk4/learnreadorientationmodel/main.nf index 89a6ae77b6..c4e39db74a 100644 --- a/modules/nf-core/gatk4/learnreadorientationmodel/main.nf +++ b/modules/nf-core/gatk4/learnreadorientationmodel/main.nf @@ -2,7 +2,7 @@ process GATK4_LEARNREADORIENTATIONMODEL { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/learnreadorientationmodel/meta.yml b/modules/nf-core/gatk4/learnreadorientationmodel/meta.yml index e29fee2251..4b73a51adb 100644 --- a/modules/nf-core/gatk4/learnreadorientationmodel/meta.yml +++ b/modules/nf-core/gatk4/learnreadorientationmodel/meta.yml @@ -16,7 +16,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -27,7 +26,6 @@ input: type: list description: list of f1r2 files to be used as input. pattern: "*.f1r2.tar.gz" - output: - artifactprior: type: file @@ -37,6 +35,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@GCJMackenzie" +maintainers: + - "@GCJMackenzie" diff --git a/modules/nf-core/gatk4/markduplicates/environment.yml b/modules/nf-core/gatk4/markduplicates/environment.yml new file mode 100644 index 0000000000..8c0c6bb279 --- /dev/null +++ b/modules/nf-core/gatk4/markduplicates/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 + - bioconda::samtools=1.17 diff --git a/modules/nf-core/gatk4/markduplicates/main.nf b/modules/nf-core/gatk4/markduplicates/main.nf index e4c01f9a2e..564b86d3dd 100644 --- a/modules/nf-core/gatk4/markduplicates/main.nf +++ b/modules/nf-core/gatk4/markduplicates/main.nf @@ -2,7 +2,7 @@ process GATK4_MARKDUPLICATES { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0 bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0': 'biocontainers/mulled-v2-d9e7bad0f7fbc8f4458d5c3ab7ffaaf0235b59fb:f857e2d6cc88d35580d01cf39e0959a68b83c1d9-0' }" @@ -65,4 +65,21 @@ process GATK4_MARKDUPLICATES { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}.bam" + prefix_no_suffix = task.ext.prefix ? prefix.tokenize('.')[0] : "${meta.id}" + """ + touch ${prefix_no_suffix}.bam + touch ${prefix_no_suffix}.cram + touch ${prefix_no_suffix}.cram.crai + touch ${prefix_no_suffix}.bai + touch ${prefix}.metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/gatk4/markduplicates/meta.yml b/modules/nf-core/gatk4/markduplicates/meta.yml index d3e755054a..b0f09d4b84 100644 --- a/modules/nf-core/gatk4/markduplicates/meta.yml +++ b/modules/nf-core/gatk4/markduplicates/meta.yml @@ -7,16 +7,12 @@ keywords: - sort tools: - gatk4: - description: - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. + description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools with a primary focus on variant discovery and genotyping. Its powerful processing engine and high-performance computing features make it capable of taking on projects of any size. homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- tool_dev_url: https://github.com/broadinstitute/gatk doi: 10.1158/1538-7445.AM2017-3590 licence: ["MIT"] - input: - meta: type: map @@ -35,7 +31,6 @@ input: type: file description: Fasta index file pattern: "*.{fai}" - output: - meta: type: map @@ -66,8 +61,11 @@ output: type: file description: Duplicate metrics file generated by GATK pattern: "*.{metrics.txt}" - authors: - "@ajodeh-juma" - "@FriederikeHanssen" - "@maxulysse" +maintainers: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4/mergemutectstats/environment.yml b/modules/nf-core/gatk4/mergemutectstats/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/mergemutectstats/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/mergemutectstats/main.nf b/modules/nf-core/gatk4/mergemutectstats/main.nf index 269721cbf9..3a4913220c 100644 --- a/modules/nf-core/gatk4/mergemutectstats/main.nf +++ b/modules/nf-core/gatk4/mergemutectstats/main.nf @@ -2,7 +2,7 @@ process GATK4_MERGEMUTECTSTATS { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/mergemutectstats/meta.yml b/modules/nf-core/gatk4/mergemutectstats/meta.yml index f75833c9bc..1269525657 100644 --- a/modules/nf-core/gatk4/mergemutectstats/meta.yml +++ b/modules/nf-core/gatk4/mergemutectstats/meta.yml @@ -13,7 +13,6 @@ tools: tool_dev_url: https://github.com/broadinstitute/gatk doi: "10.1158/1538-7445.AM2017-3590" licence: ["BSD-3-clause"] - input: - meta: type: map @@ -24,7 +23,6 @@ input: type: file description: Stats file pattern: "*.{stats}" - output: - meta: type: map @@ -39,6 +37,7 @@ output: type: file description: Stats file pattern: "*.vcf.gz.stats" - authors: - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/gatk4/mergevcfs/environment.yml b/modules/nf-core/gatk4/mergevcfs/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/mergevcfs/main.nf b/modules/nf-core/gatk4/mergevcfs/main.nf index 29c08e169c..3362c2bdad 100644 --- a/modules/nf-core/gatk4/mergevcfs/main.nf +++ b/modules/nf-core/gatk4/mergevcfs/main.nf @@ -2,7 +2,7 @@ process GATK4_MERGEVCFS { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/mergevcfs/meta.yml b/modules/nf-core/gatk4/mergevcfs/meta.yml index 4e11311c17..30290a854f 100644 --- a/modules/nf-core/gatk4/mergevcfs/meta.yml +++ b/modules/nf-core/gatk4/mergevcfs/meta.yml @@ -33,7 +33,6 @@ input: type: file description: Optional Sequence Dictionary as input pattern: "*.dict" - output: - vcf: type: file @@ -43,10 +42,11 @@ output: type: file description: index files for the merged vcf files pattern: "*.tbi" - - versions: type: file description: File containing software versions pattern: "versions.yml" authors: - "@kevinmenden" +maintainers: + - "@kevinmenden" diff --git a/modules/nf-core/gatk4/mutect2/environment.yml b/modules/nf-core/gatk4/mutect2/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/mutect2/main.nf b/modules/nf-core/gatk4/mutect2/main.nf index 4e353979db..721e94f3e4 100644 --- a/modules/nf-core/gatk4/mutect2/main.nf +++ b/modules/nf-core/gatk4/mutect2/main.nf @@ -2,7 +2,7 @@ process GATK4_MUTECT2 { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/mutect2/meta.yml b/modules/nf-core/gatk4/mutect2/meta.yml index 693aeb296a..21c928ed96 100644 --- a/modules/nf-core/gatk4/mutect2/meta.yml +++ b/modules/nf-core/gatk4/mutect2/meta.yml @@ -17,7 +17,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -79,7 +78,6 @@ input: type: file description: Index for the panel of normals. pattern: "*.vcf.gz.tbi" - output: - vcf: type: file @@ -101,7 +99,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@GCJMackenzie" - "@ramprasadn" +maintainers: + - "@GCJMackenzie" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/variantrecalibrator/environment.yml b/modules/nf-core/gatk4/variantrecalibrator/environment.yml new file mode 100644 index 0000000000..16241b6d94 --- /dev/null +++ b/modules/nf-core/gatk4/variantrecalibrator/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.4.0.0 diff --git a/modules/nf-core/gatk4/variantrecalibrator/main.nf b/modules/nf-core/gatk4/variantrecalibrator/main.nf index fa262e4a95..f9cd45ac94 100644 --- a/modules/nf-core/gatk4/variantrecalibrator/main.nf +++ b/modules/nf-core/gatk4/variantrecalibrator/main.nf @@ -2,7 +2,7 @@ process GATK4_VARIANTRECALIBRATOR { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" diff --git a/modules/nf-core/gatk4/variantrecalibrator/meta.yml b/modules/nf-core/gatk4/variantrecalibrator/meta.yml index 5ade4d89b1..39a415b61c 100644 --- a/modules/nf-core/gatk4/variantrecalibrator/meta.yml +++ b/modules/nf-core/gatk4/variantrecalibrator/meta.yml @@ -18,7 +18,6 @@ tools: homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 - input: - meta: type: map @@ -80,3 +79,6 @@ output: authors: - "@GCJMackenzie" - "@nickhsmith" +maintainers: + - "@GCJMackenzie" + - "@nickhsmith" diff --git a/modules/nf-core/gatk4spark/applybqsr/environment.yml b/modules/nf-core/gatk4spark/applybqsr/environment.yml new file mode 100644 index 0000000000..821728a662 --- /dev/null +++ b/modules/nf-core/gatk4spark/applybqsr/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4-spark=4.4.0.0 diff --git a/modules/nf-core/gatk4/applybqsrspark/main.nf b/modules/nf-core/gatk4spark/applybqsr/main.nf similarity index 81% rename from modules/nf-core/gatk4/applybqsrspark/main.nf rename to modules/nf-core/gatk4spark/applybqsr/main.nf index 7a4c29bbca..170dbeeafd 100644 --- a/modules/nf-core/gatk4/applybqsrspark/main.nf +++ b/modules/nf-core/gatk4spark/applybqsr/main.nf @@ -1,9 +1,11 @@ -process GATK4_APPLYBQSR_SPARK { +process GATK4SPARK_APPLYBQSR { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.3.0.0 conda-forge::openjdk=8.0.312" - container "nf-core/gatk:4.4.0.0" + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4-spark:4.4.0.0--hdfd78af_0': + 'biocontainers/gatk4-spark:4.4.0.0--hdfd78af_0' }" input: tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) diff --git a/modules/nf-core/gatk4/applybqsrspark/meta.yml b/modules/nf-core/gatk4spark/applybqsr/meta.yml similarity index 94% rename from modules/nf-core/gatk4/applybqsrspark/meta.yml rename to modules/nf-core/gatk4spark/applybqsr/meta.yml index b253fc78ce..4904568d2e 100644 --- a/modules/nf-core/gatk4/applybqsrspark/meta.yml +++ b/modules/nf-core/gatk4spark/applybqsr/meta.yml @@ -1,12 +1,11 @@ -name: gatk4_applybqsr_spark +name: gatk4spark_applybqsr description: Apply base quality score recalibration (BQSR) to a bam file keywords: - bam - base quality score recalibration - bqsr - cram - - gatk4 - - spark + - gatk4spark tools: - gatk4: description: | @@ -17,7 +16,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -50,7 +48,6 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - output: - meta: type: map @@ -69,8 +66,11 @@ output: type: file description: Recalibrated CRAM file pattern: "*.{cram}" - authors: - "@yocra3" - "@FriederikeHanssen" - "@maxulysse" +maintainers: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4spark/baserecalibrator/environment.yml b/modules/nf-core/gatk4spark/baserecalibrator/environment.yml new file mode 100644 index 0000000000..821728a662 --- /dev/null +++ b/modules/nf-core/gatk4spark/baserecalibrator/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4-spark=4.4.0.0 diff --git a/modules/nf-core/gatk4/baserecalibratorspark/main.nf b/modules/nf-core/gatk4spark/baserecalibrator/main.nf similarity index 80% rename from modules/nf-core/gatk4/baserecalibratorspark/main.nf rename to modules/nf-core/gatk4spark/baserecalibrator/main.nf index 6db088bb75..ee44bf7d66 100644 --- a/modules/nf-core/gatk4/baserecalibratorspark/main.nf +++ b/modules/nf-core/gatk4spark/baserecalibrator/main.nf @@ -1,9 +1,11 @@ -process GATK4_BASERECALIBRATOR_SPARK { +process GATK4SPARK_BASERECALIBRATOR { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0 conda-forge::openjdk=8.0.312" - container "nf-core/gatk:4.4.0.0" + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4-spark:4.4.0.0--hdfd78af_0': + 'biocontainers/gatk4-spark:4.4.0.0--hdfd78af_0' }" input: tuple val(meta), path(input), path(input_index), path(intervals) diff --git a/modules/nf-core/gatk4/baserecalibratorspark/meta.yml b/modules/nf-core/gatk4spark/baserecalibrator/meta.yml similarity index 94% rename from modules/nf-core/gatk4/baserecalibratorspark/meta.yml rename to modules/nf-core/gatk4spark/baserecalibrator/meta.yml index d175ca13a1..dd334a225f 100644 --- a/modules/nf-core/gatk4/baserecalibratorspark/meta.yml +++ b/modules/nf-core/gatk4spark/baserecalibrator/meta.yml @@ -1,12 +1,11 @@ -name: gatk4_baserecalibrator_spark +name: gatk4spark_baserecalibrator description: Generate recalibration table for Base Quality Score Recalibration (BQSR) keywords: - base quality score recalibration - table - bqsr - - gatk4 + - gatk4spark - sort - - spark tools: - gatk4: description: | @@ -17,7 +16,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -55,7 +53,6 @@ input: type: file description: Tabix index of the known_sites (optional) pattern: "*.vcf.gz.tbi" - output: - meta: type: map @@ -70,8 +67,11 @@ output: type: file description: Recalibration table from BaseRecalibrator pattern: "*.{table}" - authors: - "@yocra3" - "@FriederikeHanssen" - "@maxulysse" +maintainers: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/gatk4spark/markduplicates/environment.yml b/modules/nf-core/gatk4spark/markduplicates/environment.yml new file mode 100644 index 0000000000..821728a662 --- /dev/null +++ b/modules/nf-core/gatk4spark/markduplicates/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4-spark=4.4.0.0 diff --git a/modules/nf-core/gatk4/markduplicatesspark/main.nf b/modules/nf-core/gatk4spark/markduplicates/main.nf similarity index 81% rename from modules/nf-core/gatk4/markduplicatesspark/main.nf rename to modules/nf-core/gatk4spark/markduplicates/main.nf index f318ed50a6..61e295c839 100644 --- a/modules/nf-core/gatk4/markduplicatesspark/main.nf +++ b/modules/nf-core/gatk4spark/markduplicates/main.nf @@ -1,9 +1,11 @@ -process GATK4_MARKDUPLICATES_SPARK { +process GATK4SPARK_MARKDUPLICATES { tag "$meta.id" label 'process_high' - conda "bioconda::gatk4=4.4.0.0 conda-forge::openjdk=8.0.312" - container "nf-core/gatk:4.4.0.0" + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4-spark:4.4.0.0--hdfd78af_0': + 'biocontainers/gatk4-spark:4.4.0.0--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/gatk4/markduplicatesspark/meta.yml b/modules/nf-core/gatk4spark/markduplicates/meta.yml similarity index 78% rename from modules/nf-core/gatk4/markduplicatesspark/meta.yml rename to modules/nf-core/gatk4spark/markduplicates/meta.yml index c9bb263a96..016a215b25 100644 --- a/modules/nf-core/gatk4/markduplicatesspark/meta.yml +++ b/modules/nf-core/gatk4spark/markduplicates/meta.yml @@ -1,23 +1,18 @@ -name: gatk4_markduplicates_spark +name: gatk4spark_markduplicates description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. keywords: - bam - - gatk4 + - gatk4spark - markduplicates - sort - - spark tools: - gatk4: - description: - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. + description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools with a primary focus on variant discovery and genotyping. Its powerful processing engine and high-performance computing features make it capable of taking on projects of any size. homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- tool_dev_url: https://github.com/broadinstitute/gatk doi: 10.1158/1538-7445.AM2017-3590 licence: ["MIT"] - input: - meta: type: map @@ -40,7 +35,6 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - output: - meta: type: map @@ -59,9 +53,13 @@ output: type: file description: Optional BAM index file pattern: "*.bai" - authors: - "@ajodeh-juma" - "@FriederikeHanssen" - "@maxulysse" - "@SusiJo" +maintainers: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" + - "@SusiJo" diff --git a/modules/nf-core/manta/germline/environment.yml b/modules/nf-core/manta/germline/environment.yml new file mode 100644 index 0000000000..235b1ba75d --- /dev/null +++ b/modules/nf-core/manta/germline/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::manta=1.6.0 diff --git a/modules/nf-core/manta/germline/main.nf b/modules/nf-core/manta/germline/main.nf index e052b7c9f0..5d5666c6e5 100644 --- a/modules/nf-core/manta/germline/main.nf +++ b/modules/nf-core/manta/germline/main.nf @@ -3,7 +3,7 @@ process MANTA_GERMLINE { label 'process_medium' label 'error_retry' - conda "bioconda::manta=1.6.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/manta:1.6.0--h9ee0642_1' : 'biocontainers/manta:1.6.0--h9ee0642_1' }" @@ -13,6 +13,7 @@ process MANTA_GERMLINE { tuple val(meta), path(input), path(index), path(target_bed), path(target_bed_tbi) tuple val(meta2), path(fasta) tuple val(meta3), path(fai) + path(config) output: tuple val(meta), path("*candidate_small_indels.vcf.gz") , emit: candidate_small_indels_vcf @@ -31,27 +32,29 @@ process MANTA_GERMLINE { def prefix = task.ext.prefix ?: "${meta.id}" def input_files = input.collect{"--bam ${it}"}.join(' ') def options_manta = target_bed ? "--callRegions $target_bed" : "" + def config_option = config ? "--config ${config}" : "" """ - configManta.py \ - ${input_files} \ - --reference $fasta \ - --runDir manta \ - $options_manta \ + configManta.py \\ + ${input_files} \\ + ${config_option} \\ + --reference $fasta \\ + --runDir manta \\ + $options_manta \\ $args python manta/runWorkflow.py -m local -j $task.cpus - mv manta/results/variants/candidateSmallIndels.vcf.gz \ + mv manta/results/variants/candidateSmallIndels.vcf.gz \\ ${prefix}.candidate_small_indels.vcf.gz - mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \ + mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \\ ${prefix}.candidate_small_indels.vcf.gz.tbi - mv manta/results/variants/candidateSV.vcf.gz \ + mv manta/results/variants/candidateSV.vcf.gz \\ ${prefix}.candidate_sv.vcf.gz - mv manta/results/variants/candidateSV.vcf.gz.tbi \ + mv manta/results/variants/candidateSV.vcf.gz.tbi \\ ${prefix}.candidate_sv.vcf.gz.tbi - mv manta/results/variants/diploidSV.vcf.gz \ + mv manta/results/variants/diploidSV.vcf.gz \\ ${prefix}.diploid_sv.vcf.gz - mv manta/results/variants/diploidSV.vcf.gz.tbi \ + mv manta/results/variants/diploidSV.vcf.gz.tbi \\ ${prefix}.diploid_sv.vcf.gz.tbi cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/manta/germline/meta.yml b/modules/nf-core/manta/germline/meta.yml index 2eb16ada53..72ed15f8bc 100644 --- a/modules/nf-core/manta/germline/meta.yml +++ b/modules/nf-core/manta/germline/meta.yml @@ -16,7 +16,6 @@ tools: tool_dev_url: https://github.com/Illumina/manta doi: "10.1093/bioinformatics/btv710" licence: ["GPL v3"] - input: - meta: type: map @@ -57,7 +56,10 @@ input: type: file description: Genome reference FASTA index file pattern: "*.{fa.fai,fasta.fai}" - + - config: + type: file + description: Manta configuration file + pattern: "*.{ini,conf,config}" output: - meta: type: map @@ -92,7 +94,11 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@maxulysse" - "@ramprasadn" + - "@nvnieuwk" +maintainers: + - "@maxulysse" + - "@ramprasadn" + - "@nvnieuwk" diff --git a/modules/nf-core/manta/somatic/environment.yml b/modules/nf-core/manta/somatic/environment.yml new file mode 100644 index 0000000000..235b1ba75d --- /dev/null +++ b/modules/nf-core/manta/somatic/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::manta=1.6.0 diff --git a/modules/nf-core/manta/somatic/main.nf b/modules/nf-core/manta/somatic/main.nf index 8ff8d90a11..07511b2f0b 100644 --- a/modules/nf-core/manta/somatic/main.nf +++ b/modules/nf-core/manta/somatic/main.nf @@ -3,15 +3,16 @@ process MANTA_SOMATIC { label 'process_medium' label 'error_retry' - conda "bioconda::manta=1.6.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/manta:1.6.0--h9ee0642_1' : 'biocontainers/manta:1.6.0--h9ee0642_1' }" input: tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path(target_bed), path(target_bed_tbi) - path fasta - path fai + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + path(config) output: tuple val(meta), path("*.candidate_small_indels.vcf.gz") , emit: candidate_small_indels_vcf @@ -31,26 +32,53 @@ process MANTA_SOMATIC { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def options_manta = target_bed ? "--callRegions $target_bed" : "" - + def config_option = config ? "--config ${config}" : "" """ - configManta.py \ - --tumorBam $input_tumor \ - --normalBam $input_normal \ - --reference $fasta \ - --runDir manta \ - $options_manta \ + configManta.py \\ + --tumorBam $input_tumor \\ + --normalBam $input_normal \\ + --reference $fasta \\ + ${config_option} \\ + --runDir manta \\ + $options_manta \\ $args python manta/runWorkflow.py -m local -j $task.cpus - mv manta/results/variants/candidateSmallIndels.vcf.gz ${prefix}.candidate_small_indels.vcf.gz - mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi ${prefix}.candidate_small_indels.vcf.gz.tbi - mv manta/results/variants/candidateSV.vcf.gz ${prefix}.candidate_sv.vcf.gz - mv manta/results/variants/candidateSV.vcf.gz.tbi ${prefix}.candidate_sv.vcf.gz.tbi - mv manta/results/variants/diploidSV.vcf.gz ${prefix}.diploid_sv.vcf.gz - mv manta/results/variants/diploidSV.vcf.gz.tbi ${prefix}.diploid_sv.vcf.gz.tbi - mv manta/results/variants/somaticSV.vcf.gz ${prefix}.somatic_sv.vcf.gz - mv manta/results/variants/somaticSV.vcf.gz.tbi ${prefix}.somatic_sv.vcf.gz.tbi + mv manta/results/variants/candidateSmallIndels.vcf.gz \\ + ${prefix}.candidate_small_indels.vcf.gz + mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \\ + ${prefix}.candidate_small_indels.vcf.gz.tbi + mv manta/results/variants/candidateSV.vcf.gz \\ + ${prefix}.candidate_sv.vcf.gz + mv manta/results/variants/candidateSV.vcf.gz.tbi \\ + ${prefix}.candidate_sv.vcf.gz.tbi + mv manta/results/variants/diploidSV.vcf.gz \\ + ${prefix}.diploid_sv.vcf.gz + mv manta/results/variants/diploidSV.vcf.gz.tbi \\ + ${prefix}.diploid_sv.vcf.gz.tbi + mv manta/results/variants/somaticSV.vcf.gz \\ + ${prefix}.somatic_sv.vcf.gz + mv manta/results/variants/somaticSV.vcf.gz.tbi \\ + ${prefix}.somatic_sv.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + manta: \$( configManta.py --version ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.candidate_small_indels.vcf.gz + touch ${prefix}.candidate_small_indels.vcf.gz.tbi + touch ${prefix}.candidate_sv.vcf.gz + touch ${prefix}.candidate_sv.vcf.gz.tbi + touch ${prefix}.diploid_sv.vcf.gz + touch ${prefix}.diploid_sv.vcf.gz.tbi + touch ${prefix}.somatic_sv.vcf.gz + touch ${prefix}.somatic_sv.vcf.gz.tbi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/manta/somatic/meta.yml b/modules/nf-core/manta/somatic/meta.yml index 457d66a5fd..e658edaaa4 100644 --- a/modules/nf-core/manta/somatic/meta.yml +++ b/modules/nf-core/manta/somatic/meta.yml @@ -16,7 +16,6 @@ tools: tool_dev_url: https://github.com/Illumina/manta doi: "10.1093/bioinformatics/btv710" licence: ["GPL v3"] - input: - meta: type: map @@ -47,15 +46,28 @@ input: type: file description: Index for BED file containing target regions for variant calling pattern: "*.{bed.tbi}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: Genome reference FASTA file pattern: "*.{fa,fasta}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: type: file description: Genome reference FASTA index file pattern: "*.{fa.fai,fasta.fai}" - + - config: + type: file + description: Manta configuration file + pattern: "*.{ini,conf,config}" output: - meta: type: map @@ -98,6 +110,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@FriederikeHanssen" + - "@nvnieuwk" +maintainers: + - "@FriederikeHanssen" + - "@nvnieuwk" diff --git a/modules/nf-core/manta/tumoronly/environment.yml b/modules/nf-core/manta/tumoronly/environment.yml new file mode 100644 index 0000000000..235b1ba75d --- /dev/null +++ b/modules/nf-core/manta/tumoronly/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::manta=1.6.0 diff --git a/modules/nf-core/manta/tumoronly/main.nf b/modules/nf-core/manta/tumoronly/main.nf index e3d6ca1b14..b047299571 100644 --- a/modules/nf-core/manta/tumoronly/main.nf +++ b/modules/nf-core/manta/tumoronly/main.nf @@ -3,15 +3,16 @@ process MANTA_TUMORONLY { label 'process_medium' label 'error_retry' - conda "bioconda::manta=1.6.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/manta:1.6.0--h9ee0642_1' : 'biocontainers/manta:1.6.0--h9ee0642_1' }" input: tuple val(meta), path(input), path(input_index), path(target_bed), path(target_bed_tbi) - path fasta - path fai + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + path(config) output: tuple val(meta), path("*candidate_small_indels.vcf.gz") , emit: candidate_small_indels_vcf @@ -29,27 +30,29 @@ process MANTA_TUMORONLY { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def options_manta = target_bed ? "--callRegions $target_bed" : "" + def config_option = config ? "--config ${config}" : "" """ - configManta.py \ - --tumorBam $input \ - --reference $fasta \ - --runDir manta \ - $options_manta \ + configManta.py \\ + --tumorBam $input \\ + --reference $fasta \\ + ${config_option} \\ + --runDir manta \\ + $options_manta \\ $args python manta/runWorkflow.py -m local -j $task.cpus - mv manta/results/variants/candidateSmallIndels.vcf.gz \ + mv manta/results/variants/candidateSmallIndels.vcf.gz \\ ${prefix}.candidate_small_indels.vcf.gz - mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \ + mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \\ ${prefix}.candidate_small_indels.vcf.gz.tbi - mv manta/results/variants/candidateSV.vcf.gz \ + mv manta/results/variants/candidateSV.vcf.gz \\ ${prefix}.candidate_sv.vcf.gz - mv manta/results/variants/candidateSV.vcf.gz.tbi \ + mv manta/results/variants/candidateSV.vcf.gz.tbi \\ ${prefix}.candidate_sv.vcf.gz.tbi - mv manta/results/variants/tumorSV.vcf.gz \ + mv manta/results/variants/tumorSV.vcf.gz \\ ${prefix}.tumor_sv.vcf.gz - mv manta/results/variants/tumorSV.vcf.gz.tbi \ + mv manta/results/variants/tumorSV.vcf.gz.tbi \\ ${prefix}.tumor_sv.vcf.gz.tbi cat <<-END_VERSIONS > versions.yml @@ -57,4 +60,20 @@ process MANTA_TUMORONLY { manta: \$( configManta.py --version ) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.candidate_small_indels.vcf.gz + touch ${prefix}.candidate_small_indels.vcf.gz.tbi + touch ${prefix}.candidate_sv.vcf.gz + touch ${prefix}.candidate_sv.vcf.gz.tbi + touch ${prefix}.tumor_sv.vcf.gz + touch ${prefix}.tumor_sv.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + manta: \$( configManta.py --version ) + END_VERSIONS + """ } diff --git a/modules/nf-core/manta/tumoronly/meta.yml b/modules/nf-core/manta/tumoronly/meta.yml index 398d684365..63556c59b4 100644 --- a/modules/nf-core/manta/tumoronly/meta.yml +++ b/modules/nf-core/manta/tumoronly/meta.yml @@ -16,7 +16,6 @@ tools: tool_dev_url: https://github.com/Illumina/manta doi: "10.1093/bioinformatics/btv710" licence: ["GPL v3"] - input: - meta: type: map @@ -39,15 +38,28 @@ input: type: file description: Index for BED file containing target regions for variant calling pattern: "*.{bed.tbi}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: Genome reference FASTA file pattern: "*.{fa,fasta}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: type: file description: Genome reference FASTA index file pattern: "*.{fa.fai,fasta.fai}" - + - config: + type: file + description: Manta configuration file + pattern: "*.{ini,conf,config}" output: - meta: type: map @@ -82,6 +94,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@maxulysse" + - "@nvnieuwk" +maintainers: + - "@maxulysse" + - "@nvnieuwk" diff --git a/modules/nf-core/mosdepth/environment.yml b/modules/nf-core/mosdepth/environment.yml new file mode 100644 index 0000000000..f1521e08a2 --- /dev/null +++ b/modules/nf-core/mosdepth/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::mosdepth=0.3.3 diff --git a/modules/nf-core/mosdepth/main.nf b/modules/nf-core/mosdepth/main.nf index 74db3a274b..7dd13ffb51 100644 --- a/modules/nf-core/mosdepth/main.nf +++ b/modules/nf-core/mosdepth/main.nf @@ -2,7 +2,7 @@ process MOSDEPTH { tag "$meta.id" label 'process_medium' - conda "bioconda::mosdepth=0.3.3" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mosdepth:0.3.3--hdfd78af_1' : 'biocontainers/mosdepth:0.3.3--hdfd78af_1'}" diff --git a/modules/nf-core/mosdepth/meta.yml b/modules/nf-core/mosdepth/meta.yml index adf3893f3e..76263b5af9 100644 --- a/modules/nf-core/mosdepth/meta.yml +++ b/modules/nf-core/mosdepth/meta.yml @@ -107,3 +107,8 @@ authors: - "@drpatelh" - "@ramprasadn" - "@matthdsm" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@ramprasadn" + - "@matthdsm" diff --git a/modules/nf-core/msisensorpro/msisomatic/environment.yml b/modules/nf-core/msisensorpro/msisomatic/environment.yml new file mode 100644 index 0000000000..6d85f22481 --- /dev/null +++ b/modules/nf-core/msisensorpro/msisomatic/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::msisensor-pro=1.2.0 diff --git a/modules/nf-core/msisensorpro/msisomatic/main.nf b/modules/nf-core/msisensorpro/msisomatic/main.nf index 50287cb3cd..9b0084d949 100644 --- a/modules/nf-core/msisensorpro/msisomatic/main.nf +++ b/modules/nf-core/msisensorpro/msisomatic/main.nf @@ -2,7 +2,7 @@ process MSISENSORPRO_MSISOMATIC { tag "$meta.id" label 'process_low' - conda "bioconda::msisensor-pro=1.2.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/msisensor-pro:1.2.0--hfc31af2_0' : 'biocontainers/msisensor-pro:1.2.0--hfc31af2_0' }" diff --git a/modules/nf-core/msisensorpro/msisomatic/meta.yml b/modules/nf-core/msisensorpro/msisomatic/meta.yml index bcd95dc72b..a6dda66ff2 100644 --- a/modules/nf-core/msisensorpro/msisomatic/meta.yml +++ b/modules/nf-core/msisensorpro/msisomatic/meta.yml @@ -13,7 +13,6 @@ tools: tool_dev_url: https://github.com/xjtu-omics/msisensor-pro doi: "10.1016/j.gpb.2020.02.001" licence: ["Custom Licence"] - input: - meta: type: map @@ -48,7 +47,6 @@ input: type: file description: Output from msisensor-pro/scan, conaining list of msi regions pattern: "*.list" - output: - meta: type: map @@ -75,6 +73,7 @@ output: type: file description: File containing microsatellite list pattern: "*.{list}" - authors: - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/msisensorpro/scan/environment.yml b/modules/nf-core/msisensorpro/scan/environment.yml new file mode 100644 index 0000000000..6d85f22481 --- /dev/null +++ b/modules/nf-core/msisensorpro/scan/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::msisensor-pro=1.2.0 diff --git a/modules/nf-core/msisensorpro/scan/main.nf b/modules/nf-core/msisensorpro/scan/main.nf index 760d51aa22..9c7dce2596 100644 --- a/modules/nf-core/msisensorpro/scan/main.nf +++ b/modules/nf-core/msisensorpro/scan/main.nf @@ -2,7 +2,7 @@ process MSISENSORPRO_SCAN { tag "$meta.id" label 'process_low' - conda "bioconda::msisensor-pro=1.2.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/msisensor-pro:1.2.0--hfc31af2_0' : 'biocontainers/msisensor-pro:1.2.0--hfc31af2_0' }" diff --git a/modules/nf-core/msisensorpro/scan/meta.yml b/modules/nf-core/msisensorpro/scan/meta.yml index 47f4d3529f..aec743ede5 100644 --- a/modules/nf-core/msisensorpro/scan/meta.yml +++ b/modules/nf-core/msisensorpro/scan/meta.yml @@ -12,7 +12,6 @@ tools: tool_dev_url: https://github.com/xjtu-omics/msisensor-pro doi: "10.1016/j.gpb.2020.02.001" licence: ["Custom Licence"] - input: - meta: type: map @@ -23,7 +22,6 @@ input: type: file description: Reference genome pattern: "*.{fasta}" - output: - meta: type: map @@ -38,6 +36,7 @@ output: type: file description: File containing microsatellite list pattern: "*.{list}" - authors: - "@FriederikeHanssen" +maintainers: + - "@FriederikeHanssen" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 0000000000..9d0e6b2090 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.17 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 65d7dd0de1..2bbc3983fa 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.15" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : - 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : + 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f93b5ee519..a61223ed7a 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -13,7 +13,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -31,7 +30,6 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file @@ -54,3 +52,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/ngscheckmate/ncm/main.nf b/modules/nf-core/ngscheckmate/ncm/main.nf new file mode 100644 index 0000000000..28fab8f096 --- /dev/null +++ b/modules/nf-core/ngscheckmate/ncm/main.nf @@ -0,0 +1,64 @@ +process NGSCHECKMATE_NCM { + label 'process_low' + + conda "bioconda::ngscheckmate=1.0.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ngscheckmate:1.0.1--py27pl5321r40hdfd78af_1': + 'biocontainers/ngscheckmate:1.0.1--py27pl5321r40hdfd78af_1' }" + + input: + tuple val(meta) , path(files) + tuple val(meta2), path(snp_bed) + tuple val(meta3), path(fasta) + + output: + tuple val(meta), path("*_corr_matrix.txt"), emit: corr_matrix + tuple val(meta), path("*_matched.txt") , emit: matched + tuple val(meta), path("*_all.txt") , emit: all + tuple val(meta), path("*.pdf") , emit: pdf, optional: true + tuple val(meta), path("*.vcf") , emit: vcf, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "$meta.id" + def unzip = files.any { it.toString().endsWith(".vcf.gz") } + """ + if $unzip + then + for VCFGZ in *.vcf.gz; do + gunzip -cdf \$VCFGZ > \$( basename \$VCFGZ .gz ); + done + fi + + NCM_REF="./"${fasta} ncm.py -d . -bed ${snp_bed} -O . -N ${prefix} $args + + if $unzip + then + rm -f *.vcf # clean up decompressed vcfs + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ngscheckmate: \$(ncm.py --help | sed "7!d;s/ *Ensuring Sample Identity v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "$meta.id" + """ + touch ${prefix}_output_corr_matrix.txt + touch ${prefix}_matched.txt + touch ${prefix}_all.txt + touch ${prefix}.pdf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ngscheckmate: \$(ncm.py --help | sed "7!d;s/ *Ensuring Sample Identity v//g") + END_VERSIONS + """ + +} diff --git a/modules/nf-core/ngscheckmate/ncm/meta.yml b/modules/nf-core/ngscheckmate/ncm/meta.yml new file mode 100644 index 0000000000..024f031485 --- /dev/null +++ b/modules/nf-core/ngscheckmate/ncm/meta.yml @@ -0,0 +1,77 @@ +name: ngscheckmate_ncm +description: Determining whether sequencing data comes from the same individual by using SNP matching. Designed for humans on vcf or bam files. +keywords: + - ngscheckmate + - matching + - snp +tools: + - ngscheckmate: + description: NGSCheckMate is a software package for identifying next generation sequencing (NGS) data files from the same individual, including matching between DNA and RNA. + homepage: https://github.com/parklab/NGSCheckMate + documentation: https://github.com/parklab/NGSCheckMate + tool_dev_url: https://github.com/parklab/NGSCheckMate + doi: "10.1093/nar/gkx193" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - files: + type: file + description: VCF or BAM files for each sample, in a merged channel (possibly gzipped). BAM files require an index too. + pattern: "*.{vcf,vcf.gz,bam,bai}" + - meta2: + type: map + description: | + Groovy Map containing SNP information + e.g. [ id:'test' ] + - snp_bed: + type: file + description: BED file containing the SNPs to analyse + pattern: "*.{bed}" + - meta3: + type: map + description: | + Groovy Map containing reference fasta index information + e.g. [ id:'test' ] + - fasta: + type: file + description: fasta file for the genome, only used in the bam mode + pattern: "*.{bed}" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + + - pdf: + type: file + description: A pdf containing a dendrogram showing how the samples match up + pattern: "*.{pdf}" + + - corr_matrix: + type: file + description: A text file containing the correlation matrix between each sample + pattern: "*corr_matrix.txt" + + - matched: + type: file + description: A txt file containing only the samples that match with each other + pattern: "*matched.txt" + + - all: + type: file + description: A txt file containing all the sample comparisons, whether they match or not + pattern: "*all.txt" + + - vcf: + type: file + description: If ran in bam mode, vcf files for each sample giving the SNP calls used + pattern: "*.vcf" + +authors: + - "@sppearce" diff --git a/modules/nf-core/samblaster/environment.yml b/modules/nf-core/samblaster/environment.yml new file mode 100644 index 0000000000..3a79aaf8ea --- /dev/null +++ b/modules/nf-core/samblaster/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samblaster=0.1.26 + - bioconda::samtools=1.16.1 diff --git a/modules/nf-core/samblaster/main.nf b/modules/nf-core/samblaster/main.nf index af1f71d6e5..4622d3691d 100644 --- a/modules/nf-core/samblaster/main.nf +++ b/modules/nf-core/samblaster/main.nf @@ -2,7 +2,7 @@ process SAMBLASTER { tag "$meta.id" label 'process_low' - conda "bioconda::samblaster=0.1.26 bioconda::samtools=1.16.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:cee56b506ceb753d4bbef7e05b81e1bfc25d937f-0' : 'biocontainers/mulled-v2-19fa9f1a5c3966b63a24166365e81da35738c5ab:cee56b506ceb753d4bbef7e05b81e1bfc25d937f-0' }" diff --git a/modules/nf-core/samblaster/meta.yml b/modules/nf-core/samblaster/meta.yml index f090c89816..ccb48320c1 100644 --- a/modules/nf-core/samblaster/meta.yml +++ b/modules/nf-core/samblaster/meta.yml @@ -19,12 +19,10 @@ tools: It can also optionally output discordant read pairs and/or split read mappings to separate SAM files, and/or unmapped/clipped reads to a separate FASTQ file. By default, samblaster reads SAM input from stdin and writes SAM to stdout. - documentation: https://github.com/GregoryFaust/samblaster tool_dev_url: https://github.com/GregoryFaust/samblaster doi: "10.1093/bioinformatics/btu314" licence: ["MIT"] - input: - meta: type: map @@ -35,7 +33,6 @@ input: type: file description: BAM file pattern: "*.bam" - output: - meta: type: map @@ -50,6 +47,7 @@ output: type: file description: Tagged or filtered BAM file pattern: "*.bam" - authors: - "@lescai" +maintainers: + - "@lescai" diff --git a/modules/nf-core/samtools/bam2fq/environment.yml b/modules/nf-core/samtools/bam2fq/environment.yml new file mode 100644 index 0000000000..04c82f1423 --- /dev/null +++ b/modules/nf-core/samtools/bam2fq/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/bam2fq/main.nf b/modules/nf-core/samtools/bam2fq/main.nf index 858f2ae3d7..016d91d992 100644 --- a/modules/nf-core/samtools/bam2fq/main.nf +++ b/modules/nf-core/samtools/bam2fq/main.nf @@ -2,7 +2,7 @@ process SAMTOOLS_BAM2FQ { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : 'biocontainers/samtools:1.17--h00cdaf9_0' }" diff --git a/modules/nf-core/samtools/bam2fq/meta.yml b/modules/nf-core/samtools/bam2fq/meta.yml index c7ad3ba834..7769046b54 100644 --- a/modules/nf-core/samtools/bam2fq/meta.yml +++ b/modules/nf-core/samtools/bam2fq/meta.yml @@ -9,11 +9,8 @@ keywords: tools: - samtools: description: Tools for dealing with SAM, BAM and CRAM files - documentation: http://www.htslib.org/doc/1.1/samtools.html - licence: ["MIT"] - input: - meta: type: map @@ -32,7 +29,6 @@ input: Note: choosing TRUE will generate 4 different files. Choosing FALSE will produce a single file, which will be interleaved in case the input contains paired reads. - output: - meta: type: map @@ -49,6 +45,7 @@ output: FASTQ files, which will be either a group of 4 files (read_1, read_2, other and singleton) or a single interleaved .fq.gz file if the user chooses not to split the reads. pattern: "*.fq.gz" - authors: - "@lescai" +maintainers: + - "@lescai" diff --git a/modules/nf-core/samtools/collatefastq/environment.yml b/modules/nf-core/samtools/collatefastq/environment.yml new file mode 100644 index 0000000000..04c82f1423 --- /dev/null +++ b/modules/nf-core/samtools/collatefastq/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/collatefastq/main.nf b/modules/nf-core/samtools/collatefastq/main.nf index 4469fafcca..537b88cca4 100644 --- a/modules/nf-core/samtools/collatefastq/main.nf +++ b/modules/nf-core/samtools/collatefastq/main.nf @@ -2,7 +2,7 @@ process SAMTOOLS_COLLATEFASTQ { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : 'biocontainers/samtools:1.17--h00cdaf9_0' }" diff --git a/modules/nf-core/samtools/collatefastq/meta.yml b/modules/nf-core/samtools/collatefastq/meta.yml index b647cba454..898cdbdad7 100644 --- a/modules/nf-core/samtools/collatefastq/meta.yml +++ b/modules/nf-core/samtools/collatefastq/meta.yml @@ -9,11 +9,8 @@ keywords: tools: - samtools: description: Tools for dealing with SAM, BAM and CRAM files - documentation: http://www.htslib.org/doc/1.1/samtools.html - licence: ["MIT"] - input: - meta: type: map @@ -69,8 +66,11 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@lescai" - "@maxulysse" - "@matthdsm" +maintainers: + - "@lescai" + - "@maxulysse" + - "@matthdsm" diff --git a/modules/nf-core/samtools/convert/environment.yml b/modules/nf-core/samtools/convert/environment.yml new file mode 100644 index 0000000000..04c82f1423 --- /dev/null +++ b/modules/nf-core/samtools/convert/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/convert/main.nf b/modules/nf-core/samtools/convert/main.nf index 29722ba787..ddf17d2dee 100644 --- a/modules/nf-core/samtools/convert/main.nf +++ b/modules/nf-core/samtools/convert/main.nf @@ -2,7 +2,7 @@ process SAMTOOLS_CONVERT { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : 'biocontainers/samtools:1.17--h00cdaf9_0' }" diff --git a/modules/nf-core/samtools/convert/meta.yml b/modules/nf-core/samtools/convert/meta.yml index 866c228fef..f8a1612fc2 100644 --- a/modules/nf-core/samtools/convert/meta.yml +++ b/modules/nf-core/samtools/convert/meta.yml @@ -50,3 +50,6 @@ output: authors: - "@FriederikeHanssen" - "@maxulysse" +maintainers: + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml new file mode 100644 index 0000000000..04c82f1423 --- /dev/null +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index 59ed308876..3aa988224e 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -2,7 +2,7 @@ process SAMTOOLS_FAIDX { tag "$fasta" label 'process_single' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : 'biocontainers/samtools:1.17--h00cdaf9_0' }" diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index 957b25e52b..e189af28fd 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -55,3 +55,7 @@ authors: - "@drpatelh" - "@ewels" - "@phue" +maintainers: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 0000000000..04c82f1423 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 0b20aa4bb8..256bd7c469 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -2,7 +2,7 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : 'biocontainers/samtools:1.17--h00cdaf9_0' }" diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml index 8bd2fa6fb4..01a4ee03eb 100644 --- a/modules/nf-core/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -51,3 +51,7 @@ authors: - "@drpatelh" - "@ewels" - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/merge/environment.yml b/modules/nf-core/samtools/merge/environment.yml new file mode 100644 index 0000000000..04c82f1423 --- /dev/null +++ b/modules/nf-core/samtools/merge/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf index b73b7cb2a9..21f785cfdc 100644 --- a/modules/nf-core/samtools/merge/main.nf +++ b/modules/nf-core/samtools/merge/main.nf @@ -2,7 +2,7 @@ process SAMTOOLS_MERGE { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : 'biocontainers/samtools:1.17--h00cdaf9_0' }" @@ -16,6 +16,7 @@ process SAMTOOLS_MERGE { tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai") , optional:true, emit: crai path "versions.yml" , emit: versions diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml index 3a815f74b9..2e8f3dbbb5 100644 --- a/modules/nf-core/samtools/merge/meta.yml +++ b/modules/nf-core/samtools/merge/meta.yml @@ -65,9 +65,19 @@ output: type: file description: BAM index file (optional) pattern: "*.csi" + - crai: + type: file + description: CRAM index file (optional) + pattern: "*.crai" authors: - "@drpatelh" - "@yuukiiwa " - "@maxulysse" - "@FriederikeHanssen" - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/mpileup/environment.yml b/modules/nf-core/samtools/mpileup/environment.yml new file mode 100644 index 0000000000..04c82f1423 --- /dev/null +++ b/modules/nf-core/samtools/mpileup/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/mpileup/main.nf b/modules/nf-core/samtools/mpileup/main.nf index d772498415..ed102582c4 100644 --- a/modules/nf-core/samtools/mpileup/main.nf +++ b/modules/nf-core/samtools/mpileup/main.nf @@ -2,7 +2,7 @@ process SAMTOOLS_MPILEUP { tag "$meta.id" label 'process_single' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : 'biocontainers/samtools:1.17--h00cdaf9_0' }" diff --git a/modules/nf-core/samtools/mpileup/meta.yml b/modules/nf-core/samtools/mpileup/meta.yml index 7597ef41ab..13038fbc9b 100644 --- a/modules/nf-core/samtools/mpileup/meta.yml +++ b/modules/nf-core/samtools/mpileup/meta.yml @@ -50,3 +50,6 @@ output: authors: - "@drpatelh" - "@joseespinosa" +maintainers: + - "@drpatelh" + - "@joseespinosa" diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml new file mode 100644 index 0000000000..04c82f1423 --- /dev/null +++ b/modules/nf-core/samtools/stats/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf index 4a2607ded0..07286ef410 100644 --- a/modules/nf-core/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -2,7 +2,7 @@ process SAMTOOLS_STATS { tag "$meta.id" label 'process_single' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : 'biocontainers/samtools:1.17--h00cdaf9_0' }" diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml index 90e6345f53..735ff8122a 100644 --- a/modules/nf-core/samtools/stats/meta.yml +++ b/modules/nf-core/samtools/stats/meta.yml @@ -57,3 +57,7 @@ authors: - "@drpatelh" - "@FriederikeHanssen" - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test b/modules/nf-core/samtools/stats/tests/main.nf.test new file mode 100644 index 0000000000..e037132ca2 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test @@ -0,0 +1,78 @@ +nextflow_process { + + name "Test Process SAMTOOLS_STATS" + script "../main.nf" + process "SAMTOOLS_STATS" + tag "modules" + tag "modules/nf-core" + tag "samtools" + tag "samtools/stats" + + test("SAMTOOLS STATS Should run without failures") { + + when { + params { + + outdir = "$outputDir" + } + process { + """ + // define inputs of the process here. + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + + ] + input[1] = [[],[]] + """ + + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + + } + + test("SAMTOOLS CRAM Should run without failures") { + + when { + params { + + outdir = "$outputDir" + } + process { + """ + // define inputs of the process here + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true) + + ] + input[1] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + + + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + + } + + +} diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap new file mode 100644 index 0000000000..516b2b0192 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -0,0 +1,64 @@ +{ + "SAMTOOLS STATS Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,6e768486d5df0257351c5419a79f9c9b" + ] + ], + "1": [ + "versions.yml:md5,08035f3409d934d47a416150884bb0df" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,6e768486d5df0257351c5419a79f9c9b" + ] + ], + "versions": [ + "versions.yml:md5,08035f3409d934d47a416150884bb0df" + ] + } + ], + "timestamp": "2023-10-18T12:12:42.998746" + }, + "SAMTOOLS CRAM Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,7c9ee5747793cceb9d6f4d733345641a" + ] + ], + "1": [ + "versions.yml:md5,08035f3409d934d47a416150884bb0df" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,7c9ee5747793cceb9d6f4d733345641a" + ] + ], + "versions": [ + "versions.yml:md5,08035f3409d934d47a416150884bb0df" + ] + } + ], + "timestamp": "2023-10-18T12:13:30.747222" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/stats/tests/tags.yml b/modules/nf-core/samtools/stats/tests/tags.yml new file mode 100644 index 0000000000..7c28e30f3f --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/stats: + - modules/nf-core/samtools/stats/** diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml new file mode 100644 index 0000000000..04c82f1423 --- /dev/null +++ b/modules/nf-core/samtools/view/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.17 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index cb91facf8c..ddf3f88ae5 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -2,7 +2,7 @@ process SAMTOOLS_VIEW { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : 'biocontainers/samtools:1.17--h00cdaf9_0' }" diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml index 3b05450b2e..3dadafae75 100644 --- a/modules/nf-core/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -82,3 +82,8 @@ authors: - "@joseespinosa" - "@FriederikeHanssen" - "@priyanka-surana" +maintainers: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/sentieon/applyvarcal/environment.yml b/modules/nf-core/sentieon/applyvarcal/environment.yml new file mode 100644 index 0000000000..bcd3a8b1c6 --- /dev/null +++ b/modules/nf-core/sentieon/applyvarcal/environment.yml @@ -0,0 +1,4 @@ +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/sentieon/applyvarcal/meta.yml b/modules/nf-core/sentieon/applyvarcal/meta.yml index bbe23ea375..da92ce3436 100644 --- a/modules/nf-core/sentieon/applyvarcal/meta.yml +++ b/modules/nf-core/sentieon/applyvarcal/meta.yml @@ -17,7 +17,6 @@ tools: Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. homepage: https://www.sentieon.com/ documentation: https://www.sentieon.com/ - input: - meta: type: map @@ -82,3 +81,5 @@ output: pattern: "versions.yml" authors: - "@assp8200" +maintainers: + - "@assp8200" diff --git a/modules/nf-core/sentieon/bwamem/environment.yml b/modules/nf-core/sentieon/bwamem/environment.yml new file mode 100644 index 0000000000..bcd3a8b1c6 --- /dev/null +++ b/modules/nf-core/sentieon/bwamem/environment.yml @@ -0,0 +1,4 @@ +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/sentieon/bwamem/meta.yml b/modules/nf-core/sentieon/bwamem/meta.yml index 9987e8c900..0859a923ca 100644 --- a/modules/nf-core/sentieon/bwamem/meta.yml +++ b/modules/nf-core/sentieon/bwamem/meta.yml @@ -71,3 +71,5 @@ output: pattern: "versions.yml" authors: - "@asp8200" +maintainers: + - "@asp8200" diff --git a/modules/nf-core/sentieon/dedup/environment.yml b/modules/nf-core/sentieon/dedup/environment.yml new file mode 100644 index 0000000000..bcd3a8b1c6 --- /dev/null +++ b/modules/nf-core/sentieon/dedup/environment.yml @@ -0,0 +1,4 @@ +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/sentieon/dedup/meta.yml b/modules/nf-core/sentieon/dedup/meta.yml index ec0565d923..0efbb96c22 100644 --- a/modules/nf-core/sentieon/dedup/meta.yml +++ b/modules/nf-core/sentieon/dedup/meta.yml @@ -86,3 +86,5 @@ output: pattern: "versions.yml" authors: - "@asp8200" +maintainers: + - "@asp8200" diff --git a/modules/nf-core/sentieon/dnamodelapply/environment.yml b/modules/nf-core/sentieon/dnamodelapply/environment.yml new file mode 100644 index 0000000000..bcd3a8b1c6 --- /dev/null +++ b/modules/nf-core/sentieon/dnamodelapply/environment.yml @@ -0,0 +1,4 @@ +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/sentieon/dnamodelapply/main.nf b/modules/nf-core/sentieon/dnamodelapply/main.nf new file mode 100644 index 0000000000..3fe9a28f19 --- /dev/null +++ b/modules/nf-core/sentieon/dnamodelapply/main.nf @@ -0,0 +1,81 @@ +process SENTIEON_DNAMODELAPPLY { + tag "$meta.id" + label 'process_high' + label 'sentieon' + + secret 'SENTIEON_LICENSE_BASE64' + + container 'nf-core/sentieon:202112.06' + + input: + tuple val(meta), path(vcf), path(idx) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(ml_model) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.vcf.gz.tbi"), emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: '' + def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: '' + + """ + if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url. + export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d) + else # Localhost license file + # The license file is stored as a nextflow variable like, for instance, this: + # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) + export SENTIEON_LICENSE=\$(mktemp) + echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE + fi + + if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then + # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license. + export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d) + export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d) + echo "Decoded and exported Sentieon test-license system environment variables" + fi + + sentieon driver \\ + -t $task.cpus \\ + -r $fasta \\ + $args \\ + --algo DNAModelApply \\ + --model $ml_model \\ + -v $vcf \\ + ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead." + } + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/dnamodelapply/meta.yml b/modules/nf-core/sentieon/dnamodelapply/meta.yml new file mode 100644 index 0000000000..bc7d323ed5 --- /dev/null +++ b/modules/nf-core/sentieon/dnamodelapply/meta.yml @@ -0,0 +1,77 @@ +name: "sentieon_dnamodelapply" +description: modifies the input VCF file by adding the MLrejected FILTER to the variants +keywords: + - dnamodelapply + - vcf + - filter + - sentieon +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - vcf: + type: file + description: INPUT VCF file + pattern: "*.{vcf,vcf.gz}" + - idx: + type: file + description: Index of the input VCF file + pattern: "*.{tbi}" + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + - fai: + type: file + description: Index of the genome fasta file + pattern: "*.fai" + - ml_model: + type: file + description: machine learning model file + pattern: "*.model" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: INPUT VCF file + pattern: "*.{vcf,vcf.gz}" + - index: + type: file + description: Index of the input VCF file + pattern: "*.{tbi}" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/sentieon/dnascope/environment.yml b/modules/nf-core/sentieon/dnascope/environment.yml new file mode 100644 index 0000000000..bcd3a8b1c6 --- /dev/null +++ b/modules/nf-core/sentieon/dnascope/environment.yml @@ -0,0 +1,4 @@ +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/sentieon/dnascope/main.nf b/modules/nf-core/sentieon/dnascope/main.nf new file mode 100644 index 0000000000..6be42a1728 --- /dev/null +++ b/modules/nf-core/sentieon/dnascope/main.nf @@ -0,0 +1,100 @@ +process SENTIEON_DNASCOPE { + tag "$meta.id" + label 'process_high' + label 'sentieon' + + secret 'SENTIEON_LICENSE_BASE64' + + container 'nf-core/sentieon:202112.06' + + input: + tuple val(meta), path(bam), path(bai), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dbsnp) + tuple val(meta5), path(dbsnp_tbi) + tuple val(meta6), path(ml_model) + val(pcr_indel_model) + val(emit_vcf) + val(emit_gvcf) + + output: + tuple val(meta), path("*.unfiltered.vcf.gz") , optional:true, emit: vcf // added the substring ".unfiltered" in the filename of the vcf-files since without that the g.vcf.gz-files were ending up in the vcf-channel + tuple val(meta), path("*.unfiltered.vcf.gz.tbi"), optional:true, emit: vcf_tbi + tuple val(meta), path("*.g.vcf.gz") , optional:true, emit: gvcf // these output-files have to have the extension ".vcf.gz", otherwise the subsequent GATK-MergeVCFs will fail. + tuple val(meta), path("*.g.vcf.gz.tbi") , optional:true, emit: gvcf_tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' // options for the driver + def args2 = task.ext.args2 ?: '' // options for the vcf generation + def args3 = task.ext.args3 ?: '' // options for the gvcf generation + def interval = intervals ? "--interval ${intervals}" : '' + def dbsnp_cmd = dbsnp ? "-d ${dbsnp}" : '' + def model_cmd = ml_model ? " --model ${ml_model}" : '' + def pcr_indel_model_cmd = pcr_indel_model ? " --pcr_indel_model ${pcr_indel_model}" : '' + def prefix = task.ext.prefix ?: "${meta.id}" + def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: '' + def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: '' + def vcf_cmd = "" + def gvcf_cmd = "" + def base_cmd = '--algo DNAscope ' + dbsnp_cmd + ' ' + + if (emit_vcf) { // emit_vcf can be the empty string, 'variant', 'confident' or 'all' but NOT 'gvcf' + vcf_cmd = base_cmd + args2 + ' ' + model_cmd + pcr_indel_model_cmd + ' --emit_mode ' + emit_vcf + ' ' + prefix + '.unfiltered.vcf.gz' + } + + if (emit_gvcf) { // emit_gvcf can be either true or false + gvcf_cmd = base_cmd + args3 + ' ' + model_cmd + pcr_indel_model_cmd + ' --emit_mode gvcf ' + prefix + '.g.vcf.gz' + } + + """ + if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url. + export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d) + else # Localhost license file + # The license file is stored as a nextflow variable like, for instance, this: + # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) + export SENTIEON_LICENSE=\$(mktemp) + echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE + fi + + if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then + # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license. + export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d) + export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d) + echo "Decoded and exported Sentieon test-license system environment variables" + fi + + sentieon driver $args -r $fasta -t $task.cpus -i $bam $interval $vcf_cmd $gvcf_cmd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead." + } + """ + touch ${prefix}.unfiltered.vcf.gz + touch ${prefix}.unfiltered.vcf.gz.tbi + touch ${prefix}.g.vcf.gz + touch ${prefix}.g.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/dnascope/meta.yml b/modules/nf-core/sentieon/dnascope/meta.yml new file mode 100644 index 0000000000..6b61cee828 --- /dev/null +++ b/modules/nf-core/sentieon/dnascope/meta.yml @@ -0,0 +1,120 @@ +name: sentieon_dnascope +description: DNAscope algorithm performs an improved version of Haplotype variant calling. +keywords: + - dnascope + - sentieon + - variant_calling +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ +input: + - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file. + pattern: "*.bam" + - bai: + type: file + description: BAI file + pattern: "*.bai" + - intervals: + type: file + description: bed or interval_list file containing interval in the reference that will be used in the analysis + pattern: "*.{bed,interval_list}" + - meta2: + type: map + description: | + Groovy Map containing meta information for fasta. + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + - meta3: + type: map + description: | + Groovy Map containing meta information for fasta index. + - fai: + type: file + description: Index of the genome fasta file + pattern: "*.fai" + - meta4: + type: map + description: | + Groovy Map containing meta information for dbsnp. + - dbsnp: + type: file + description: Single Nucleotide Polymorphism database (dbSNP) file + pattern: "*.vcf.gz" + - meta5: + type: map + description: | + Groovy Map containing meta information for dbsnp_tbi. + - dbsnp_tbi: + type: file + description: Index of the Single Nucleotide Polymorphism database (dbSNP) file + pattern: "*.vcf.gz.tbi" + - meta6: + type: map + description: | + Groovy Map containing meta information for machine learning model for Dnascope. + - ml_model: + type: file + description: machine learning model file + pattern: "*.model" + - ml_model: + type: file + description: machine learning model file + pattern: "*.model" + - pcr_indel_model: + type: string + description: | + Controls the option pcr_indel_model for Dnascope. + The possible options are "NONE" (used for PCR free samples), and "HOSTILE", "AGGRESSIVE" and "CONSERVATIVE". + See Sentieons documentation for further explanation. + - emit_vcf: + type: string + description: | + Controls the vcf output from Dnascope. + Possible options are "all", "confident" and "variant". + See Sentieons documentation for further explanation. + - emit_gvcf: + type: boolean + description: If true, the haplotyper will output a gvcf +output: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: Compressed VCF file + pattern: "*.unfiltered.vcf.gz" + - vcf_tbi: + type: file + description: Index of VCF file + pattern: "*.unfiltered.vcf.gz.tbi" + - gvcf: + type: file + description: Compressed GVCF file + pattern: "*.g.vcf.gz" + - gvcf_tbi: + type: file + description: Index of GVCF file + pattern: "*.g.vcf.gz.tbi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/sentieon/gvcftyper/environment.yml b/modules/nf-core/sentieon/gvcftyper/environment.yml new file mode 100644 index 0000000000..bcd3a8b1c6 --- /dev/null +++ b/modules/nf-core/sentieon/gvcftyper/environment.yml @@ -0,0 +1,4 @@ +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/sentieon/gvcftyper/meta.yml b/modules/nf-core/sentieon/gvcftyper/meta.yml index c373b2c5cc..ceef173e3e 100644 --- a/modules/nf-core/sentieon/gvcftyper/meta.yml +++ b/modules/nf-core/sentieon/gvcftyper/meta.yml @@ -47,7 +47,6 @@ input: type: file description: dbSNP VCF index file pattern: "*.tbi" - output: - meta: type: map @@ -68,3 +67,5 @@ output: pattern: "versions.yml" authors: - "@asp8200" +maintainers: + - "@asp8200" diff --git a/modules/nf-core/sentieon/haplotyper/environment.yml b/modules/nf-core/sentieon/haplotyper/environment.yml new file mode 100644 index 0000000000..bcd3a8b1c6 --- /dev/null +++ b/modules/nf-core/sentieon/haplotyper/environment.yml @@ -0,0 +1,4 @@ +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/sentieon/haplotyper/meta.yml b/modules/nf-core/sentieon/haplotyper/meta.yml index 33217808f8..c248db3fca 100644 --- a/modules/nf-core/sentieon/haplotyper/meta.yml +++ b/modules/nf-core/sentieon/haplotyper/meta.yml @@ -80,3 +80,5 @@ output: pattern: "versions.yml" authors: - "@asp8200" +maintainers: + - "@asp8200" diff --git a/modules/nf-core/sentieon/varcal/environment.yml b/modules/nf-core/sentieon/varcal/environment.yml new file mode 100644 index 0000000000..bcd3a8b1c6 --- /dev/null +++ b/modules/nf-core/sentieon/varcal/environment.yml @@ -0,0 +1,4 @@ +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/sentieon/varcal/meta.yml b/modules/nf-core/sentieon/varcal/meta.yml index 5de37f6ad1..cad7ee106f 100644 --- a/modules/nf-core/sentieon/varcal/meta.yml +++ b/modules/nf-core/sentieon/varcal/meta.yml @@ -70,3 +70,5 @@ output: pattern: "*.versions.yml" authors: - "@asp8200" +maintainers: + - "@asp8200" diff --git a/modules/nf-core/snpeff/download/environment.yml b/modules/nf-core/snpeff/download/environment.yml new file mode 100644 index 0000000000..7d54aa5bdb --- /dev/null +++ b/modules/nf-core/snpeff/download/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::snpeff=5.1 diff --git a/modules/nf-core/snpeff/download/main.nf b/modules/nf-core/snpeff/download/main.nf index 9a3a0d3167..f1fc4cc395 100644 --- a/modules/nf-core/snpeff/download/main.nf +++ b/modules/nf-core/snpeff/download/main.nf @@ -2,7 +2,7 @@ process SNPEFF_DOWNLOAD { tag "$meta.id" label 'process_medium' - conda "bioconda::snpeff=5.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/snpeff:5.1--hdfd78af_2' : 'biocontainers/snpeff:5.1--hdfd78af_2' }" diff --git a/modules/nf-core/snpeff/download/meta.yml b/modules/nf-core/snpeff/download/meta.yml index 3c03c2f602..26e728f71a 100644 --- a/modules/nf-core/snpeff/download/meta.yml +++ b/modules/nf-core/snpeff/download/meta.yml @@ -39,3 +39,5 @@ output: pattern: "versions.yml" authors: - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/snpeff/snpeff/environment.yml b/modules/nf-core/snpeff/snpeff/environment.yml new file mode 100644 index 0000000000..7d54aa5bdb --- /dev/null +++ b/modules/nf-core/snpeff/snpeff/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::snpeff=5.1 diff --git a/modules/nf-core/snpeff/snpeff/main.nf b/modules/nf-core/snpeff/snpeff/main.nf index e92c1597e7..cc4f2ccb36 100644 --- a/modules/nf-core/snpeff/snpeff/main.nf +++ b/modules/nf-core/snpeff/snpeff/main.nf @@ -2,7 +2,7 @@ process SNPEFF_SNPEFF { tag "$meta.id" label 'process_medium' - conda "bioconda::snpeff=5.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/snpeff:5.1--hdfd78af_2' : 'biocontainers/snpeff:5.1--hdfd78af_2' }" diff --git a/modules/nf-core/snpeff/snpeff/meta.yml b/modules/nf-core/snpeff/snpeff/meta.yml index 44bada2303..0b771447ff 100644 --- a/modules/nf-core/snpeff/snpeff/meta.yml +++ b/modules/nf-core/snpeff/snpeff/meta.yml @@ -56,3 +56,5 @@ output: pattern: "versions.yml" authors: - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/strelka/germline/environment.yml b/modules/nf-core/strelka/germline/environment.yml new file mode 100644 index 0000000000..5c9c1cf81a --- /dev/null +++ b/modules/nf-core/strelka/germline/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::strelka=2.9.10 diff --git a/modules/nf-core/strelka/germline/main.nf b/modules/nf-core/strelka/germline/main.nf index 74712df16b..8f93356160 100644 --- a/modules/nf-core/strelka/germline/main.nf +++ b/modules/nf-core/strelka/germline/main.nf @@ -3,7 +3,7 @@ process STRELKA_GERMLINE { label 'process_medium' label 'error_retry' - conda "bioconda::strelka=2.9.10" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/strelka:2.9.10--h9ee0642_1' : 'biocontainers/strelka:2.9.10--h9ee0642_1' }" diff --git a/modules/nf-core/strelka/germline/meta.yml b/modules/nf-core/strelka/germline/meta.yml index c119532d16..6ee656683e 100644 --- a/modules/nf-core/strelka/germline/meta.yml +++ b/modules/nf-core/strelka/germline/meta.yml @@ -14,7 +14,6 @@ tools: tool_dev_url: https://github.com/Illumina/strelka doi: 10.1038/s41592-018-0051-x licence: ["GPL v3"] - input: - meta: type: map @@ -61,3 +60,5 @@ output: pattern: "versions.yml" authors: - "@arontommi" +maintainers: + - "@arontommi" diff --git a/modules/nf-core/strelka/somatic/environment.yml b/modules/nf-core/strelka/somatic/environment.yml new file mode 100644 index 0000000000..5c9c1cf81a --- /dev/null +++ b/modules/nf-core/strelka/somatic/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::strelka=2.9.10 diff --git a/modules/nf-core/strelka/somatic/main.nf b/modules/nf-core/strelka/somatic/main.nf index 17d700c136..dd975bd563 100644 --- a/modules/nf-core/strelka/somatic/main.nf +++ b/modules/nf-core/strelka/somatic/main.nf @@ -3,7 +3,7 @@ process STRELKA_SOMATIC { label 'process_medium' label 'error_retry' - conda "bioconda::strelka=2.9.10" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/strelka:2.9.10--h9ee0642_1' : 'biocontainers/strelka:2.9.10--h9ee0642_1' }" diff --git a/modules/nf-core/strelka/somatic/meta.yml b/modules/nf-core/strelka/somatic/meta.yml index b2a2550462..6032cd6d5f 100644 --- a/modules/nf-core/strelka/somatic/meta.yml +++ b/modules/nf-core/strelka/somatic/meta.yml @@ -14,7 +14,6 @@ tools: tool_dev_url: https://github.com/Illumina/strelka doi: 10.1038/s41592-018-0051-x licence: ["GPL v3"] - input: - meta: type: map @@ -61,7 +60,6 @@ input: type: file description: Index for BED file containing target regions for variant calling pattern: "*.{bed.tbi}" - output: - meta: type: map @@ -88,6 +86,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/svdb/merge/environment.yml b/modules/nf-core/svdb/merge/environment.yml new file mode 100644 index 0000000000..e31e204e3d --- /dev/null +++ b/modules/nf-core/svdb/merge/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::svdb=2.8.1 + - bioconda::samtools=1.16.1 diff --git a/modules/nf-core/svdb/merge/main.nf b/modules/nf-core/svdb/merge/main.nf index 0bd94499ab..0d9967dda9 100644 --- a/modules/nf-core/svdb/merge/main.nf +++ b/modules/nf-core/svdb/merge/main.nf @@ -1,7 +1,7 @@ process SVDB_MERGE { tag "$meta.id" label 'process_medium' - conda "bioconda::svdb=2.8.1 bioconda::samtools=1.16.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-c8daa8f9d69d3c5a1a4ff08283a166c18edb0000:af6f8534cd538a85ff43a2eae1b52b143e7abd05-0': 'biocontainers/mulled-v2-c8daa8f9d69d3c5a1a4ff08283a166c18edb0000:af6f8534cd538a85ff43a2eae1b52b143e7abd05-0' }" diff --git a/modules/nf-core/svdb/merge/meta.yml b/modules/nf-core/svdb/merge/meta.yml index 92a5a128ea..84265acb84 100644 --- a/modules/nf-core/svdb/merge/meta.yml +++ b/modules/nf-core/svdb/merge/meta.yml @@ -39,3 +39,5 @@ output: pattern: "*_sv_merge.vcf.gz" authors: - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/tabix/bgziptabix/environment.yml b/modules/nf-core/tabix/bgziptabix/environment.yml new file mode 100644 index 0000000000..fdd84057b8 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tabix=1.11 diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf index d6c5a760f3..f9482690ba 100644 --- a/modules/nf-core/tabix/bgziptabix/main.nf +++ b/modules/nf-core/tabix/bgziptabix/main.nf @@ -2,7 +2,7 @@ process TABIX_BGZIPTABIX { tag "$meta.id" label 'process_single' - conda "bioconda::tabix=1.11" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : 'biocontainers/tabix:1.11--hdfd78af_0' }" diff --git a/modules/nf-core/tabix/bgziptabix/meta.yml b/modules/nf-core/tabix/bgziptabix/meta.yml index 2761e27183..438aba4d18 100644 --- a/modules/nf-core/tabix/bgziptabix/meta.yml +++ b/modules/nf-core/tabix/bgziptabix/meta.yml @@ -48,3 +48,6 @@ output: authors: - "@maxulysse" - "@DLBPointon" +maintainers: + - "@maxulysse" + - "@DLBPointon" diff --git a/modules/nf-core/tabix/tabix/environment.yml b/modules/nf-core/tabix/tabix/environment.yml new file mode 100644 index 0000000000..fdd84057b8 --- /dev/null +++ b/modules/nf-core/tabix/tabix/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tabix=1.11 diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index 5bf332ef84..c304a8a34b 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -2,7 +2,7 @@ process TABIX_TABIX { tag "$meta.id" label 'process_single' - conda "bioconda::tabix=1.11" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : 'biocontainers/tabix:1.11--hdfd78af_0' }" diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml index fcc6e52463..ae5b4f439f 100644 --- a/modules/nf-core/tabix/tabix/meta.yml +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -43,3 +43,7 @@ authors: - "@joseespinosa" - "@drpatelh" - "@maxulysse" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/tiddit/sv/environment.yml b/modules/nf-core/tiddit/sv/environment.yml new file mode 100644 index 0000000000..69963f7c4b --- /dev/null +++ b/modules/nf-core/tiddit/sv/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tiddit=3.6.1 diff --git a/modules/nf-core/tiddit/sv/main.nf b/modules/nf-core/tiddit/sv/main.nf index 67a0670dbc..0f4bc7cb52 100644 --- a/modules/nf-core/tiddit/sv/main.nf +++ b/modules/nf-core/tiddit/sv/main.nf @@ -2,7 +2,7 @@ process TIDDIT_SV { tag "$meta.id" label 'process_medium' - conda "bioconda::tiddit=3.6.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/tiddit:3.6.1--py38h24c8ff8_0' : 'biocontainers/tiddit:3.6.1--py38h24c8ff8_0' }" diff --git a/modules/nf-core/tiddit/sv/meta.yml b/modules/nf-core/tiddit/sv/meta.yml index 8b41c69cf5..b13ae5cdcb 100644 --- a/modules/nf-core/tiddit/sv/meta.yml +++ b/modules/nf-core/tiddit/sv/meta.yml @@ -53,3 +53,5 @@ output: pattern: "versions.yml" authors: - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 0000000000..2d52ce64ac --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 + - conda-forge::grep=3.11 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 61461c3917..8a75bb957d 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -2,7 +2,7 @@ process UNTAR { tag "$archive" label 'process_single' - conda "conda-forge::sed=4.7 conda-forge::grep=3.11 conda-forge::tar=1.34" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml index db241a6e5e..a9a2110f55 100644 --- a/modules/nf-core/untar/meta.yml +++ b/modules/nf-core/untar/meta.yml @@ -39,3 +39,8 @@ authors: - "@drpatelh" - "@matthdsm" - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/unzip/environment.yml b/modules/nf-core/unzip/environment.yml new file mode 100644 index 0000000000..157fe95c01 --- /dev/null +++ b/modules/nf-core/unzip/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::p7zip=16.02 diff --git a/modules/nf-core/unzip/main.nf b/modules/nf-core/unzip/main.nf index cf977f1dcb..08cfc3c406 100644 --- a/modules/nf-core/unzip/main.nf +++ b/modules/nf-core/unzip/main.nf @@ -2,7 +2,7 @@ process UNZIP { tag "$archive" label 'process_single' - conda "conda-forge::p7zip=16.02" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/p7zip:16.02' : 'biocontainers/p7zip:16.02' }" diff --git a/modules/nf-core/unzip/meta.yml b/modules/nf-core/unzip/meta.yml index 2910e0fd3f..e8e377e2af 100644 --- a/modules/nf-core/unzip/meta.yml +++ b/modules/nf-core/unzip/meta.yml @@ -12,7 +12,6 @@ tools: documentation: https://sourceforge.net/projects/p7zip/ tool_dev_url: https://sourceforge.net/projects/p7zip" licence: ["LGPL-2.1-or-later"] - input: - meta: type: map @@ -23,7 +22,6 @@ input: type: file description: ZIP file pattern: "*.zip" - output: - meta: type: map @@ -38,6 +36,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/vcftools/environment.yml b/modules/nf-core/vcftools/environment.yml new file mode 100644 index 0000000000..875817e21d --- /dev/null +++ b/modules/nf-core/vcftools/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::vcftools=0.1.16 diff --git a/modules/nf-core/vcftools/main.nf b/modules/nf-core/vcftools/main.nf index cf6d296281..0153a60891 100644 --- a/modules/nf-core/vcftools/main.nf +++ b/modules/nf-core/vcftools/main.nf @@ -2,7 +2,7 @@ process VCFTOOLS { tag "$meta.id" label 'process_single' - conda "bioconda::vcftools=0.1.16" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/vcftools:0.1.16--he513fc3_4' : 'biocontainers/vcftools:0.1.16--he513fc3_4' }" diff --git a/modules/nf-core/vcftools/meta.yml b/modules/nf-core/vcftools/meta.yml index 04b786f0e6..f361db4a8f 100644 --- a/modules/nf-core/vcftools/meta.yml +++ b/modules/nf-core/vcftools/meta.yml @@ -9,7 +9,6 @@ tools: homepage: http://vcftools.sourceforge.net/ documentation: http://vcftools.sourceforge.net/man_latest.html licence: ["LGPL"] - input: - meta: type: map @@ -25,7 +24,6 @@ input: - diff_variant_file: type: file description: secondary variant file which can be used with the 'diff' suite of tools (optional) - output: - meta: type: map @@ -288,6 +286,7 @@ output: type: file description: Switch errors found between sites (optional) pattern: "*.diff.switch" - authors: - "@Mark-S-Hill" +maintainers: + - "@Mark-S-Hill" diff --git a/nextflow.config b/nextflow.config index 123a58ae90..f77b1dea0e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,7 +15,7 @@ params { // References genome = 'GATK.GRCh38' - igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false save_reference = false // Built references not saved build_only_index = false // Only build the reference indexes @@ -51,26 +51,29 @@ params { seq_platform = 'ILLUMINA' // Default platform written in read group PL field by aligner // Variant Calling - only_paired_variant_calling = false // if true, skips germline variant calling for normal-paired samples - ascat_ploidy = null // default value for ASCAT - ascat_min_base_qual = 20 // default value for ASCAT - ascat_min_counts = 10 // default value for ASCAT - ascat_min_map_qual = 35 // default value for ASCAT - ascat_purity = null // default value for ASCAT - cf_ploidy = "2" // default value for Control-FREEC - cf_coeff = 0.05 // default value for Control-FREEC - cf_contamination = 0 // default value for Control-FREEC - cf_contamination_adjustment = false // by default we are not using this in Control-FREEC - cf_mincov = 0 // ControlFreec default values - cf_minqual = 0 // ControlFreec default values - cf_window = null // by default we are not using this in Control-FREEC - cnvkit_reference = null // by default the reference is build from the fasta file - concatenate_vcfs = false // by default we don't concatenate the germline-vcf-files - ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2 - wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers - joint_germline = false // g.vcf & joint germline calling are not run by default if HaplotypeCaller is selected - joint_mutect2 = false // if true, enables patient-wise multi-sample somatic variant calling - sentieon_haplotyper_emit_mode = "variant" // default value for Sentieon haplotyper + ascat_ploidy = null // default value for ASCAT + ascat_min_base_qual = 20 // default value for ASCAT + ascat_min_counts = 10 // default value for ASCAT + ascat_min_map_qual = 35 // default value for ASCAT + ascat_purity = null // default value for ASCAT + cf_ploidy = "2" // default value for Control-FREEC + cf_coeff = 0.05 // default value for Control-FREEC + cf_contamination = 0 // default value for Control-FREEC + cf_contamination_adjustment = false // by default we are not using this in Control-FREEC + cf_mincov = 0 // ControlFreec default values + cf_minqual = 0 // ControlFreec default values + cf_window = null // by default we are not using this in Control-FREEC + cnvkit_reference = null // by default the reference is build from the fasta file + concatenate_vcfs = false // by default we don't concatenate the germline-vcf-files + ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2 + joint_germline = false // g.vcf & joint germline calling are not run by default if HaplotypeCaller is selected + joint_mutect2 = false // if true, enables patient-wise multi-sample somatic variant calling + only_paired_variant_calling = false // if true, skips germline variant calling for normal-paired sample + sentieon_dnascope_emit_mode = "variant" // default value for Sentieon dnascope + sentieon_dnascope_model = "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38/Annotation/Sentieon/SentieonDNAscopeModel1.1.model" + sentieon_dnascope_pcr_indel_model = "CONSERVATIVE" + sentieon_haplotyper_emit_mode = "variant" // default value for Sentieon haplotyper + wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers // Annotation dbnsfp = null // No dbnsfp processed file @@ -83,7 +86,6 @@ params { spliceai_indel_tbi = null // No spliceai_indel file index spliceai_snv = null // No spliceai_snv file spliceai_snv_tbi = null // No spliceai_snv file index - use_annotation_cache_keys = false vep_cache = 's3://annotation-cache/vep_cache/' vep_custom_args = "--everything --filter_common --per_gene --total_length --offline --format vcf" // Default arguments for VEP vep_dbnsfp = null // dbnsfp plugin disabled within VEP @@ -92,6 +94,10 @@ params { vep_out_format = "vcf" vep_spliceai = null // spliceai plugin disabled within VEP vep_spliceregion = null // spliceregion plugin disabled within VEP + vep_version = "110.0-0" // Should be updated when we update VEP, needs this to get full path to some plugins + bcftools_annotations = null // No extra annotation file + bcftools_annotations_index = null // No extra annotation file index + bcftools_header_lines = null // No header lines to be added to the VCF file // MultiQC options multiqc_config = null @@ -331,7 +337,11 @@ dag { } prov { enabled = true - file = "${params.outdir}/pipeline_info/manifest_${trace_timestamp}.bco.json" + formats { + bco { + file = "${params.outdir}/pipeline_info/manifest_${trace_timestamp}.bco.json" + } + } } manifest { @@ -341,7 +351,7 @@ manifest { description = """An open-source analysis pipeline to detect germline or somatic variants from whole genome or targeted sequencing""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '3.3.2' + version = '3.4.0' doi = '10.12688/f1000research.16665.2, 10.1101/2023.07.19.549462, 10.5281/zenodo.3476425' } @@ -365,6 +375,9 @@ includeConfig 'conf/modules/recalibrate.config' includeConfig 'conf/modules/trimming.config' includeConfig 'conf/modules/umi.config' +//ngscheckmate +includeConfig 'conf/modules/ngscheckmate.config' + // variant calling includeConfig 'conf/modules/ascat.config' includeConfig 'conf/modules/cnvkit.config' @@ -377,11 +390,12 @@ includeConfig 'conf/modules/manta.config' includeConfig 'conf/modules/mpileup.config' includeConfig 'conf/modules/msisensorpro.config' includeConfig 'conf/modules/mutect2.config' +includeConfig 'conf/modules/sentieon_dnascope.config' +includeConfig 'conf/modules/sentieon_dnascope_joint_germline.config' includeConfig 'conf/modules/sentieon_haplotyper.config' -includeConfig 'conf/modules/sentieon_joint_germline.config' +includeConfig 'conf/modules/sentieon_haplotyper_joint_germline.config' includeConfig 'conf/modules/strelka.config' includeConfig 'conf/modules/tiddit.config' - includeConfig 'conf/modules/post_variant_calling.config' //annotate diff --git a/nextflow_schema.json b/nextflow_schema.json index 51850b6fa9..c3ff2d759e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -100,14 +100,15 @@ "fa_icon": "fas fa-toolbox", "description": "Tools to use for duplicate marking, variant calling and/or for annotation.", "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", - "pattern": "^((ascat|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? **NB** `--skip_tools baserecalibrator_report` is actually just not saving the reports.\n> **NB** `--skip_tools markduplicates_report` does not skip `MarkDuplicates` but prevent the collection of duplicate metrics that slows down performance.", - "pattern": "^((baserecalibrator|baserecalibrator_report|bcftools|documentation|fastqc|haplotypecaller_filter|haplotyper_filter|markduplicates|markduplicates_report|mosdepth|multiqc|samtools|vcftools|versions)?,?)*(? [ meta + [ num_intervals:num_intervals ], cram, crai, recal, intervals ] } // RUN APPLYBQSR SPARK - GATK4_APPLYBQSR_SPARK(cram_intervals, fasta, fasta_fai, dict.map{ meta, it -> [ it ] }) + GATK4SPARK_APPLYBQSR(cram_intervals, fasta, fasta_fai, dict.map{ meta, it -> [ it ] }) // Gather the recalibrated cram files - cram_to_merge = GATK4_APPLYBQSR_SPARK.out.cram.map{ meta, cram -> [ groupKey(meta, meta.num_intervals), cram ] }.groupTuple() + cram_to_merge = GATK4SPARK_APPLYBQSR.out.cram.map{ meta, cram -> [ groupKey(meta, meta.num_intervals), cram ] }.groupTuple() // Merge and index the recalibrated cram files CRAM_MERGE_INDEX_SAMTOOLS(cram_to_merge, fasta, fasta_fai) @@ -37,7 +37,7 @@ workflow BAM_APPLYBQSR_SPARK { .map{ meta, cram, crai -> [ meta - meta.subMap('num_intervals'), cram, crai ] } // Gather versions of all tools used - versions = versions.mix(GATK4_APPLYBQSR_SPARK.out.versions) + versions = versions.mix(GATK4SPARK_APPLYBQSR.out.versions) versions = versions.mix(CRAM_MERGE_INDEX_SAMTOOLS.out.versions) emit: diff --git a/subworkflows/local/bam_baserecalibrator_spark/main.nf b/subworkflows/local/bam_baserecalibrator_spark/main.nf index 04bb491b4c..d6e12c39e0 100644 --- a/subworkflows/local/bam_baserecalibrator_spark/main.nf +++ b/subworkflows/local/bam_baserecalibrator_spark/main.nf @@ -4,8 +4,8 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { GATK4_BASERECALIBRATOR_SPARK } from '../../../modules/nf-core/gatk4/baserecalibratorspark/main' -include { GATK4_GATHERBQSRREPORTS } from '../../../modules/nf-core/gatk4/gatherbqsrreports/main' +include { GATK4SPARK_BASERECALIBRATOR } from '../../../modules/nf-core/gatk4spark/baserecalibrator/main' +include { GATK4_GATHERBQSRREPORTS } from '../../../modules/nf-core/gatk4/gatherbqsrreports/main' workflow BAM_BASERECALIBRATOR_SPARK { take: @@ -26,10 +26,10 @@ workflow BAM_BASERECALIBRATOR_SPARK { .map{ meta, cram, crai, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], cram, crai, intervals ] } // RUN BASERECALIBRATOR SPARK - GATK4_BASERECALIBRATOR_SPARK(cram_intervals, fasta, fasta_fai, dict.map{ meta, it -> [ it ] }, known_sites, known_sites_tbi) + GATK4SPARK_BASERECALIBRATOR(cram_intervals, fasta, fasta_fai, dict.map{ meta, it -> [ it ] }, known_sites, known_sites_tbi) // Figuring out if there is one or more table(s) from the same sample - table_to_merge = GATK4_BASERECALIBRATOR_SPARK.out.table.map{ meta, table -> [ groupKey(meta, meta.num_intervals), table ] }.groupTuple().branch{ + table_to_merge = GATK4SPARK_BASERECALIBRATOR.out.table.map{ meta, table -> [ groupKey(meta, meta.num_intervals), table ] }.groupTuple().branch{ // Use meta.num_intervals to asses number of intervals single: it[0].num_intervals <= 1 multiple: it[0].num_intervals > 1 @@ -44,7 +44,7 @@ workflow BAM_BASERECALIBRATOR_SPARK { .map{ meta, table -> [ meta - meta.subMap('num_intervals'), table ] } // Gather versions of all tools used - versions = versions.mix(GATK4_BASERECALIBRATOR_SPARK.out.versions) + versions = versions.mix(GATK4SPARK_BASERECALIBRATOR.out.versions) versions = versions.mix(GATK4_GATHERBQSRREPORTS.out.versions) emit: diff --git a/subworkflows/local/bam_joint_calling_germline_sentieon/main.nf b/subworkflows/local/bam_joint_calling_germline_sentieon/main.nf index da8b0e60be..3f19b33d52 100644 --- a/subworkflows/local/bam_joint_calling_germline_sentieon/main.nf +++ b/subworkflows/local/bam_joint_calling_germline_sentieon/main.nf @@ -27,6 +27,7 @@ workflow BAM_JOINT_CALLING_GERMLINE_SENTIEON { resource_snps_vcf resource_snps_tbi known_snps_vqsr + variant_caller main: versions = Channel.empty() @@ -39,96 +40,107 @@ workflow BAM_JOINT_CALLING_GERMLINE_SENTIEON { BCFTOOLS_SORT(SENTIEON_GVCFTYPER.out.vcf_gz) - gvcf_to_merge = BCFTOOLS_SORT.out.vcf.map{ meta, vcf -> [ meta.subMap('num_intervals') + [ id:'joint_variant_calling', patient:'all_samples', variantcaller:'sentieon_haplotyper' ], vcf ]}.groupTuple() + gvcf_to_merge = BCFTOOLS_SORT.out.vcf.map{ meta, vcf -> [ meta.subMap('num_intervals') + [ id:'joint_variant_calling', patient:'all_samples', variantcaller:variant_caller ], vcf ]}.groupTuple() // Merge scatter/gather vcfs & index // Rework meta for variantscalled.csv and annotation tools MERGE_GENOTYPEGVCFS(gvcf_to_merge, dict) - vqsr_input = MERGE_GENOTYPEGVCFS.out.vcf.join(MERGE_GENOTYPEGVCFS.out.tbi, failOnDuplicate: true) - indels_resource_label = known_indels_vqsr.mix(dbsnp_vqsr).collect() - snps_resource_label = known_snps_vqsr.mix(dbsnp_vqsr).collect() - - // Recalibrate INDELs and SNPs separately - SENTIEON_VARCAL_INDEL( - vqsr_input, - resource_indels_vcf, - resource_indels_tbi, - indels_resource_label, - fasta, - fai) - - SENTIEON_VARCAL_SNP( - vqsr_input, - resource_snps_vcf, - resource_snps_tbi, - snps_resource_label, - fasta, - fai) - - //Prepare SNPs and INDELs for Sentieon's applyvarcal - // Step 1. : applyvarcal to SNPs - // Step 2. : Use SENTIEON_APPLYVARCAL_SNP output and run ApplyVQSR_INDEL. This avoids duplicate entries in the vcf as described here: https://hpc.nih.gov/training/gatk_tutorial/vqsr.html - - // Join results of variant recalibration into a single channel tuple - // Rework meta for variantscalled.csv and annotation tools - vqsr_input_snp = vqsr_input.join(SENTIEON_VARCAL_SNP.out.recal, failOnDuplicate: true) - .join(SENTIEON_VARCAL_SNP.out.idx, failOnDuplicate: true) - .join(SENTIEON_VARCAL_SNP.out.tranches, failOnDuplicate: true) - .map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] } - - SENTIEON_APPLYVARCAL_SNP( - vqsr_input_snp, - fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] }, - fai.map{ fai -> [ [ id:fai.baseName ], fai ] }) - - // Join results of SENTIEON_APPLYVARCAL_SNP and use as input for SENTIEON_APPLYVARCAL_INDEL to avoid duplicate entries in the result - // Rework meta for variantscalled.csv and annotation tools - vqsr_input_indel = SENTIEON_APPLYVARCAL_SNP.out.vcf.join(SENTIEON_APPLYVARCAL_SNP.out.tbi).map{ meta, vcf, tbi -> [ meta + [ id:'joint_variant_calling' ], vcf, tbi ]} - .join(SENTIEON_VARCAL_INDEL.out.recal, failOnDuplicate: true) - .join(SENTIEON_VARCAL_INDEL.out.idx, failOnDuplicate: true) - .join(SENTIEON_VARCAL_INDEL.out.tranches, failOnDuplicate: true) - .map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] } - - SENTIEON_APPLYVARCAL_INDEL( - vqsr_input_indel, - fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] }, - fai.map{ fai -> [ [ id:fai.baseName ], fai ] }) - - // The following is an ugly monster to achieve the following: - // When MERGE_GENOTYPEGVCFS and SENTIEON_APPLYVARCAL are run, then use output from SENTIEON_APPLYVARCAL - // When MERGE_GENOTYPEGVCFS and NOT SENTIEON_APPLYVARCAL, then use the output from MERGE_GENOTYPEGVCFS - - merge_vcf_for_join = MERGE_GENOTYPEGVCFS.out.vcf.map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]} - merge_tbi_for_join = MERGE_GENOTYPEGVCFS.out.tbi.map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]} - - // Remap for both to have the same key, if ApplyBQSR is not run, the channel is empty --> populate with empty elements - vqsr_vcf_for_join = SENTIEON_APPLYVARCAL_INDEL.out.vcf.ifEmpty([[:], []]).map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]} - vqsr_tbi_for_join = SENTIEON_APPLYVARCAL_INDEL.out.tbi.ifEmpty([[:], []]).map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]} - - // Join on metamap - // If both --> meta, vcf_merged, vcf_bqsr - // If not VQSR --> meta, vcf_merged, [] - // if the second is empty, use the first - genotype_vcf = merge_vcf_for_join.join(vqsr_vcf_for_join, remainder: true).map{ - meta, joint_vcf, recal_vcf -> - - vcf_out = recal_vcf ?: joint_vcf - - [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"sentieon_haplotyper"], vcf_out] - } - - genotype_index = merge_tbi_for_join.join(vqsr_tbi_for_join, remainder: true).map{ - meta, joint_tbi, recal_tbi -> - - tbi_out = recal_tbi ?: joint_tbi - [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"sentieon_haplotyper"], tbi_out] + merged_vcf = MERGE_GENOTYPEGVCFS.out.vcf.map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]} + merged_tbi = MERGE_GENOTYPEGVCFS.out.tbi.map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]} + + if (variant_caller == 'sentieon_dnascope') { + // As advised by Don Freed (Sentieon), VQSR is skipped for DnaScope + genotype_vcf = merged_vcf.map{ + meta, vcf -> [ meta + [ patient:"all_samples", variantcaller:'sentieon_dnascope'], vcf ] + } + genotype_index = merged_tbi.map{ + meta, tbi -> [ meta + [ patient:"all_samples", variantcaller:'sentieon_dnascope'], tbi ] + } + } else { + vqsr_input = MERGE_GENOTYPEGVCFS.out.vcf.join(MERGE_GENOTYPEGVCFS.out.tbi, failOnDuplicate: true) + indels_resource_label = known_indels_vqsr.mix(dbsnp_vqsr).collect() + snps_resource_label = known_snps_vqsr.mix(dbsnp_vqsr).collect() + + // Recalibrate INDELs and SNPs separately + SENTIEON_VARCAL_INDEL( + vqsr_input, + resource_indels_vcf, + resource_indels_tbi, + indels_resource_label, + fasta, + fai) + + SENTIEON_VARCAL_SNP( + vqsr_input, + resource_snps_vcf, + resource_snps_tbi, + snps_resource_label, + fasta, + fai) + + //Prepare SNPs and INDELs for Sentieon's applyvarcal + // Step 1. : applyvarcal to SNPs + // Step 2. : Use SENTIEON_APPLYVARCAL_SNP output and run ApplyVQSR_INDEL. This avoids duplicate entries in the vcf as described here: https://hpc.nih.gov/training/gatk_tutorial/vqsr.html + + // Join results of variant recalibration into a single channel tuple + // Rework meta for variantscalled.csv and annotation tools + vqsr_input_snp = vqsr_input.join(SENTIEON_VARCAL_SNP.out.recal, failOnDuplicate: true) + .join(SENTIEON_VARCAL_SNP.out.idx, failOnDuplicate: true) + .join(SENTIEON_VARCAL_SNP.out.tranches, failOnDuplicate: true) + .map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] } + + SENTIEON_APPLYVARCAL_SNP( + vqsr_input_snp, + fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] }, + fai.map{ fai -> [ [ id:fai.baseName ], fai ] }) + + // Join results of SENTIEON_APPLYVARCAL_SNP and use as input for SENTIEON_APPLYVARCAL_INDEL to avoid duplicate entries in the result + // Rework meta for variantscalled.csv and annotation tools + vqsr_input_indel = SENTIEON_APPLYVARCAL_SNP.out.vcf.join(SENTIEON_APPLYVARCAL_SNP.out.tbi).map{ meta, vcf, tbi -> [ meta + [ id:'joint_variant_calling' ], vcf, tbi ]} + .join(SENTIEON_VARCAL_INDEL.out.recal, failOnDuplicate: true) + .join(SENTIEON_VARCAL_INDEL.out.idx, failOnDuplicate: true) + .join(SENTIEON_VARCAL_INDEL.out.tranches, failOnDuplicate: true) + .map{ meta, vcf, tbi, recal, index, tranche -> [ meta + [ id:'recalibrated_joint_variant_calling' ], vcf, tbi, recal, index, tranche ] } + + SENTIEON_APPLYVARCAL_INDEL( + vqsr_input_indel, + fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] }, + fai.map{ fai -> [ [ id:fai.baseName ], fai ] }) + + // The following is an ugly monster to achieve the following: + // When MERGE_GENOTYPEGVCFS and SENTIEON_APPLYVARCAL are run, then use output from SENTIEON_APPLYVARCAL + // When MERGE_GENOTYPEGVCFS and NOT SENTIEON_APPLYVARCAL, then use the output from MERGE_GENOTYPEGVCFS + + // Remap for both to have the same key, if ApplyBQSR is not run, the channel is empty --> populate with empty elements + vqsr_vcf_for_join = SENTIEON_APPLYVARCAL_INDEL.out.vcf.ifEmpty([[:], []]).map{meta, vcf -> [[id: 'joint_variant_calling'] , vcf]} + vqsr_tbi_for_join = SENTIEON_APPLYVARCAL_INDEL.out.tbi.ifEmpty([[:], []]).map{meta, tbi -> [[id: 'joint_variant_calling'] , tbi]} + + // Join on metamap + // If both --> meta, vcf_merged, vcf_bqsr + // If not VQSR --> meta, vcf_merged, [] + // if the second is empty, use the first + genotype_vcf = merged_vcf.join(vqsr_vcf_for_join, remainder: true).map{ + meta, joint_vcf, recal_vcf -> + + vcf_out = recal_vcf ?: joint_vcf + + [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"sentieon_haplotyper"], vcf_out] + } + + genotype_index = merged_tbi.join(vqsr_tbi_for_join, remainder: true).map{ + meta, joint_tbi, recal_tbi -> + + tbi_out = recal_tbi ?: joint_tbi + [[id:"joint_variant_calling", patient:"all_samples", variantcaller:"sentieon_haplotyper"], tbi_out] + } + + versions = versions.mix(SENTIEON_VARCAL_SNP.out.versions) + versions = versions.mix(SENTIEON_VARCAL_INDEL.out.versions) + versions = versions.mix(SENTIEON_APPLYVARCAL_INDEL.out.versions) } versions = versions.mix(SENTIEON_GVCFTYPER.out.versions) - versions = versions.mix(SENTIEON_VARCAL_SNP.out.versions) - versions = versions.mix(SENTIEON_VARCAL_INDEL.out.versions) - versions = versions.mix(SENTIEON_APPLYVARCAL_INDEL.out.versions) emit: genotype_index // channel: [ val(meta), [ tbi ] ] diff --git a/subworkflows/local/bam_markduplicates_spark/main.nf b/subworkflows/local/bam_markduplicates_spark/main.nf index e978bf5a38..8e7d0ee023 100644 --- a/subworkflows/local/bam_markduplicates_spark/main.nf +++ b/subworkflows/local/bam_markduplicates_spark/main.nf @@ -4,10 +4,10 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { CRAM_QC_MOSDEPTH_SAMTOOLS } from '../cram_qc_mosdepth_samtools/main' -include { GATK4_ESTIMATELIBRARYCOMPLEXITY } from '../../../modules/nf-core/gatk4/estimatelibrarycomplexity/main' -include { GATK4_MARKDUPLICATES_SPARK } from '../../../modules/nf-core/gatk4/markduplicatesspark/main' -include { SAMTOOLS_INDEX as INDEX_MARKDUPLICATES } from '../../../modules/nf-core/samtools/index/main' +include { CRAM_QC_MOSDEPTH_SAMTOOLS } from '../cram_qc_mosdepth_samtools/main' +include { GATK4_ESTIMATELIBRARYCOMPLEXITY } from '../../../modules/nf-core/gatk4/estimatelibrarycomplexity/main' +include { GATK4SPARK_MARKDUPLICATES } from '../../../modules/nf-core/gatk4spark/markduplicates/main' +include { SAMTOOLS_INDEX as INDEX_MARKDUPLICATES } from '../../../modules/nf-core/samtools/index/main' workflow BAM_MARKDUPLICATES_SPARK { take: @@ -22,13 +22,13 @@ workflow BAM_MARKDUPLICATES_SPARK { reports = Channel.empty() // RUN MARKUPDUPLICATES SPARK - GATK4_MARKDUPLICATES_SPARK(bam, fasta, fasta_fai, dict) + GATK4SPARK_MARKDUPLICATES(bam, fasta, fasta_fai, dict) // Index cram - INDEX_MARKDUPLICATES(GATK4_MARKDUPLICATES_SPARK.out.output) + INDEX_MARKDUPLICATES(GATK4SPARK_MARKDUPLICATES.out.output) // Join with the crai file - cram = GATK4_MARKDUPLICATES_SPARK.out.output.join(INDEX_MARKDUPLICATES.out.crai, failOnDuplicate: true, failOnMismatch: true) + cram = GATK4SPARK_MARKDUPLICATES.out.output.join(INDEX_MARKDUPLICATES.out.crai, failOnDuplicate: true, failOnMismatch: true) // QC on CRAM CRAM_QC_MOSDEPTH_SAMTOOLS(cram, fasta, intervals_bed_combined) @@ -42,7 +42,7 @@ workflow BAM_MARKDUPLICATES_SPARK { // Gather versions of all tools used versions = versions.mix(GATK4_ESTIMATELIBRARYCOMPLEXITY.out.versions) - versions = versions.mix(GATK4_MARKDUPLICATES_SPARK.out.versions) + versions = versions.mix(GATK4SPARK_MARKDUPLICATES.out.versions) versions = versions.mix(INDEX_MARKDUPLICATES.out.versions) versions = versions.mix(CRAM_QC_MOSDEPTH_SAMTOOLS.out.versions) diff --git a/subworkflows/local/bam_variant_calling_cnvkit/main.nf b/subworkflows/local/bam_variant_calling_cnvkit/main.nf index f90a2d6233..161c2dc21a 100644 --- a/subworkflows/local/bam_variant_calling_cnvkit/main.nf +++ b/subworkflows/local/bam_variant_calling_cnvkit/main.nf @@ -5,21 +5,27 @@ // A when clause condition is defined in the conf/modules.config to determine if the module should be run include { CNVKIT_BATCH } from '../../../modules/nf-core/cnvkit/batch/main' +include { CNVKIT_GENEMETRICS } from '../../../modules/nf-core/cnvkit/genemetrics/main' workflow BAM_VARIANT_CALLING_CNVKIT { take: - cram // channel: [mandatory] cram - fasta // channel: [mandatory] fasta - fasta_fai // channel: [optional] fasta_fai - targets // channel: [mandatory] bed - reference // channel: [] cnn + cram // channel: [mandatory] meta, cram + fasta // channel: [mandatory] meta, fasta + fasta_fai // channel: [optional] meta, fasta_fai + targets // channel: [mandatory] meta, bed + reference // channel: [optional] meta, cnn main: + versions = Channel.empty() generate_pon = false CNVKIT_BATCH(cram, fasta, fasta_fai, targets, reference, generate_pon) - versions = CNVKIT_BATCH.out.versions + ch_genemetrics = CNVKIT_BATCH.out.cnr.join(CNVKIT_BATCH.out.cns).map{ meta, cnr, cns -> [meta, cnr, cns[2]]} + CNVKIT_GENEMETRICS(ch_genemetrics) + + versions = versions.mix(CNVKIT_BATCH.out.versions) + versions = versions.mix(CNVKIT_GENEMETRICS.out.versions) emit: versions // channel: [ versions.yml ] diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf index 666c7c7b6b..1f751c2263 100644 --- a/subworkflows/local/bam_variant_calling_germline_all/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf @@ -2,18 +2,23 @@ // GERMLINE VARIANT CALLING // -include { BAM_JOINT_CALLING_GERMLINE_GATK } from '../bam_joint_calling_germline_gatk/main' -include { BAM_JOINT_CALLING_GERMLINE_SENTIEON } from '../bam_joint_calling_germline_sentieon/main' -include { BAM_VARIANT_CALLING_CNVKIT } from '../bam_variant_calling_cnvkit/main' -include { BAM_VARIANT_CALLING_DEEPVARIANT } from '../bam_variant_calling_deepvariant/main' -include { BAM_VARIANT_CALLING_FREEBAYES } from '../bam_variant_calling_freebayes/main' -include { BAM_VARIANT_CALLING_GERMLINE_MANTA } from '../bam_variant_calling_germline_manta/main' -include { BAM_VARIANT_CALLING_HAPLOTYPECALLER } from '../bam_variant_calling_haplotypecaller/main' -include { BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER } from '../bam_variant_calling_sentieon_haplotyper/main' -include { BAM_VARIANT_CALLING_MPILEUP } from '../bam_variant_calling_mpileup/main' -include { BAM_VARIANT_CALLING_SINGLE_STRELKA } from '../bam_variant_calling_single_strelka/main' -include { BAM_VARIANT_CALLING_SINGLE_TIDDIT } from '../bam_variant_calling_single_tiddit/main' -include { VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main' +include { BAM_JOINT_CALLING_GERMLINE_GATK } from '../bam_joint_calling_germline_gatk/main' +include { BAM_JOINT_CALLING_GERMLINE_SENTIEON } from '../bam_joint_calling_germline_sentieon/main' +include { BAM_VARIANT_CALLING_CNVKIT } from '../bam_variant_calling_cnvkit/main' +include { BAM_VARIANT_CALLING_DEEPVARIANT } from '../bam_variant_calling_deepvariant/main' +include { BAM_VARIANT_CALLING_FREEBAYES } from '../bam_variant_calling_freebayes/main' +include { BAM_VARIANT_CALLING_GERMLINE_MANTA } from '../bam_variant_calling_germline_manta/main' +include { BAM_VARIANT_CALLING_HAPLOTYPECALLER } from '../bam_variant_calling_haplotypecaller/main' +include { BAM_VARIANT_CALLING_SENTIEON_DNASCOPE } from '../bam_variant_calling_sentieon_dnascope/main' +include { BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER } from '../bam_variant_calling_sentieon_haplotyper/main' +include { BAM_VARIANT_CALLING_MPILEUP } from '../bam_variant_calling_mpileup/main' +include { BAM_VARIANT_CALLING_SINGLE_STRELKA } from '../bam_variant_calling_single_strelka/main' +include { BAM_VARIANT_CALLING_SINGLE_TIDDIT } from '../bam_variant_calling_single_tiddit/main' +include { SENTIEON_DNAMODELAPPLY } from '../../../modules/nf-core/sentieon/dnamodelapply/main' +include { VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main' +include { VCF_VARIANT_FILTERING_GATK as SENTIEON_HAPLOTYPER_VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main' + + workflow BAM_VARIANT_CALLING_GERMLINE_ALL { take: @@ -41,18 +46,24 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { joint_germline // boolean: [mandatory] [default: false] joint calling of germline variants skip_haplotypecaller_filter // boolean: [mandatory] [default: false] whether to filter haplotypecaller single sample vcfs sentieon_haplotyper_emit_mode // channel: [mandatory] value channel with string + sentieon_dnascope_emit_mode // channel: [mandatory] value channel with string + sentieon_dnascope_pcr_indel_model // channel: [mandatory] value channel with string + sentieon_dnascope_model // channel: [mandatory] value channel with string main: versions = Channel.empty() //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config + gvcf_sentieon_dnascope = Channel.empty() + gvcf_sentieon_haplotyper = Channel.empty() + vcf_deepvariant = Channel.empty() vcf_freebayes = Channel.empty() vcf_haplotypecaller = Channel.empty() vcf_manta = Channel.empty() vcf_mpileup = Channel.empty() + vcf_sentieon_dnascope = Channel.empty() vcf_sentieon_haplotyper = Channel.empty() - gvcf_sentieon_haplotyper = Channel.empty() vcf_strelka = Channel.empty() vcf_tiddit = Channel.empty() @@ -73,10 +84,10 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { BAM_VARIANT_CALLING_CNVKIT( // Remap channel to match module/subworkflow cram.map{ meta, cram, crai -> [ meta, [], cram ] }, - fasta, - fasta_fai, - intervals_bed_combined, - [] + fasta.map{ it -> [[id:it[0].baseName], it] }, + fasta_fai.map{ it -> [[id:it[0].baseName], it] }, + intervals_bed_combined.map{ it -> [[id:it[0].baseName], it] }, + [[id:"null"], []] ) versions = versions.mix(BAM_VARIANT_CALLING_CNVKIT.out.versions) } @@ -171,8 +182,8 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { if (tools.split(',').contains('manta')) { BAM_VARIANT_CALLING_GERMLINE_MANTA ( cram, - fasta, - fasta_fai, + fasta.map{ it -> [ [ id:'fasta' ], it ] }, + fasta_fai.map{ it -> [ [ id:'fasta_fai' ], it ] }, intervals_bed_gz_tbi_combined ) @@ -180,6 +191,66 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_MANTA.out.versions) } + // SENTIEON DNASCOPE + if (tools.split(',').contains('sentieon_dnascope')) { + BAM_VARIANT_CALLING_SENTIEON_DNASCOPE( + cram, + fasta, + fasta_fai, + dict, + dbsnp, + dbsnp_tbi, + dbsnp_vqsr, + intervals, + joint_germline, + sentieon_dnascope_emit_mode, + sentieon_dnascope_pcr_indel_model, + sentieon_dnascope_model) + + versions = versions.mix(BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.versions) + + vcf_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.vcf + vcf_tbi_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.vcf_tbi + gvcf_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.gvcf + gvcf_tbi_sentieon_dnascope = BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.gvcf_tbi + + if (joint_germline) { + BAM_JOINT_CALLING_GERMLINE_SENTIEON( + BAM_VARIANT_CALLING_SENTIEON_DNASCOPE.out.genotype_intervals, + fasta, + fasta_fai, + dict, + dbsnp, + dbsnp_tbi, + dbsnp_vqsr, + known_sites_indels, + known_sites_indels_tbi, + known_indels_vqsr, + known_sites_snps, + known_sites_snps_tbi, + known_snps_vqsr, + 'sentieon_dnascope') + + vcf_sentieon_dnascope = BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.genotype_vcf + versions = versions.mix(BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.versions) + } else { + // If single sample track, check if filtering should be done + if (!(skip_tools && skip_tools.split(',').contains('dnascope_filter'))) { + + SENTIEON_DNAMODELAPPLY( + vcf_sentieon_dnascope.join(vcf_tbi_sentieon_dnascope, failOnDuplicate: true, failOnMismatch: true), + fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] }, + fasta_fai.map{ fai -> [ [ id:fai.baseName ], fai ] }, + sentieon_dnascope_model.map{ model -> [ [ id:model.baseName ], model ] }) + + vcf_sentieon_dnascope = SENTIEON_DNAMODELAPPLY.out.vcf + versions = versions.mix(SENTIEON_DNAMODELAPPLY.out.versions) + + } + + } + } + // SENTIEON HAPLOTYPER if (tools.split(',').contains('sentieon_haplotyper')) { BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER( @@ -215,7 +286,8 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { known_indels_vqsr, known_sites_snps, known_sites_snps_tbi, - known_snps_vqsr) + known_snps_vqsr, + 'sentieon_haplotyper') vcf_sentieon_haplotyper = BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.genotype_vcf versions = versions.mix(BAM_JOINT_CALLING_GERMLINE_SENTIEON.out.versions) @@ -224,7 +296,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { // If single sample track, check if filtering should be done if (!(skip_tools && skip_tools.split(',').contains('haplotyper_filter'))) { - VCF_VARIANT_FILTERING_GATK( + SENTIEON_HAPLOTYPER_VCF_VARIANT_FILTERING_GATK( vcf_sentieon_haplotyper.join(vcf_tbi_sentieon_haplotyper, failOnDuplicate: true, failOnMismatch: true), fasta, fasta_fai, @@ -233,9 +305,9 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { known_sites_indels.concat(known_sites_snps).flatten().unique().collect(), known_sites_indels_tbi.concat(known_sites_snps_tbi).flatten().unique().collect()) - vcf_sentieon_haplotyper = VCF_VARIANT_FILTERING_GATK.out.filtered_vcf + vcf_sentieon_haplotyper = SENTIEON_HAPLOTYPER_VCF_VARIANT_FILTERING_GATK.out.filtered_vcf - versions = versions.mix(VCF_VARIANT_FILTERING_GATK.out.versions) + versions = versions.mix(SENTIEON_HAPLOTYPER_VCF_VARIANT_FILTERING_GATK.out.versions) } } } @@ -270,6 +342,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { vcf_all = Channel.empty().mix( vcf_deepvariant, vcf_freebayes, + vcf_sentieon_dnascope, vcf_haplotypecaller, vcf_manta, vcf_mpileup, @@ -279,6 +352,8 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { ) emit: + gvcf_sentieon_dnascope + gvcf_sentieon_haplotyper vcf_all vcf_deepvariant vcf_freebayes @@ -286,8 +361,8 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { vcf_manta vcf_mpileup vcf_strelka + vcf_sentieon_dnascope vcf_sentieon_haplotyper - gvcf_sentieon_haplotyper vcf_tiddit versions diff --git a/subworkflows/local/bam_variant_calling_germline_manta/main.nf b/subworkflows/local/bam_variant_calling_germline_manta/main.nf index 36cb957d6f..d27a999a68 100644 --- a/subworkflows/local/bam_variant_calling_germline_manta/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_manta/main.nf @@ -10,8 +10,8 @@ include { MANTA_GERMLINE } from '../../../modules/nf-core/manta/germline/main' workflow BAM_VARIANT_CALLING_GERMLINE_MANTA { take: cram // channel: [mandatory] [ meta, cram, crai ] - fasta // channel: [mandatory] [ fasta ] - fasta_fai // channel: [mandatory] [ fasta_fai ] + fasta // channel: [mandatory] [ meta, fasta ] + fasta_fai // channel: [mandatory] [ meta, fasta_fai ] intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi] or [ [], []] if no intervals; intervals file contains all intervals main: @@ -25,7 +25,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_MANTA { [it[0], it[1], it[2], bed_gz, bed_tbi] } - MANTA_GERMLINE(cram_intervals, fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] }, fasta_fai.map{ fasta_fai -> [ [ id:fasta_fai.baseName ], fasta_fai ] }) + MANTA_GERMLINE(cram_intervals, fasta, fasta_fai, []) small_indels_vcf = MANTA_GERMLINE.out.candidate_small_indels_vcf sv_vcf = MANTA_GERMLINE.out.candidate_sv_vcf diff --git a/subworkflows/local/bam_variant_calling_mpileup/main.nf b/subworkflows/local/bam_variant_calling_mpileup/main.nf index 412ba38619..663ed6a0bc 100644 --- a/subworkflows/local/bam_variant_calling_mpileup/main.nf +++ b/subworkflows/local/bam_variant_calling_mpileup/main.nf @@ -4,10 +4,10 @@ // For all modules here: // A when clause condition is defined in the conf/modules.config to determine if the module should be run -include { CAT_CAT as CAT_MPILEUP } from '../../../modules/nf-core/cat/cat/main' -include { BCFTOOLS_MPILEUP } from '../../../modules/nf-core/bcftools/mpileup/main' -include { SAMTOOLS_MPILEUP } from '../../../modules/nf-core/samtools/mpileup/main' -include { GATK4_MERGEVCFS as MERGE_BCFTOOLS_MPILEUP } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { BCFTOOLS_MPILEUP } from '../../../modules/nf-core/bcftools/mpileup/main' +include { CAT_CAT as CAT_MPILEUP } from '../../../modules/nf-core/cat/cat/main' +include { GATK4_MERGEVCFS as MERGE_BCFTOOLS_MPILEUP } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { SAMTOOLS_MPILEUP } from '../../../modules/nf-core/samtools/mpileup/main' workflow BAM_VARIANT_CALLING_MPILEUP { take: @@ -26,7 +26,7 @@ workflow BAM_VARIANT_CALLING_MPILEUP { // Run, if --tools mpileup keep_bcftools_mpileup = false - BCFTOOLS_MPILEUP(cram_intervals, fasta, keep_bcftools_mpileup) + BCFTOOLS_MPILEUP(cram_intervals, fasta.map{ it -> [[id:it[0].baseName], it] }, keep_bcftools_mpileup) //Only run, if --tools ControlFreec SAMTOOLS_MPILEUP(cram_intervals, fasta) diff --git a/subworkflows/local/bam_variant_calling_sentieon_dnascope/main.nf b/subworkflows/local/bam_variant_calling_sentieon_dnascope/main.nf new file mode 100644 index 0000000000..9eea9b2d61 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_sentieon_dnascope/main.nf @@ -0,0 +1,157 @@ +// +// SENTIEON HAPLOTYPER germline variant calling +// +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run + +include { GATK4_MERGEVCFS as MERGE_SENTIEON_DNASCOPE_GVCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_MERGEVCFS as MERGE_SENTIEON_DNASCOPE_VCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { SENTIEON_DNASCOPE } from '../../../modules/nf-core/sentieon/dnascope/main' + +workflow BAM_VARIANT_CALLING_SENTIEON_DNASCOPE { + take: + cram // channel: [mandatory] [ meta, cram, crai, interval.bed ] + fasta // channel: [mandatory] + fasta_fai // channel: [mandatory] + dict // channel: [mandatory] + dbsnp // channel: [optional] + dbsnp_tbi // channel: [optional] + dbsnp_vqsr // channel: [optional] + intervals // channel: [mandatory] [ intervals, num_intervals ] or [ [], 0 ] if no intervals + joint_germline // boolean: [mandatory] [default: false] joint calling of germline variants + sentieon_dnascope_emit_mode // string + sentieon_dnascope_pcr_indel_model // string + sentieon_dnascope_model // channel + + main: + versions = Channel.empty() + + gvcf = Channel.empty() + vcf = Channel.empty() + genotype_intervals = Channel.empty() + + // Combine cram and intervals for spread and gather strategy + cram_intervals_for_sentieon = cram.combine(intervals) + // Move num_intervals to meta map + .map{ meta, cram, crai, intervals, num_intervals -> [ + meta + [ + num_intervals:num_intervals, + intervals_name:intervals.simpleName, + variantcaller:'sentieon_dnascope'], + cram, + crai, + intervals + ] + } + + emit_mode_items = sentieon_dnascope_emit_mode.split(',').each{ it -> it.toLowerCase().trim() } + lst = emit_mode_items - 'gvcf' + emit_vcf = lst.size() > 0 ? lst[0] : '' + + SENTIEON_DNASCOPE( + cram_intervals_for_sentieon, + fasta.map{it -> [[:], it]}, + fasta_fai.map{it -> [[:], it]}, + dbsnp.map{it -> [[:], it]}, + dbsnp_tbi.map{it -> [[:], it]}, + sentieon_dnascope_model.map{it -> [[:], it]}, + sentieon_dnascope_pcr_indel_model, + emit_vcf, + emit_mode_items.any{ it.equals('gvcf') }) + + if (joint_germline) { + genotype_intervals = SENTIEON_DNASCOPE.out.gvcf + .join(SENTIEON_DNASCOPE.out.gvcf_tbi, failOnMismatch: true) + .join(cram_intervals_for_sentieon, failOnMismatch: true) + .map{ meta, gvcf, tbi, cram, crai, intervals -> [ meta, gvcf, tbi, intervals ] } + } + + // Figure out if using intervals or no_intervals + dnascope_vcf_branch = SENTIEON_DNASCOPE.out.vcf.map{ + meta, vcf -> [ meta - meta.subMap('interval_name'), vcf] + } + .branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + dnascope_vcf_tbi_branch = SENTIEON_DNASCOPE.out.vcf_tbi.map{ + meta, vcf_tbi -> [ meta - meta.subMap('interval_name'), vcf_tbi] + } + .branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + haplotyper_gvcf_branch = SENTIEON_DNASCOPE.out.gvcf.map{ + meta, gvcf -> [ meta - meta.subMap('interval_name'), gvcf] + } + .branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + haplotyper_gvcf_tbi_branch = SENTIEON_DNASCOPE.out.gvcf_tbi.map{ + meta, gvcf_tbi -> [ meta - meta.subMap('interval_name'), gvcf_tbi] + } + .branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } + + vcfs_for_merging = dnascope_vcf_branch.intervals.map{ + meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ]} + + vcfs_for_merging = vcfs_for_merging.map{ + meta, vcf -> [ + meta - meta.subMap('intervals_name'), + vcf]}.groupTuple() + + // VCFs + // Only when using intervals + MERGE_SENTIEON_DNASCOPE_VCFS(vcfs_for_merging, dict) + + dnascope_vcf = Channel.empty().mix( + MERGE_SENTIEON_DNASCOPE_VCFS.out.vcf, + dnascope_vcf_branch.no_intervals) + + haplotyper_tbi = Channel.empty().mix( + MERGE_SENTIEON_DNASCOPE_VCFS.out.tbi, + dnascope_vcf_tbi_branch.no_intervals) + + // Remove no longer necessary field: num_intervals + vcf = dnascope_vcf.map{ meta, vcf -> [ meta - meta.subMap('num_intervals'), vcf ] } + vcf_tbi = haplotyper_tbi.map{ meta, tbi -> [ meta - meta.subMap('num_intervals'), tbi ] } + + // GVFs + // Only when using intervals + gvcfs_for_merging = haplotyper_gvcf_branch.intervals.map{ + meta, vcf -> [groupKey(meta, meta.num_intervals), vcf]} + + gvcfs_for_merging = gvcfs_for_merging.map{ + meta, vcf -> [ meta - meta.subMap('intervals_name'), vcf ] + }.groupTuple() + + MERGE_SENTIEON_DNASCOPE_GVCFS(gvcfs_for_merging, dict) + + gvcf = Channel.empty().mix( + MERGE_SENTIEON_DNASCOPE_GVCFS.out.vcf, + haplotyper_gvcf_branch.no_intervals) + + gvcf_tbi = Channel.empty().mix( + MERGE_SENTIEON_DNASCOPE_GVCFS.out.tbi, + haplotyper_gvcf_tbi_branch.no_intervals) + + versions = versions.mix(SENTIEON_DNASCOPE.out.versions) + versions = versions.mix(MERGE_SENTIEON_DNASCOPE_VCFS.out.versions) + versions = versions.mix(MERGE_SENTIEON_DNASCOPE_GVCFS.out.versions) + + emit: + versions + vcf + vcf_tbi + gvcf + gvcf_tbi + genotype_intervals // For joint genotyping + +} diff --git a/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf b/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf index cf9bcf9e21..4b280d271c 100644 --- a/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf +++ b/subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf @@ -42,7 +42,8 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER { ] } - emit_mode_items = sentieon_haplotyper_emit_mode.split(',') + + emit_mode_items = sentieon_haplotyper_emit_mode.split(',').each{ it -> it.toLowerCase().trim() } lst = emit_mode_items - 'gvcf' emit_vcf = lst.size() > 0 ? lst[0] : '' @@ -53,7 +54,7 @@ workflow BAM_VARIANT_CALLING_SENTIEON_HAPLOTYPER { dbsnp, dbsnp_tbi, emit_vcf, - emit_mode_items.contains('gvcf')) + emit_mode_items.any{ it.equals('gvcf') }) if (joint_germline) { genotype_intervals = SENTIEON_HAPLOTYPER.out.gvcf diff --git a/subworkflows/local/bam_variant_calling_somatic_all/main.nf b/subworkflows/local/bam_variant_calling_somatic_all/main.nf index af2b8c1e2f..f561ea420c 100644 --- a/subworkflows/local/bam_variant_calling_somatic_all/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_all/main.nf @@ -118,10 +118,10 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { BAM_VARIANT_CALLING_CNVKIT( // Remap channel to match module/subworkflow cram.map{ meta, normal_cram, normal_crai, tumor_cram, tumor_crai -> [ meta, tumor_cram, normal_cram ] }, - fasta, - fasta_fai, - intervals_bed_combined, - [] + fasta.map{ it -> [[id:it[0].baseName], it] }, + fasta_fai.map{ it -> [[id:it[0].baseName], it] }, + intervals_bed_combined.map{ it -> [[id:it[0].baseName], it] }, + [[id:"null"], []] ) versions = versions.mix(BAM_VARIANT_CALLING_CNVKIT.out.versions) @@ -145,8 +145,8 @@ workflow BAM_VARIANT_CALLING_SOMATIC_ALL { if (tools.split(',').contains('manta')) { BAM_VARIANT_CALLING_SOMATIC_MANTA( cram, - fasta, - fasta_fai, + fasta.map{ it -> [ [ id:'fasta' ], it ] }, + fasta_fai.map{ it -> [ [ id:'fasta_fai' ], it ] }, intervals_bed_gz_tbi_combined ) diff --git a/subworkflows/local/bam_variant_calling_somatic_manta/main.nf b/subworkflows/local/bam_variant_calling_somatic_manta/main.nf index 7eb5e6687d..f6720c5406 100644 --- a/subworkflows/local/bam_variant_calling_somatic_manta/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_manta/main.nf @@ -9,8 +9,8 @@ include { MANTA_SOMATIC } from '../../../modules/nf-core/manta/somatic/main' workflow BAM_VARIANT_CALLING_SOMATIC_MANTA { take: cram // channel: [mandatory] [ meta, cram1, crai1, cram2, crai2 ] - fasta // channel: [mandatory] [ fasta ] - fasta_fai // channel: [mandatory] [ fasta_fai ] + fasta // channel: [mandatory] [ meta, fasta ] + fasta_fai // channel: [mandatory] [ meta, fasta_fai ] intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi ] or [ [], [] ] if no intervals main: @@ -24,7 +24,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MANTA { [it[0], it[1], it[2], it[3], it[4], bed_gz, bed_tbi] } - MANTA_SOMATIC(cram_intervals, fasta, fasta_fai) + MANTA_SOMATIC(cram_intervals, fasta, fasta_fai, []) candidate_small_indels_vcf = MANTA_SOMATIC.out.candidate_small_indels_vcf candidate_small_indels_vcf_tbi = MANTA_SOMATIC.out.candidate_small_indels_vcf_tbi diff --git a/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf index 81881f26aa..3f5b7d53ca 100644 --- a/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf +++ b/subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf @@ -31,7 +31,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 { versions = Channel.empty() //If no germline resource is provided, then create an empty channel to avoid GetPileupsummaries from being run - germline_resource_pileup = germline_resource_tbi ? germline_resource : Channel.empty() + germline_resource_pileup = (germline_resource && germline_resource_tbi) ? germline_resource : Channel.empty() germline_resource_pileup_tbi = germline_resource_tbi ?: Channel.empty() // Combine input and intervals for spread and gather strategy @@ -44,8 +44,8 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 { // Separate normal cram files // Extract tumor cram files ch_cram = input.multiMap{ meta, cram, crai -> - normal: [ meta - meta.subMap('normal_id', 'tumor_id') , cram[0], crai[0] ] - tumor: [ meta - meta.subMap('normal_id', 'tumor_id') , cram[1], crai[1] ] + normal: [ meta - meta.subMap('tumor_id') , cram[0], crai[0] ] + tumor: [ meta - meta.subMap('tumor_id') , cram[1], crai[1] ] } // Remove duplicates from normal channel and merge normal and tumor crams by patient @@ -102,10 +102,18 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 { MERGEMUTECTSTATS(stats_to_merge) // Mix intervals and no_intervals channels together and remove no longer necessary field: normal_id, tumor_id, num_intervals - vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals).map{ meta, vcf -> [ meta - meta.subMap('num_intervals'), vcf ]} - tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals).map{ meta, tbi -> [ meta - meta.subMap('num_intervals'), tbi ]} - stats = Channel.empty().mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals).map{ meta, stats -> [ meta - meta.subMap('num_intervals'), stats ]} - f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals).map{ meta, f1r2 -> [ meta - meta.subMap('num_intervals'), f1r2 ]} + vcf = Channel.empty().mix(MERGE_MUTECT2.out.vcf, vcf_branch.no_intervals).map{ meta, vcf -> + [ joint_mutect2 ? meta - meta.subMap('normal_id', 'num_intervals') : meta - meta.subMap('num_intervals') , vcf ] + } + tbi = Channel.empty().mix(MERGE_MUTECT2.out.tbi, tbi_branch.no_intervals).map{ meta, tbi-> + [ joint_mutect2 ? meta - meta.subMap('normal_id', 'num_intervals') : meta - meta.subMap('num_intervals'), tbi ] + } + stats = Channel.empty().mix(MERGEMUTECTSTATS.out.stats, stats_branch.no_intervals).map{ meta, stats -> + [ joint_mutect2 ? meta - meta.subMap('normal_id', 'num_intervals') : meta - meta.subMap('num_intervals'), stats ] + } + f1r2 = Channel.empty().mix(f1r2_to_merge, f1r2_branch.no_intervals).map{ meta, f1r2-> + [ joint_mutect2 ? meta - meta.subMap('normal_id', 'num_intervals') : meta - meta.subMap('num_intervals') , f1r2 ] + } // Generate artifactpriors using learnreadorientationmodel on the f1r2 output of mutect2 LEARNREADORIENTATIONMODEL(f1r2) diff --git a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf index d92b263b8a..31d968a245 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_all/main.nf @@ -85,10 +85,10 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { BAM_VARIANT_CALLING_CNVKIT ( // Remap channel to match module/subworkflow cram.map{ meta, cram, crai -> [ meta, cram, [] ] }, - fasta, - fasta_fai, - [], - cnvkit_reference + fasta.map{ it -> [[id:it[0].baseName], it] }, + fasta_fai.map{ it -> [[id:it[0].baseName], it] }, + [[id:"null"], []], + cnvkit_reference.map{ it -> [[id:it[0].baseName], it] } ) versions = versions.mix(BAM_VARIANT_CALLING_CNVKIT.out.versions) @@ -141,9 +141,8 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_ALL { BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA( cram, // Remap channel to match module/subworkflow - dict.map{ it -> [ [ id:'dict' ], it ] }, - fasta, - fasta_fai, + fasta.map{ it -> [ [ id:'fasta' ], it ] }, + fasta_fai.map{ it -> [ [ id:'fasta_fai' ], it ] }, intervals_bed_gz_tbi_combined ) diff --git a/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf b/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf index 10045c7356..38f5d4366c 100644 --- a/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf +++ b/subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf @@ -10,9 +10,8 @@ include { MANTA_TUMORONLY } from '../../../modules/nf-core/manta/tumoronly/main' workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA { take: cram // channel: [mandatory] [ meta, cram, crai ] - dict // channel: [optional] [ meta, dict ] - fasta // channel: [mandatory] [ fasta ] - fasta_fai // channel: [mandatory] [ fasta_fai ] + fasta // channel: [mandatory] [ meta, fasta ] + fasta_fai // channel: [mandatory] [ meta, fasta_fai ] intervals // channel: [mandatory] [ interval.bed.gz, interval.bed.gz.tbi ] or [ [], [] ] if no intervals main: @@ -26,7 +25,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MANTA { [it[0], it[1], it[2], bed_gz, bed_tbi] } - MANTA_TUMORONLY(cram_intervals, fasta, fasta_fai) + MANTA_TUMORONLY(cram_intervals, fasta, fasta_fai, []) small_indels_vcf = MANTA_TUMORONLY.out.candidate_small_indels_vcf candidate_sv_vcf = MANTA_TUMORONLY.out.candidate_sv_vcf diff --git a/subworkflows/local/cram_sampleqc/main.nf b/subworkflows/local/cram_sampleqc/main.nf new file mode 100644 index 0000000000..3e3b06f008 --- /dev/null +++ b/subworkflows/local/cram_sampleqc/main.nf @@ -0,0 +1,30 @@ +include { BAM_NGSCHECKMATE } from '../../../subworkflows/nf-core/bam_ngscheckmate/main' + +workflow CRAM_SAMPLEQC { + + take: + ch_cram // channel: [ val(meta), cram, crai ] + ngscheckmate_bed // channel: [ ngscheckmate_bed ] + fasta // channel: [ fasta ] + + main: + + ch_versions = Channel.empty() + + ch_ngscheckmate_bed = ngscheckmate_bed.map{bed -> [[id: "ngscheckmate"], bed]} + + ch_fasta = fasta.map{fasta -> [[id: "genome"], fasta]} + + BAM_NGSCHECKMATE ( ch_cram.map{meta, cram, crai -> [meta, cram]}, ch_ngscheckmate_bed, ch_fasta) + ch_versions = ch_versions.mix(BAM_NGSCHECKMATE.out.versions.first()) + + emit: + corr_matrix = BAM_NGSCHECKMATE.out.corr_matrix // channel: [ meta, corr_matrix ] + matched = BAM_NGSCHECKMATE.out.matched // channel: [ meta, matched ] + all = BAM_NGSCHECKMATE.out.all // channel: [ meta, all ] + vcf = BAM_NGSCHECKMATE.out.vcf // channel: [ meta, vcf ] + pdf = BAM_NGSCHECKMATE.out.pdf // channel: [ meta, pdf ] + + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/local/prepare_cache/main.nf b/subworkflows/local/download_cache_snpeff_vep/main.nf similarity index 94% rename from subworkflows/local/prepare_cache/main.nf rename to subworkflows/local/download_cache_snpeff_vep/main.nf index 601a453720..f9f776db7c 100644 --- a/subworkflows/local/prepare_cache/main.nf +++ b/subworkflows/local/download_cache_snpeff_vep/main.nf @@ -1,5 +1,5 @@ // -// PREPARE CACHE +// DOWNLOAD CACHE SNPEFF VEP // // Initialize channels based on params or indices that were just built @@ -11,7 +11,7 @@ include { ENSEMBLVEP_DOWNLOAD } from '../../../modules/nf-core/ensemblvep/download/main' include { SNPEFF_DOWNLOAD } from '../../../modules/nf-core/snpeff/download/main' -workflow PREPARE_CACHE { +workflow DOWNLOAD_CACHE_SNPEFF_VEP { take: ensemblvep_info snpeff_info diff --git a/subworkflows/local/initialize_annotation_cache/main.nf b/subworkflows/local/initialize_annotation_cache/main.nf new file mode 100644 index 0000000000..d2c6fcb7d6 --- /dev/null +++ b/subworkflows/local/initialize_annotation_cache/main.nf @@ -0,0 +1,57 @@ +// +// INITIALIZE ANNOTATION CACHE +// + +// Initialize channels based on params or indices that were just built +// For all modules here: +// A when clause condition is defined in the conf/modules.config to determine if the module should be run +// Condition is based on params.step and params.tools +// If and extra condition exists, it's specified in comments + +workflow INITIALIZE_ANNOTATION_CACHE { + take: + snpeff_enabled + snpeff_cache + snpeff_genome + snpeff_db + vep_enabled + vep_cache + vep_species + vep_cache_version + vep_genome + help_message + + main: + if (snpeff_enabled) { + def snpeff_annotation_cache_key = (snpeff_cache == "s3://annotation-cache/snpeff_cache/") ? "${snpeff_genome}.${snpeff_db}/" : "" + def snpeff_cache_dir = "${snpeff_annotation_cache_key}${snpeff_genome}.${snpeff_db}" + def snpeff_cache_path_full = file("$snpeff_cache/$snpeff_cache_dir", type: 'dir') + if ( !snpeff_cache_path_full.exists() || !snpeff_cache_path_full.isDirectory() ) { + if (snpeff_cache == "s3://annotation-cache/snpeff_cache/") { + error("This path is not available within annotation-cache.\nPlease check https://annotation-cache.github.io/ to create a request for it.") + } else { + error("Path provided with SnpEff cache is invalid.\nMake sure there is a directory named ${snpeff_cache_dir} in ${snpeff_cache}./n${help_message}") + } + } + snpeff_cache = Channel.fromPath(file("${snpeff_cache}/${snpeff_annotation_cache_key}"), checkIfExists: true).collect() + .map{ cache -> [ [ id:"${snpeff_genome}.${snpeff_db}" ], cache ] } + } else snpeff_cache = [] + + if (vep_enabled) { + def vep_annotation_cache_key = (vep_cache == "s3://annotation-cache/vep_cache/") ? "${vep_cache_version}_${vep_genome}/" : "" + def vep_cache_dir = "${vep_annotation_cache_key}${vep_species}/${vep_cache_version}_${vep_genome}" + def vep_cache_path_full = file("$vep_cache/$vep_cache_dir", type: 'dir') + if ( !vep_cache_path_full.exists() || !vep_cache_path_full.isDirectory() ) { + if (vep_cache == "s3://annotation-cache/vep_cache/") { + error("This path is not available within annotation-cache.\nPlease check https://annotation-cache.github.io/ to create a request for it.") + } else { + error("Path provided with VEP cache is invalid.\nMake sure there is a directory named ${vep_cache_dir} in ${vep_cache}./n${help_message}") + } + } + ensemblvep_cache = Channel.fromPath(file("${vep_cache}/${vep_annotation_cache_key}"), checkIfExists: true).collect() + } else ensemblvep_cache = [] + + emit: + ensemblvep_cache // channel: [ meta, cache ] + snpeff_cache // channel: [ meta, cache ] +} diff --git a/subworkflows/local/samplesheet_to_channel/main.nf b/subworkflows/local/samplesheet_to_channel/main.nf new file mode 100644 index 0000000000..0b82285368 --- /dev/null +++ b/subworkflows/local/samplesheet_to_channel/main.nf @@ -0,0 +1,290 @@ +workflow SAMPLESHEET_TO_CHANNEL{ + + take: + ch_from_samplesheet + + main: + ch_from_samplesheet.dump(tag:"ch_from_samplesheet") + input_sample = ch_from_samplesheet.map{ meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller -> + // generate patient_sample key to group lanes together + [ meta.patient + meta.sample, [meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller] ] + }.tap{ ch_with_patient_sample } // save the channel + .groupTuple() //group by patient_sample to get all lanes + .map { patient_sample, ch_items -> + // get number of lanes per sample + [ patient_sample, ch_items.size() ] + }.combine(ch_with_patient_sample, by: 0) // for each entry add numLanes + .map { patient_sample, num_lanes, ch_items -> + (meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller) = ch_items + if (meta.lane && fastq_2) { + meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] + def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' + + def flowcell = flowcellLaneFromFastq(fastq_1) + // Don't use a random element for ID, it breaks resuming + def read_group = "\"@RG\\tID:${flowcell}.${meta.sample}.${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" + + meta = meta - meta.subMap('lane') + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'fastq', size: 1] + + if (params.step == 'mapping') return [ meta, [ fastq_1, fastq_2 ] ] + else { + error("Samplesheet contains fastq files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + + // start from BAM + } else if (meta.lane && bam) { + if (params.step != 'mapping' && !bai) { + error("BAM index (bai) should be provided.") + } + meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] + def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' + def read_group = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" + + meta = meta - meta.subMap('lane') + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1] + + if (params.step != 'annotate') return [ meta - meta.subMap('lane'), bam, bai ] + else { + error("Samplesheet contains bam files but step is `annotate`. The pipeline is expecting vcf files for the annotation. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + + // recalibration + } else if (table && cram) { + meta = meta + [id: meta.sample, data_type: 'cram'] + + if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai, table ] + else { + error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + + // recalibration when skipping MarkDuplicates + } else if (table && bam) { + meta = meta + [id: meta.sample, data_type: 'bam'] + + if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai, table ] + else { + error("Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + + // prepare_recalibration or variant_calling + } else if (cram) { + meta = meta + [id: meta.sample, data_type: 'cram'] + + if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai ] + else { + error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + + // prepare_recalibration when skipping MarkDuplicates or `--step markduplicates` + } else if (bam) { + meta = meta + [id: meta.sample, data_type: 'bam'] + + if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai ] + else { + error("Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + + // annotation + } else if (vcf) { + meta = meta + [id: meta.sample, data_type: 'vcf', variantcaller: variantcaller ?: ''] + + if (params.step == 'annotate') return [ meta - meta.subMap('lane'), vcf ] + else { + error("Samplesheet contains vcf files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + } else { + error("Missing or unknown field in csv file header. Please check your samplesheet") + } + } + + if (params.step != 'annotate' && params.tools && !params.build_only_index) { + // Two checks for ensuring that the pipeline stops with a meaningful error message if + // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and + // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples. + input_sample.filter{ it[0].status == 1 }.ifEmpty{ // In this case, the sample-sheet contains no tumor-samples + if (!params.build_only_index) { + def tools_tumor = ['ascat', 'controlfreec', 'mutect2', 'msisensorpro'] + def tools_tumor_asked = [] + tools_tumor.each{ tool -> + if (params.tools.split(',').contains(tool)) tools_tumor_asked.add(tool) + } + if (!tools_tumor_asked.isEmpty()) { + error('The sample-sheet only contains normal-samples, but the following tools, which were requested with "--tools", expect at least one tumor-sample : ' + tools_tumor_asked.join(", ")) + } + } + } + input_sample.filter{ it[0].status == 0 }.ifEmpty{ // In this case, the sample-sheet contains no normal/germline-samples + def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller', 'msisensorpro'] + def requested_tools_requiring_normal_samples = [] + tools_requiring_normal_samples.each{ tool_requiring_normal_samples -> + if (params.tools.split(',').contains(tool_requiring_normal_samples)) requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) + } + if (!requested_tools_requiring_normal_samples.isEmpty()) { + error('The sample-sheet only contains tumor-samples, but the following tools, which were requested by the option "tools", expect at least one normal-sample : ' + requested_tools_requiring_normal_samples.join(", ")) + } + } + } + + // Fails when wrongfull extension for intervals file + if (params.wes && !params.step == 'annotate') { + if (params.intervals && !params.intervals.endsWith("bed")) error("Target file specified with `--intervals` must be in BED format for targeted data") + else log.warn("Intervals file was provided without parameter `--wes`: Pipeline will assume this is Whole-Genome-Sequencing data.") + } else if (params.intervals && !params.intervals.endsWith("bed") && !params.intervals.endsWith("list")) error("Intervals file must end with .bed, .list, or .interval_list") + + if (params.step == 'mapping' && params.aligner.contains("dragmap") && !(params.skip_tools && params.skip_tools.split(',').contains("baserecalibrator"))) { + log.warn("DragMap was specified as aligner. Base recalibration is not contained in --skip_tools. It is recommended to skip baserecalibration when using DragMap\nhttps://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode") + } + + if (params.step == 'mapping' && params.aligner.contains("sentieon-bwamem") && params.umi_read_structure) { + error("Sentieon BWA is currently not compatible with FGBio UMI handeling. Please choose a different aligner.") + } + + if (params.tools && params.tools.split(',').contains("sentieon_haplotyper") && params.joint_germline && (!params.sentieon_haplotyper_emit_mode || !(params.sentieon_haplotyper_emit_mode.contains('gvcf')))) { + error("When setting the option `--joint_germline` and including `sentieon_haplotyper` among the requested tools, please set `--sentieon_haplotyper_emit_mode` to include `gvcf`.") + } + + // Fails or warns when missing files or params for ascat + if (params.tools && params.tools.split(',').contains('ascat')) { + if (!params.ascat_alleles) { + error("No allele files were provided for running ASCAT. Please provide a zip folder with allele files.") + } + if (!params.ascat_loci) { + error("No loci files were provided for running ASCAT. Please provide a zip folder with loci files.") + } + if (!params.ascat_loci_gc && !params.ascat_loci_rt) { + log.warn("No LogRCorrection performed in ASCAT. For LogRCorrection to run, please provide either loci gc files or both loci gc files and loci rt files.") + } + if (params.wes) { + log.warn("Default reference files not suited for running ASCAT on WES data. It's recommended to use the reference files provided here: https://github.com/Wedge-lab/battenberg#required-reference-files") + } + } + + // Warns when missing files or params for mutect2 + if (params.tools && params.tools.split(',').contains('mutect2')) { + if (!params.pon) { + log.warn("No Panel-of-normal was specified for Mutect2.\nIt is highly recommended to use one: https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2\nFor more information on how to create one: https://gatk.broadinstitute.org/hc/en-us/articles/5358921041947-CreateSomaticPanelOfNormals-BETA-") + } + if (!params.germline_resource) { + log.warn("If Mutect2 is specified without a germline resource, no filtering will be done.\nIt is recommended to use one: https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2") + } + if (params.pon && params.pon.contains("/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz")) { + log.warn("The default Panel-of-Normals provided by GATK is used for Mutect2.\nIt is highly recommended to generate one from normal samples that are technical similar to the tumor ones.\nFor more information: https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-") + } + } + + // Fails when missing resources for baserecalibrator + // Warns when missing resources for haplotypecaller + if (!params.dbsnp && !params.known_indels) { + if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate'] && (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('baserecalibrator')))) { + error("Base quality score recalibration requires at least one resource file. Please provide at least one of `--dbsnp` or `--known_indels`\nYou can skip this step in the workflow by adding `--skip_tools baserecalibrator` to the command.") + } + if (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('sentieon_dnascope'))) { + log.warn "If GATK's Haplotypecaller, Sentieon's Dnascpe or Sentieon's Haplotyper is specified, without `--dbsnp` or `--known_indels no filtering will be done. For filtering, please provide at least one of `--dbsnp` or `--known_indels`.\nFor more information see FilterVariantTranches (single-sample, default): https://gatk.broadinstitute.org/hc/en-us/articles/5358928898971-FilterVariantTranches\nFor more information see VariantRecalibration (--joint_germline): https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator\nFor more information on GATK Best practice germline variant calling: https://gatk.broadinstitute.org/hc/en-us/articles/360035535932-Germline-short-variant-discovery-SNPs-Indels-" + } + } + if (params.joint_germline && (!params.tools || !(params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper') || params.tools.split(',').contains('sentieon_dnascope')))) { + error("The GATK's Haplotypecaller, Sentieon's Dnascope or Sentieon's Haplotyper should be specified as one of the tools when doing joint germline variant calling.) ") + } + + if ( + params.tools && + ( + params.tools.split(',').contains('haplotypecaller') || + params.tools.split(',').contains('sentieon_haplotyper') || + params.tools.split(',').contains('sentieon_dnascope') + ) && + params.joint_germline && + ( + !params.dbsnp || + !params.known_indels || + !params.known_snps || + params.no_intervals + ) + ) { + log.warn("""If GATK's Haplotypecaller, Sentieon's Dnascope and/or Sentieon's Haplotyper is specified, \ + but without `--dbsnp`, `--known_snps`, `--known_indels` or the associated resource labels (ie `known_snps_vqsr`), \ + no variant recalibration will be done. For recalibration you must provide all of these resources.\nFor more information \ + see VariantRecalibration: https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator \n\ + Joint germline variant calling also requires intervals in order to genotype the samples. \ + As a result, if `--no_intervals` is set to `true` the joint germline variant calling will not be performed.""") + } + + if (params.tools && + params.tools.split(',').contains('sentieon_dnascope') && + params.joint_germline && + ( + !params.sentieon_dnascope_emit_mode || + !params.sentieon_dnascope_emit_mode.split(',').contains('gvcf') + ) + ) { + error("When using Sentieon Dnascope for joint-germline variant-calling the option `--sentieon_dnascope_emit_mode` has to include `gvcf`.") + } + + if (params.tools && + params.tools.split(',').contains('sentieon_haplotyper') && + params.joint_germline && + ( + !params.sentieon_haplotyper_emit_mode || + !params.sentieon_haplotyper_emit_mode.split(',').contains('gvcf') + ) + ) { + error("When using Sentieon Haplotyper for joint-germline variant-calling the option `--sentieon_haplotyper_emit_mode` has to include `gvcf`.") + } + + + // Fails when --joint_mutect2 is used without enabling mutect2 + if (params.joint_mutect2 && (!params.tools || !params.tools.split(',').contains('mutect2'))) { + error("The mutect2 should be specified as one of the tools when doing joint somatic variant calling with Mutect2. (The mutect2 could be specified by adding `--tools mutect2` to the nextflow command.)") + } + + // Fails when missing tools for variant_calling or annotate + if ((params.step == 'variant_calling' || params.step == 'annotate') && !params.tools) { + error("Please specify at least one tool when using `--step ${params.step}`.\nhttps://nf-co.re/sarek/parameters#tools") + } + + // Fails when missing sex information for CNV tools + if (params.tools && (params.tools.split(',').contains('ascat') || params.tools.split(',').contains('controlfreec'))) { + input_sample.map{ + if (it[0].sex == 'NA' ) { + error("Please specify sex information for each sample in your samplesheet when using '--tools' with 'ascat' or 'controlfreec'.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") + } + } + } + + emit: + input_sample + } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +// Parse first line of a FASTQ file, return the flowcell id and lane number. +def flowcellLaneFromFastq(path) { + // expected format: + // xx:yy:FLOWCELLID:LANE:... (seven fields) + // or + // FLOWCELLID:LANE:xx:... (five fields) + def line + path.withInputStream { + InputStream gzipStream = new java.util.zip.GZIPInputStream(it) + Reader decoder = new InputStreamReader(gzipStream, 'ASCII') + BufferedReader buffered = new BufferedReader(decoder) + line = buffered.readLine() + } + assert line.startsWith('@') + line = line.substring(1) + def fields = line.split(':') + String fcid + + if (fields.size() >= 7) { + // CASAVA 1.8+ format, from https://support.illumina.com/help/BaseSpace_OLH_009008/Content/Source/Informatics/BS/FileFormat_FASTQ-files_swBS.htm + // "@::::::: :::" + fcid = fields[2] + } else if (fields.size() == 5) { + fcid = fields[0] + } + return fcid +} + + diff --git a/subworkflows/local/samplesheet_to_channel/main.nf.test b/subworkflows/local/samplesheet_to_channel/main.nf.test new file mode 100644 index 0000000000..49eeb2a132 --- /dev/null +++ b/subworkflows/local/samplesheet_to_channel/main.nf.test @@ -0,0 +1,34 @@ +nextflow_workflow { + + name "Test Workflow SAMPLESHEET_TO_CHANNEL" + script "subworkflows/local/samplesheet_to_channel/main.nf" + workflow "SAMPLESHEET_TO_CHANNEL" + + test("Should run without failures") { + + when { + params { + // define parameters here. Example: + skip_tools = 'baserecalibrator' + + } + workflow { + """ + // define inputs of the workflow here. Example: + input[0] = Channel.of([['patient':'test', 'sample':'test', + 'sex':'XX', 'status':0, 'lane':'test_L1'], + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), + [], [], [], [], [], [], []]) + """ + } + } + + then { + assert workflow.success + assert snapshot(workflow.out).match() + } + + } + +} diff --git a/subworkflows/local/samplesheet_to_channel/main.nf.test.snap b/subworkflows/local/samplesheet_to_channel/main.nf.test.snap new file mode 100644 index 0000000000..fa440f539b --- /dev/null +++ b/subworkflows/local/samplesheet_to_channel/main.nf.test.snap @@ -0,0 +1,47 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "patient": "test", + "sample": "test", + "sex": "XX", + "status": 0, + "id": "test-test_L1", + "num_lanes": 1, + "read_group": "\"@RG\\tID:null.test.test_L1\\tPU:test_L1\\tSM:test_test\\tLB:test\\tDS:null\\tPL:ILLUMINA\"", + "data_type": "fastq", + "size": 1 + }, + [ + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz" + ] + ] + ], + "input_sample": [ + [ + { + "patient": "test", + "sample": "test", + "sex": "XX", + "status": 0, + "id": "test-test_L1", + "num_lanes": 1, + "read_group": "\"@RG\\tID:null.test.test_L1\\tPU:test_L1\\tSM:test_test\\tLB:test\\tDS:null\\tPL:ILLUMINA\"", + "data_type": "fastq", + "size": 1 + }, + [ + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", + "/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz" + ] + ] + ] + } + ], + "timestamp": "2023-10-16T14:12:54.640503" + } +} \ No newline at end of file diff --git a/subworkflows/local/vcf_annotate_all/main.nf b/subworkflows/local/vcf_annotate_all/main.nf index 5b759d8818..199aae3b5f 100644 --- a/subworkflows/local/vcf_annotate_all/main.nf +++ b/subworkflows/local/vcf_annotate_all/main.nf @@ -5,6 +5,7 @@ include { VCF_ANNOTATE_ENSEMBLVEP } from '../../nf-core/vcf_annotate_ensemblvep/main' include { VCF_ANNOTATE_ENSEMBLVEP as VCF_ANNOTATE_MERGE } from '../../nf-core/vcf_annotate_ensemblvep/main' include { VCF_ANNOTATE_SNPEFF } from '../../nf-core/vcf_annotate_snpeff/main' +include { VCF_ANNOTATE_BCFTOOLS } from '../../nf-core/vcf_annotate_bcftools/main' workflow VCF_ANNOTATE_ALL { take: @@ -18,6 +19,9 @@ workflow VCF_ANNOTATE_ALL { vep_cache_version vep_cache vep_extra_files + bcftools_annotations + bcftools_annotations_index + bcftools_header_lines main: reports = Channel.empty() @@ -26,6 +30,14 @@ workflow VCF_ANNOTATE_ALL { json_ann = Channel.empty() versions = Channel.empty() + if (tools.split(',').contains('bcfann')) { + VCF_ANNOTATE_BCFTOOLS(vcf, bcftools_annotations, bcftools_annotations_index, bcftools_header_lines) + + vcf_ann = vcf_ann.mix(VCF_ANNOTATE_BCFTOOLS.out.vcf_tbi) + versions = versions.mix(VCF_ANNOTATE_BCFTOOLS.out.versions) + } + + if (tools.split(',').contains('merge') || tools.split(',').contains('snpeff')) { VCF_ANNOTATE_SNPEFF(vcf, snpeff_db, snpeff_cache) diff --git a/subworkflows/nf-core/bam_ngscheckmate/main.nf b/subworkflows/nf-core/bam_ngscheckmate/main.nf new file mode 100644 index 0000000000..2da41fede3 --- /dev/null +++ b/subworkflows/nf-core/bam_ngscheckmate/main.nf @@ -0,0 +1,49 @@ +include { BCFTOOLS_MPILEUP } from '../../../modules/nf-core/bcftools/mpileup/main' +include { NGSCHECKMATE_NCM } from '../../../modules/nf-core/ngscheckmate/ncm/main' + +workflow BAM_NGSCHECKMATE { + + take: + ch_input // channel: [ val(meta1), bam/cram ] + ch_snp_bed // channel: [ val(meta2), bed ] + ch_fasta // channel: [ val(meta3), fasta ] + + main: + + ch_versions = Channel.empty() + + ch_input_bed = ch_input.combine(ch_snp_bed.collect()) + // do something to combine the metas? + .map{ input_meta, input_file, bed_meta, bed_file -> + [input_meta, input_file, bed_file] + } + + BCFTOOLS_MPILEUP (ch_input_bed, ch_fasta, false) + ch_versions = ch_versions.mix(BCFTOOLS_MPILEUP.out.versions) + + BCFTOOLS_MPILEUP + .out + .vcf + .map{meta, vcf -> vcf} // discard individual metas + .collect() // group into one channel + .map{files -> [files]} // make the channel into [vcf1, vcf2, ...] + .set {ch_collected_vcfs} + + ch_snp_bed + .map{meta, bed -> meta} // use the snp_bed file meta as the meta for the merged channel + .combine(ch_collected_vcfs) // add the vcf files after the meta, now looks like [meta, [vcf1, vcf2, ... ] ] + .set {ch_vcfs} + + NGSCHECKMATE_NCM (ch_vcfs, ch_snp_bed, ch_fasta) + ch_versions = ch_versions.mix(NGSCHECKMATE_NCM.out.versions) + + emit: + corr_matrix = NGSCHECKMATE_NCM.out.corr_matrix // channel: [ meta, corr_matrix ] + matched = NGSCHECKMATE_NCM.out.matched // channel: [ meta, matched ] + all = NGSCHECKMATE_NCM.out.all // channel: [ meta, all ] + vcf = BCFTOOLS_MPILEUP.out.vcf // channel: [ meta, vcf ] + pdf = NGSCHECKMATE_NCM.out.pdf // channel: [ meta, pdf ] + versions = ch_versions // channel: [ versions.yml ] + +} + diff --git a/subworkflows/nf-core/bam_ngscheckmate/meta.yml b/subworkflows/nf-core/bam_ngscheckmate/meta.yml new file mode 100644 index 0000000000..a3a1ab6e4a --- /dev/null +++ b/subworkflows/nf-core/bam_ngscheckmate/meta.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "bam_ngscheckmate" +description: Take a set of bam files and run NGSCheckMate to determine whether samples match with each other, using a set of SNPs. +keywords: + - ngscheckmate + - qc + - bam + - snp +components: + - bcftools/mpileup + - ngscheckmate/ncm +input: + - meta1: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: BAM files for each sample + pattern: "*.{bam}" + - meta2: + type: map + description: | + Groovy Map containing bed file information + e.g. [ id:'sarscov2' ] + - snp_bed: + type: file + description: BED file containing the SNPs to analyse. NGSCheckMate provides some default ones for hg19/hg38. + pattern: "*.{bed}" + - meta3: + type: map + description: | + Groovy Map containing reference genome meta information + e.g. [ id:'sarscov2' ] + - fasta: + type: file + description: fasta file for the genome + pattern: "*.{fasta}" + +output: + - pdf: + type: file + description: A pdf containing a dendrogram showing how the samples match up + pattern: "*.{pdf}" + - corr_matrix: + type: file + description: A text file containing the correlation matrix between each sample + pattern: "*corr_matrix.txt" + - matched: + type: file + description: A txt file containing only the samples that match with each other + pattern: "*matched.txt" + - all: + type: file + description: A txt file containing all the sample comparisons, whether they match or not + pattern: "*all.txt" + - vcf: + type: file + description: vcf files for each sample giving the SNP calls + pattern: "*.vcf" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@SPPearce" diff --git a/subworkflows/nf-core/vcf_annotate_bcftools/main.nf b/subworkflows/nf-core/vcf_annotate_bcftools/main.nf new file mode 100644 index 0000000000..e54c52aa7c --- /dev/null +++ b/subworkflows/nf-core/vcf_annotate_bcftools/main.nf @@ -0,0 +1,33 @@ + +// +// Run BCFtools to annotate VCF files +// + +include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate/main' +include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' + +workflow VCF_ANNOTATE_BCFTOOLS { + take: + vcf // channel: [ val(meta), vcf ] + annotations // + annotations_index // + header_lines // + + + main: + ch_versions = Channel.empty() + + BCFTOOLS_ANNOTATE(vcf, annotations, annotations_index, header_lines) + TABIX_TABIX(BCFTOOLS_ANNOTATE.out.vcf) + + ch_vcf_tbi = BCFTOOLS_ANNOTATE.out.vcf.join(TABIX_TABIX.out.tbi, failOnDuplicate: true, failOnMismatch: true) + + + // Gather versions of all tools used + ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) + + emit: + vcf_tbi = ch_vcf_tbi // channel: [ val(meta), vcf.gz, vcf.gz.tbi ] + versions = ch_versions // path: versions.yml +} diff --git a/subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml b/subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml index 7a9fd10c4e..15d42da23f 100644 --- a/subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml +++ b/subworkflows/nf-core/vcf_annotate_ensemblvep/meta.yml @@ -59,3 +59,7 @@ authors: - "@maxulysse" - "@matthdsm" - "@nvnieuwk" +maintainers: + - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" diff --git a/subworkflows/nf-core/vcf_annotate_snpeff/main.nf b/subworkflows/nf-core/vcf_annotate_snpeff/main.nf index 2a875939de..3570a5b7c0 100644 --- a/subworkflows/nf-core/vcf_annotate_snpeff/main.nf +++ b/subworkflows/nf-core/vcf_annotate_snpeff/main.nf @@ -13,6 +13,7 @@ workflow VCF_ANNOTATE_SNPEFF { main: ch_versions = Channel.empty() + SNPEFF_SNPEFF(ch_vcf, val_snpeff_db, ch_snpeff_cache) TABIX_BGZIPTABIX(SNPEFF_SNPEFF.out.vcf) diff --git a/subworkflows/nf-core/vcf_annotate_snpeff/meta.yml b/subworkflows/nf-core/vcf_annotate_snpeff/meta.yml index b3f10cae12..c8d5a635ee 100644 --- a/subworkflows/nf-core/vcf_annotate_snpeff/meta.yml +++ b/subworkflows/nf-core/vcf_annotate_snpeff/meta.yml @@ -36,3 +36,5 @@ output: Structure: [ path(versions.yml) ] authors: - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/tests/config/bcfann_test_header.txt b/tests/config/bcfann_test_header.txt new file mode 100644 index 0000000000..443dd3ea4a --- /dev/null +++ b/tests/config/bcfann_test_header.txt @@ -0,0 +1 @@ +##INFO= diff --git a/tests/config/tags.yml b/tests/config/pytesttags.yml similarity index 88% rename from tests/config/tags.yml rename to tests/config/pytesttags.yml index 362bc525b9..8a550ee057 100644 --- a/tests/config/tags.yml +++ b/tests/config/pytesttags.yml @@ -206,20 +206,20 @@ intervals: - tests/csv/3.0/fastq_single.csv - tests/test_intervals.yml -## gatk4_spark -gatk4_spark: +## gatk4spark +gatk4spark: - conf/modules/markduplicates.config - conf/modules/prepare_recalibration.config - conf/modules/recalibrate.config - - modules/nf-core/gatk4/applybqsrspark/main.nf - - modules/nf-core/gatk4/baserecalibratorspark/main.nf + - modules/nf-core/gatk4spark/applybqsr/main.nf + - modules/nf-core/gatk4spark/baserecalibrator/main.nf - modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf - - modules/nf-core/gatk4/markduplicatesspark/main.nf + - modules/nf-core/gatk4spark/markduplicates/main.nf - subworkflows/local/bam_applybqsr_spark/main.nf - subworkflows/local/bam_baserecalibrator_spark/main.nf - subworkflows/local/bam_markduplicates_spark/main.nf - tests/csv/3.0/fastq_single.csv - - tests/test_gatk4_spark.yml + - tests/test_gatk4spark.yml # variant calling @@ -314,6 +314,31 @@ haplotypecaller_skip_filter: - tests/csv/3.0/mapped_single_bam.csv - tests/test_haplotypecaller_skip_filter.yml +## sentieon/dnascope +sentieon/dnascope: + - conf/modules/sentieon_dnascope.config + - modules/nf-core/sentieon/dnascope/main.nf + - modules/nf-core/gatk4/mergevcfs/main.nf + - modules/nf-core/samtools/index/main.nf + - modules/nf-core/samtools/merge/main.nf + - subworkflows/local/bam_merge_index_samtools/main.nf + - subworkflows/local/bam_variant_calling_germline_all/main.nf + - subworkflows/local/bam_variant_calling_sentieon_dnascope/main.nf + - tests/csv/3.0/mapped_single_bam.csv + - tests/test_sentieon_dnascope.yml + +sentieon_dnascope_skip_filter: + - conf/modules/sentieon_dnascope.config + - modules/nf-core/sentieon/dnascope/main.nf + - modules/nf-core/gatk4/mergevcfs/main.nf + - modules/nf-core/samtools/index/main.nf + - modules/nf-core/samtools/merge/main.nf + - subworkflows/local/bam_merge_index_samtools/main.nf + - subworkflows/local/bam_variant_calling_germline_all/main.nf + - subworkflows/local/bam_variant_calling_sentieon_dnascope/main.nf + - tests/csv/3.0/mapped_single_bam.csv + - tests/test_sentieon_dnascope_skip_filter.yml + ## sentieon/haplotyper sentieon/haplotyper: - conf/modules/sentieon_haplotyper.config @@ -364,16 +389,27 @@ joint_germline: - tests/csv/3.0/mapped_joint_bam.csv - tests/test_joint_germline.yml -## sentieon_joint_germline -sentieon_joint_germline: +## sentieon_dnascope_joint_germline +sentieon_dnascope_joint_germline: + - conf/modules/prepare_genome.config + - conf/modules/sentieon_dnascope.config + - conf/modules/sentieon_dnascope_joint_germline.config + - modules/nf-core/sentieon/dnascope/main.nf + - subworkflows/local/bam_variant_calling_germline_all/main.nf + - subworkflows/local/bam_variant_calling_sentieon_dnascope/main.nf + - tests/csv/3.0/mapped_joint_bam.csv + - tests/test_sentieon_dnascop_joint_germline.yml + +## sentieon_haplotyper_joint_germline +sentieon_haplotyper_joint_germline: - conf/modules/prepare_genome.config - conf/modules/sentieon_haplotyper.config - - conf/modules/sentieon_joint_germline.config + - conf/modules/sentieon_haplotyper_joint_germline.config - modules/nf-core/sentieon/haplotyper/main.nf - subworkflows/local/bam_variant_calling_germline_all/main.nf - subworkflows/local/bam_variant_calling_sentieon_haplotyper/main.nf - tests/csv/3.0/mapped_joint_bam.csv - - tests/test_sentieon_joint_germline.yml + - tests/test_sentieon_haplotyper_joint_germline.yml ## manta manta: @@ -521,6 +557,15 @@ vep: - tests/csv/3.0/vcf_single.csv - tests/test_annotation_vep.yml +## bcfann +bcfann: + - conf/modules/annotate.config + - modules/nf-core/bcftools/annotate/main.nf + - modules/nf-core/tabix/bgziptabix/main.nf + - subworkflows/nf-core/vcf_annotate_bcftools/main.nf + - tests/csv/3.0/vcf_single.csv + - tests/test_annotation_bcfann.yml + # postprocessing ## concatenate germline vcfs @@ -553,3 +598,14 @@ concatenate_vcfs: - subworkflows/local/vcf_concatenate_germline/main.nf - tests/csv/3.0/mapped_joint_bam.csv - tests/test_concat_germline_vcfs.yml + +# sampleqc + +## ngscheckmate +ngscheckmate: + - conf/modules/ngscheckmate.config + - modules/nf-core/bcftools/mpileup/main.nf + - modules/nf-core/ngscheckmate/ncm/main.nf + - subworkflows/local/cram_sampleqc/main.nf + - subworkflows/nf-core/bam_ngscheckmate/main.nf + - tests/test_ngscheckmate.yml diff --git a/tests/main.nf.test b/tests/main.nf.test new file mode 100644 index 0000000000..ad9209ab88 --- /dev/null +++ b/tests/main.nf.test @@ -0,0 +1,29 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + tag "pipeline" + tag "pipeline_sarek" + + test("Run with profile test") { + + when { + params { + outdir = "results" + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + input = "$projectDir/tests/csv/3.0/fastq_pair.csv" + validationSchemaIgnoreParams = 'test_data_base,test_data,genomes' + use_gatk_spark = false + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + +} diff --git a/tests/tags.yml b/tests/tags.yml new file mode 100644 index 0000000000..0b52c379ca --- /dev/null +++ b/tests/tags.yml @@ -0,0 +1,5 @@ +pipeline_sarek: + - "**.nf" + - "**.config" + - "**.nf.test" + - "**.json" diff --git a/tests/test_annotation_bcfann.yml b/tests/test_annotation_bcfann.yml new file mode 100644 index 0000000000..93fa668c3d --- /dev/null +++ b/tests/test_annotation_bcfann.yml @@ -0,0 +1,10 @@ +- name: Run bcfann + command: nextflow run main.nf -profile test,test_cache,annotation --tools bcfann --outdir results + tags: + - annotation + - bcfann + files: + - path: results/annotation/test/test_BCF.ann.vcf.gz + # binary changes md5sums on reruns + - path: results/annotation/test/test_BCF.ann.vcf.gz.tbi + # binary changes md5sums on reruns diff --git a/tests/test_gatk4_spark.yml b/tests/test_gatk4spark.yml similarity index 97% rename from tests/test_gatk4_spark.yml rename to tests/test_gatk4spark.yml index 32082446ec..7b6b42a3a5 100644 --- a/tests/test_gatk4_spark.yml +++ b/tests/test_gatk4spark.yml @@ -1,7 +1,7 @@ -- name: Run default pipeline with gatk4_spark +- name: Run default pipeline with gatk4spark command: nextflow run main.nf -profile test_cache,use_gatk_spark --outdir results tags: - - gatk4_spark + - gatk4spark - preprocessing files: - path: results/csv/markduplicates.csv @@ -48,10 +48,10 @@ # conda changes md5sums for test - path: results/preprocessing/mapped/ should_exist: false -- name: Run default pipeline with gatk4_spark & skipping all QC steps +- name: Run default pipeline with gatk4spark and skipping all QC steps command: nextflow run main.nf -profile test_cache,use_gatk_spark --skip_tools fastqc,markduplicates_report,mosdepth,multiqc,samtools --outdir results tags: - - gatk4_spark + - gatk4spark - preprocessing - skip_qc files: @@ -84,7 +84,7 @@ - name: Run save_output_as_bam with gatk4 spark command: nextflow run main.nf -profile test_cache,use_gatk_spark --save_output_as_bam --outdir results tags: - - gatk4_spark + - gatk4spark - preprocessing - save_output_as_bam files: diff --git a/tests/test_ngscheckmate.yml b/tests/test_ngscheckmate.yml new file mode 100644 index 0000000000..8923ddc9ec --- /dev/null +++ b/tests/test_ngscheckmate.yml @@ -0,0 +1,15 @@ +- name: Check ngscheckmate is working + command: nextflow run main.nf -profile test_cache,tools --tools ngscheckmate --outdir results + tags: + - ngscheckmate + - tools + files: + - path: results/multiqc + - path: results/reports/ngscheckmate/ngscheckmate_all.txt + - path: results/reports/ngscheckmate/ngscheckmate_matched.txt + - path: results/reports/ngscheckmate/ngscheckmate_output_corr_matrix.txt + - path: results/reports/ngscheckmate/ngscheckmate.pdf + - path: results/reports/ngscheckmate/vcfs/sample1.ngscheckmate.vcf.gz + - path: results/reports/ngscheckmate/vcfs/sample2.ngscheckmate.vcf.gz + - path: results/reports/ngscheckmate/vcfs/sample3.ngscheckmate.vcf.gz + - path: results/reports/ngscheckmate/vcfs/sample4.ngscheckmate.vcf.gz diff --git a/tests/test_sentieon_dnascope.yml b/tests/test_sentieon_dnascope.yml new file mode 100644 index 0000000000..f51e0bca72 --- /dev/null +++ b/tests/test_sentieon_dnascope.yml @@ -0,0 +1,147 @@ +- name: Run variant calling on germline sample with sentieons dnascope + command: nextflow run main.nf -profile test_cache,targeted,software_license --sentieon_extension --input ./tests/csv/3.0/mapped_single_bam.csv --tools sentieon_dnascope --step variant_calling --outdir results + tags: + - germline + - sentieon/dnascope + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: b2144d21a0ebfd807a8646f1751d0ddc + - path: results/multiqc + - path: results/preprocessing/converted/test/test.converted.cram + # binary changes md5sums on reruns + - path: results/preprocessing/converted/test/test.converted.cram.crai + # binary changes md5sums on reruns + - path: results/preprocessing/recalibrated/test/test.recal.cram + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + should_exist: false + - path: results/reports/bcftools/sentieon_dnascope/test/test.dnascope.filtered.bcftools_stats.txt + md5sum: 912c7d5b31784c50e0a75b4fcfa4997b + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.FILTER.summary + md5sum: e67b24d296810a075378e5864bcea0fa + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.count + md5sum: b77c120ee5cc0423267200c67d60c663 + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.qual + # changes md5sum on reruns. This is somewhat unexpected, but might to tiny variation in very small numbers in the qual-files. + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz + # binary changes md5sums on reruns + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz.tbi + # binary changes md5sums on reruns + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz + # binary changes md5sums on reruns + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz.tbi + # binary changes md5sums on reruns + - path: results/dnascope + should_exist: false +- name: Run variant calling on germline sample with sentieons dnascope without intervals + command: nextflow run main.nf -profile test_cache,targeted,software_license --sentieon_extension --input ./tests/csv/3.0/mapped_single_bam.csv --tools sentieon_dnascope --step variant_calling --no_intervals --outdir results + tags: + - germline + - sentieon/dnascope + - no_intervals + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: b2144d21a0ebfd807a8646f1751d0ddc + - path: results/multiqc + - path: results/no_intervals.bed + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz.tbi + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/preprocessing/converted/test/test.converted.cram + # binary changes md5sums on reruns + - path: results/preprocessing/converted/test/test.converted.cram.crai + # binary changes md5sums on reruns + - path: results/preprocessing/recalibrated/test/test.recal.cram + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + should_exist: false + - path: results/reports/bcftools/sentieon_dnascope/test/test.dnascope.filtered.bcftools_stats.txt + md5sum: 912c7d5b31784c50e0a75b4fcfa4997b + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.FILTER.summary + md5sum: e67b24d296810a075378e5864bcea0fa + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.count + md5sum: b77c120ee5cc0423267200c67d60c663 + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.qual + # changes md5sum on reruns. This is somewhat unexpected, but might to tiny variation in very small numbers in the qual-files. + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz + # binary changes md5sums on reruns + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz.tbi + # binary changes md5sums on reruns + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz + # binary changes md5sums on reruns + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz.tbi + # binary changes md5sums on reruns + - path: results/sentieon_dnascope + should_exist: false +- name: Run variant calling on germline sample with sentieons dnascope output gvcf + command: nextflow run main.nf -profile test_cache,targeted,software_license --sentieon_extension --input ./tests/csv/3.0/mapped_single_bam.csv --tools sentieon_dnascope --step variant_calling --outdir results --sentieon_dnascope_emit_mode gvcf + tags: + - germline + - sentieon/dnascope + - variant_calling + files: + - path: results/csv/variantcalled.csv + should_exist: false + - path: results/multiqc + - path: results/preprocessing/converted/test/test.converted.cram + # binary changes md5sums on reruns + - path: results/preprocessing/converted/test/test.converted.cram.crai + # binary changes md5sums on reruns + - path: results/preprocessing/recalibrated/test/test.recal.cram + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + should_exist: false + - path: results/reports/bcftools/sentieon_dnascope/test/test.dnascope.filtered.bcftools_stats.txt + should_exist: false + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.FILTER.summary + should_exist: false + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.count + should_exist: false + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.qual + should_exist: false + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.g.vcf.gz + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.g.vcf.gz.tbi + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz + should_exist: false + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz.tbi + should_exist: false + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz + should_exist: false + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz.tbi + should_exist: false + - path: results/dnascope + should_exist: false +- name: Run variant calling on germline sample with sentieons dnascope output both gvcf and vcf + command: nextflow run main.nf -profile test_cache,targeted,software_license --sentieon_extension --input ./tests/csv/3.0/mapped_single_bam.csv --tools sentieon_dnascope --step variant_calling --outdir results --sentieon_dnascope_emit_mode variant,gvcf + tags: + - germline + - sentieon/dnascope + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: b2144d21a0ebfd807a8646f1751d0ddc + - path: results/multiqc + - path: results/preprocessing/converted/test/test.converted.cram + # binary changes md5sums on reruns + - path: results/preprocessing/converted/test/test.converted.cram.crai + # binary changes md5sums on reruns + - path: results/preprocessing/recalibrated/test/test.recal.cram + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + should_exist: false + - path: results/reports/bcftools/sentieon_dnascope/test/test.dnascope.filtered.bcftools_stats.txt + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.FILTER.summary + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.count + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.filtered.TsTv.qual + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.g.vcf.gz + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.g.vcf.gz.tbi + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz.tbi + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz.tbi + - path: results/dnascope + should_exist: false diff --git a/tests/test_sentieon_dnascope_joint_germline.yml b/tests/test_sentieon_dnascope_joint_germline.yml new file mode 100644 index 0000000000..e905b9cd53 --- /dev/null +++ b/tests/test_sentieon_dnascope_joint_germline.yml @@ -0,0 +1,68 @@ +- name: Run joint germline variant calling with sentieon dnascope + command: nextflow run main.nf -profile test_cache,targeted,software_license --sentieon_extension --input ./tests/csv/3.0/mapped_joint_bam.csv --tools sentieon_dnascope --step variant_calling --joint_germline --outdir results --sentieon_dnascope_emit_mode gvcf + tags: + - germline + - sentieon_dnascope_joint_germline + - variant_calling + - sentieon/dnascope + files: + - path: results/csv/variantcalled.csv + md5sum: 62d70060aad96337254efe2d7a1df170 + - path: results/multiqc + - path: results/reports/bcftools/sentieon_dnascope/joint_variant_calling/joint_germline.bcftools_stats.txt + - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline.FILTER.summary + - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline.TsTv.count + - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline.TsTv.qual + - path: results/variant_calling/sentieon_dnascope/joint_variant_calling/joint_germline.vcf.gz + # binary changes md5sums on reruns + - path: results/variant_calling/sentieon_dnascope/joint_variant_calling/joint_germline.vcf.gz.tbi + # binary changes md5sums on reruns + - path: results/variant_calling/sentieon_dnascope/testN/testN.dnascope.g.vcf.gz + - path: results/variant_calling/sentieon_dnascope/testN/testN.dnascope.g.vcf.gz.tbi + - path: results/variant_calling/sentieon_dnascope/testT/testT.dnascope.g.vcf.gz + - path: results/variant_calling/sentieon_dnascope/testT/testT.dnascope.g.vcf.gz.tbi + - path: results/dnascope + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + should_exist: false + - path: results/reports/bcftools/sentieon_dnascope/joint_variant_calling/joint_germline_recalibrated.bcftools_stats.txt + should_exist: false + - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline_recalibrated.FILTER.summary + should_exist: false + - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline_recalibrated.TsTv.count + should_exist: false + - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline_recalibrated.TsTv.qual + should_exist: false + - path: results/variant_calling/sentieon_dnascope/joint_variant_calling/joint_germline_recalibrated.vcf.gz + should_exist: false + - path: results/variant_calling/sentieon_dnascope/joint_variant_calling/joint_germline_recalibrated.vcf.gz.tbi + should_exist: false +- name: Run joint germline variant calling with sentieon dnascope all intervals at once + command: nextflow run main.nf -profile test_cache,targeted,software_license --sentieon_extension --input ./tests/csv/3.0/mapped_joint_bam.csv --tools sentieon_dnascope --step variant_calling --joint_germline --outdir results --sentieon_dnascope_emit_mode gvcf --nucleotides_per_second 100 + tags: + - germline + - sentieon_dnascope_joint_germline + - variant_calling + - sentieon/dnascope + files: + - path: results/csv/variantcalled.csv + md5sum: 62d70060aad96337254efe2d7a1df170 + - path: results/multiqc + - path: results/preprocessing/recalibrated/test/test.recal.cram + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + should_exist: false + - path: results/reports/bcftools/sentieon_dnascope/joint_variant_calling/joint_germline.bcftools_stats.txt + - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline.FILTER.summary + - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline.TsTv.count + - path: results/reports/vcftools/sentieon_dnascope/joint_variant_calling/joint_germline.TsTv.qual + - path: results/variant_calling/sentieon_dnascope/joint_variant_calling/joint_germline.vcf.gz + - path: results/variant_calling/sentieon_dnascope/joint_variant_calling/joint_germline.vcf.gz.tbi + - path: results/variant_calling/sentieon_dnascope/testN/testN.dnascope.g.vcf.gz + - path: results/variant_calling/sentieon_dnascope/testN/testN.dnascope.g.vcf.gz.tbi + - path: results/variant_calling/sentieon_dnascope/testT/testT.dnascope.g.vcf.gz + - path: results/variant_calling/sentieon_dnascope/testT/testT.dnascope.g.vcf.gz.tbi + - path: results/dnascope + should_exist: false diff --git a/tests/test_sentieon_dnascope_skip_filter.yml b/tests/test_sentieon_dnascope_skip_filter.yml new file mode 100644 index 0000000000..16bbca9e7c --- /dev/null +++ b/tests/test_sentieon_dnascope_skip_filter.yml @@ -0,0 +1,81 @@ +- name: Run variant calling on germline sample with sentieon dnascope and skip filter + command: nextflow run main.nf -profile test_cache,targeted,software_license --sentieon_extension --input ./tests/csv/3.0/mapped_single_bam.csv --tools sentieon_dnascope --step variant_calling --skip_tools dnascope_filter --outdir results + tags: + - germline + - sentieon_dnascope_skip_filter + - variant_calling + - sentieon/dnascope + files: + - path: results/csv/variantcalled.csv + md5sum: 10254414c0679ba1fb25e41b9ff548cc + - path: results/multiqc + - path: results/preprocessing/converted/test/test.converted.cram + # binary changes md5sums on reruns + - path: results/preprocessing/converted/test/test.converted.cram.crai + # binary changes md5sums on reruns + - path: results/preprocessing/recalibrated/test/test.recal.cram + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + should_exist: false + - path: results/reports/bcftools/sentieon_dnascope/test/test.dnascope.unfiltered.bcftools_stats.txt + md5sum: f915fe1591ababb0da5e7b43dfc35092 + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.unfiltered.FILTER.summary + md5sum: 87a84b5f8ac3d3cbeeef7d60afcdbfe7 + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.unfiltered.TsTv.count + md5sum: b77c120ee5cc0423267200c67d60c663 + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.unfiltered.TsTv.qual + # changes md5sum on reruns. This is somewhat unexpected, but might to tiny variation in very small numbers in the qual-files. + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz + should_exist: false + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz.tbi + should_exist: false + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz + # binary changes md5sums on reruns + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz.tbi + # binary changes md5sums on reruns + - path: results/sentieon_dnascope + should_exist: false +- name: Run variant calling on germline sample with sentieon dnascope without intervals and skip filter + command: nextflow run main.nf -profile test_cache,targeted,software_license --sentieon_extension --input ./tests/csv/3.0/mapped_single_bam.csv --tools sentieon_dnascope --step variant_calling --skip_tools dnascope_filter --no_intervals --outdir results + tags: + - germline + - sentieon_dnascope_skip_filter + - no_intervals + - variant_calling + - sentieon/dnascope + files: + - path: results/csv/variantcalled.csv + md5sum: 10254414c0679ba1fb25e41b9ff548cc + - path: results/multiqc + - path: results/no_intervals.bed + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz.tbi + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/preprocessing/converted/test/test.converted.cram + # binary changes md5sums on reruns + - path: results/preprocessing/converted/test/test.converted.cram.crai + # binary changes md5sums on reruns + - path: results/preprocessing/recalibrated/test/test.recal.cram + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + should_exist: false + - path: results/reports/bcftools/sentieon_dnascope/test/test.dnascope.unfiltered.bcftools_stats.txt + md5sum: f915fe1591ababb0da5e7b43dfc35092 + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.unfiltered.FILTER.summary + md5sum: 87a84b5f8ac3d3cbeeef7d60afcdbfe7 + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.unfiltered.TsTv.count + md5sum: b77c120ee5cc0423267200c67d60c663 + - path: results/reports/vcftools/sentieon_dnascope/test/test.dnascope.unfiltered.TsTv.qual + # changes md5sum on reruns. This is somewhat unexpected, but might to tiny variation in very small numbers in the qual-files. + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz + should_exist: false + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.filtered.vcf.gz.tbi + should_exist: false + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz + # binary changes md5sums on reruns + - path: results/variant_calling/sentieon_dnascope/test/test.dnascope.unfiltered.vcf.gz.tbi + # binary changes md5sums on reruns + - path: results/sentieon_dnascope + should_exist: false diff --git a/tests/test_sentieon_haplotyper.yml b/tests/test_sentieon_haplotyper.yml index b90e538e70..7cac6f34c7 100644 --- a/tests/test_sentieon_haplotyper.yml +++ b/tests/test_sentieon_haplotyper.yml @@ -145,3 +145,47 @@ - path: results/variant_calling/sentieon_haplotyper/test/test.haplotyper.unfiltered.vcf.gz.tbi - path: results/haplotyper should_exist: false +- name: Run variant calling on germline sample with sentieons haplotyper and gatk haplotypecaller + command: nextflow run main.nf -profile test_cache,software_license,targeted --sentieon_extension --input ./tests/csv/3.0/mapped_single_bam.csv --tools haplotypecaller,sentieon_haplotyper --step variant_calling --outdir results + tags: + - germline + - sentieon/haplotyper + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: caa9932235cf993fca208943d2e58041 + - path: results/multiqc + - path: results/preprocessing/converted/test/test.converted.cram + # binary changes md5sums on reruns + - path: results/preprocessing/converted/test/test.converted.cram.crai + # binary changes md5sums on reruns + - path: results/preprocessing/recalibrated/test/test.recal.cram + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + should_exist: false + - path: results/reports/bcftools/sentieon_haplotyper/test/test.haplotyper.filtered.bcftools_stats.txt + md5sum: 66be03d4e6535175514f54a1a031d49f + - path: results/reports/vcftools/sentieon_haplotyper/test/test.haplotyper.filtered.FILTER.summary + md5sum: d501a93356f3c91c743f51104e24514a + - path: results/reports/vcftools/sentieon_haplotyper/test/test.haplotyper.filtered.TsTv.count + md5sum: 89562fef808b5c3db629682d36fd86fc + - path: results/reports/vcftools/sentieon_haplotyper/test/test.haplotyper.filtered.TsTv.qual + # changes md5sum on reruns. This is somewhat unexpected, but might to tiny variation in very small numbers in the qual-files. + - path: results/variant_calling/sentieon_haplotyper/test/test.haplotyper.filtered.vcf.gz + # binary changes md5sums on reruns + - path: results/variant_calling/sentieon_haplotyper/test/test.haplotyper.filtered.vcf.gz.tbi + # binary changes md5sums on reruns + - path: results/variant_calling/sentieon_haplotyper/test/test.haplotyper.unfiltered.vcf.gz + # binary changes md5sums on reruns + - path: results/variant_calling/sentieon_haplotyper/test/test.haplotyper.unfiltered.vcf.gz.tbi + # binary changes md5sums on reruns + - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.filtered.vcf.gz + # binary changes md5sums on reruns + - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.filtered.vcf.gz.tbi + # binary changes md5sums on reruns + - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz + # binary changes md5sums on reruns + - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz.tbi + # binary changes md5sums on reruns + - path: results/haplotyper + should_exist: false diff --git a/tests/test_sentieon_joint_germline.yml b/tests/test_sentieon_haplotyper_joint_germline.yml similarity index 97% rename from tests/test_sentieon_joint_germline.yml rename to tests/test_sentieon_haplotyper_joint_germline.yml index 4637571aec..1c12f101db 100644 --- a/tests/test_sentieon_joint_germline.yml +++ b/tests/test_sentieon_haplotyper_joint_germline.yml @@ -2,7 +2,7 @@ command: nextflow run main.nf -profile test_cache,software_license,targeted --sentieon_extension --input ./tests/csv/3.0/mapped_joint_bam.csv --tools sentieon_haplotyper --step variant_calling --joint_germline --outdir results --sentieon_haplotyper_emit_mode gvcf tags: - germline - - sentieon_joint_germline + - sentieon_haplotyper_joint_germline - variant_calling - sentieon/haplotyper files: @@ -31,7 +31,7 @@ command: nextflow run main.nf -profile test_cache,software_license,targeted --sentieon_extension --input ./tests/csv/3.0/mapped_joint_bam.csv --tools sentieon_haplotyper --step variant_calling --joint_germline --outdir results --sentieon_haplotyper_emit_mode gvcf --nucleotides_per_second 100 tags: - germline - - sentieon_joint_germline + - sentieon_haplotyper_joint_germline - variant_calling - sentieon/haplotyper files: @@ -58,7 +58,7 @@ command: nextflow run main.nf -profile test_cache,software_license,tools_germline --sentieon_extension --input ./tests/csv/3.0/mapped_joint_bam.csv --tools sentieon_haplotyper --step variant_calling --joint_germline --outdir results --sentieon_haplotyper_emit_mode gvcf -stub-run tags: - germline - - sentieon_joint_germline + - sentieon_haplotyper_joint_germline - variant_calling - vqsr files: diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 5ae80fbae1..be673dc249 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -48,12 +48,18 @@ def checkPathParamList = [ params.known_snps_tbi, params.mappability, params.multiqc_config, + params.ngscheckmate_bed, params.pon, params.pon_tbi, + params.sentieon_dnascope_model, + params.snpeff_cache, params.spliceai_indel, params.spliceai_indel_tbi, params.spliceai_snv, - params.spliceai_snv_tbi + params.spliceai_snv_tbi, + params.bcftools_annotations, + params.bcftools_annotations_index, + params.bcftools_header_lines ] // only check if we are using the tools @@ -71,225 +77,6 @@ WorkflowSarek.initialise(params, log) for (param in checkPathParamList) if (param) file(param, checkIfExists: true) -// Set input, can either be from --input or from automatic retrieval in WorkflowSarek.groovy - -if (params.input) { - ch_from_samplesheet = params.build_only_index ? Channel.empty() : Channel.fromSamplesheet("input") -} else { - ch_from_samplesheet = params.build_only_index ? Channel.empty() : Channel.fromSamplesheet("input_restart") -} - -input_sample = ch_from_samplesheet - .map{ meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller -> - // generate patient_sample key to group lanes together - [ meta.patient + meta.sample, [meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller] ] - } - .tap{ ch_with_patient_sample } // save the channel - .groupTuple() //group by patient_sample to get all lanes - .map { patient_sample, ch_items -> - // get number of lanes per sample - [ patient_sample, ch_items.size() ] - } - .combine(ch_with_patient_sample, by: 0) // for each entry add numLanes - .map { patient_sample, num_lanes, ch_items -> - - (meta, fastq_1, fastq_2, table, cram, crai, bam, bai, vcf, variantcaller) = ch_items - if (meta.lane && fastq_2) { - meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] - def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' - - def flowcell = flowcellLaneFromFastq(fastq_1) - // Don't use a random element for ID, it breaks resuming - def read_group = "\"@RG\\tID:${flowcell}.${meta.sample}.${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" - - meta = meta - meta.subMap('lane') + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'fastq', size: 1] - - if (params.step == 'mapping') return [ meta, [ fastq_1, fastq_2 ] ] - else { - error("Samplesheet contains fastq files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") - } - - // start from BAM - } else if (meta.lane && bam) { - if (params.step != 'mapping' && !bai) { - error("BAM index (bai) should be provided.") - } - meta = meta + [id: "${meta.sample}-${meta.lane}".toString()] - def CN = params.seq_center ? "CN:${params.seq_center}\\t" : '' - def read_group = "\"@RG\\tID:${meta.sample}_${meta.lane}\\t${CN}PU:${meta.lane}\\tSM:${meta.patient}_${meta.sample}\\tLB:${meta.sample}\\tDS:${params.fasta}\\tPL:${params.seq_platform}\"" - - meta = meta - meta.subMap('lane') + [num_lanes: num_lanes.toInteger(), read_group: read_group.toString(), data_type: 'bam', size: 1] - - if (params.step != 'annotate') return [ meta - meta.subMap('lane'), bam, bai ] - else { - error("Samplesheet contains bam files but step is `annotate`. The pipeline is expecting vcf files for the annotation. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") - } - - // recalibration - } else if (table && cram) { - meta = meta + [id: meta.sample, data_type: 'cram'] - - if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai, table ] - else { - error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") - } - - // recalibration when skipping MarkDuplicates - } else if (table && bam) { - meta = meta + [id: meta.sample, data_type: 'bam'] - - if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai, table ] - else { - error("Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") - } - - // prepare_recalibration or variant_calling - } else if (cram) { - meta = meta + [id: meta.sample, data_type: 'cram'] - - if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), cram, crai ] - else { - error("Samplesheet contains cram files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") - } - - // prepare_recalibration when skipping MarkDuplicates or `--step markduplicates` - } else if (bam) { - meta = meta + [id: meta.sample, data_type: 'bam'] - - if (!(params.step == 'mapping' || params.step == 'annotate')) return [ meta - meta.subMap('lane'), bam, bai ] - else { - error("Samplesheet contains bam files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") - } - - // annotation - } else if (vcf) { - meta = meta + [id: meta.sample, data_type: 'vcf', variantcaller: variantcaller ?: ''] - - if (params.step == 'annotate') return [ meta - meta.subMap('lane'), vcf ] - else { - error("Samplesheet contains vcf files but step is `$params.step`. Please check your samplesheet or adjust the step parameter.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") - } - } else { - error("Missing or unknown field in csv file header. Please check your samplesheet") - } - } - -if (params.step != 'annotate' && params.tools && !params.build_only_index) { - // Two checks for ensuring that the pipeline stops with a meaningful error message if - // 1. the sample-sheet only contains normal-samples, but some of the requested tools require tumor-samples, and - // 2. the sample-sheet only contains tumor-samples, but some of the requested tools require normal-samples. - input_sample.filter{ it[0].status == 1 }.ifEmpty{ // In this case, the sample-sheet contains no tumor-samples - if (!params.build_only_index) { - def tools_tumor = ['ascat', 'controlfreec', 'mutect2', 'msisensorpro'] - def tools_tumor_asked = [] - tools_tumor.each{ tool -> - if (params.tools.split(',').contains(tool)) tools_tumor_asked.add(tool) - } - if (!tools_tumor_asked.isEmpty()) { - error('The sample-sheet only contains normal-samples, but the following tools, which were requested with "--tools", expect at least one tumor-sample : ' + tools_tumor_asked.join(", ")) - } - } - } - input_sample.filter{ it[0].status == 0 }.ifEmpty{ // In this case, the sample-sheet contains no normal/germline-samples - def tools_requiring_normal_samples = ['ascat', 'deepvariant', 'haplotypecaller', 'msisensorpro'] - def requested_tools_requiring_normal_samples = [] - tools_requiring_normal_samples.each{ tool_requiring_normal_samples -> - if (params.tools.split(',').contains(tool_requiring_normal_samples)) requested_tools_requiring_normal_samples.add(tool_requiring_normal_samples) - } - if (!requested_tools_requiring_normal_samples.isEmpty()) { - error('The sample-sheet only contains tumor-samples, but the following tools, which were requested by the option "tools", expect at least one normal-sample : ' + requested_tools_requiring_normal_samples.join(", ")) - } - } -} - -// Fails when wrongfull extension for intervals file -if (params.wes && !params.step == 'annotate') { - if (params.intervals && !params.intervals.endsWith("bed")) error("Target file specified with `--intervals` must be in BED format for targeted data") - else log.warn("Intervals file was provided without parameter `--wes`: Pipeline will assume this is Whole-Genome-Sequencing data.") -} else if (params.intervals && !params.intervals.endsWith("bed") && !params.intervals.endsWith("list")) error("Intervals file must end with .bed, .list, or .interval_list") - -if (params.step == 'mapping' && params.aligner.contains("dragmap") && !(params.skip_tools && params.skip_tools.split(',').contains("baserecalibrator"))) { - log.warn("DragMap was specified as aligner. Base recalibration is not contained in --skip_tools. It is recommended to skip baserecalibration when using DragMap\nhttps://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode") -} - -if (params.step == 'mapping' && params.aligner.contains("sentieon-bwamem") && params.umi_read_structure) { - error("Sentieon BWA is currently not compatible with FGBio UMI handeling. Please choose a different aligner.") -} - -if (params.tools && params.tools.split(',').contains("sentieon_haplotyper") && params.joint_germline && (!params.sentieon_haplotyper_emit_mode || !(params.sentieon_haplotyper_emit_mode.contains('gvcf')))) { - error("When setting the option `--joint_germline` and including `sentieon_haplotyper` among the requested tools, please set `--sentieon_haplotyper_emit_mode` to include `gvcf`.") -} - -// Fails or warns when missing files or params for ascat -if (params.tools && params.tools.split(',').contains('ascat')) { - if (!params.ascat_alleles) { - error("No allele files were provided for running ASCAT. Please provide a zip folder with allele files.") - } - if (!params.ascat_loci) { - error("No loci files were provided for running ASCAT. Please provide a zip folder with loci files.") - } - if (!params.ascat_loci_gc && !params.ascat_loci_rt) { - log.warn("No LogRCorrection performed in ASCAT. For LogRCorrection to run, please provide either loci gc files or both loci gc files and loci rt files.") - } - if (params.wes) { - log.warn("Default reference files not suited for running ASCAT on WES data. It's recommended to use the reference files provided here: https://github.com/Wedge-lab/battenberg#required-reference-files") - } -} - -// Warns when missing files or params for mutect2 -if (params.tools && params.tools.split(',').contains('mutect2')) { - if (!params.pon) { - log.warn("No Panel-of-normal was specified for Mutect2.\nIt is highly recommended to use one: https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2\nFor more information on how to create one: https://gatk.broadinstitute.org/hc/en-us/articles/5358921041947-CreateSomaticPanelOfNormals-BETA-") - } - if (!params.germline_resource) { - log.warn("If Mutect2 is specified without a germline resource, no filtering will be done.\nIt is recommended to use one: https://gatk.broadinstitute.org/hc/en-us/articles/5358911630107-Mutect2") - } - if (params.pon && params.pon.contains("/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz")) { - log.warn("The default Panel-of-Normals provided by GATK is used for Mutect2.\nIt is highly recommended to generate one from normal samples that are technical similar to the tumor ones.\nFor more information: https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-") - } -} - -// Fails when missing resources for baserecalibrator -// Warns when missing resources for haplotypecaller -if (!params.dbsnp && !params.known_indels) { - if (params.step in ['mapping', 'markduplicates', 'prepare_recalibration', 'recalibrate'] && (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('baserecalibrator')))) { - error("Base quality score recalibration requires at least one resource file. Please provide at least one of `--dbsnp` or `--known_indels`\nYou can skip this step in the workflow by adding `--skip_tools baserecalibrator` to the command.") - } - if (params.tools && (params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper'))) { - log.warn "If GATK's Haplotypecaller or Sentieon's Haplotyper is specified, without `--dbsnp` or `--known_indels no filtering will be done. For filtering, please provide at least one of `--dbsnp` or `--known_indels`.\nFor more information see FilterVariantTranches (single-sample, default): https://gatk.broadinstitute.org/hc/en-us/articles/5358928898971-FilterVariantTranches\nFor more information see VariantRecalibration (--joint_germline): https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator\nFor more information on GATK Best practice germline variant calling: https://gatk.broadinstitute.org/hc/en-us/articles/360035535932-Germline-short-variant-discovery-SNPs-Indels-" - } -} -if (params.joint_germline && (!params.tools || !(params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('sentieon_haplotyper')))) { - error("The GATK's Haplotypecaller or Sentieon's Haplotyper should be specified as one of the tools when doing joint germline variant calling.) ") -} - -if (params.joint_germline && (!params.dbsnp || !params.known_indels || !params.known_snps || params.no_intervals)) { - log.warn "If GATK's Haplotypecaller or Sentieon's Haplotyper is specified, without `--dbsnp`, `--known_snps`, `--known_indels` or the associated resource labels (ie `known_snps_vqsr`), no variant recalibration will be done. For recalibration you must provide all of these resources.\nFor more information see VariantRecalibration: https://gatk.broadinstitute.org/hc/en-us/articles/5358906115227-VariantRecalibrator \nJoint germline variant calling also requires intervals in order to genotype the samples. As a result, if `--no_intervals` is set to `true` the joint germline variant calling will not be performed." -} - -// Fails when --joint_mutect2 is used without enabling mutect2 -if (params.joint_mutect2 && (!params.tools || !params.tools.split(',').contains('mutect2'))) { - error("The mutect2 should be specified as one of the tools when doing joint somatic variant calling with Mutect2. (The mutect2 could be specified by adding `--tools mutect2` to the nextflow command.)") -} - -// Fails when missing tools for variant_calling or annotate -if ((params.step == 'variant_calling' || params.step == 'annotate') && !params.tools) { - error("Please specify at least one tool when using `--step ${params.step}`.\nhttps://nf-co.re/sarek/parameters#tools") -} - -// Fails when missing sex information for CNV tools -if (params.tools && (params.tools.split(',').contains('ascat') || params.tools.split(',').contains('controlfreec'))) { - input_sample.map{ - if (it[0].sex == 'NA' ) { - error("Please specify sex information for each sample in your samplesheet when using '--tools' with 'ascat' or 'controlfreec'.\nhttps://nf-co.re/sarek/usage#input-samplesheet-configurations") - } - } -} - -if ((params.download_cache) && (params.snpeff_cache || params.vep_cache)) { - error("Please specify either `--download_cache` or `--snpeff_cache`, `--vep_cache`.\nhttps://nf-co.re/sarek/usage#how-to-customise-snpeff-and-vep-annotation") -} - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT LOCAL MODULES/SUBWORKFLOWS @@ -297,74 +84,35 @@ if ((params.download_cache) && (params.snpeff_cache || params.vep_cache)) { */ // Initialize file channels based on params, defined in the params.genomes[params.genome] scope -ascat_alleles = params.ascat_alleles ? Channel.fromPath(params.ascat_alleles).collect() : Channel.empty() -ascat_loci = params.ascat_loci ? Channel.fromPath(params.ascat_loci).collect() : Channel.empty() -ascat_loci_gc = params.ascat_loci_gc ? Channel.fromPath(params.ascat_loci_gc).collect() : Channel.value([]) -ascat_loci_rt = params.ascat_loci_rt ? Channel.fromPath(params.ascat_loci_rt).collect() : Channel.value([]) -cf_chrom_len = params.cf_chrom_len ? Channel.fromPath(params.cf_chrom_len).collect() : [] -chr_dir = params.chr_dir ? Channel.fromPath(params.chr_dir).collect() : Channel.value([]) -dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([]) -fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty() -fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.empty() -germline_resource = params.germline_resource ? Channel.fromPath(params.germline_resource).collect() : Channel.value([]) // Mutect2 does not require a germline resource, so set to optional input -known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([]) -known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([]) -mappability = params.mappability ? Channel.fromPath(params.mappability).collect() : Channel.value([]) -pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.value([]) // PON is optional for Mutect2 (but highly recommended) +ascat_alleles = params.ascat_alleles ? Channel.fromPath(params.ascat_alleles).collect() : Channel.empty() +ascat_loci = params.ascat_loci ? Channel.fromPath(params.ascat_loci).collect() : Channel.empty() +ascat_loci_gc = params.ascat_loci_gc ? Channel.fromPath(params.ascat_loci_gc).collect() : Channel.value([]) +ascat_loci_rt = params.ascat_loci_rt ? Channel.fromPath(params.ascat_loci_rt).collect() : Channel.value([]) +cf_chrom_len = params.cf_chrom_len ? Channel.fromPath(params.cf_chrom_len).collect() : [] +chr_dir = params.chr_dir ? Channel.fromPath(params.chr_dir).collect() : Channel.value([]) +dbsnp = params.dbsnp ? Channel.fromPath(params.dbsnp).collect() : Channel.value([]) +fasta = params.fasta ? Channel.fromPath(params.fasta).first() : Channel.empty() +fasta_fai = params.fasta_fai ? Channel.fromPath(params.fasta_fai).collect() : Channel.empty() +germline_resource = params.germline_resource ? Channel.fromPath(params.germline_resource).collect() : Channel.value([]) // Mutect2 does not require a germline resource, so set to optional input +known_indels = params.known_indels ? Channel.fromPath(params.known_indels).collect() : Channel.value([]) +known_snps = params.known_snps ? Channel.fromPath(params.known_snps).collect() : Channel.value([]) +mappability = params.mappability ? Channel.fromPath(params.mappability).collect() : Channel.value([]) +pon = params.pon ? Channel.fromPath(params.pon).collect() : Channel.value([]) // PON is optional for Mutect2 (but highly recommended) +sentieon_dnascope_model = params.sentieon_dnascope_model ? Channel.fromPath(params.sentieon_dnascope_model).collect() : Channel.value([]) // Initialize value channels based on params, defined in the params.genomes[params.genome] scope ascat_genome = params.ascat_genome ?: Channel.empty() dbsnp_vqsr = params.dbsnp_vqsr ? Channel.value(params.dbsnp_vqsr) : Channel.empty() known_indels_vqsr = params.known_indels_vqsr ? Channel.value(params.known_indels_vqsr) : Channel.empty() known_snps_vqsr = params.known_snps_vqsr ? Channel.value(params.known_snps_vqsr) : Channel.empty() +ngscheckmate_bed = params.ngscheckmate_bed ? Channel.value(params.ngscheckmate_bed) : Channel.empty() snpeff_db = params.snpeff_db ?: Channel.empty() vep_cache_version = params.vep_cache_version ?: Channel.empty() vep_genome = params.vep_genome ?: Channel.empty() vep_species = params.vep_species ?: Channel.empty() - -// Initialize files channels based on params, not defined within the params.genomes[params.genome] scope -if (params.snpeff_cache && params.tools && (params.tools.split(',').contains("snpeff") || params.tools.split(',').contains('merge'))) { - def snpeff_annotation_cache_key = '' - if (params.snpeff_cache == "s3://annotation-cache/snpeff_cache") { - snpeff_annotation_cache_key = "${params.snpeff_genome}.${params.snpeff_db}/" - } else { - snpeff_annotation_cache_key = params.use_annotation_cache_keys ? "${params.snpeff_genome}.${params.snpeff_db}/" : "" - } - def snpeff_cache_dir = "${snpeff_annotation_cache_key}${params.snpeff_genome}.${params.snpeff_db}" - def snpeff_cache_path_full = file("$params.snpeff_cache/$snpeff_cache_dir", type: 'dir') - if ( !snpeff_cache_path_full.exists() || !snpeff_cache_path_full.isDirectory() ) { - if (params.snpeff_cache == "s3://annotation-cache/snpeff_cache") { - error("This path is not available within annotation-cache. Please check https://annotation-cache.github.io/ to create a request for it.") - } else { - error("Files within --snpeff_cache invalid. Make sure there is a directory named ${snpeff_cache_dir} in ${params.snpeff_cache}.\nhttps://nf-co.re/sarek/usage#how-to-customise-snpeff-and-vep-annotation") - } - } - snpeff_cache = Channel.fromPath(file("${params.snpeff_cache}/${snpeff_annotation_cache_key}"), checkIfExists: true).collect() - .map{ cache -> [ [ id:"${params.snpeff_genome}.${params.snpeff_db}" ], cache ] } - } else if (params.tools && (params.tools.split(',').contains("snpeff") || params.tools.split(',').contains('merge')) && !params.download_cache) { - error("No cache for SnpEff or automatic download of said cache has been detected.\nPlease refer to https://nf-co.re/sarek/docs/usage/#how-to-customise-snpeff-and-vep-annotation for more information.") - } else snpeff_cache = [] - -if (params.vep_cache && params.tools && (params.tools.split(',').contains("vep") || params.tools.split(',').contains('merge'))) { - def vep_annotation_cache_key = '' - if (params.vep_cache == "s3://annotation-cache/vep_cache") { - vep_annotation_cache_key = "${params.vep_cache_version}_${params.vep_genome}/" - } else { - vep_annotation_cache_key = params.use_annotation_cache_keys ? "${params.vep_cache_version}_${params.vep_genome}/" : "" - } - def vep_cache_dir = "${vep_annotation_cache_key}${params.vep_species}/${params.vep_cache_version}_${params.vep_genome}" - def vep_cache_path_full = file("$params.vep_cache/$vep_cache_dir", type: 'dir') - if ( !vep_cache_path_full.exists() || !vep_cache_path_full.isDirectory() ) { - if (params.vep_cache == "s3://annotation-cache/vep_cache") { - error("This path is not available within annotation-cache. Please check https://annotation-cache.github.io/ to create a request for it.") - } else { - error("Files within --vep_cache invalid. Make sure there is a directory named ${vep_cache_dir} in ${params.vep_cache}.\nhttps://nf-co.re/sarek/usage#how-to-customise-snpeff-and-vep-annotation") - } - } - vep_cache = Channel.fromPath(file("${params.vep_cache}/${vep_annotation_cache_key}"), checkIfExists: true).collect() - } else if (params.tools && (params.tools.split(',').contains("vep") || params.tools.split(',').contains('merge')) && !params.download_cache) { - error("No cache for VEP or automatic download of said cache has been detected.\nPlease refer to https://nf-co.re/sarek/docs/usage/#how-to-customise-snpeff-and-vep-annotation for more information.") - } else vep_cache = [] +bcftools_annotations = params.bcftools_annotations ?: Channel.empty() +bcftools_annotations_index = params.bcftools_annotations_index ?: Channel.empty() +bcftools_header_lines = params.bcftools_header_lines ?: Channel.empty() vep_extra_files = [] @@ -387,14 +135,18 @@ if (params.spliceai_snv && params.spliceai_snv_tbi && params.spliceai_indel && p */ // Create samplesheets to restart from different steps +include { SAMPLESHEET_TO_CHANNEL } from '../subworkflows/local/samplesheet_to_channel/main' include { CHANNEL_ALIGN_CREATE_CSV } from '../subworkflows/local/channel_align_create_csv/main' include { CHANNEL_MARKDUPLICATES_CREATE_CSV } from '../subworkflows/local/channel_markduplicates_create_csv/main' include { CHANNEL_BASERECALIBRATOR_CREATE_CSV } from '../subworkflows/local/channel_baserecalibrator_create_csv/main' include { CHANNEL_APPLYBQSR_CREATE_CSV } from '../subworkflows/local/channel_applybqsr_create_csv/main' include { CHANNEL_VARIANT_CALLING_CREATE_CSV } from '../subworkflows/local/channel_variant_calling_create_csv/main' -// Download annotation cache if needed -include { PREPARE_CACHE } from '../subworkflows/local/prepare_cache/main' +// Download cache for SnpEff/VEP if needed +include { DOWNLOAD_CACHE_SNPEFF_VEP } from '../subworkflows/local/download_cache_snpeff_vep/main' + +// Initialize annotation cache +include { INITIALIZE_ANNOTATION_CACHE } from '../subworkflows/local/initialize_annotation_cache/main' // Build indices if needed include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome/main' @@ -464,6 +216,9 @@ include { POST_VARIANTCALLING } from '../subworkflows/lo // QC on VCF files include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../subworkflows/local/vcf_qc_bcftools_vcftools/main' +// Sample QC on CRAM files +include { CRAM_SAMPLEQC } from '../subworkflows/local/cram_sampleqc/main' + // Annotation include { VCF_ANNOTATE_ALL } from '../subworkflows/local/vcf_annotate_all/main' @@ -481,6 +236,17 @@ include { MULTIQC } from '../modules/nf-core workflow SAREK { + // Parse samplesheet + // Set input, can either be from --input or from automatic retrieval in WorkflowSarek.groovy + if (params.input) { + ch_from_samplesheet = params.build_only_index ? Channel.empty() : Channel.fromSamplesheet("input") + } else { + ch_from_samplesheet = params.build_only_index ? Channel.empty() : Channel.fromSamplesheet("input_restart") + } + SAMPLESHEET_TO_CHANNEL(ch_from_samplesheet) + + input_sample = SAMPLESHEET_TO_CHANNEL.out.input_sample + // MULTIQC ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() @@ -492,17 +258,32 @@ workflow SAREK { // To gather used softwares versions for MultiQC versions = Channel.empty() - // Download cache if needed - // Assuming that if the cache is provided, the user has already downloaded it - ensemblvep_info = params.vep_cache ? [] : Channel.of([ [ id:"${params.vep_cache_version}_${params.vep_genome}" ], params.vep_genome, params.vep_species, params.vep_cache_version ]) - snpeff_info = params.snpeff_cache ? [] : Channel.of([ [ id:"${params.snpeff_genome}.${params.snpeff_db}" ], params.snpeff_genome, params.snpeff_db ]) - + // Download cache if (params.download_cache) { - PREPARE_CACHE(ensemblvep_info, snpeff_info) - snpeff_cache = PREPARE_CACHE.out.snpeff_cache - vep_cache = PREPARE_CACHE.out.ensemblvep_cache.map{ meta, cache -> [ cache ] } - - versions = versions.mix(PREPARE_CACHE.out.versions) + // Assuming that even if the cache is provided, if the user specify download_cache, sarek will download the cache + ensemblvep_info = Channel.of([ [ id:"${params.vep_cache_version}_${params.vep_genome}" ], params.vep_genome, params.vep_species, params.vep_cache_version ]) + snpeff_info = Channel.of([ [ id:"${params.snpeff_genome}.${params.snpeff_db}" ], params.snpeff_genome, params.snpeff_db ]) + DOWNLOAD_CACHE_SNPEFF_VEP(ensemblvep_info, snpeff_info) + snpeff_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.snpeff_cache + vep_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.ensemblvep_cache.map{ meta, cache -> [ cache ] } + + versions = versions.mix(DOWNLOAD_CACHE_SNPEFF_VEP.out.versions) + } else { + // Looks for cache information either locally or on the cloud + INITIALIZE_ANNOTATION_CACHE( + (params.snpeff_cache && params.tools && (params.tools.split(',').contains("snpeff") || params.tools.split(',').contains('merge'))), + params.snpeff_cache, + params.snpeff_genome, + params.snpeff_db, + (params.vep_cache && params.tools && (params.tools.split(',').contains("vep") || params.tools.split(',').contains('merge'))), + params.vep_cache, + params.vep_species, + params.vep_cache_version, + params.vep_genome, + "Please refer to https://nf-co.re/sarek/docs/usage/#how-to-customise-snpeff-and-vep-annotation for more information.") + + snpeff_cache = INITIALIZE_ANNOTATION_CACHE.out.snpeff_cache + vep_cache = INITIALIZE_ANNOTATION_CACHE.out.ensemblvep_cache } // Build indices if needed @@ -1078,6 +859,8 @@ workflow SAREK { if (params.step == 'annotate') cram_variant_calling = Channel.empty() + CRAM_SAMPLEQC(cram_variant_calling, ngscheckmate_bed, fasta) + // // Logic to separate germline samples, tumor samples with no matched normal, and combine tumor-normal pairs // @@ -1164,7 +947,10 @@ workflow SAREK { known_snps_vqsr, params.joint_germline, params.skip_tools && params.skip_tools.split(',').contains('haplotypecaller_filter'), // true if filtering should be skipped - params.sentieon_haplotyper_emit_mode) + params.sentieon_haplotyper_emit_mode, + params.sentieon_dnascope_emit_mode, + params.sentieon_dnascope_pcr_indel_model, + sentieon_dnascope_model) // TUMOR ONLY VARIANT CALLING BAM_VARIANT_CALLING_TUMOR_ONLY_ALL( @@ -1232,6 +1018,7 @@ workflow SAREK { vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_freebayes) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_haplotypecaller) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_manta) + vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_sentieon_dnascope) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_sentieon_haplotyper) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_strelka) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.vcf_tiddit) @@ -1259,7 +1046,7 @@ workflow SAREK { // ANNOTATE if (params.step == 'annotate') vcf_to_annotate = input_sample - if (params.tools.split(',').contains('merge') || params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('vep')) { + if (params.tools.split(',').contains('merge') || params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('vep')|| params.tools.split(',').contains('bcfann')) { vep_fasta = (params.vep_include_fasta) ? fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] } : [[id: 'null'], []] @@ -1273,7 +1060,10 @@ workflow SAREK { vep_species, vep_cache_version, vep_cache, - vep_extra_files) + vep_extra_files, + bcftools_annotations, + bcftools_annotations_index, + bcftools_header_lines) // Gather used softwares versions versions = versions.mix(VCF_ANNOTATE_ALL.out.versions) @@ -1322,38 +1112,6 @@ workflow.onComplete { if (params.hook_url) NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) } -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - FUNCTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -// Parse first line of a FASTQ file, return the flowcell id and lane number. -def flowcellLaneFromFastq(path) { - // expected format: - // xx:yy:FLOWCELLID:LANE:... (seven fields) - // or - // FLOWCELLID:LANE:xx:... (five fields) - def line - path.withInputStream { - InputStream gzipStream = new java.util.zip.GZIPInputStream(it) - Reader decoder = new InputStreamReader(gzipStream, 'ASCII') - BufferedReader buffered = new BufferedReader(decoder) - line = buffered.readLine() - } - assert line.startsWith('@') - line = line.substring(1) - def fields = line.split(':') - String fcid - - if (fields.size() >= 7) { - // CASAVA 1.8+ format, from https://support.illumina.com/help/BaseSpace_OLH_009008/Content/Source/Informatics/BS/FileFormat_FASTQ-files_swBS.htm - // "@::::::: :::" - fcid = fields[2] - } else if (fields.size() == 5) { - fcid = fields[0] - } - return fcid -} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~