From cd0a34603a99bba0583591d958c2b83a7783c546 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 28 Mar 2023 16:31:49 -0300 Subject: [PATCH] new module: indeppairwise (#3150) * new module: indeppairwise * fix formatting --- modules/nf-core/plink/indeppairwise/main.nf | 43 ++++++++++++ modules/nf-core/plink/indeppairwise/meta.yml | 68 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ .../nf-core/plink/indeppairwise/main.nf | 28 ++++++++ .../plink/indeppairwise/nextflow.config | 8 +++ .../nf-core/plink/indeppairwise/test.yml | 17 +++++ 6 files changed, 168 insertions(+) create mode 100644 modules/nf-core/plink/indeppairwise/main.nf create mode 100644 modules/nf-core/plink/indeppairwise/meta.yml create mode 100644 tests/modules/nf-core/plink/indeppairwise/main.nf create mode 100644 tests/modules/nf-core/plink/indeppairwise/nextflow.config create mode 100644 tests/modules/nf-core/plink/indeppairwise/test.yml diff --git a/modules/nf-core/plink/indeppairwise/main.nf b/modules/nf-core/plink/indeppairwise/main.nf new file mode 100644 index 00000000000..fa06eb99cdb --- /dev/null +++ b/modules/nf-core/plink/indeppairwise/main.nf @@ -0,0 +1,43 @@ + +process PLINK_INDEPPAIRWISE { + tag "$meta.id" + label 'process_low' + + conda "bioconda::plink=1.90b6.21" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/plink:1.90b6.21--h779adbc_1': + 'quay.io/biocontainers/plink:1.90b6.21--h779adbc_1' }" + + input: + tuple val(meta), path(bed), path(bim), path(fam) + val(window_size) + val(variant_count) + val(r2_threshold) + + output: + tuple val(meta), path("*.prune.in") , emit: prunein + tuple val(meta), path("*.prune.out") , optional:true, emit: pruneout + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + plink \\ + --bed ${bed} \\ + --bim ${bim} \\ + --fam ${fam} \\ + --threads $task.cpus \\ + --indep-pairwise ${window_size} ${variant_count} ${r2_threshold} \\ + $args \\ + --out $prefix + cat <<-END_VERSIONS > versions.yml + "${task.process}": + plink: \$(echo \$(plink --version) | sed 's/^PLINK v//;s/64.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/plink/indeppairwise/meta.yml b/modules/nf-core/plink/indeppairwise/meta.yml new file mode 100644 index 00000000000..f84748b620c --- /dev/null +++ b/modules/nf-core/plink/indeppairwise/meta.yml @@ -0,0 +1,68 @@ +name: plink_indeppairwise +description: Produce a pruned subset of markers that are in approximate linkage equilibrium with each other. Pairs of variants in the current window with squared correlation greater than the threshold are noted and variants are greedily pruned from the window until no such pairs remain. +keywords: + - plink + - indep pairwise + - variant pruning + - bim + - fam +tools: + - "plink": + description: "Whole genome association analysis toolset, designed to perform a range of basic, large-scale analyses in a computationally efficient manner." + homepage: "https://www.cog-genomics.org/plink" + documentation: "https://www.cog-genomics.org/plink/1.9/data" + tool_dev_url: "https://www.cog-genomics.org/plink/1.9/dev" + licence: ["GPL"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: PLINK binary biallelic genotype table file + pattern: "*.{bed}" + - bim: + type: file + description: PLINK extended MAP file + pattern: "*.{bim}" + - fam: + type: file + description: PLINK sample information file + pattern: "*.{fam}" + - window_size: + type: number + description: Window size in variant count or kilobase (if the 'kb' modifier is present) units, a variant count to shift the window at the end of each step, and a variance inflation factor (VIF) threshold. + pattern: "*.{}" + - variant_count: + type: number + description: Variant count to shift the window at the end of each step. + pattern: "*.{}" + - r2_threshold: + type: number + description: Pairwise r2 threshold. At each step, pairs of variants in the current window with squared correlation greater than the threshold are noted, and variants are greedily pruned from the window until no such pairs remain + pattern: "*.{}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - prunein: + type: file + description: File with IDs of pruned subset of markers that are in approximate linkage equilibrium with each other + pattern: "*.{prune.in}" + - pruneout: + type: file + description: File with IDs of excluded variants + pattern: "*.{prune.out}" + +authors: + - "@atrigila" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 1dc45bcf1bf..7a3b9eb176a 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -2531,6 +2531,10 @@ plink/indep: - modules/nf-core/plink/indep/** - tests/modules/nf-core/plink/indep/** +plink/indeppairwise: + - modules/nf-core/plink/indeppairwise/** + - tests/modules/nf-core/plink/indeppairwise/** + plink/recode: - modules/nf-core/plink/recode/** - tests/modules/nf-core/plink/recode/** diff --git a/tests/modules/nf-core/plink/indeppairwise/main.nf b/tests/modules/nf-core/plink/indeppairwise/main.nf new file mode 100644 index 00000000000..52f3fb395ce --- /dev/null +++ b/tests/modules/nf-core/plink/indeppairwise/main.nf @@ -0,0 +1,28 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { PLINK_INDEPPAIRWISE } from '../../../../../modules/nf-core/plink/indeppairwise/main.nf' +include { PLINK_VCF } from '../../../../../modules/nf-core/plink/vcf/main.nf' + +workflow test_plink_indeppairwise { + + input = [ + [ id:'test', single_end:false ], // meta map + file("https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/genome/vcf/ped/justhusky_minimal.vcf.gz", checkIfExists: true) + ] + + PLINK_VCF ( input ) + + bed_ch = PLINK_VCF.out.bed + bim_ch = PLINK_VCF.out.bim + fam_ch = PLINK_VCF.out.fam + + ch_bed_bim_fam = bed_ch.join(bim_ch).join(fam_ch) + + ch_window_size = Channel.value(50) + ch_variant_count = Channel.value(5) + ch_r2_threshold = Channel.value(0.2) + + PLINK_INDEPPAIRWISE ( ch_bed_bim_fam, ch_window_size, ch_variant_count, ch_r2_threshold ) +} diff --git a/tests/modules/nf-core/plink/indeppairwise/nextflow.config b/tests/modules/nf-core/plink/indeppairwise/nextflow.config new file mode 100644 index 00000000000..973d82507e7 --- /dev/null +++ b/tests/modules/nf-core/plink/indeppairwise/nextflow.config @@ -0,0 +1,8 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: PLINK_VCF { + ext.args = ' --make-bed --biallelic-only strict --vcf-half-call missing --allow-extra-chr' + } +} diff --git a/tests/modules/nf-core/plink/indeppairwise/test.yml b/tests/modules/nf-core/plink/indeppairwise/test.yml new file mode 100644 index 00000000000..20c6c0a5c78 --- /dev/null +++ b/tests/modules/nf-core/plink/indeppairwise/test.yml @@ -0,0 +1,17 @@ +- name: plink indeppairwise test_plink_indeppairwise + command: nextflow run ./tests/modules/nf-core/plink/indeppairwise -entry test_plink_indeppairwise -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/plink/indeppairwise/nextflow.config + tags: + - plink + - plink/indeppairwise + files: + - path: output/plink/test.bed + md5sum: 0cbb7bb2e4c9e23c75da1f338f9268bf + - path: output/plink/test.bim + md5sum: fe302f9edc5a217e55f1b6b17f187d2e + - path: output/plink/test.fam + md5sum: 619946963dc461d0e2ae4359da08eeb2 + - path: output/plink/test.prune.in + md5sum: 50c7ab703f7b5c01d7c7ddc7517aea37 + - path: output/plink/test.prune.out + md5sum: 6104a59d98bda81014870ecd059573fb + - path: output/plink/versions.yml