From 30c6bf69efaaae06fc3675a0d650c4e6743eef11 Mon Sep 17 00:00:00 2001 From: DavideBag Bagordo Date: Fri, 22 Mar 2024 15:48:20 +0000 Subject: [PATCH 1/3] adding epistasis module --- .../nf-core/plink/epistasis/environment.yml | 9 + modules/nf-core/plink/epistasis/main.nf | 91 +++++++++ modules/nf-core/plink/epistasis/meta.yml | 97 +++++++++ .../plink/epistasis/tests/main.nf.test | 190 ++++++++++++++++++ .../plink/epistasis/tests/main.nf.test.snap | 170 ++++++++++++++++ .../plink/epistasis/tests/nextflow.config | 7 + .../plink/epistasis/tests/pheno_name.config | 7 + .../nf-core/plink/epistasis/tests/tags.yml | 2 + 8 files changed, 573 insertions(+) create mode 100644 modules/nf-core/plink/epistasis/environment.yml create mode 100644 modules/nf-core/plink/epistasis/main.nf create mode 100644 modules/nf-core/plink/epistasis/meta.yml create mode 100644 modules/nf-core/plink/epistasis/tests/main.nf.test create mode 100644 modules/nf-core/plink/epistasis/tests/main.nf.test.snap create mode 100644 modules/nf-core/plink/epistasis/tests/nextflow.config create mode 100644 modules/nf-core/plink/epistasis/tests/pheno_name.config create mode 100644 modules/nf-core/plink/epistasis/tests/tags.yml diff --git a/modules/nf-core/plink/epistasis/environment.yml b/modules/nf-core/plink/epistasis/environment.yml new file mode 100644 index 00000000000..1e0f3792dee --- /dev/null +++ b/modules/nf-core/plink/epistasis/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "plink_epistasis" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::plink=1.90b6.21" diff --git a/modules/nf-core/plink/epistasis/main.nf b/modules/nf-core/plink/epistasis/main.nf new file mode 100644 index 00000000000..acfe4e0dd09 --- /dev/null +++ b/modules/nf-core/plink/epistasis/main.nf @@ -0,0 +1,91 @@ +process PLINK_EPISTASIS { + tag "$meta.id" + label 'process_low' + + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/plink:1.90b6.21--h031d066_5': + 'biocontainers/plink:1.90b6.21--h031d066_5' }" + + input: + tuple val(meta), path(bed), path(bim), path(fam) + tuple val(meta2), path(vcf) + tuple val(meta3), path(bcf) + tuple val(meta4), path(phe) + + output: + tuple val(meta), path("*.epi.cc") , emit: epi + tuple val(meta), path("*.epi.cc.summary"), emit: episummary, optional:true + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*.nosex") , emit: nosex, optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = "" + // define input string based on provided input files + // in hierarchical order + def input_command = "" + def outmeta = "" + if (bed){ + input_command = "--bed ${bed} --bim ${bim} --fam ${fam}" + prefix = task.ext.prefix ?: "${meta.id}" + } else if (vcf) { + input_command = "--vcf ${vcf} --pheno ${phe}" + prefix = task.ext.prefix ?: "${meta2.id}" + meta = meta2 + } else if (bcf) { + input_command = "--bcf ${bcf} --pheno ${phe}" + prefix = task.ext.prefix ?: "${meta3.id}" + meta = meta3 + } else { + log.error 'ERROR: the input should be either plink native binary format, VCF or BCF' + } + """ + plink \\ + $input_command \\ + --threads $task.cpus \\ + --epistasis \\ + $args \\ + --out $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + plink: \$(samtools --version |& sed '1!d ; s/samtools //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = "" + // define input string based on provided input files + // in hierarchical order + def input_command = "" + def outmeta = "" + if (bed){ + input_command = "--bed ${bed} --bim ${bim} --fam ${fam}" + prefix = task.ext.prefix ?: "${meta.id}" + } else if (vcf) { + input_command = "--vcf ${vcf}" + prefix = task.ext.prefix ?: "${meta2.id} --pheno ${pheno}" + meta = meta2 + } else if (bcf) { + input_command = "--bcf ${bcf} --pheno ${pheno}" + prefix = task.ext.prefix ?: "${meta3.id}" + meta = meta3 + } else { + log.error 'ERROR: the input should be either plink native binary format, VCF or BCF' + } + """ + touch ${prefix}.assoc + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + plink: \$(echo \$(plink --version) | sed 's/^PLINK v//;s/64.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/plink/epistasis/meta.yml b/modules/nf-core/plink/epistasis/meta.yml new file mode 100644 index 00000000000..c88dcdbec0e --- /dev/null +++ b/modules/nf-core/plink/epistasis/meta.yml @@ -0,0 +1,97 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "plink_epistasis" +description: Epistasis in PLINK, analyzing how the effects of one gene depend on the presence of others. +keywords: + - interactions + - variants + - regression +tools: + - "plink": + description: "Whole genome association analysis toolset, designed to perform a range of basic, large-scale analyses in a computationally efficient manner." + homepage: "https://www.cog-genomics.org/plink" + documentation: "https://www.cog-genomics.org/plink/1.9/data#recode" + tool_dev_url: "https://www.cog-genomics.org/plink/1.9/dev" + licence: ["GPL"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + meta is associated to the PLINK native file input + - meta2: + type: map + description: | + Groovy Map containing sample information, + e.g. [ id:'test', single_end:false ] + meta2 is associated to VCF file input + - meta3: + type: map + description: | + Groovy Map containing sample information, + e.g. [ id:'test', single_end:false ] + meta3 is associated to BCF file input + - meta4: + type: map + description: | + Groovy Map containing sample information, + e.g. [ id:'test', single_end:false ] + meta4 is associated to phenotype file input + - bed: + type: file + description: PLINK binary biallelic genotype table file + pattern: "*.{bed}" + - bim: + type: file + description: PLINK extended MAP file + pattern: "*.{bim}" + - fam: + type: file + description: PLINK sample information file + pattern: "*.{fam}" + - bcf: + type: file + description: PLINK variant information + sample ID + genotype call binary file + pattern: "*.{bcf}" + - vcf: + type: file + description: Variant calling file (vcf) + pattern: "*.{vcf}" + - phe: + type: file + description: PLINK file containing phenotype information. This phenotype information can be read from the third column with the --pheno option or from a specific column with the --pheno-name option. + pattern: "*.{phe}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - epi: + type: file + description: PLINK GWAS association file + pattern: "*.{assoc}" + - episummary: + type: file + description: PLINK GWAS association file + pattern: "*.{assoc}" + - log: + type: file + description: PLINK GWAS association file + pattern: "*.{assoc}" + - nosex: + type: file + description: PLINK GWAS association file + pattern: "*.{assoc}" + +authors: + - "@davidebag" +maintainers: + - "@davidebag" diff --git a/modules/nf-core/plink/epistasis/tests/main.nf.test b/modules/nf-core/plink/epistasis/tests/main.nf.test new file mode 100644 index 00000000000..b178db561ea --- /dev/null +++ b/modules/nf-core/plink/epistasis/tests/main.nf.test @@ -0,0 +1,190 @@ +nextflow_process { + + name "Test Process PLINK_EPISTASIS" + script "../main.nf" + process "PLINK_EPISTASIS" + tag "modules" + tag "modules_nfcore" + tag "plink" + tag "plink/epistasis" + + test("plink - VCF") { + + config "./nextflow.config" + + when { + params { + outdir = "test" + } + process { + """ + input[0] = [ [id:"null"], [], [], []] + input[1] = [ + [id:"test"], + file(params.test_data['homo_sapiens']['popgen']['plink_case_control_vcf_gz'], checkIfExists: true) + ] + input[2] = [ [id:"null"], []] + input[3] = [ + [id:"test"], + file(params.test_data['homo_sapiens']['popgen']['plink_simulated_phe'], checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match("version") }, + { assert snapshot( + process.out.epi, + process.out.episummary, + process.out.nosex + ).match() }, + { assert process.out.log.get(0).get(1) ==~ ".*/*.log" } + ) + } + } + + test("plink - VCF with phenotype name") { + + config "./pheno_name.config" + + when { + params { + outdir = "test" + } + process { + """ + input[0] = [ [id:"null"], [], [], []] + input[1] = [ + [id:"test"], + file(params.test_data['homo_sapiens']['popgen']['plink_case_control_vcf_gz'], checkIfExists: true) + ] + input[2] = [ [id:"null"], []] + input[3] = [ + [id:"test"], + file(params.test_data['homo_sapiens']['popgen']['plink_simulated_phenoname'], checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.epi, + process.out.episummary, + process.out.nosex + ).match() }, + { assert process.out.log.get(0).get(1) ==~ ".*/*.log" } + ) + } + } + + test("plink - binary") { + + config "./nextflow.config" + + when { + params { + outdir = "test" + } + process { + """ + input[0] = [ + [id:"test"], + file(params.test_data['homo_sapiens']['popgen']['plink_case_control_bed'], checkIfExists: true), + file(params.test_data['homo_sapiens']['popgen']['plink_case_control_bim'], checkIfExists: true), + file(params.test_data['homo_sapiens']['popgen']['plink_case_control_fam'], checkIfExists: true) + ] + input[1] = [ [id:"null"], []] + input[2] = [ [id:"null"], []] + input[3] = [ [id:"null"], []] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.epi, + process.out.episummary, + process.out.nosex + ).match() }, + { assert process.out.log.get(0).get(1) ==~ ".*/*.log" } + ) + } + } + + test("plink - BCF") { + + config "./nextflow.config" + + when { + params { + outdir = "test" + } + process { + """ + input[0] = [ [id:"null"], [], [], []] + input[1] = [ [id:"null"], []] + input[2] = [ + [id:"test"], + file(params.test_data['homo_sapiens']['popgen']['plink_case_control_bcf_gz'], checkIfExists: true) + ] + input[3] = [ + [id:"test"], + file(params.test_data['homo_sapiens']['popgen']['plink_simulated_phe'], checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.epi, + process.out.episummary, + process.out.nosex + ).match() }, + { assert process.out.log.get(0).get(1) ==~ ".*/*.log" } + ) + } + } + + test("plink - BCF with phenotype name") { + + config "./pheno_name.config" + + when { + params { + outdir = "test" + } + process { + """ + input[0] = [ [id:"null"], [], [], []] + input[1] = [ + [id:"test"], + file(params.test_data['homo_sapiens']['popgen']['plink_case_control_vcf_gz'], checkIfExists: true) + ] + input[2] = [ [id:"null"], []] + input[3] = [ + [id:"test"], + file(params.test_data['homo_sapiens']['popgen']['plink_simulated_phenoname'], checkIfExists: true) + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.epi, + process.out.episummary, + process.out.nosex + ).match() }, + { assert process.out.log.get(0).get(1) ==~ ".*/*.log" } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/plink/epistasis/tests/main.nf.test.snap b/modules/nf-core/plink/epistasis/tests/main.nf.test.snap new file mode 100644 index 00000000000..151792673ad --- /dev/null +++ b/modules/nf-core/plink/epistasis/tests/main.nf.test.snap @@ -0,0 +1,170 @@ +{ + "plink - BCF": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.epi.cc:md5,91ac05fbd7b96d5883403eb8ac20dea9" + ] + ], + [ + [ + { + "id": "test" + }, + "test.epi.cc.summary:md5,1853bf0e3b49af64a8fffdc06f3e7f39" + ] + ], + [ + [ + { + "id": "test" + }, + "test.nosex:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-22T15:44:06.676203832" + }, + "plink - BCF with phenotype name": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.epi.cc:md5,91ac05fbd7b96d5883403eb8ac20dea9" + ] + ], + [ + [ + { + "id": "test" + }, + "test.epi.cc.summary:md5,1853bf0e3b49af64a8fffdc06f3e7f39" + ] + ], + [ + [ + { + "id": "test" + }, + "test.nosex:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-22T15:44:36.743772181" + }, + "plink - VCF with phenotype name": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.epi.cc:md5,91ac05fbd7b96d5883403eb8ac20dea9" + ] + ], + [ + [ + { + "id": "test" + }, + "test.epi.cc.summary:md5,1853bf0e3b49af64a8fffdc06f3e7f39" + ] + ], + [ + [ + { + "id": "test" + }, + "test.nosex:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-22T15:43:29.573025062" + }, + "plink - binary": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.epi.cc:md5,91ac05fbd7b96d5883403eb8ac20dea9" + ] + ], + [ + [ + { + "id": "test" + }, + "test.epi.cc.summary:md5,1853bf0e3b49af64a8fffdc06f3e7f39" + ] + ], + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-22T15:43:51.26229668" + }, + "plink - VCF": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.epi.cc:md5,91ac05fbd7b96d5883403eb8ac20dea9" + ] + ], + [ + [ + { + "id": "test" + }, + "test.epi.cc.summary:md5,1853bf0e3b49af64a8fffdc06f3e7f39" + ] + ], + [ + [ + { + "id": "test" + }, + "test.nosex:md5,4f9aa36c44a417ff6d7caa9841e66ad9" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-22T15:43:14.573129885" + }, + "version": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-22T15:43:14.558983227" + } +} \ No newline at end of file diff --git a/modules/nf-core/plink/epistasis/tests/nextflow.config b/modules/nf-core/plink/epistasis/tests/nextflow.config new file mode 100644 index 00000000000..a9955d8c3f3 --- /dev/null +++ b/modules/nf-core/plink/epistasis/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: PLINK_EPISTASIS { + ext.args = '--allow-no-sex' + } + +} diff --git a/modules/nf-core/plink/epistasis/tests/pheno_name.config b/modules/nf-core/plink/epistasis/tests/pheno_name.config new file mode 100644 index 00000000000..7c0fbe93655 --- /dev/null +++ b/modules/nf-core/plink/epistasis/tests/pheno_name.config @@ -0,0 +1,7 @@ +process { + + withName: PLINK_EPISTASIS { + ext.args = '--pheno-name Phenotype --allow-no-sex' + } + +} diff --git a/modules/nf-core/plink/epistasis/tests/tags.yml b/modules/nf-core/plink/epistasis/tests/tags.yml new file mode 100644 index 00000000000..51e92aa68d8 --- /dev/null +++ b/modules/nf-core/plink/epistasis/tests/tags.yml @@ -0,0 +1,2 @@ +plink/epistasis: + - "modules/nf-core/plink/epistasis/**" From 91405f1dcb0cda19d99478b3db32f9dc004e3811 Mon Sep 17 00:00:00 2001 From: DavideBag Bagordo Date: Fri, 22 Mar 2024 15:57:20 +0000 Subject: [PATCH 2/3] fixing stub error --- modules/nf-core/plink/epistasis/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/plink/epistasis/main.nf b/modules/nf-core/plink/epistasis/main.nf index acfe4e0dd09..aa05d8ed615 100644 --- a/modules/nf-core/plink/epistasis/main.nf +++ b/modules/nf-core/plink/epistasis/main.nf @@ -81,7 +81,7 @@ process PLINK_EPISTASIS { log.error 'ERROR: the input should be either plink native binary format, VCF or BCF' } """ - touch ${prefix}.assoc + touch ${prefix}.epi cat <<-END_VERSIONS > versions.yml "${task.process}": From 822edc91a2eda390922f1180e28395919187a875 Mon Sep 17 00:00:00 2001 From: DavideBag Bagordo Date: Mon, 25 Mar 2024 10:33:52 +0000 Subject: [PATCH 3/3] module revision --- modules/nf-core/plink/epistasis/main.nf | 3 +++ modules/nf-core/plink/epistasis/meta.yml | 16 ++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/modules/nf-core/plink/epistasis/main.nf b/modules/nf-core/plink/epistasis/main.nf index aa05d8ed615..a407cde1d23 100644 --- a/modules/nf-core/plink/epistasis/main.nf +++ b/modules/nf-core/plink/epistasis/main.nf @@ -82,6 +82,9 @@ process PLINK_EPISTASIS { } """ touch ${prefix}.epi + touch ${prefix}.episummary + touch ${prefix}.log + touch ${prefix}.nosex cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/plink/epistasis/meta.yml b/modules/nf-core/plink/epistasis/meta.yml index c88dcdbec0e..47581bc64d2 100644 --- a/modules/nf-core/plink/epistasis/meta.yml +++ b/modules/nf-core/plink/epistasis/meta.yml @@ -76,20 +76,20 @@ output: pattern: "versions.yml" - epi: type: file - description: PLINK GWAS association file - pattern: "*.{assoc}" + description: PLINK epistasis file + pattern: "*.{epi.cc}" - episummary: type: file - description: PLINK GWAS association file - pattern: "*.{assoc}" + description: PLINK epistasis summary file + pattern: "*.{epi.cc.summary}" - log: type: file - description: PLINK GWAS association file - pattern: "*.{assoc}" + description: PLINK epistasis log file + pattern: "*.{log}" - nosex: type: file - description: PLINK GWAS association file - pattern: "*.{assoc}" + description: Ambiguous sex ID file + pattern: "*.{nosex}" authors: - "@davidebag"