Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

new module: Plink epistasis analysis #5386

Merged
merged 6 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions modules/nf-core/plink/epistasis/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "plink_epistasis"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::plink=1.90b6.21"
94 changes: 94 additions & 0 deletions modules/nf-core/plink/epistasis/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
process PLINK_EPISTASIS {
tag "$meta.id"
label 'process_low'


conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/plink:1.90b6.21--h031d066_5':
'biocontainers/plink:1.90b6.21--h031d066_5' }"

input:
tuple val(meta), path(bed), path(bim), path(fam)
tuple val(meta2), path(vcf)
tuple val(meta3), path(bcf)
tuple val(meta4), path(phe)

output:
tuple val(meta), path("*.epi.cc") , emit: epi
tuple val(meta), path("*.epi.cc.summary"), emit: episummary, optional:true
tuple val(meta), path("*.log") , emit: log
tuple val(meta), path("*.nosex") , emit: nosex, optional:true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = ""
// define input string based on provided input files
// in hierarchical order
def input_command = ""
def outmeta = ""
if (bed){
input_command = "--bed ${bed} --bim ${bim} --fam ${fam}"
prefix = task.ext.prefix ?: "${meta.id}"
} else if (vcf) {
input_command = "--vcf ${vcf} --pheno ${phe}"
prefix = task.ext.prefix ?: "${meta2.id}"
meta = meta2
} else if (bcf) {
input_command = "--bcf ${bcf} --pheno ${phe}"
prefix = task.ext.prefix ?: "${meta3.id}"
meta = meta3
} else {
log.error 'ERROR: the input should be either plink native binary format, VCF or BCF'
}
"""
plink \\
$input_command \\
--threads $task.cpus \\
--epistasis \\
$args \\
--out $prefix

cat <<-END_VERSIONS > versions.yml
"${task.process}":
plink: \$(samtools --version |& sed '1!d ; s/samtools //')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = ""
// define input string based on provided input files
// in hierarchical order
def input_command = ""
def outmeta = ""
if (bed){
input_command = "--bed ${bed} --bim ${bim} --fam ${fam}"
prefix = task.ext.prefix ?: "${meta.id}"
} else if (vcf) {
input_command = "--vcf ${vcf}"
prefix = task.ext.prefix ?: "${meta2.id} --pheno ${pheno}"
meta = meta2
} else if (bcf) {
input_command = "--bcf ${bcf} --pheno ${pheno}"
prefix = task.ext.prefix ?: "${meta3.id}"
meta = meta3
} else {
log.error 'ERROR: the input should be either plink native binary format, VCF or BCF'
}
"""
touch ${prefix}.epi
touch ${prefix}.episummary
touch ${prefix}.log
touch ${prefix}.nosex

cat <<-END_VERSIONS > versions.yml
"${task.process}":
plink: \$(echo \$(plink --version) | sed 's/^PLINK v//;s/64.*//')
END_VERSIONS
"""
}
97 changes: 97 additions & 0 deletions modules/nf-core/plink/epistasis/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "plink_epistasis"
description: Epistasis in PLINK, analyzing how the effects of one gene depend on the presence of others.
keywords:
- interactions
- variants
- regression
tools:
- "plink":
description: "Whole genome association analysis toolset, designed to perform a range of basic, large-scale analyses in a computationally efficient manner."
homepage: "https://www.cog-genomics.org/plink"
documentation: "https://www.cog-genomics.org/plink/1.9/data#recode"
tool_dev_url: "https://www.cog-genomics.org/plink/1.9/dev"
licence: ["GPL"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
meta is associated to the PLINK native file input
- meta2:
type: map
description: |
Groovy Map containing sample information,
e.g. [ id:'test', single_end:false ]
meta2 is associated to VCF file input
- meta3:
type: map
description: |
Groovy Map containing sample information,
e.g. [ id:'test', single_end:false ]
meta3 is associated to BCF file input
- meta4:
type: map
description: |
Groovy Map containing sample information,
e.g. [ id:'test', single_end:false ]
meta4 is associated to phenotype file input
- bed:
type: file
description: PLINK binary biallelic genotype table file
pattern: "*.{bed}"
- bim:
type: file
description: PLINK extended MAP file
pattern: "*.{bim}"
- fam:
type: file
description: PLINK sample information file
pattern: "*.{fam}"
- bcf:
type: file
description: PLINK variant information + sample ID + genotype call binary file
pattern: "*.{bcf}"
- vcf:
type: file
description: Variant calling file (vcf)
pattern: "*.{vcf}"
- phe:
type: file
description: PLINK file containing phenotype information. This phenotype information can be read from the third column with the --pheno option or from a specific column with the --pheno-name option.
pattern: "*.{phe}"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- epi:
type: file
description: PLINK epistasis file
pattern: "*.{epi.cc}"
- episummary:
type: file
description: PLINK epistasis summary file
pattern: "*.{epi.cc.summary}"
- log:
type: file
description: PLINK epistasis log file
pattern: "*.{log}"
- nosex:
type: file
description: Ambiguous sex ID file
pattern: "*.{nosex}"

authors:
- "@davidebag"
maintainers:
- "@davidebag"
190 changes: 190 additions & 0 deletions modules/nf-core/plink/epistasis/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
nextflow_process {

name "Test Process PLINK_EPISTASIS"
script "../main.nf"
process "PLINK_EPISTASIS"
tag "modules"
tag "modules_nfcore"
tag "plink"
tag "plink/epistasis"

test("plink - VCF") {

config "./nextflow.config"

when {
params {
outdir = "test"
}
process {
"""
input[0] = [ [id:"null"], [], [], []]
input[1] = [
[id:"test"],
file(params.test_data['homo_sapiens']['popgen']['plink_case_control_vcf_gz'], checkIfExists: true)
]
input[2] = [ [id:"null"], []]
input[3] = [
[id:"test"],
file(params.test_data['homo_sapiens']['popgen']['plink_simulated_phe'], checkIfExists: true)
]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.version).match("version") },
{ assert snapshot(
process.out.epi,
process.out.episummary,
process.out.nosex
).match() },
{ assert process.out.log.get(0).get(1) ==~ ".*/*.log" }
)
}
}

test("plink - VCF with phenotype name") {

config "./pheno_name.config"

when {
params {
outdir = "test"
}
process {
"""
input[0] = [ [id:"null"], [], [], []]
input[1] = [
[id:"test"],
file(params.test_data['homo_sapiens']['popgen']['plink_case_control_vcf_gz'], checkIfExists: true)
]
input[2] = [ [id:"null"], []]
input[3] = [
[id:"test"],
file(params.test_data['homo_sapiens']['popgen']['plink_simulated_phenoname'], checkIfExists: true)
]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.epi,
process.out.episummary,
process.out.nosex
).match() },
{ assert process.out.log.get(0).get(1) ==~ ".*/*.log" }
)
}
}

test("plink - binary") {

config "./nextflow.config"

when {
params {
outdir = "test"
}
process {
"""
input[0] = [
[id:"test"],
file(params.test_data['homo_sapiens']['popgen']['plink_case_control_bed'], checkIfExists: true),
file(params.test_data['homo_sapiens']['popgen']['plink_case_control_bim'], checkIfExists: true),
file(params.test_data['homo_sapiens']['popgen']['plink_case_control_fam'], checkIfExists: true)
]
input[1] = [ [id:"null"], []]
input[2] = [ [id:"null"], []]
input[3] = [ [id:"null"], []]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.epi,
process.out.episummary,
process.out.nosex
).match() },
{ assert process.out.log.get(0).get(1) ==~ ".*/*.log" }
)
}
}

test("plink - BCF") {

config "./nextflow.config"

when {
params {
outdir = "test"
}
process {
"""
input[0] = [ [id:"null"], [], [], []]
input[1] = [ [id:"null"], []]
input[2] = [
[id:"test"],
file(params.test_data['homo_sapiens']['popgen']['plink_case_control_bcf_gz'], checkIfExists: true)
]
input[3] = [
[id:"test"],
file(params.test_data['homo_sapiens']['popgen']['plink_simulated_phe'], checkIfExists: true)
]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.epi,
process.out.episummary,
process.out.nosex
).match() },
{ assert process.out.log.get(0).get(1) ==~ ".*/*.log" }
)
}
}

test("plink - BCF with phenotype name") {

config "./pheno_name.config"

when {
params {
outdir = "test"
}
process {
"""
input[0] = [ [id:"null"], [], [], []]
input[1] = [
[id:"test"],
file(params.test_data['homo_sapiens']['popgen']['plink_case_control_vcf_gz'], checkIfExists: true)
]
input[2] = [ [id:"null"], []]
input[3] = [
[id:"test"],
file(params.test_data['homo_sapiens']['popgen']['plink_simulated_phenoname'], checkIfExists: true)
]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.epi,
process.out.episummary,
process.out.nosex
).match() },
{ assert process.out.log.get(0).get(1) ==~ ".*/*.log" }
)
}
}
}
Loading
Loading