Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tcoffee tcs #5288

Merged
merged 11 commits into from
Mar 20, 2024
10 changes: 10 additions & 0 deletions modules/nf-core/tcoffee/tcs/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "tcoffee_tcs"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::t-coffee=13.46.0.919e8c6b"
- conda-forge::pigz=2.8
75 changes: 75 additions & 0 deletions modules/nf-core/tcoffee/tcs/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
process TCOFFEE_TCS {
tag "$meta.id"
label 'process_low'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0':
'biocontainers/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0' }"

input:
tuple val(meta), path(msa)
tuple val(meta2), path(lib)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why 2 input channels? It seems meta2 is not even used in the script

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can optionally pass a lib file. And if you doyou pass it through a channel that created such library in an alignment step before, so it will have it is own meta anyway. It is not used because not needed. But it will be passed in case with the lib file.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, should be fine as is


output:
tuple val(meta), path("*.tcs") , emit: tcs
tuple val(meta), path("*.scores"), emit: scores
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def lib_arg = lib ? "-lib ${lib}" : ""
def header = meta.keySet().join(",")
def values = meta.values().join(",")
def unzipped_name = msa.toString() - '.gz'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Align equal signs

"""
export TEMP='./'
filename=${msa}
if [[ \$(basename $msa) == *.gz ]] ; then
unpigz -f $msa
filename=${unzipped_name}
fi

# Bad hack to circumvent t_coffee bug
# Issue described already in: https://github.com/cbcrg/tcoffee/issues/3
# Add an A in front of filename if the file begins with A
first_letter_filename=\${filename:0:1}
if [ "\$first_letter_filename" == "A" ]; then input="A"\$filename; else input=\$filename; fi

t_coffee -infile \$input \
-evaluate -output=score_ascii \
${lib_arg} \
-outfile ${prefix}.tcs

# Add metadata info to output file
echo "${header},TCS" > "${prefix}.scores"

# Add values
scores=\$(grep 'SCORE=' ${prefix}.tcs | cut -d '=' -f 2 )
echo "${values},\$scores" >> "${prefix}.scores"

cat <<-END_VERSIONS > versions.yml
"${task.process}":
tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}')
pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ))
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.tcs
touch ${prefix}.scores

cat <<-END_VERSIONS > versions.yml
"${task.process}":
tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}')
pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ))
END_VERSIONS
"""
}
29 changes: 29 additions & 0 deletions modules/nf-core/tcoffee/tcs/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
nextflow_process {

name "Test Process TCOFFEE_TCS"
script "modules/nf-core/tcoffee/tcs/main.nf"
process "TCOFFEE_TCS"

test("Should run without failures") {

when {
params {
// define parameters here. Example:
// outdir = "tests/results"
}
process {
"""
// define inputs of the process here. Example:
// input[0] = file("test-file.txt")
"""
}
}

then {
assert process.success
assert snapshot(process.out).match()
}

}

}
57 changes: 57 additions & 0 deletions modules/nf-core/tcoffee/tcs/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "tcoffee_tcs"
description: Compute the TCS score for a MSA or for a MSA plus a library file. Outputs the tcs as it is and a csv with just the total TCS score.
keywords:
- alignment
- MSA
- evaluation
tools:
- "tcoffee":
description: "A collection of tools for Multiple Alignments of DNA, RNA, Protein Sequence"
homepage: "http://www.tcoffee.org/Projects/tcoffee/"
documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html"
tool_dev_url: "https://github.com/cbcrg/tcoffee"
doi: "10.1006/jmbi.2000.4042"
licence: ["GPL v3"]
- "pigz":
description: "Parallel implementation of the gzip algorithm."
homepage: "https://zlib.net/pigz/"
documentation: "https://zlib.net/pigz/pigz.pdf"

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', ... ]
- msa:
type: file
description: fasta file containing the alignment to be evaluated. May be gzipped or uncompressed.
pattern: "*.{aln,fa,fasta,fas}{.gz,}"
- lib:
type: file
description: lib file containing the alignment library of the given msa.
pattern: "*{.tc_lib,*_lib}"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- tcs:
type: file
description: The msa represented in tcs format, prepended with TCS scores
pattern: "*.tcs"
- scores:
type: file
description: a file containing the score of the alignment in csv format
pattern: "*.scores"

authors:
- "@alessiovignoli"
5 changes: 5 additions & 0 deletions modules/nf-core/tcoffee/tcs/tests/lib.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {
withName: "TCOFFEE_ALIGN"{
ext.args = { "-output fasta_aln -out_lib=sample_lib1.tc_lib" }
}
}
114 changes: 114 additions & 0 deletions modules/nf-core/tcoffee/tcs/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
nextflow_process {

name "Test Process TCOFFEE_TCS"
script "../main.nf"
process "TCOFFEE_TCS"
tag "modules"
tag "modules_nfcore"
tag "tcoffee"
tag "tcoffee/tcs"
tag "pigz/compress"
tag "tcoffee/align"

test("tcs - msa") {

when {
process {
"""
input[0] = [
[ id:'test'],
file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists: true)
]
input[1] = [
[ id:'test'],
[]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("tcs - msa - compressed") {

setup {
run("PIGZ_COMPRESS") {
script "../../../pigz/compress/main.nf"
process {
"""
input[0] = [ [ id:'test' ],
file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists: true)
]

"""
}
}
}

when {
process {
"""
input[0] = PIGZ_COMPRESS.out.archive.map { it -> [[ id:'test'], it[1]] }
input[1] = [
[ id:'test'],
[]
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("tcs - msa - lib") {

config "./lib.config"

setup {
run("TCOFFEE_ALIGN") {
script "../../align/main.nf"
process {
"""
input[0] = [ [ id:'test' ],
file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true)
]
input[1] = [[:],[]]
input[2] = [[:],[],[]]
input[3] = true
"""
}
}
}

when {
process {
"""
input[0] = [
[ id:'test'],
file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists: true)
]
input[1] = TCOFFEE_ALIGN.out.lib.map { it -> [[ id:'test' ], it[1]] }
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}
}
Loading
Loading