Skip to content

Commit

Permalink
Arriba download update (#6745)
Browse files Browse the repository at this point in the history
* add specific files created

* typo

* add from nf-test template

* remove from pytest_modules

* update environement file

* update arriba

* add tests

* use latest conda build

* arriba conda env

* update environement

* update meta

* update tags in test

* move arriba test to stubs as timeout for merge queue
  • Loading branch information
rannick authored Oct 17, 2024
1 parent 3c464e7 commit 7741dfc
Show file tree
Hide file tree
Showing 14 changed files with 281 additions and 242 deletions.
3 changes: 3 additions & 0 deletions modules/nf-core/arriba/arriba/environment.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda

dependencies:
- bioconda::arriba=2.4.0
17 changes: 7 additions & 10 deletions modules/nf-core/arriba/arriba/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@ process ARRIBA_ARRIBA {
'biocontainers/arriba:2.4.0--h0033a41_2' }"

input:
tuple val(meta), path(bam)
tuple val(meta), path(bam)
tuple val(meta2), path(fasta)
tuple val(meta3), path(gtf)
tuple val(meta4), path(blacklist)
tuple val(meta5), path(known_fusions)
tuple val(meta6), path(structural_variants)
tuple val(meta7), path(tags)
tuple val(meta8), path(protein_domains)
path(blacklist)
path(known_fusions)
path(cytobands)
path(protein_domains)

output:
tuple val(meta), path("*.fusions.tsv") , emit: fusions
Expand All @@ -30,8 +29,7 @@ process ARRIBA_ARRIBA {
def prefix = task.ext.prefix ?: "${meta.id}"
def blacklist = blacklist ? "-b $blacklist" : "-f blacklist"
def known_fusions = known_fusions ? "-k $known_fusions" : ""
def structural_variants = structural_variants ? "-d $structual_variants" : ""
def tags = tags ? "-t $tags" : ""
def cytobands = cytobands ? "-d $cytobands" : ""
def protein_domains = protein_domains ? "-p $protein_domains" : ""

"""
Expand All @@ -43,8 +41,7 @@ process ARRIBA_ARRIBA {
-O ${prefix}.fusions.discarded.tsv \\
$blacklist \\
$known_fusions \\
$structural_variants \\
$tags \\
$cytobands \\
$protein_domains \\
$args
Expand Down
39 changes: 5 additions & 34 deletions modules/nf-core/arriba/arriba/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,48 +43,19 @@ input:
type: file
description: Annotation GTF file
pattern: "*.{gtf}"
- - meta4:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'test' ]
- blacklist:
- - blacklist:
type: file
description: Blacklist file
pattern: "*.{tsv}"
- - meta5:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'test' ]
- known_fusions:
- - known_fusions:
type: file
description: Known fusions file
pattern: "*.{tsv}"
- - meta6:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'test' ]
- structural_variants:
type: file
description: Structural variants file
pattern: "*.{tsv}"
- - meta7:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'test' ]
- tags:
- - cytobands:
type: file
description: Tags file
description: Cytobands file
pattern: "*.{tsv}"
- - meta8:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'test' ]
- protein_domains:
- - protein_domains:
type: file
description: Protein domains file
pattern: "*.{gff3}"
Expand Down
107 changes: 107 additions & 0 deletions modules/nf-core/arriba/arriba/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@

nextflow_process {

name "Test Process ARRIBA_ARRIBA"
script "../main.nf"
process "ARRIBA_ARRIBA"

tag "modules"
tag "modules_nfcore"
tag "arriba"
tag "arriba/arriba"
tag "arriba/download"
tag "star/genomegenerate"
tag "star/align"

setup {
config "./nextflow.config"
options "-stub"
run("ARRIBA_DOWNLOAD") {
script "../../../arriba/download/main.nf"
process {
"""
input[0] = 'GRCh38'
"""
}
}
run("STAR_GENOMEGENERATE") {
script "../../../star/genomegenerate/main.nf"
options "-stub"
process {
"""
input[0] = Channel.of([
[ id:'test_fasta' ],
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
])
input[1] = Channel.of([
[ id:'test_gtf' ],
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
])
"""
}
}
run("STAR_ALIGN") {
script "../../../star/align/main.nf"
options "-stub"
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
[
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
]
])
input[1] = STAR_GENOMEGENERATE.out.index
input[2] = Channel.of([
[ id:'test_gtf' ],
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
])
input[3] = false
input[4] = 'illumina'
input[5] = false
"""
}
}
}

test("homo_sapiens - paired_end - stub") {

options "-stub"

when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.sorted.bam', checkIfExists: true) ]
])
input[1] = Channel.of([
[ id:'test_fasta' ],
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
])
input[2] = Channel.of([
[ id:'test_gtf' ],
[ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
])
input[3] = []
input[4] = []
input[5] = []
input[6] = []
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
55 changes: 55 additions & 0 deletions modules/nf-core/arriba/arriba/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"homo_sapiens - paired_end - stub": {
"content": [
{
"0": [
[
{
"id": "test",
"single_end": false
},
"test.fusions.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260"
]
],
"1": [
[
{
"id": "test",
"single_end": false
},
"test.fusions.discarded.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260"
]
],
"2": [
"versions.yml:md5,d323796555db4a58fe4c6bc08d1dec30"
],
"fusions": [
[
{
"id": "test",
"single_end": false
},
"test.fusions.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260"
]
],
"fusions_fail": [
[
{
"id": "test",
"single_end": false
},
"test.fusions.discarded.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260"
]
],
"versions": [
"versions.yml:md5,d323796555db4a58fe4c6bc08d1dec30"
]
}
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-08T15:41:23.945072"
}
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

withName: STAR_GENOMEGENERATE {
ext.args = '--genomeSAindexNbases 11'
}

withName: STAR_ALIGN {
ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50'
}

}
9 changes: 6 additions & 3 deletions modules/nf-core/arriba/download/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,14 @@ process ARRIBA_DOWNLOAD {
'biocontainers/arriba:2.4.0--h0033a41_2' }"

input:
val(genome)

output:
path "*" , emit: reference
path "versions.yml" , emit: versions
path "blacklist*${genome}*.tsv.gz" , emit: blacklist
path "cytobands*${genome}*.tsv" , emit: cytobands
path "protein_domains*${genome}*.gff3" , emit: protein_domains
path "known_fusions*${genome}*.tsv.gz" , emit: known_fusions
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand All @@ -36,7 +40,6 @@ process ARRIBA_DOWNLOAD {
touch protein_domains_hg38_GRCh38_v2.4.0.gff3
touch cytobands_hg38_GRCh38_v2.4.0.tsv
touch known_fusions_hg38_GRCh38_v2.4.0.tsv.gz
touch protein_domains_hg38_GRCh38_v2.4.0.gff3
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
32 changes: 27 additions & 5 deletions modules/nf-core/arriba/download/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,34 @@ tools:
licence: ["MIT"]
identifier: biotools:Arriba

input:
- - genome:
type: string
description: hg38, hg19, GRCh38, GRCh37 for humans are accepted
output:
- reference:
- "*":
type: directory
description: Folder with arriba references
pattern: "*"
- blacklist:
- blacklist*${genome}*.tsv.gz:
type: str
description: The blacklist removes recurrent alignment artifacts and transcripts
which are present in healthy tissue
pattern: ".tsv.gz"
- cytobands:
- cytobands*${genome}*.tsv:
type: file
description: Coordinates of the Giemsa staining bands. This information is used
to draw ideograms
pattern: ".tsv"
- protein_domains:
- protein_domains*${genome}*.gff3:
type: file
description: Protein domain annotations
patter: "*.gff3"
- known_fusions:
- known_fusions*${genome}*.tsv.gz:
type: file
description: Arriba is more sensitive to those fusions to improve the detection
rate of expected or highly relevant events, such as recurrent fusions
patter: "*.tsv.gz"
- versions:
- versions.yml:
type: file
Expand Down
29 changes: 22 additions & 7 deletions modules/nf-core/arriba/download/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,38 @@ nextflow_process {
tag "arriba/download"

test("test-arriba-download") {

when {
process {
"""
input[0] = 'GRCh38'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.reference[0].collect { file(it).name }.toSorted(),
process.out.versions
).match()
}
{ assert snapshot(process.out).match() }
)
}

}

test("download - stub") {

options "-stub"

when {
process {
"""
input[0] = 'GRCh38'
"""
}
}

then {
assertAll(
{ assert process.success }
)
}
}
Expand Down
Loading

0 comments on commit 7741dfc

Please sign in to comment.