Skip to content

Commit

Permalink
Merge pull request #131 from Plant-Food-Research-Open/fix/short_intron
Browse files Browse the repository at this point in the history
Fixed a crash due to short introns
  • Loading branch information
GallVp authored Dec 20, 2024
2 parents 2a0c7a2 + 3459b40 commit d069633
Show file tree
Hide file tree
Showing 9 changed files with 2,134 additions and 12 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## v0.6.0 - [16-Dec-2024]
## v0.6.0 - [20-Dec-2024]

### 'Added'

Expand All @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3. Fixed an issue in `genepal_report.Rmd` which caused the pangene matrix plot to fail when the number of clusters exceeded 65536 [#124](https://github.com/Plant-Food-Research-Open/genepal/issues/124)
4. Fixed an issue where `GENEPALREPORT` process failed due to OOM kill signal from SLURM [#123](https://github.com/Plant-Food-Research-Open/genepal/issues/123)
5. Fixed an issue where Gff merge after liftoff failed when one of the Gff files did not contain any genes
6. Fixed an issue where `gxf_fasta_agat_spaddintrons_spextractsequences` crashed due to short introns [#89](https://github.com/Plant-Food-Research-Open/genepal/issues/89)

### `Dependencies`

Expand Down
2 changes: 1 addition & 1 deletion modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@
},
"gxf_fasta_agat_spaddintrons_spextractsequences": {
"branch": "main",
"git_sha": "7bf6fbca23edc94490ffa6709f52b2f71c6fb130",
"git_sha": "ed4146008dbdcfd4823252b456de32059e2d07f4",
"installed_by": ["subworkflows"]
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ workflow GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES {
| collectFile
| map { gxf -> [ gxf.baseName.replace('.nointrons', ''), gxf ] }
| join(
ch_gxf.map { meta, gxf -> [ meta.id, meta ] }
ch_gxf.map { meta, _gxf -> [ meta.id, meta ] }
)
| map { id, gxf, meta -> [ meta, gxf ] }
| map { _id, gxf, meta -> [ meta, gxf ] }

// MODULE: AGAT_SPADDINTRONS
AGAT_SPADDINTRONS ( ch_gxf_purged, [] )
Expand All @@ -46,11 +46,11 @@ workflow GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES {
| join(
ch_fasta.map { meta2, fasta -> [ meta2.id, fasta ] }
)
| map { id, meta, gff3, fasta -> [ meta, gff3, fasta ] }
| map { _id, meta, gff3, fasta -> [ meta, gff3, fasta ] }

AGAT_SPEXTRACTSEQUENCES(
ch_gxf_fasta.map { meta, gff3, fasta -> [ meta, gff3 ] },
ch_gxf_fasta.map { meta, gff3, fasta -> fasta },
ch_gxf_fasta.map { meta , gff3 , _fasta -> [ meta, gff3 ] },
ch_gxf_fasta.map { _meta, _gff3 , fasta -> fasta },
[] // config
)

Expand All @@ -61,16 +61,20 @@ workflow GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES {
ch_splice_motifs = ch_intron_sequences
| map { meta, fasta ->
def splice_motifs = fasta.splitFasta ( record: [id: true, seqString: true] )
.collect { el -> [ el.id, "${el.seqString[0..1]}${el.seqString[-2..-1]}" ].join('\t') }
.collect { el ->
el.seqString.size() < 4
? [ el.id, '-' ].join('\t')
: [ el.id, "${el.seqString[0..1]}${el.seqString[-2..-1]}" ].join('\t')
}

[ "${meta.id}.motifs.tsv", splice_motifs.join('\n') ]
}
| collectFile
| map { tsv -> [ tsv.baseName.replace('.motifs', ''), tsv ] }
| join(
ch_gxf_purged.map { meta, gxf -> [ meta.id, meta ] }
ch_gxf_purged.map { meta, _gxf -> [ meta.id, meta ] }
)
| map { id, tsv, meta -> [ meta, tsv ] }
| map { _id, tsv, meta -> [ meta, tsv ] }

// collectFile: Mark gff3
ch_marked_gff3 = ch_introns_gff
Expand Down Expand Up @@ -106,9 +110,9 @@ workflow GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES {
| collectFile
| map { gff3 -> [ gff3.baseName.replace('.marked', ''), gff3 ] }
| join(
ch_gxf_purged.map { meta, gxf -> [ meta.id, meta ] }
ch_gxf_purged.map { meta, _gxf -> [ meta.id, meta ] }
)
| map { id, gff3, meta -> [ meta, gff3 ] }
| map { _id, gff3, meta -> [ meta, gff3 ] }

emit:
motifs_tsv = ch_splice_motifs // channel: [ val(meta), tsv ]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
nextflow_workflow {

name "Test Subworkflow GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES"
script "../../../gallvp/gxf_fasta_agat_spaddintrons_spextractsequences/main.nf"
workflow "GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES"
config './nextflow.config'

tag "subworkflows"
tag "subworkflows_gallvp"
tag "subworkflows/gxf_fasta_agat_spaddintrons_spextractsequences"
tag "modules/nf-core/gunzip"
tag "agat/spextractsequences"
tag "agat/spaddintrons"

test("scaffold_4 - fasta - gff3") {

when {
workflow {
"""
input[0] = Channel.of ( [
[ id:'test' ],
file("$baseDir" + '/subworkflows/local/tests/gxf_fasta_agat_spaddintrons_spextractsequences/testdata/scaffold_4.gff', checkIfExists: true)
] )
input[1] = Channel.of ( [
[ id:'test' ],
file("$baseDir" + '/subworkflows/local/tests/gxf_fasta_agat_spaddintrons_spextractsequences/testdata/scaffold_4.fasta', checkIfExists: true)
] )
"""
}
}

then {
assertAll(
{ assert workflow.success},
{ assert snapshot(workflow.out).match()}
)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{
"scaffold_4 - fasta - gff3": {
"content": [
{
"0": [
[
{
"id": "test"
},
"test.motifs.tsv:md5,b0a99a7a7bf598bbf1e25ca3690567c6"
]
],
"1": [
[
{
"id": "test"
},
"test.marked.gff3:md5,f464b2e0dc24238e69c3e9e1419c1b3b"
]
],
"2": [
"versions.yml:md5,1befbe41bc7abbf49767b8dc68877bc7",
"versions.yml:md5,65042e008b2466984150cb219a05291c"
],
"marked_gff3": [
[
{
"id": "test"
},
"test.marked.gff3:md5,f464b2e0dc24238e69c3e9e1419c1b3b"
]
],
"motifs_tsv": [
[
{
"id": "test"
},
"test.motifs.tsv:md5,b0a99a7a7bf598bbf1e25ca3690567c6"
]
],
"versions": [
"versions.yml:md5,1befbe41bc7abbf49767b8dc68877bc7",
"versions.yml:md5,65042e008b2466984150cb219a05291c"
]
}
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.3"
},
"timestamp": "2024-12-20T12:00:17.861955"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
process {

withName: AGAT_SPEXTRACTSEQUENCES {
ext.args = '-t intron'
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This test data is from an _Actinidia_ genome. To minify the data, the gene coordinates have been shifted back by 630000. The Gff file has a single bp intron which is not flagged by `agat_sp_flag_short_introns.pl` from version `quay.io/biocontainers/agat:1.4.1--pl5321hdfd78af_0`. See issue <https://github.com/NBISweden/AGAT/issues/516> for further details.
Loading

0 comments on commit d069633

Please sign in to comment.