Skip to content

Commit

Permalink
Merge pull request #38 from PlantandFoodResearch/fix/is_masked
Browse files Browse the repository at this point in the history
Fixed a bug where `is_masked` was ignored by the pipeline
  • Loading branch information
GallVp authored Jun 18, 2024
2 parents 87508dc + 1f957a2 commit 6713761
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 31 deletions.
18 changes: 18 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,24 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## 0.3.3 - [18-Jun-2024]

### `Added`

1. Added a stub test to evaluate the case where an assembly is soft masked but has no annotations

### `Fixed`

1. Fixed a bug where `is_masked` was ignored by the pipeline
2. Fixed a bug in param validation which allowed specification of `braker_hints` without `braker_gff3`

### `Dependencies`

1. NextFlow!>=23.04.4
2. nf-validation=1.1.3

### `Deprecated`

## 0.3.2 - [13-May-2024]

### `Added`
Expand Down
3 changes: 2 additions & 1 deletion assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@
"maxLength": 0
}
],
"errorMessage": "BRAKER hints GFF/GFF3 file path cannot contain spaces and must have extension '.gff.gz', '.gff3.gz', '.gff' or '.gff3'"
"errorMessage": "BRAKER hints GFF/GFF3 file path cannot contain spaces and must have extension '.gff.gz', '.gff3.gz', '.gff' or '.gff3'",
"dependentRequired": ["braker_gff3"]
}
},
"required": ["tag", "fasta", "is_masked"]
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ manifest {
description = """A NextFlow pipeline for pan-genome annotation"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.4'
version = '0.3.2'
version = '0.3.3'
doi = ''
}

Expand Down
56 changes: 29 additions & 27 deletions subworkflows/local/prepare_assembly.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ workflow PREPARE_ASSEMBLY {
te_library // channel: [ meta, fasta ]
repeat_annotator // val(String), 'repeatmodeler' or 'edta'
exclude_assemblies // channel: val(assembly_x,assembly_y)
ch_is_masked // channel: [ meta, val(true|false) ]

main:
ch_versions = Channel.empty()
Expand Down Expand Up @@ -63,26 +64,35 @@ workflow PREPARE_ASSEMBLY {
ch_versions = ch_versions.mix(GUNZIP_TE_LIBRARY.out.versions.first())

// SUBWORKFLOW: FASTA_EDTA_LAI
ch_annotator_inputs = ch_validated_assembly
ch_unmasked_masked_branch = ch_validated_assembly
| combine( exclude_assemblies )
| map { meta, fasta, ex_assemblies ->
ex_assemblies.tokenize(",").contains( meta.id )
? null
: [ meta, fasta ]
}
| join(
ch_is_masked
)
| branch { meta, fasta, is_masked ->
unmasked: ! is_masked
return [ meta, fasta ]
masked: is_masked
return [ meta, fasta ]
}

ch_annotator_inputs = ch_unmasked_masked_branch.unmasked
| join(
ch_gunzip_te_library, remainder: true
)
| filter { meta, assembly, teLib ->
teLib == null
teLib == null && ( assembly != null )
}
| map { meta, assembly, teLib -> [meta, assembly] }
| map { meta, assembly, teLib -> [ meta, assembly ] }

ch_edta_inputs = repeat_annotator != 'edta'
? Channel.empty()
: ch_annotator_inputs
| combine( exclude_assemblies )
| map { meta, fasta, ex_assemblies ->
def ex_list = ex_assemblies.split(",")

if ( !( ex_list.contains( meta.id ) ) ) {
[ meta, fasta ]
}
}

FASTA_EDTA_LAI(
ch_edta_inputs,
Expand All @@ -96,14 +106,6 @@ workflow PREPARE_ASSEMBLY {
ch_repeatmodeler_inputs = repeat_annotator != 'repeatmodeler'
? Channel.empty()
: ch_annotator_inputs
| combine( exclude_assemblies )
| map { meta, fasta, ex_assemblies ->
def ex_list = ex_assemblies.split(",")

if ( !( ex_list.contains( meta.id ) ) ) {
[ meta, fasta ]
}
}

REPEATMODELER_BUILDDATABASE ( ch_repeatmodeler_inputs )

Expand All @@ -112,7 +114,7 @@ workflow PREPARE_ASSEMBLY {
// MODULE: REPEATMODELER_REPEATMODELER
REPEATMODELER_REPEATMODELER ( REPEATMODELER_BUILDDATABASE.out.db )

ch_assembly_and_te_lib = ch_validated_assembly
ch_assembly_and_te_lib = ch_unmasked_masked_branch.unmasked
| join(
repeat_annotator == 'edta'
? FASTA_EDTA_LAI.out.te_lib_fasta.mix(ch_gunzip_te_library)
Expand All @@ -123,21 +125,21 @@ workflow PREPARE_ASSEMBLY {

// MODULE: REPEATMASKER
REPEATMASKER(
ch_assembly_and_te_lib.map { meta, assembly, teLib -> [meta, assembly] },
ch_assembly_and_te_lib.map { meta, assembly, teLib -> [ meta, assembly ] },
ch_assembly_and_te_lib.map { meta, assembly, teLib -> teLib },
)

ch_masked_assembly = ch_unmasked_masked_branch.masked
| mix(REPEATMASKER.out.fasta_masked)
ch_versions = ch_versions.mix(REPEATMASKER.out.versions.first())

// MODULE: STAR_GENOMEGENERATE
ch_genomegenerate_inputs = ch_validated_assembly
| combine( exclude_assemblies )
| map { meta, fasta, ex_assemblies ->
def ex_list = ex_assemblies.split(",")

if ( !( ex_list.contains( meta.id ) ) ) {
[ meta, fasta ]
}
ex_assemblies.tokenize(",").contains( meta.id )
? null
: [ meta, fasta ]
}


Expand All @@ -151,7 +153,7 @@ workflow PREPARE_ASSEMBLY {

emit:
target_assemby = ch_validated_assembly // channel: [ meta, fasta ]
masked_target_assembly = REPEATMASKER.out.fasta_masked // channel: [ meta, fasta ]
masked_target_assembly = ch_masked_assembly // channel: [ meta, fasta ]
target_assemby_index = ch_assembly_index // channel: [ meta, star_index ]
versions = ch_versions // channel: [ versions.yml ]
}
1 change: 1 addition & 0 deletions tests/stub/assemblysheet.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
tag,fasta,is_masked,te_lib,braker_gff3,braker_hints
red5_v2p1,tests/stub/target/red5_v2p1_chr1.fasta.gz,no,,tests/stub/braker/red5_v2p1.gff3.gz,tests/stub/braker/red5_v2p1.hints.gff.gz
donghong,tests/stub/target/donghong.chr1.fsa.gz,no,tests/stub/te_lib/donghong.TElib.fa.gz,tests/stub/braker/red5_v2p1.gff3.gz,tests/stub/braker/red5_v2p1.hints.gff.gz
red5_v3,tests/stub/target/red5_v3_chr1.fasta,yes
Empty file.
5 changes: 3 additions & 2 deletions workflows/pangene.nf
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ workflow PANGENE {
it.join(",")
}

ch_masked = ch_input
ch_is_masked = ch_input
| map { it ->
def tag = it[0]
def is_masked = it[2]
Expand Down Expand Up @@ -144,7 +144,8 @@ workflow PANGENE {
ch_target_assembly,
ch_te_library,
params.repeat_annotator,
ch_braker_ex_asm_str
ch_braker_ex_asm_str,
ch_is_masked
)

ch_valid_target_assembly = PREPARE_ASSEMBLY.out.target_assemby
Expand Down

0 comments on commit 6713761

Please sign in to comment.