forked from nf-core/modules
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added subworkflow fasta_gxf_busco_plot (nf-core#7051)
* Added subworkflow fasta_gxf_busco_plot * Update subworkflows/nf-core/fasta_gxf_busco_plot/main.nf Co-authored-by: Fernando Duarte <[email protected]> * Update subworkflows/nf-core/fasta_gxf_busco_plot/main.nf Co-authored-by: Fernando Duarte <[email protected]> * Update subworkflows/nf-core/fasta_gxf_busco_plot/main.nf Co-authored-by: Fernando Duarte <[email protected]> * Update subworkflows/nf-core/fasta_gxf_busco_plot/main.nf Co-authored-by: Fernando Duarte <[email protected]> * Fixed linting and updated snapshot * Applied suggestions --------- Co-authored-by: Fernando Duarte <[email protected]>
- Loading branch information
1 parent
b6515a0
commit 3628d82
Showing
5 changed files
with
677 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
include { BUSCO_BUSCO as BUSCO_ASSEMBLY } from '../../../modules/nf-core/busco/busco/main' | ||
include { BUSCO_GENERATEPLOT as PLOT_ASSEMBLY } from '../../../modules/nf-core/busco/generateplot/main' | ||
include { GFFREAD as EXTRACT_PROTEINS } from '../../../modules/nf-core/gffread/main' | ||
include { BUSCO_BUSCO as BUSCO_ANNOTATION } from '../../../modules/nf-core/busco/busco/main' | ||
include { BUSCO_GENERATEPLOT as PLOT_ANNOTATION } from '../../../modules/nf-core/busco/generateplot/main' | ||
|
||
workflow FASTA_GXF_BUSCO_PLOT { | ||
|
||
take: | ||
ch_fasta // channel: [ val(meta), fasta ] | ||
ch_gxf // channel: [ val(meta2), gxf ]; gxf ~ gff | gff3 | gtf | ||
// | ||
// meta and meta2 should have same id | ||
|
||
val_mode // val(mode); BUSCO mode to apply to ch_fasta | ||
// - genome, for genome assemblies (DNA) | ||
// - transcriptome, for transcriptome assemblies (DNA) | ||
// - proteins, for annotated gene sets (protein) | ||
// | ||
// If mode is genome, annotations from ch_gxf are evaluated with | ||
// mode proteins, otherwise, evaluation of the annotations is skipped | ||
// | ||
val_lineages // [ val(lineage) ] | ||
val_busco_lineages_path // val(path); Optional; Set to [] if not needed | ||
val_busco_config // val(path); Optional; Set to [] if not needed | ||
|
||
main: | ||
ch_versions = Channel.empty() | ||
ch_db_path = val_busco_lineages_path | ||
? Channel.of(file(val_busco_lineages_path, checkIfExists: true)) | ||
: Channel.of( [ [] ] ) | ||
ch_config_path = val_busco_config | ||
? Channel.of(file(val_busco_config, checkIfExists: true)) | ||
: Channel.of( [ [] ] ) | ||
|
||
// MODULE: BUSCO_BUSCO as BUSCO_ASSEMBLY | ||
ch_busco_assembly_inputs = ch_fasta | ||
| combine( | ||
Channel.of(val_mode) | ||
) | ||
| combine( | ||
Channel.fromList(val_lineages) | ||
) | ||
| map { meta, fasta, mode, lineage -> | ||
[ | ||
meta + [ mode:mode, lineage:lineage ], | ||
fasta, mode, lineage | ||
] | ||
} | ||
| combine( | ||
ch_db_path | ||
) | ||
| combine( | ||
ch_config_path | ||
) | ||
|
||
BUSCO_ASSEMBLY( | ||
ch_busco_assembly_inputs.map { meta, fasta, _mode, _lineage, _db, _config -> [ meta, fasta ] }, | ||
ch_busco_assembly_inputs.map { _meta, _fasta, mode, _lineage, _db, _config -> mode }, | ||
ch_busco_assembly_inputs.map { _meta, _fasta, _mode, lineage, _db, _config -> lineage }, | ||
ch_busco_assembly_inputs.map { _meta, _fasta, _mode, _lineage, db, _config -> db }, | ||
ch_busco_assembly_inputs.map { _meta, _fasta, _mode, _lineage, _db, config -> config } | ||
) | ||
|
||
ch_assembly_batch_summary = BUSCO_ASSEMBLY.out.batch_summary | ||
ch_assembly_short_summaries_txt = BUSCO_ASSEMBLY.out.short_summaries_txt | ||
ch_assembly_short_summaries_json = BUSCO_ASSEMBLY.out.short_summaries_json | ||
ch_assembly_full_table = BUSCO_ASSEMBLY.out.full_table | ||
ch_versions = ch_versions.mix(BUSCO_ASSEMBLY.out.versions.first()) | ||
|
||
// MODULE: BUSCO_GENERATEPLOT as PLOT_ASSEMBLY | ||
ch_assembly_plot_summary = ch_assembly_short_summaries_txt | ||
| map { meta, txt -> | ||
def lineage_name = meta.lineage - '_odb' | ||
[ | ||
"short_summary.specific.${meta.lineage}.${meta.id}_${lineage_name}.txt", | ||
txt.text | ||
] | ||
} | ||
| collectFile | ||
|
||
PLOT_ASSEMBLY( ch_assembly_plot_summary.collect() ) | ||
|
||
ch_assembly_png = PLOT_ASSEMBLY.out.png | ||
ch_versions = ch_versions.mix(PLOT_ASSEMBLY.out.versions) | ||
|
||
// MODULE: GFFREAD as EXTRACT_PROTEINS | ||
ch_gffread_inputs = val_mode !in [ 'geno', 'genome' ] | ||
? Channel.empty() | ||
: ch_fasta | ||
| map { meta, fasta -> [ meta.id, meta, fasta ] } | ||
| join( | ||
ch_gxf.map { meta2, gxf -> [ meta2.id, gxf ] } | ||
// Join with matching annotation | ||
// to allow one annotations per fasta | ||
) | ||
| map { _id, meta, fasta, gxf -> [ meta, gxf, fasta ] } | ||
EXTRACT_PROTEINS( | ||
ch_gffread_inputs.map { meta, gxf, _fasta -> [ meta, gxf ] }, | ||
ch_gffread_inputs.map { _meta, _gxf, fasta -> fasta } | ||
) | ||
|
||
ch_proteins = EXTRACT_PROTEINS.out.gffread_fasta | ||
ch_versions = ch_versions.mix(EXTRACT_PROTEINS.out.versions.first()) | ||
|
||
// MODULE: BUSCO_BUSCO as BUSCO_ANNOTATION | ||
ch_busco_annotation_inputs = ch_proteins | ||
| combine( | ||
Channel.of('proteins') | ||
) | ||
| combine( | ||
Channel.fromList(val_lineages) | ||
) | ||
| map { meta, fasta, mode, lineage -> | ||
[ | ||
meta + [ mode:mode, lineage:lineage ], | ||
fasta, mode, lineage | ||
] | ||
} | ||
| combine( | ||
ch_db_path | ||
) | ||
| combine( | ||
ch_config_path | ||
) | ||
|
||
BUSCO_ANNOTATION( | ||
ch_busco_annotation_inputs.map { meta, fasta, _mode, _lineage, _db, _config -> [ meta, fasta ] }, | ||
ch_busco_annotation_inputs.map { _meta, _fasta, mode, _lineage, _db, _config -> mode }, | ||
ch_busco_annotation_inputs.map { _meta, _fasta, _mode, lineage, _db, _config -> lineage }, | ||
ch_busco_annotation_inputs.map { _meta, _fasta, _mode, _lineage, db, _config -> db }, | ||
ch_busco_annotation_inputs.map { _meta, _fasta, _mode, _lineage, _db, config -> config } | ||
) | ||
|
||
ch_annotation_batch_summary = BUSCO_ANNOTATION.out.batch_summary | ||
ch_annotation_short_summaries_txt = BUSCO_ANNOTATION.out.short_summaries_txt | ||
ch_annotation_short_summaries_json = BUSCO_ANNOTATION.out.short_summaries_json | ||
ch_annotation_full_table = BUSCO_ANNOTATION.out.full_table | ||
ch_versions = ch_versions.mix(BUSCO_ANNOTATION.out.versions.first()) | ||
|
||
// MODULE: BUSCO_GENERATEPLOT as PLOT_ANNOTATION | ||
ch_annotation_plot_summary = ch_annotation_short_summaries_txt | ||
| map { meta, txt -> | ||
def lineage_name = meta.lineage - '_odb' | ||
[ | ||
"short_summary.specific.${meta.lineage}.${meta.id}_${lineage_name}.proteins.txt", | ||
txt.text | ||
] | ||
} | ||
| collectFile | ||
|
||
PLOT_ANNOTATION( ch_annotation_plot_summary.collect() ) | ||
|
||
ch_annotation_png = PLOT_ANNOTATION.out.png | ||
ch_versions = ch_versions.mix(PLOT_ANNOTATION.out.versions) | ||
|
||
|
||
emit: | ||
assembly_batch_summary = ch_assembly_batch_summary // channel: [ meta3, txt ]; meta3 ~ meta + [ val(mode), val(lineage) ] | ||
assembly_short_summaries_txt = ch_assembly_short_summaries_txt // channel: [ meta3, txt ] | ||
assembly_short_summaries_json = ch_assembly_short_summaries_json // channel: [ meta3, json ] | ||
assembly_full_table = ch_assembly_full_table // channel: [ meta3, tsv ] | ||
assembly_plot_summary_txt = ch_assembly_plot_summary // channel: [ text ] | ||
assembly_png = ch_assembly_png // channel: [ png ] | ||
annotation_batch_summary = ch_annotation_batch_summary // channel: [ meta3, txt ] | ||
annotation_short_summaries_txt = ch_annotation_short_summaries_txt // channel: [ meta3, txt ] | ||
annotation_short_summaries_json = ch_annotation_short_summaries_json // channel: [ meta3, json ] | ||
annotation_full_table = ch_annotation_full_table // channel: [ meta3, tsv ] | ||
annotation_plot_summary_txt = ch_annotation_plot_summary // channel: [ txt ] | ||
annotation_png = ch_annotation_png // channel: [ png ] | ||
versions = ch_versions // channel: [ versions.yml ] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json | ||
name: "fasta_gxf_busco_plot" | ||
description: | | ||
Runs BUSCO for input assemblies and their annotations in GFF/GFF3/GTF format, and creates summary plots using `BUSCO/generate_plot.py` script | ||
keywords: | ||
- genome | ||
- annotation | ||
- busco | ||
- plot | ||
components: | ||
- busco/busco | ||
- busco/generateplot | ||
- gffread | ||
input: | ||
- ch_fasta: | ||
type: file | ||
description: | | ||
Channel containing FASTA files | ||
Structure:[ val(meta), fasta ] | ||
pattern: "*.{fa,faa,fsa,fas,fasta}(.gz)?" | ||
- ch_gxf: | ||
type: file | ||
description: | | ||
Channel containing GFF/GFF3/GTF files | ||
Structure:[ val(meta2), gxf ] | ||
pattern: "*.{gff,gff3,gtf}" | ||
- val_mode: | ||
type: string | ||
description: | | ||
String containing BUSCO mode to apply to ch_fasta files | ||
Structure:val(mode) | ||
- val_lineages: | ||
type: array | ||
description: | | ||
Array of strings representing BUSCO lineage datasets | ||
Structure:[ val(lineage) ] | ||
- val_busco_lineages_path: | ||
type: path | ||
description: | | ||
Path where BUSCO lineages are located or downloaded if not already there. If this input is `[]`, | ||
BUSCO will download the datasets in the task work directory | ||
Structure:val(busco_lineages_path) | ||
- val_busco_config: | ||
type: path | ||
description: | | ||
Path to BUSCO config. It is optional and can be set to `[]` | ||
Structure:val(busco_config) | ||
output: | ||
- assembly_batch_summary: | ||
type: file | ||
description: | | ||
Channel containing BUSCO batch summaries corresponding to fasta files | ||
Structure: [ val(meta), txt ] | ||
pattern: "*.txt" | ||
- assembly_short_summaries_txt: | ||
type: file | ||
description: | | ||
Channel containing BUSCO short summaries corresponding to fasta files | ||
Structure: [ val(meta), txt ] | ||
pattern: "*.txt" | ||
- assembly_short_summaries_json: | ||
type: file | ||
description: | | ||
Channel containing BUSCO short summaries corresponding to fasta files | ||
Structure: [ val(meta), json ] | ||
pattern: "*.json" | ||
- assembly_full_table: | ||
description: | | ||
Channel containing complete results in a tabular format with scores and lengths of BUSCO matches, | ||
and coordinates (for genome mode) or gene/protein IDs (for transcriptome or protein modes) | ||
Structure: [ val(meta), tsv ] | ||
pattern: "*.tsv" | ||
- assembly_plot_summary_txt: | ||
type: file | ||
description: | | ||
Channel containing BUSCO short summaries corresponding to fasta files renamed to include lineage in sample id | ||
Structure: [ txt ] | ||
pattern: "*.txt" | ||
- assembly_png: | ||
type: file | ||
description: | | ||
Channel containing summary plot for assemblies | ||
Structure: png | ||
pattern: "*.png" | ||
- annotation_batch_summary: | ||
type: file | ||
description: | | ||
Channel containing BUSCO batch summaries corresponding to annotation files | ||
Structure: [ val(meta), txt ] | ||
pattern: "*.txt" | ||
- annotation_short_summaries_txt: | ||
type: file | ||
description: | | ||
Channel containing BUSCO short summaries corresponding to annotation files | ||
Structure: [ val(meta), txt ] | ||
pattern: "*.txt" | ||
- annotation_short_summaries_json: | ||
type: file | ||
description: | | ||
Channel containing BUSCO short summaries corresponding to annotation files | ||
Structure: [ val(meta), json ] | ||
pattern: "*.json" | ||
- annotation_full_table: | ||
description: | | ||
Channel containing complete results in a tabular format with scores and lengths of BUSCO matches, | ||
protein IDs in protein mode | ||
Structure: [ val(meta), tsv ] | ||
pattern: "*.tsv" | ||
- annotation_plot_summary_txt: | ||
type: file | ||
description: | | ||
Channel containing BUSCO short summaries corresponding to annotation files renamed to include lineage in sample id | ||
Structure: [ txt ] | ||
pattern: "*.txt" | ||
- annotation_png: | ||
type: file | ||
description: | | ||
Channel containing summary plot for annotations | ||
Structure: png | ||
pattern: "*.png" | ||
- versions: | ||
type: file | ||
description: | | ||
File containing software versions | ||
Structure: [ path(versions.yml) ] | ||
pattern: "versions.yml" | ||
authors: | ||
- "@GallVp" | ||
maintainers: | ||
- "@GallVp" | ||
- "@FernandoDuarteF" |
Oops, something went wrong.