Skip to content

Commit

Permalink
Merge pull request #102 from nf-core/genome_ideogram
Browse files Browse the repository at this point in the history
Aded BUSCO ideogram for ``GENOME`` mode
  • Loading branch information
FernandoDuarteF authored Dec 10, 2024
2 parents ae76a10 + fe07df8 commit e7df634
Show file tree
Hide file tree
Showing 8 changed files with 129 additions and 19 deletions.
52 changes: 52 additions & 0 deletions bin/plot_markers1.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env Rscript

# Modified from https://gitlab.com/ezlab/busco_protocol/-/blob/main/support_protocol2/plot_markers/scripts/plot_markers1.R?ref_type=heads

# load libraries
require(RIdeogram)

args <- commandArgs(trailingOnly = TRUE)
file_name_for_karyotype <- args[1]
out_name <- args[2] # how to name the karyotype

##### karyotype-------
karyotype1 <- read.csv(file=file_name_for_karyotype, sep="\t", header = FALSE, stringsAsFactors = F)
karyotype1$start <- 1
karyotype1$genome <- out_name
karyotype1$size <- 12
karyotype1$color <- "25252"

# fix columns names
colnames(karyotype1) <- c("Chr", "End", "Start", "species", "size", "color")
# reorder columns
karyotype <- karyotype1[, c(1,3,2,4,5,6)]

# debugging code
print("Karyotype table\n")
head(karyotype)

write.table(karyotype, file = paste0(out_name, "_karyotype.txt"), quote = FALSE, row.names = FALSE, col.names = TRUE, sep = "\t")
























12 changes: 10 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,17 @@ process {
]
}

withName: 'PLOT_BUSCO_IDEOGRAM' {
withName: 'GENOME_ANNOTATION_BUSCO_IDEOGRAM' {
publishDir = [
path: { "${params.outdir}/busco_ideogram" },
path: { "${params.outdir}/genome_annotation_busco_ideogram" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'GENOME_ONLY_BUSCO_IDEOGRAM' {
publishDir = [
path: { "${params.outdir}/genome_busco_ideogram" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
process PLOT_BUSCO_IDEOGRAM {
process GENOME_ANNOTATION_BUSCO_IDEOGRAM {
tag "${genusspeci}_${lineage}"
label 'process_single'

Expand Down
41 changes: 41 additions & 0 deletions modules/local/genome_only_busco_ideogram.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
process GENOME_ONLY_BUSCO_IDEOGRAM {
tag "$meta.id"
label 'process_single'

conda "bioconda::r-rideogram=0.2.2 bioconda::r-svglite=2.1.1"
container "community.wave.seqera.io/library/seqkit_r-dplyr_r-optparse_r-readr_pruned:92b750716d244919"

input:
tuple val(meta), path(genome), path(busco_full_table)

output:
tuple val(meta), path("*.svg"), emit: svg
tuple val(meta), path("*.png"), emit: png
path "versions.yml" , emit: versions

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
grep -v "#" ${busco_full_table} | cut -f 2,3,4,5 | grep -v "Missing" > ${prefix}_busco_coordinates.txt
# Get chromosome lengths:
seqkit fx2tab -i -n -l ${genome} > ${prefix}_for_karyotype.txt
# Call script for table wrangling
plot_markers1.R ${prefix}_for_karyotype.txt ${prefix}
# Call script for plotting
plot_busco_ideogram.R \\
--busco_output ${prefix}_busco_coordinates.txt \\
--karyotype ${prefix}_karyotype.txt \\
--prefix ${prefix} \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
r-base: \$(Rscript -e "cat(as.character(getRversion()))")
r-rideogram: \$(Rscript -e "cat(as.character(packageVersion('RIdeogram')))")
END_VERSIONS
"""
}
4 changes: 2 additions & 2 deletions modules/nf-core/busco/busco/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ process BUSCO_BUSCO {

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/busco:5.7.1--pyhdfd78af_0':
'biocontainers/busco:5.7.1--pyhdfd78af_0' }"
'https://depot.galaxyproject.org/singularity/busco:5.8.0--pyhdfd78af_0':
'biocontainers/busco:5.8.0--pyhdfd78af_0' }"

input:
tuple val(meta), path(fasta, stageAs:'tmp_input/*')
Expand Down
8 changes: 3 additions & 5 deletions subworkflows/local/genome_and_annotation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ include { LONGEST } from '../../modules/local/longes
include { BUSCO_BUSCO } from '../../modules/nf-core/busco/busco/main'
include { QUAST } from '../../modules/nf-core/quast/main'
include { AGAT_SPSTATISTICS } from '../../modules/nf-core/agat/spstatistics/main'
include { PLOT_BUSCO_IDEOGRAM } from '../../modules/local/plot_busco_ideogram'
include { GENOME_ANNOTATION_BUSCO_IDEOGRAM } from '../../modules/local/genome_annotation_busco_ideogram'
include { GFFREAD } from '../../modules/nf-core/gffread/main'
include { ORTHOFINDER } from '../../modules/nf-core/orthofinder/main'
include { FASTAVALIDATOR } from '../../modules/nf-core/fastavalidator/main'
Expand Down Expand Up @@ -52,7 +52,7 @@ workflow GENOME_AND_ANNOTATION {
| combine(ch_gff_agat, by:0) // by:0 | Only combine when both channels share the same id
| combine(ch_gff_long, by:0)
| multiMap {
meta, fasta, gff_unfilt, gff_filt ->
meta, fasta, gff_unfilt, gff_filt -> // null is is probably not necessary
fasta : fasta ? tuple( meta, file(fasta) ) : null // channel: [ val(meta), [ fasta ] ]
gff_unfilt : gff_unfilt ? tuple( meta, file(gff_unfilt) ) : null // channel: [ val(meta), [ gff ] ], unfiltered
gff_filt : gff_filt ? tuple( meta, file(gff_filt) ) : null // channel: [ val(meta), [ gff ] ], filtered for longest isoform
Expand Down Expand Up @@ -134,8 +134,6 @@ workflow GENOME_AND_ANNOTATION {
// MODULE: Run BUSCO
//

//GFFREAD.out.gffread_fasta.collect().view()

BUSCO_BUSCO (
GFFREAD.out.gffread_fasta,
"proteins", // hardcoded
Expand Down Expand Up @@ -182,7 +180,7 @@ workflow GENOME_AND_ANNOTATION {
}
}

PLOT_BUSCO_IDEOGRAM ( ch_plot_input )//removed this temporarily:, ch_karyotype
GENOME_ANNOTATION_BUSCO_IDEOGRAM ( ch_plot_input )

ch_tree_data = ch_tree_data.mix(BUSCO_BUSCO.out.batch_summary.collect { meta, file -> file })

Expand Down
19 changes: 15 additions & 4 deletions subworkflows/local/genome.nf → subworkflows/local/genome_only.nf
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@

include { QUAST } from '../../modules/nf-core/quast/main'
include { BUSCO_BUSCO } from '../../modules/nf-core/busco/busco/main'
include { GENOME_ONLY_BUSCO_IDEOGRAM } from '../../modules/local/genome_only_busco_ideogram'

workflow GENOME {
workflow GENOME_ONLY {

take:
ch_fasta // channel: [ val(meta), [ fasta ] ]

main:

ch_versions = Channel.empty()
ch_versions = Channel.empty()

QUAST (
ch_fasta,
[[],[]],
[[],[]]
)
ch_versions = ch_versions.mix(QUAST.out.versions.first())
ch_versions = ch_versions.mix(QUAST.out.versions.first())

BUSCO_BUSCO (
ch_fasta,
Expand All @@ -25,7 +26,17 @@ workflow GENOME {
params.busco_lineages_path ?: [],
params.busco_config ?: []
)
ch_versions = ch_versions.mix(BUSCO_BUSCO.out.versions.first())
ch_versions = ch_versions.mix(BUSCO_BUSCO.out.versions.first())
ch_full_table = BUSCO_BUSCO.out.full_table

// Combined ch_fasta and BUSCO output channel into a single channel for ideogram
ch_input_ideo = ch_fasta
| combine(ch_full_table, by:0)


GENOME_ONLY_BUSCO_IDEOGRAM (
ch_input_ideo
)

emit:
quast_results = QUAST.out.results // channel: [ val(meta), [tsv] ]
Expand Down
10 changes: 5 additions & 5 deletions workflows/genomeqc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ include { CREATE_PATH } from '../modules/local/create_pa
include { NCBIGENOMEDOWNLOAD } from '../modules/nf-core/ncbigenomedownload/main'
include { PIGZ_UNCOMPRESS as UNCOMPRESS_FASTA } from '../modules/nf-core/pigz/uncompress/main'
include { PIGZ_UNCOMPRESS as UNCOMPRESS_GFF } from '../modules/nf-core/pigz/uncompress/main'
include { GENOME } from '../subworkflows/local/genome'
include { GENOME_ONLY } from '../subworkflows/local/genome_only'
include { GENOME_AND_ANNOTATION } from '../subworkflows/local/genome_and_annotation'
include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { TREE_SUMMARY } from '../modules/local/tree_summary'
Expand Down Expand Up @@ -201,13 +201,13 @@ workflow GENOMEQC {

// Run genome only or genome + gff
if (params.genome_only) {
GENOME (
GENOME_ONLY (
ch_input.fasta
)
ch_multiqc_files = ch_multiqc_files
| mix(GENOME.out.quast_results.map { meta, results -> results })
| mix(GENOME.out.busco_short_summaries.map { meta, txt -> txt })
ch_versions = ch_versions.mix(GENOME.out.versions.first())
| mix(GENOME_ONLY.out.quast_results.map { meta, results -> results })
| mix(GENOME_ONLY.out.busco_short_summaries.map { meta, txt -> txt })
ch_versions = ch_versions.mix(GENOME_ONLY.out.versions.first())
} else {
GENOME_AND_ANNOTATION (
ch_input.fasta,
Expand Down

0 comments on commit e7df634

Please sign in to comment.