Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Freyja integration #268

Merged
merged 6 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions configs/container.config
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ process {
withLabel: covarplot { container = 'nanozoo/covarplot:0.0.2--2c6e300' }
withLabel: demultiplex { container = 'nanozoo/guppy_cpu:5.0.7-1--47b84be' }
withLabel: fastcov { container = 'raverjay/fastcov:0.1.3--ba8c8cf6ae19' }
withLabel: freyja { container = 'staphb/freyja:1.5.0-03_27_2024-00-48-2024-03-27' }
withLabel: ggplot2 { container = 'nanozoo/r_ggpubr:0.2.5--4b52011' }
withLabel: kraken2 { container = 'nanozoo/kraken2:2.1.1--d5ded30'}
withLabel: krona { container = 'nanozoo/krona:2.7.1--e7615f7'}
Expand Down
1 change: 1 addition & 0 deletions configs/local.config
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ process {
withLabel: covarplot { cpus = 1 }
withLabel: demultiplex { cpus = params.max_cores }
withLabel: fastcov { cpus = params.cores }
withLabel: freyja { cpus = params.cores}
withLabel: ggplot2 { cpus = params.cores }
withLabel: guppy_cpu { cpus = params.max_cores }
withLabel: guppy_gpu { cpus = params.max_cores }
Expand Down
1 change: 1 addition & 0 deletions configs/nodes.config
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ process {
withLabel: demultiplex { cpus = 12; memory = '12 GB' }
withLabel: fastcov { cpus = 4; memory = '8 GB' }
withLabel: fasttree { cpus = 4; memory = '2 GB' }
withLabel: freyja { cpus = 4; memory = '16 GB' }
withLabel: ggplot2 { cpus = 2; memory = '2 GB' }
withLabel: guppy_cpu { cpus = 60; memory = '60 GB' }
withLabel: guppy_gpu { cpus = 10; memory = '16 GB' }
Expand Down
3 changes: 3 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,12 @@ params {
buildDB = false
cloudProcess = false
extended = false
freyja = false
freyja_update = false
guppy_cpu = false
guppy_model = 'dna_r9.4.1_450bps_hac.cfg'
krakendb = ''
lcs = false
localguppy = false
medaka_model = 'r941_min_hac_g507'
nanopolish = ''
Expand Down
32 changes: 22 additions & 10 deletions poreCov.nf
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,14 @@ if (params.minLength && !params.minLength.toString().matches("[0-9]+")) { exit 5
if (params.maxLength && !params.maxLength.toString().matches("[0-9]+")) { exit 5, "Please provide an integer number (e.g. 300) as maximum read length via [--maxLength]" }
if (params.nanopolish == true && (params.fastq || params.fastq_pass) ) { exit 5, "Please provide sequencing_summary.txt via [--nanopolish]" }
if (!workflow.profile.contains('test_fast5')) { if (params.nanopolish && !params.fast5 ) { exit 5, "Please provide a fast5 dir for nanopolish [--fast5]" } }

// check correct usage of param-flags
if (params.extended && !params.samples ) { exit 5, "When using --extended you need to specify also a sample.csv via [--samples]" }
if (!params.freyja == true && !params.freyja == false) {exit 5, "Please provide no input to [--freyja]"}
if (!params.lcs == true && !params.lcs == false) {exit 5, "Please provide no input to [--lcs]"}
if (params.screen_reads && !params.lcs && !params.freyja) {exit 5, "When using [--screen_reads] you also need to use at least one: [--freyja] or [--lcs]"}
if (!params.screen_reads && params.lcs) {exit 5, "[--lcs] requires [--screen_reads] to work"}
if (!params.screen_reads && params.freyja) {exit 5, "[--freyja] requires [--screen_reads] to work"}

// validating sample table
if (params.samples) {
Expand Down Expand Up @@ -315,7 +322,7 @@ include { create_summary_report_wf } from './workflows/create_summary_report.nf'
include { determine_lineage_wf } from './workflows/determine_lineage.nf'
include { determine_mutations_wf } from './workflows/determine_mutations.nf'
include { genome_quality_wf } from './workflows/genome_quality.nf'
include { read_classification_wf } from './workflows/read_classification'
include { read_classification_wf; read_screening_freyja_wf; read_screening_lsc_wf} from './workflows/read_classification'
include { read_qc_wf } from './workflows/read_qc.nf'
include { rki_report_wf } from './workflows/provide_rki.nf'

Expand Down Expand Up @@ -348,11 +355,11 @@ workflow {
// use medaka or nanopolish artic reconstruction
if (params.nanopolish) {
artic_ncov_np_wf(filtered_reads_ch, dir_input_ch, basecalling_wf.out[1], artic_ncov_np_wf)
fasta_input_ch = artic_ncov_np_wf.out[0]
fasta_input_ch = artic_ncov_np_wf.out.assembly[0]
}
else if (!params.nanopolish) {
artic_ncov_wf(filtered_reads_ch, params.artic_normalize)
fasta_input_ch = artic_ncov_wf.out[0]
fasta_input_ch = artic_ncov_wf.out.assembly[0]
}
}
// fastq input via dir and or files
Expand Down Expand Up @@ -384,7 +391,7 @@ workflow {
}
else if (!params.nanopolish) {
artic_ncov_wf(filtered_reads_ch, params.artic_normalize)
fasta_input_ch = artic_ncov_wf.out
fasta_input_ch = artic_ncov_wf.out.assembly
}
}

Expand All @@ -408,14 +415,16 @@ workflow {
// 4. Summary output
if (params.fasta || workflow.profile.contains('test_fasta')) {
taxonomic_read_classification_ch = Channel.from( ['deactivated', 'deactivated', 'deactivated'] ).collect()
linage_read_classification_ch = Channel.from( ['deactivated', 'deactivated'] ).collect()
alignments_ch = Channel.from( ['deactivated'] )
} else {
taxonomic_read_classification_ch = read_classification_wf.out.kraken
if (params.screen_reads) {
linage_read_classification_ch = read_classification_wf.out.lcs
} else {
linage_read_classification_ch = Channel.from( ['deactivated', 'deactivated'] ).collect()
if (params.lcs) {
read_screening_lsc_wf(filtered_reads_ch)
}
if (params.freyja) {
read_screening_freyja_wf(artic_ncov_wf.out.binary_alignment.combine(reference_for_qc_input_ch))
}
}
alignments_ch = align_to_reference(filtered_reads_ch.combine(reference_for_qc_input_ch))
}
Expand All @@ -427,7 +436,7 @@ workflow {
else { samples_table_ch = Channel.from( ['deactivated'] ) }
*/
create_summary_report_wf(determine_lineage_wf.out, genome_quality_wf.out[0], determine_mutations_wf.out,
taxonomic_read_classification_ch, linage_read_classification_ch, alignments_ch, samples_file_ch)
taxonomic_read_classification_ch, alignments_ch, samples_file_ch)

}

Expand Down Expand Up @@ -479,12 +488,15 @@ ${c_yellow}Workflow control (optional)${c_reset}
--nanopolish use nanopolish instead of medaka for ARTIC (needs --fast5)
to skip basecalling use --fastq or --fastq_pass and provide a sequencing_summary.txt in addition to --fast5
e.g --nanopolish sequencing_summary.txt
--screen_reads Determines the Pangolineage of each individual read (takes time)
--screen_reads Determines the Pangolineage of each individual read (takes time, needs --freyja and/or --lcs)
--scorpio Skip Scorpio in pangolin run [default: $params.scorpio]
${c_dim}From pangolin version 4, Scorpio overwrites Usher results which leads to many unassigned samples
Can be turned on with --scorpio${c_reset}

${c_yellow}Parameters - Lineage detection on reads (see screen_reads, optional)${c_reset}
--freyja activate read-screening via freyja
--freyja_update update freyja's barcode-db prior to running
--lcs activate read-screening via lcs
--lcs_ucsc_version Create marker table based on a specific UCSC SARS-CoV-2 tree (e.g. '2022-05-01'). Use 'predefined'
to use the marker table from the repo (most probably not up-to-date) [default: $params.lcs_ucsc_version]
${c_dim}See https://hgdownload.soe.ucsc.edu/goldenPath/wuhCor1/UShER_SARS-CoV-2 for available trees.${c_reset}
Expand Down
7 changes: 7 additions & 0 deletions workflows/artic_nanopore_nCov19.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ workflow artic_ncov_wf {

artic_medaka_custom_bed(fastq.combine(external_primer_schemes).combine(primerBed), normalise_threshold)
assembly = artic_medaka_custom_bed.out.fasta
binary_alignment = artic_medaka_custom_bed.out.fullbam

// plot amplicon coverage
covarplot_custom_bed(artic_medaka_custom_bed.out.covarplot.combine(primerBed))
Expand All @@ -25,6 +26,7 @@ workflow artic_ncov_wf {

artic_medaka(fastq.combine(external_primer_schemes), normalise_threshold)
assembly = artic_medaka.out.fasta
binary_alignment = artic_medaka.out.fullbam

// plot amplicon coverage
covarplot(artic_medaka.out.covarplot.combine(external_primer_schemes))
Expand All @@ -33,9 +35,11 @@ workflow artic_ncov_wf {

// error logging
no_genomes_at_all = assembly.ifEmpty{ log.info "\033[0;33mCould not generate any genomes, please check your reads $params.output/$params.readqcdir\033[0m" }
binary_alignment.ifEmpty{ log.info "\033[0;33mCould not generate any genomes, please check your reads $params.output/$params.readqcdir\033[0m" }

emit:
assembly
binary_alignment
}

workflow artic_ncov_np_wf {
Expand All @@ -62,6 +66,7 @@ workflow artic_ncov_np_wf {
)

assembly = artic_nanopolish_custom_bed.out.fasta
binary_alignment = artic_nanopolish_custom_bed.out.fullbam

// plot amplicon coverage
covarplot_custom_bed(artic_nanopolish_custom_bed.out.covarplot.combine(primerBed))
Expand All @@ -81,11 +86,13 @@ workflow artic_ncov_np_wf {
)

assembly = artic_nanopolish.out.fasta
binary_alignment = artic_nanopolish.out.fullbam

// plot amplicon coverage
covarplot(artic_nanopolish.out.covarplot.combine(external_primer_schemes))
}

emit:
assembly
binary_alignment
}
9 changes: 0 additions & 9 deletions workflows/create_summary_report.nf
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
include { summary_report; summary_report_fasta; summary_report_default } from './process/summary_report'
include { plot_coverages } from '../modules/plot_coverages.nf'
include { get_variants_classification } from '../modules/get_variants_classification.nf'
include { lcs_plot } from './process/lcs_sc2'

workflow create_summary_report_wf {
take:
pangolin
president
nextclade
kraken2
lcs
alignments
samples_table

Expand All @@ -20,7 +18,6 @@ workflow create_summary_report_wf {
pangolin_results = pangolin.map {it -> it[1]}.collectFile(name: 'pangolin_results.csv', skip: 1, keepHeader: true)
president_results = president.map {it -> it[1]}.collectFile(name: 'president_results.tsv', skip: 1, keepHeader: true)
nextclade_results = nextclade.map {it -> it[1]}.collectFile(name: 'nextclade_results.tsv', skip: 1, keepHeader: true)
lcs_results = lcs.map {it -> it[1]}.collectFile(name: 'lcs_results.tsv', skip: 1, keepHeader: true, storeDir: "${params.output}/${params.lineagedir}/")

alignment_files = alignments.map {it -> it[0]}.collect()
if (params.fasta || workflow.profile.contains('test_fasta')) {
Expand All @@ -35,11 +32,5 @@ workflow create_summary_report_wf {

if (params.samples) { summary_report(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots, samples_table) }
else { summary_report_default(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots) }

}

if (params.screen_reads){
lcs_plot(lcs.map {it -> it[1]}.collectFile(name: 'lcs_results.tsv', skip: 1, keepHeader: true), params.lcs_cutoff)
}

}
96 changes: 96 additions & 0 deletions workflows/process/freyja.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
process freyja {
label 'freyja'
errorStrategy 'retry'
maxRetries 1
publishDir "${params.output}/${params.lineagedir}/${name}/lineage-proportion-by-reads", mode: 'copy', pattern: "*"

input:
tuple val(name), path(bam_file), path(reference)
output:
tuple val(name), path("*_freyja_aggregate.tsv"), emit: aggregate
tuple val(name), path("*_freyja_variants.tsv"), path("*_freyja_depths.tsv"), path("*_freyja_demix.tsv"), path("*_freyja_aggregate.tsv"), emit: fullresults
tuple val(name), path("*_freyja_plot.svg"), path("*_freyja_plot.png"), emit: plots, optional: true

script:
if ( params.freyja_update )
{ freyja_update_cmd = "mkdir data; freyja update --outdir data"
// here we will download the latest Freyja data again instead of using the files obtained when installing the conda env
freyja_ref_data = "data"
}
else
{ freyja_update_cmd = " "
// this is the original path where inside of the container the Freyja data is stored
freyja_ref_data = "/opt/conda/envs/freyja-env/lib/python3.10/site-packages/freyja/data/"
}
"""
sed -i "s/^>.*\$/>MN908947.3/" ${reference} #rename reference-sequence as Alignment is done with this sequence-name even so using the same reference freyja will detect deviating names and throw an error
mkdir -p freyja_result/

${freyja_update_cmd}

freyja variants ${bam_file} \
--variants ${name}_freyja_variants.tsv \
--depths ${name}_freyja_depths.tsv \
--ref ${reference}

freyja demix ${name}_freyja_variants.tsv \
${name}_freyja_depths.tsv \
--output freyja_result/${name}_freyja_demix.tsv \
--barcodes ${freyja_ref_data}/usher_barcodes.csv \
--lineageyml ${freyja_ref_data}/lineages.yml

freyja aggregate freyja_result/ \
--output ${name}_freyja_aggregate.tsv \

freyja plot ${name}_freyja_aggregate.tsv \
--output ${name}_freyja_plot.svg \
--lineages \
--mincov 10.0 #default minCoverage: 60 \
--lineageyml ${freyja_ref_data}/lineages.yml

freyja plot ${name}_freyja_aggregate.tsv \
--output ${name}_freyja_plot.png \
--lineages \
--mincov 10.0 #default minCoverage: 60 \
--lineageyml ${freyja_ref_data}/lineages.yml

mv freyja_result/${name}_freyja_demix.tsv \$PWD #freyha_demix.tsv is put into a folder before to give it to freyja aggregate
"""
stub:
"""
touch stub_freyja_variants.tsv \
stub_freyja_depths.tsv \
stub_freyja_demix.tsv \
stub_freyja_aggregate.tsv \
stub_freyja_plot.png \
stub_freyja_plot.svg
"""
}

process freyja_plot {
label 'freyja'
publishDir "${params.output}/${params.lineagedir}/", mode: 'copy', pattern: "*"

input:
path(aggregate_file)
output:
tuple path("*_freyja_plot.svg"), path("*_freyja_plot.png"), emit: results, optional: true

script:
"""
freyja plot ${aggregate_file} \
--output combined_freyja_plot.svg \
--lineages \
--mincov 10.0 #default minCoverage: 60

freyja plot ${aggregate_file} \
--output combined_freyja_plot.png \
--lineages \
--mincov 10.0 #default minCoverage: 60
"""
stub:
"""
touch stub_combined_freyja_plot.png \
stub_combined_freyja_plot.svg
"""
}
4 changes: 4 additions & 0 deletions workflows/process/lcs_sc2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,8 @@ process lcs_plot {
"""
lcs_bar_plot.R '${tsv}' ${cutoff}
"""
stub:
"""
touch stub.png
"""
}
49 changes: 35 additions & 14 deletions workflows/read_classification.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
include { kraken2 } from './process/kraken2.nf'
include { krona } from './process/krona.nf'
include { download_database_kraken2 } from './process/download_database_kraken2.nf'
include { lcs_sc2; lcs_ucsc_markers_table } from './process/lcs_sc2'
include { freyja; freyja_plot } from './process/freyja.nf'
include { lcs_plot; lcs_sc2; lcs_ucsc_markers_table } from './process/lcs_sc2'

workflow read_classification_wf {
take:
Expand All @@ -18,20 +19,40 @@ workflow read_classification_wf {

// visuals
krona(kraken2.out)

// Metagenomic analysis
// calculate mixed/ pooled samples using LCS, https://github.com/rvalieris/LCS
if (params.screen_reads) {
if (params.lcs_variant_groups == 'default') { lcs_variant_groups_ch = Channel.empty() }
else { lcs_variant_groups_ch = Channel.fromPath("${params.lcs_variant_groups}", checkIfExists: true)}

lcs_ucsc_markers_table(lcs_variant_groups_ch.ifEmpty([]))
lcs_sc2(fastq.combine(lcs_ucsc_markers_table.out))
lcs_output = lcs_sc2.out
}
else { lcs_output = Channel.empty()}

emit:
kraken = kraken2.out
lcs = lcs_output
}

workflow read_screening_freyja_wf {
take:
alignment
main:
freyja(alignment)
freya_result_ch = freyja.out.aggregate.map{it -> it[1]}.collectFile(name: 'freyja_results.tsv', skip: 1, keepHeader: true, storeDir: "${params.output}/${params.lineagedir}/")
freyja_plot(freya_result_ch)

emit:
freyja_results = freyja.out.aggregate
freyja_plots = freyja_plot.out
}

workflow read_screening_lsc_wf {
take:
fastq
main:
// Metagenomic analysis
// calculate mixed/ pooled samples using LCS, https://github.com/rvalieris/LCS
if (params.lcs_variant_groups == 'default') { lcs_variant_groups_ch = Channel.empty() }
else { lcs_variant_groups_ch = Channel.fromPath("${params.lcs_variant_groups}", checkIfExists: true)}

lcs_ucsc_markers_table(lcs_variant_groups_ch.ifEmpty([]))
lcs_sc2(fastq.combine(lcs_ucsc_markers_table.out))

lcs_result_ch = lcs_sc2.out.map{it -> it[1]}.collectFile(name: 'lcs_results.tsv', skip: 1, keepHeader: true, storeDir: "${params.output}/${params.lineagedir}/")
lcs_plot(lcs_result_ch, params.lcs_cutoff)

emit:
lcs_results = lcs_sc2.out
lcs_plots = lcs_plot.out
}
Loading