Skip to content

Commit

Permalink
Merge pull request #268 from replikation/freyja_integration
Browse files Browse the repository at this point in the history
Freyja integration
  • Loading branch information
replikation authored May 17, 2024
2 parents a95483a + 3e05798 commit ff6cb14
Show file tree
Hide file tree
Showing 11 changed files with 172 additions and 35 deletions.
1 change: 1 addition & 0 deletions configs/container.config
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ process {
withLabel: covarplot { container = 'nanozoo/covarplot:0.0.2--2c6e300' }
withLabel: demultiplex { container = 'nanozoo/guppy_cpu:5.0.7-1--47b84be' }
withLabel: fastcov { container = 'raverjay/fastcov:0.1.3--ba8c8cf6ae19' }
withLabel: freyja { container = 'staphb/freyja:1.5.0-03_27_2024-00-48-2024-03-27' }
withLabel: ggplot2 { container = 'nanozoo/r_ggpubr:0.2.5--4b52011' }
withLabel: kraken2 { container = 'nanozoo/kraken2:2.1.1--d5ded30'}
withLabel: krona { container = 'nanozoo/krona:2.7.1--e7615f7'}
Expand Down
1 change: 1 addition & 0 deletions configs/local.config
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ process {
withLabel: covarplot { cpus = 1 }
withLabel: demultiplex { cpus = params.max_cores }
withLabel: fastcov { cpus = params.cores }
withLabel: freyja { cpus = params.cores}
withLabel: ggplot2 { cpus = params.cores }
withLabel: guppy_cpu { cpus = params.max_cores }
withLabel: guppy_gpu { cpus = params.max_cores }
Expand Down
1 change: 1 addition & 0 deletions configs/nodes.config
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ process {
withLabel: demultiplex { cpus = 12; memory = '12 GB' }
withLabel: fastcov { cpus = 4; memory = '8 GB' }
withLabel: fasttree { cpus = 4; memory = '2 GB' }
withLabel: freyja { cpus = 4; memory = '16 GB' }
withLabel: ggplot2 { cpus = 2; memory = '2 GB' }
withLabel: guppy_cpu { cpus = 60; memory = '60 GB' }
withLabel: guppy_gpu { cpus = 10; memory = '16 GB' }
Expand Down
3 changes: 3 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,12 @@ params {
buildDB = false
cloudProcess = false
extended = false
freyja = false
freyja_update = false
guppy_cpu = false
guppy_model = 'dna_r9.4.1_450bps_hac.cfg'
krakendb = ''
lcs = false
localguppy = false
medaka_model = 'r941_min_hac_g507'
nanopolish = ''
Expand Down
32 changes: 22 additions & 10 deletions poreCov.nf
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,14 @@ if (params.minLength && !params.minLength.toString().matches("[0-9]+")) { exit 5
if (params.maxLength && !params.maxLength.toString().matches("[0-9]+")) { exit 5, "Please provide an integer number (e.g. 300) as maximum read length via [--maxLength]" }
if (params.nanopolish == true && (params.fastq || params.fastq_pass) ) { exit 5, "Please provide sequencing_summary.txt via [--nanopolish]" }
if (!workflow.profile.contains('test_fast5')) { if (params.nanopolish && !params.fast5 ) { exit 5, "Please provide a fast5 dir for nanopolish [--fast5]" } }

// check correct usage of param-flags
if (params.extended && !params.samples ) { exit 5, "When using --extended you need to specify also a sample.csv via [--samples]" }
if (!params.freyja == true && !params.freyja == false) {exit 5, "Please provide no input to [--freyja]"}
if (!params.lcs == true && !params.lcs == false) {exit 5, "Please provide no input to [--lcs]"}
if (params.screen_reads && !params.lcs && !params.freyja) {exit 5, "When using [--screen_reads] you also need to use at least one: [--freyja] or [--lcs]"}
if (!params.screen_reads && params.lcs) {exit 5, "[--lcs] requires [--screen_reads] to work"}
if (!params.screen_reads && params.freyja) {exit 5, "[--freyja] requires [--screen_reads] to work"}

// validating sample table
if (params.samples) {
Expand Down Expand Up @@ -315,7 +322,7 @@ include { create_summary_report_wf } from './workflows/create_summary_report.nf'
include { determine_lineage_wf } from './workflows/determine_lineage.nf'
include { determine_mutations_wf } from './workflows/determine_mutations.nf'
include { genome_quality_wf } from './workflows/genome_quality.nf'
include { read_classification_wf } from './workflows/read_classification'
include { read_classification_wf; read_screening_freyja_wf; read_screening_lsc_wf} from './workflows/read_classification'
include { read_qc_wf } from './workflows/read_qc.nf'
include { rki_report_wf } from './workflows/provide_rki.nf'

Expand Down Expand Up @@ -348,11 +355,11 @@ workflow {
// use medaka or nanopolish artic reconstruction
if (params.nanopolish) {
artic_ncov_np_wf(filtered_reads_ch, dir_input_ch, basecalling_wf.out[1], artic_ncov_np_wf)
fasta_input_ch = artic_ncov_np_wf.out[0]
fasta_input_ch = artic_ncov_np_wf.out.assembly
}
else if (!params.nanopolish) {
artic_ncov_wf(filtered_reads_ch, params.artic_normalize)
fasta_input_ch = artic_ncov_wf.out[0]
fasta_input_ch = artic_ncov_wf.out.assembly
}
}
// fastq input via dir and or files
Expand Down Expand Up @@ -384,7 +391,7 @@ workflow {
}
else if (!params.nanopolish) {
artic_ncov_wf(filtered_reads_ch, params.artic_normalize)
fasta_input_ch = artic_ncov_wf.out
fasta_input_ch = artic_ncov_wf.out.assembly
}
}

Expand All @@ -408,14 +415,16 @@ workflow {
// 4. Summary output
if (params.fasta || workflow.profile.contains('test_fasta')) {
taxonomic_read_classification_ch = Channel.from( ['deactivated', 'deactivated', 'deactivated'] ).collect()
linage_read_classification_ch = Channel.from( ['deactivated', 'deactivated'] ).collect()
alignments_ch = Channel.from( ['deactivated'] )
} else {
taxonomic_read_classification_ch = read_classification_wf.out.kraken
if (params.screen_reads) {
linage_read_classification_ch = read_classification_wf.out.lcs
} else {
linage_read_classification_ch = Channel.from( ['deactivated', 'deactivated'] ).collect()
if (params.lcs) {
read_screening_lsc_wf(filtered_reads_ch)
}
if (params.freyja) {
read_screening_freyja_wf(artic_ncov_wf.out.binary_alignment.combine(reference_for_qc_input_ch))
}
}
alignments_ch = align_to_reference(filtered_reads_ch.combine(reference_for_qc_input_ch))
}
Expand All @@ -427,7 +436,7 @@ workflow {
else { samples_table_ch = Channel.from( ['deactivated'] ) }
*/
create_summary_report_wf(determine_lineage_wf.out, genome_quality_wf.out[0], determine_mutations_wf.out,
taxonomic_read_classification_ch, linage_read_classification_ch, alignments_ch, samples_file_ch)
taxonomic_read_classification_ch, alignments_ch, samples_file_ch)

}

Expand Down Expand Up @@ -479,12 +488,15 @@ ${c_yellow}Workflow control (optional)${c_reset}
--nanopolish use nanopolish instead of medaka for ARTIC (needs --fast5)
to skip basecalling use --fastq or --fastq_pass and provide a sequencing_summary.txt in addition to --fast5
e.g --nanopolish sequencing_summary.txt
--screen_reads Determines the Pangolineage of each individual read (takes time)
--screen_reads Determines the Pangolineage of each individual read (takes time, needs --freyja and/or --lcs)
--scorpio Skip Scorpio in pangolin run [default: $params.scorpio]
${c_dim}From pangolin version 4, Scorpio overwrites Usher results which leads to many unassigned samples
Can be turned on with --scorpio${c_reset}
${c_yellow}Parameters - Lineage detection on reads (see screen_reads, optional)${c_reset}
--freyja activate read-screening via freyja
--freyja_update update freyja's barcode-db prior to running
--lcs activate read-screening via lcs
--lcs_ucsc_version Create marker table based on a specific UCSC SARS-CoV-2 tree (e.g. '2022-05-01'). Use 'predefined'
to use the marker table from the repo (most probably not up-to-date) [default: $params.lcs_ucsc_version]
${c_dim}See https://hgdownload.soe.ucsc.edu/goldenPath/wuhCor1/UShER_SARS-CoV-2 for available trees.${c_reset}
Expand Down
7 changes: 7 additions & 0 deletions workflows/artic_nanopore_nCov19.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ workflow artic_ncov_wf {

artic_medaka_custom_bed(fastq.combine(external_primer_schemes).combine(primerBed), normalise_threshold)
assembly = artic_medaka_custom_bed.out.fasta
binary_alignment = artic_medaka_custom_bed.out.fullbam

// plot amplicon coverage
covarplot_custom_bed(artic_medaka_custom_bed.out.covarplot.combine(primerBed))
Expand All @@ -25,6 +26,7 @@ workflow artic_ncov_wf {

artic_medaka(fastq.combine(external_primer_schemes), normalise_threshold)
assembly = artic_medaka.out.fasta
binary_alignment = artic_medaka.out.fullbam

// plot amplicon coverage
covarplot(artic_medaka.out.covarplot.combine(external_primer_schemes))
Expand All @@ -33,9 +35,11 @@ workflow artic_ncov_wf {

// error logging
no_genomes_at_all = assembly.ifEmpty{ log.info "\033[0;33mCould not generate any genomes, please check your reads $params.output/$params.readqcdir\033[0m" }
binary_alignment.ifEmpty{ log.info "\033[0;33mCould not generate any genomes, please check your reads $params.output/$params.readqcdir\033[0m" }

emit:
assembly
binary_alignment
}

workflow artic_ncov_np_wf {
Expand All @@ -62,6 +66,7 @@ workflow artic_ncov_np_wf {
)

assembly = artic_nanopolish_custom_bed.out.fasta
binary_alignment = artic_nanopolish_custom_bed.out.fullbam

// plot amplicon coverage
covarplot_custom_bed(artic_nanopolish_custom_bed.out.covarplot.combine(primerBed))
Expand All @@ -81,11 +86,13 @@ workflow artic_ncov_np_wf {
)

assembly = artic_nanopolish.out.fasta
binary_alignment = artic_nanopolish.out.fullbam

// plot amplicon coverage
covarplot(artic_nanopolish.out.covarplot.combine(external_primer_schemes))
}

emit:
assembly
binary_alignment
}
4 changes: 2 additions & 2 deletions workflows/basecalling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ workflow basecalling_wf {
// nanopore sequencing summary
pycoqc(guppy_summary)

if (!params.single && !params.guppy_cpu) { guppy_gpu.out.reads.ifEmpty{ log.info "\033[0;33 Could not retrieve reads for any barcode!\033[0m" } }
if (!params.single && params.guppy_cpu) { guppy_cpu.out.reads.ifEmpty{ log.info "\033[0;33 Could not retrieve reads for any barcode!\033[0m" } }
if (!params.single && !params.guppy_cpu) { guppy_gpu.out.reads.ifEmpty{ log.info "\033[0;33mCould not retrieve reads for any barcode!\033[0m" } }
if (!params.single && params.guppy_cpu) { guppy_cpu.out.reads.ifEmpty{ log.info "\033[0;33mCould not retrieve reads for any barcode!\033[0m" } }

// adjust channels to a clean val(name), path(reads) channel
if (params.single) { fastq_channel = guppy_basecalls }
Expand Down
9 changes: 0 additions & 9 deletions workflows/create_summary_report.nf
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
include { summary_report; summary_report_fasta; summary_report_default } from './process/summary_report'
include { plot_coverages } from '../modules/plot_coverages.nf'
include { get_variants_classification } from '../modules/get_variants_classification.nf'
include { lcs_plot } from './process/lcs_sc2'

workflow create_summary_report_wf {
take:
pangolin
president
nextclade
kraken2
lcs
alignments
samples_table

Expand All @@ -20,7 +18,6 @@ workflow create_summary_report_wf {
pangolin_results = pangolin.map {it -> it[1]}.collectFile(name: 'pangolin_results.csv', skip: 1, keepHeader: true)
president_results = president.map {it -> it[1]}.collectFile(name: 'president_results.tsv', skip: 1, keepHeader: true)
nextclade_results = nextclade.map {it -> it[1]}.collectFile(name: 'nextclade_results.tsv', skip: 1, keepHeader: true)
lcs_results = lcs.map {it -> it[1]}.collectFile(name: 'lcs_results.tsv', skip: 1, keepHeader: true, storeDir: "${params.output}/${params.lineagedir}/")

alignment_files = alignments.map {it -> it[0]}.collect()
if (params.fasta || workflow.profile.contains('test_fasta')) {
Expand All @@ -35,11 +32,5 @@ workflow create_summary_report_wf {

if (params.samples) { summary_report(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots, samples_table) }
else { summary_report_default(version_ch, variants_table_ch, pangolin_results, president_results, nextclade_results, kraken2_results, coverage_plots) }

}

if (params.screen_reads){
lcs_plot(lcs.map {it -> it[1]}.collectFile(name: 'lcs_results.tsv', skip: 1, keepHeader: true), params.lcs_cutoff)
}

}
96 changes: 96 additions & 0 deletions workflows/process/freyja.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
process freyja {
label 'freyja'
errorStrategy 'retry'
maxRetries 1
publishDir "${params.output}/${params.lineagedir}/${name}/lineage-proportion-by-reads", mode: 'copy', pattern: "*"

input:
tuple val(name), path(bam_file), path(reference)
output:
tuple val(name), path("*_freyja_aggregate.tsv"), emit: aggregate
tuple val(name), path("*_freyja_variants.tsv"), path("*_freyja_depths.tsv"), path("*_freyja_demix.tsv"), path("*_freyja_aggregate.tsv"), emit: fullresults
tuple val(name), path("*_freyja_plot.svg"), path("*_freyja_plot.png"), emit: plots, optional: true

script:
if ( params.freyja_update )
{ freyja_update_cmd = "mkdir data; freyja update --outdir data"
// here we will download the latest Freyja data again instead of using the files obtained when installing the conda env
freyja_ref_data = "data"
}
else
{ freyja_update_cmd = " "
// this is the original path where inside of the container the Freyja data is stored
freyja_ref_data = "/opt/conda/envs/freyja-env/lib/python3.10/site-packages/freyja/data/"
}
"""
sed -i "s/^>.*\$/>MN908947.3/" ${reference} #rename reference-sequence as Alignment is done with this sequence-name even so using the same reference freyja will detect deviating names and throw an error
mkdir -p freyja_result/
${freyja_update_cmd}
freyja variants ${bam_file} \
--variants ${name}_freyja_variants.tsv \
--depths ${name}_freyja_depths.tsv \
--ref ${reference}
freyja demix ${name}_freyja_variants.tsv \
${name}_freyja_depths.tsv \
--output freyja_result/${name}_freyja_demix.tsv \
--barcodes ${freyja_ref_data}/usher_barcodes.csv \
--lineageyml ${freyja_ref_data}/lineages.yml
freyja aggregate freyja_result/ \
--output ${name}_freyja_aggregate.tsv \
freyja plot ${name}_freyja_aggregate.tsv \
--output ${name}_freyja_plot.svg \
--lineages \
--mincov 10.0 #default minCoverage: 60 \
--lineageyml ${freyja_ref_data}/lineages.yml
freyja plot ${name}_freyja_aggregate.tsv \
--output ${name}_freyja_plot.png \
--lineages \
--mincov 10.0 #default minCoverage: 60 \
--lineageyml ${freyja_ref_data}/lineages.yml
mv freyja_result/${name}_freyja_demix.tsv \$PWD #freyha_demix.tsv is put into a folder before to give it to freyja aggregate
"""
stub:
"""
touch stub_freyja_variants.tsv \
stub_freyja_depths.tsv \
stub_freyja_demix.tsv \
stub_freyja_aggregate.tsv \
stub_freyja_plot.png \
stub_freyja_plot.svg
"""
}

process freyja_plot {
label 'freyja'
publishDir "${params.output}/${params.lineagedir}/", mode: 'copy', pattern: "*"

input:
path(aggregate_file)
output:
tuple path("*_freyja_plot.svg"), path("*_freyja_plot.png"), emit: results, optional: true

script:
"""
freyja plot ${aggregate_file} \
--output combined_freyja_plot.svg \
--lineages \
--mincov 10.0 #default minCoverage: 60
freyja plot ${aggregate_file} \
--output combined_freyja_plot.png \
--lineages \
--mincov 10.0 #default minCoverage: 60
"""
stub:
"""
touch stub_combined_freyja_plot.png \
stub_combined_freyja_plot.svg
"""
}
4 changes: 4 additions & 0 deletions workflows/process/lcs_sc2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,8 @@ process lcs_plot {
"""
lcs_bar_plot.R '${tsv}' ${cutoff}
"""
stub:
"""
touch stub.png
"""
}
Loading

0 comments on commit ff6cb14

Please sign in to comment.