diff --git a/conf/modules.config b/conf/modules.config index 72ee95a6..724e243c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -80,7 +80,7 @@ process { // errorStrategy = 'terminate' // publishDir = [ // mode: params.publish_dir_mode, - // path: "${params.outdir}/read_quality/raw_reads", + // path: "${params.outdir}/ReadQuality/raw_reads", // pattern: "*.json" // ] //} @@ -134,7 +134,7 @@ process { publishDir = [ [ mode: params.publish_dir_mode, - path: "${params.outdir}/read_quality/RawReadData", + path: "${params.outdir}/ReadQuality/RawReadData", pattern: "*.json" ], ] @@ -151,7 +151,7 @@ process { ext.args = '--quiet' publishDir = [ [ - path: "${params.outdir}/read_quality/fastqc", + path: "${params.outdir}/ReadQuality/fastqc", mode: params.publish_dir_mode, pattern: "*{${params.fastqc.html_ext},${params.fastqc.zip_ext}}", ] @@ -165,7 +165,7 @@ process { // scratch = false publishDir = [ [ - path: "${params.outdir}/read_quality/sub_sampled_reads", + path: "${params.outdir}/ReadQuality/SubSampledReads", mode: params.publish_dir_mode, pattern: "*${params.seqtk.reads_ext}" ] @@ -179,7 +179,7 @@ withName: SEQTK_SIZE { // scratch = false publishDir = [ [ - path: "${params.outdir}/Assembly/quality/BaseCounts", + path: "${params.outdir}/ReadQuality/SubSampledReads/BaseCounts", mode: params.publish_dir_mode, pattern: "*.txt" ] @@ -292,7 +292,7 @@ withName: SEQTK_SIZE { stageInMode = params.stage_in_mode publishDir = [ [ - path: "${params.outdir}/read_quality/mash_sketch", + path: "${params.outdir}/ReadQuality/mash_sketch", mode: params.publish_dir_mode, pattern: "*${params.mash.sketch_ext}" ] @@ -386,7 +386,7 @@ withName: SEQTK_SIZE { errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' } maxForks = 20 maxErrors = 3 - //dir_out = process.toString().contains("QC_READS") ? "read_quality/${params.mash.output_dir}/mash" : "taxon_determination/mash" + //dir_out = process.toString().contains("QC_READS") ? "ReadQuality/${params.mash.output_dir}/mash" : "taxon_determination/mash" ext.args = "-w" //container = params.mash.container ext.containers = params.mash @@ -394,7 +394,7 @@ withName: SEQTK_SIZE { stageInMode = params.stage_in_mode publishDir = [ [ - path: "${params.outdir}/read_quality/${params.mash.output_dir}/mash", + path: "${params.outdir}/ReadQuality/${params.mash.output_dir}/mash", mode: params.publish_dir_mode, pattern: "*${params.mash.output_reads_ext}" ], @@ -422,7 +422,7 @@ withName: SEQTK_SIZE { stageInMode = params.stage_in_mode publishDir = [ [ - path: { "${params.outdir}/read_quality/${params.r_contaminants.output_dir}"}, + path: { "${params.outdir}/ReadQuality/${params.r_contaminants.output_dir}"}, mode: params.publish_dir_mode, pattern: "*.gz" // specifying this outside of config as yet to handle singletons TODO decide on singleton usage ] @@ -545,12 +545,12 @@ withName: SEQTK_SIZE { // scratch = false publishDir = [ [ - path: { "${params.outdir}/read_quality/fastp/trimmed_reads"}, + path: { "${params.outdir}/ReadQuality/fastp/trimmed_reads"}, mode: params.publish_dir_mode, pattern: "*${params.fastp.fastq_ext}" ], [ - path: { "${params.outdir}/read_quality/fastp/reports"}, + path: { "${params.outdir}/ReadQuality/fastp/reports"}, mode: params.publish_dir_mode, pattern: "*{${params.fastp.json_ext},${params.fastp.html_ext}}" ] @@ -566,7 +566,7 @@ withName: SEQTK_SIZE { stageInMode = params.stage_in_mode publishDir = [ [ - path: { "${params.outdir}/read_quality/chopper/trimmed_reads"}, + path: { "${params.outdir}/ReadQuality/chopper/trimmed_reads"}, mode: params.publish_dir_mode, pattern: "*${params.chopper.fastq_ext}" ] @@ -691,17 +691,17 @@ withName: SEQTK_SIZE { ext.containers = params.kat publishDir = [ [ - path: { "${params.outdir}/read_quality/kat/hist/histogram" }, + path: { "${params.outdir}/ReadQuality/kat/hist/histogram" }, mode: params.publish_dir_mode, pattern: "*${params.kat.hist_ext}" ], [ - path: { "${params.outdir}/read_quality/kat/hist/json" }, + path: { "${params.outdir}/ReadQuality/kat/hist/json" }, mode: params.publish_dir_mode, pattern: "*${params.kat.json_ext}" ], [ - path: { "${params.outdir}/read_quality/kat/hist/png" }, + path: { "${params.outdir}/ReadQuality/kat/hist/png" }, mode: params.publish_dir_mode, pattern: "*${params.kat.png_ext}" ] diff --git a/nextflow.config b/nextflow.config index aaeaa721..69dcbc10 100644 --- a/nextflow.config +++ b/nextflow.config @@ -175,7 +175,7 @@ params { seqtk_size { singularity = 'https://depot.galaxyproject.org/singularity/seqtk:1.3--h5bf99c6_3' docker = 'quay.io/biocontainers/seqtk:1.3--h5bf99c6_3' - report_tag = "SeqtkSize" + report_tag = "SeqtkBaseCount" } // FASTP options @@ -290,6 +290,11 @@ params { // sample_header = "Assembly" //} + //quast_genome_size{ + // genome_size_field = "Total length (>= 1000 bp)" + // report_tag = "QuastGenomeSize" + //} + checkm { // TODO add to trouble shooting if checkm fails and provides EOF errors, to try changing the container diff --git a/subworkflows/local/assemble_reads.nf b/subworkflows/local/assemble_reads.nf index 7439cfa7..83d15e9e 100644 --- a/subworkflows/local/assemble_reads.nf +++ b/subworkflows/local/assemble_reads.nf @@ -1,6 +1,7 @@ // Workflow for assembling reads // TODO add in read sub-sampling to reduce errors +include { SEQTK_SIZE } from "../../modules/local/seqtk_size.nf" include { SPADES_ASSEMBLE } from "../../modules/local/spades_assemble.nf" include { FLYE_ASSEMBLE } from "../../modules/local/flye_assemble.nf" include { MINIMAP2_INDEX } from "../../modules/local/minimap2_index.nf" @@ -16,6 +17,16 @@ workflow ASSEMBLE_READS{ main: versions = Channel.empty() final_contigs = Channel.empty() + reports = Channel.empty() + + // no report channel here? + base_counts = SEQTK_SIZE(sample_data) + reports = reports.mix(base_counts.map{ + meta, file_bc -> tuple(meta, extract_base_count(file_bc)); + }) + + versions = versions.mix(base_counts.versions) + def platform_comp = params.platform.replaceAll("\\s", "").toString() // strip whitespace from entries if(platform_comp == params.opt_platforms.illumina){ @@ -52,7 +63,6 @@ workflow ASSEMBLE_READS{ minimap2_idx = MINIMAP2_INDEX(ch_assembled.contigs) //TODO Move mapping to its own workflow versions = versions.mix(minimap2_idx.versions) - //TODO no idea if I am doing this right, get input in the future ch_mapping_data = sample_data.join(minimap2_idx.index) //Decided to leave Racon out of the polishing work flow but to wrap this in statement in a optional value in the future output_paf = Channel.value(true) @@ -70,4 +80,14 @@ workflow ASSEMBLE_READS{ graphs = ch_assembled.graphs final_contigs = final_contigs versions = versions + reports = reports +} + + +def extract_base_count(meta, data){ + def base_count_pos = 1; + def rows = data.splitCSV(header: false, sep: '\t') // get first line split as second val is the base count + def base_count = rows[base_count_pos]; + def base_long = base_count.toLong(); + return base_long } diff --git a/subworkflows/local/qc_assemblies.nf b/subworkflows/local/qc_assemblies.nf index 13160b45..b04c3829 100644 --- a/subworkflows/local/qc_assemblies.nf +++ b/subworkflows/local/qc_assemblies.nf @@ -1,7 +1,6 @@ // Run annotate and apply QC metrics include { QUAST } from "../../modules/local/quast_assembly.nf" include { CHECKM_LINEAGEWF } from "../../modules/local/checkm_lineagewf.nf" - include { MLST } from "../../modules/local/mlst.nf" workflow QC_ASSEMBLIES { diff --git a/workflows/CleanAssemble.nf b/workflows/CleanAssemble.nf index db709597..df3d6bc1 100644 --- a/workflows/CleanAssemble.nf +++ b/workflows/CleanAssemble.nf @@ -120,6 +120,7 @@ workflow CLEAN_ASSEMBLE_READS { // Isolate workflow ch_assembled_reads = ASSEMBLE_READS(ch_trimmed_reads) + ch_reports = ch_reports.mix(ch_assembled_reads.reports) ch_versions = ch_versions.mix(ch_assembled_reads.versions) if(!params.skip_polishing){