diff --git a/README.md b/README.md index 435c56a..c846c47 100644 --- a/README.md +++ b/README.md @@ -62,10 +62,9 @@ sample-01 ├── sample-01_20211125165316_provenance.yml ├── sample-01_fastp.csv ├── sample-01_fastp.json -├── sample-01_prokka.gbk -├── sample-01_prokka.gff -├── sample-01_quast.json -├── sample-01_quast.tsv +├── sample-01_shovill_prokka.gbk +├── sample-01_shovill_prokka.gff +├── sample-01_shovill_quast.csv ├── sample-01_shovill.fa └── sample-01_shovill.log ``` @@ -76,16 +75,16 @@ Including the tool name suffixes to output files allows re-analysis of the same sample-01 ├── sample-01_20211125165316_provenance.yml ├── sample-01_20211128122118_provenance.yml -├── sample-01_bakta.gbk -├── sample-01_bakta.gff -├── sample-01_bakta.json -├── sample-01_bakta.log +├── sample-01_unicycler_bakta.gbk +├── sample-01_unicycler_bakta.gff +├── sample-01_unicycler_bakta.json +├── sample-01_unicycler_bakta.log ├── sample-01_fastp.csv ├── sample-01_fastp.json -├── sample-01_prokka.gbk -├── sample-01_prokka.gff -├── sample-01_quast.json -├── sample-01_quast.tsv +├── sample-01_shovill_prokka.gbk +├── sample-01_shovill_prokka.gff +├── sample-01_shovill_quast.csv +├── sample-01_unicycler_quast.csv ├── sample-01_shovill.fa ├── sample-01_shovill.log ├── sample-01_unicycler.fa @@ -104,7 +103,7 @@ For each pipeline invocation, each sample will produce a `provenance.yml` file w - tool_name: prokka tool_version: 1.14.5 - tool_name: quast - tool_version: v5.0.2 + tool_version: 5.0.2 - input_filename: sample-01_R1.fastq.gz sha256: 4ac3055ac5f03114a005aff033e7018ea98486cbebdae669880e3f0511ed21bb - input_filename: sample-01_R2.fastq.gz diff --git a/bin/parse_quast_report.py b/bin/parse_quast_report.py index c16fc01..d60c2b0 100755 --- a/bin/parse_quast_report.py +++ b/bin/parse_quast_report.py @@ -4,6 +4,7 @@ import collections import csv import json +import sys def parse_transposed_quast_report(transposed_quast_report_path): @@ -92,8 +93,35 @@ def main(): parser.add_argument('transposed_quast_report') args = parser.parse_args() + output_fieldnames = [ + 'assembly_id', + 'total_length', + 'num_contigs', + 'largest_contig', + 'assembly_N50', + 'assembly_N75', + 'assembly_L50', + 'assembly_L75', + 'num_contigs_gt_0_bp', + 'num_contigs_gt_1000_bp', + 'num_contigs_gt_5000_bp', + 'num_contigs_gt_10000_bp', + 'num_contigs_gt_25000_bp', + 'num_contigs_gt_50000_bp', + 'total_length_gt_0_bp', + 'total_length_gt_1000_bp', + 'total_length_gt_5000_bp', + 'total_length_gt_10000_bp', + 'total_length_gt_25000_bp', + 'total_length_gt_50000_bp', + 'num_N_per_100_kb', + ] + report = parse_transposed_quast_report(args.transposed_quast_report) - print(json.dumps(report, indent=2)) + writer = csv.DictWriter(sys.stdout, fieldnames=output_fieldnames) + writer.writeheader() + for record in report: + writer.writerow(record) if __name__ == '__main__': diff --git a/modules/quast.nf b/modules/quast.nf index a4d4081..7c874e5 100644 --- a/modules/quast.nf +++ b/modules/quast.nf @@ -2,8 +2,6 @@ process quast { tag { sample_id } - publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_${assembler}_quast.tsv", mode: 'copy' - input: tuple val(sample_id), path(assembly), val(assembler) @@ -13,7 +11,7 @@ process quast { script: """ - printf -- "- tool_name: quast\\n tool_version: \$(quast --version | cut -d ' ' -f 2)\\n" > ${sample_id}_${assembler}_quast_provenance.yml + printf -- "- tool_name: quast\\n tool_version: \$(quast --version | cut -d ' ' -f 2 | tr -d 'v')\\n" > ${sample_id}_${assembler}_quast_provenance.yml quast --threads ${task.cpus} ${assembly} --space-efficient --fast --output-dir ${sample_id} mv ${sample_id}/transposed_report.tsv ${sample_id}_${assembler}_quast.tsv """ @@ -25,16 +23,16 @@ process parse_quast_report { executor 'local' - publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_${assembler}_quast.json", mode: 'copy' + publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_${assembler}_quast.csv", mode: 'copy' input: tuple val(sample_id), path(quast_report), val(assembler) output: - tuple val(sample_id), path("${sample_id}_${assembler}_quast.json") + tuple val(sample_id), path("${sample_id}_${assembler}_quast.csv") script: """ - parse_quast_report.py ${quast_report} > ${sample_id}_${assembler}_quast.json + parse_quast_report.py ${quast_report} > ${sample_id}_${assembler}_quast.csv """ } diff --git a/modules/unicycler.nf b/modules/unicycler.nf index 73ff173..adfff2f 100644 --- a/modules/unicycler.nf +++ b/modules/unicycler.nf @@ -15,7 +15,7 @@ process unicycler { script: """ - printf -- "- tool_name: unicycler\\n tool_version: \$(unicycler --version | cut -d ' ' -f 2)\\n" > ${sample_id}_unicycler_provenance.yml + printf -- "- tool_name: unicycler\\n tool_version: \$(unicycler --version | cut -d ' ' -f 2 | tr -d 'v')\\n" > ${sample_id}_unicycler_provenance.yml unicycler --threads ${task.cpus} -1 ${reads_1} -2 ${reads_2} -o ${sample_id}_assembly sed 's/^>/>${sample_id}_/' ${sample_id}_assembly/assembly.fasta > ${sample_id}_unicycler.fa cp ${sample_id}_assembly/assembly.gfa ${sample_id}_unicycler.gfa