Skip to content

Commit

Permalink
Parse quast report to csv (#16)
Browse files Browse the repository at this point in the history
* Parse quast report to csv

* Update README, fix version parsing
  • Loading branch information
dfornika authored Dec 2, 2021
1 parent 7b2c6f6 commit f8a6416
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 21 deletions.
25 changes: 12 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,9 @@ sample-01
├── sample-01_20211125165316_provenance.yml
├── sample-01_fastp.csv
├── sample-01_fastp.json
├── sample-01_prokka.gbk
├── sample-01_prokka.gff
├── sample-01_quast.json
├── sample-01_quast.tsv
├── sample-01_shovill_prokka.gbk
├── sample-01_shovill_prokka.gff
├── sample-01_shovill_quast.csv
├── sample-01_shovill.fa
└── sample-01_shovill.log
```
Expand All @@ -76,16 +75,16 @@ Including the tool name suffixes to output files allows re-analysis of the same
sample-01
├── sample-01_20211125165316_provenance.yml
├── sample-01_20211128122118_provenance.yml
├── sample-01_bakta.gbk
├── sample-01_bakta.gff
├── sample-01_bakta.json
├── sample-01_bakta.log
├── sample-01_unicycler_bakta.gbk
├── sample-01_unicycler_bakta.gff
├── sample-01_unicycler_bakta.json
├── sample-01_unicycler_bakta.log
├── sample-01_fastp.csv
├── sample-01_fastp.json
├── sample-01_prokka.gbk
├── sample-01_prokka.gff
├── sample-01_quast.json
├── sample-01_quast.tsv
├── sample-01_shovill_prokka.gbk
├── sample-01_shovill_prokka.gff
├── sample-01_shovill_quast.csv
├── sample-01_unicycler_quast.csv
├── sample-01_shovill.fa
├── sample-01_shovill.log
├── sample-01_unicycler.fa
Expand All @@ -104,7 +103,7 @@ For each pipeline invocation, each sample will produce a `provenance.yml` file w
- tool_name: prokka
tool_version: 1.14.5
- tool_name: quast
tool_version: v5.0.2
tool_version: 5.0.2
- input_filename: sample-01_R1.fastq.gz
sha256: 4ac3055ac5f03114a005aff033e7018ea98486cbebdae669880e3f0511ed21bb
- input_filename: sample-01_R2.fastq.gz
Expand Down
30 changes: 29 additions & 1 deletion bin/parse_quast_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import collections
import csv
import json
import sys


def parse_transposed_quast_report(transposed_quast_report_path):
Expand Down Expand Up @@ -92,8 +93,35 @@ def main():
parser.add_argument('transposed_quast_report')
args = parser.parse_args()

output_fieldnames = [
'assembly_id',
'total_length',
'num_contigs',
'largest_contig',
'assembly_N50',
'assembly_N75',
'assembly_L50',
'assembly_L75',
'num_contigs_gt_0_bp',
'num_contigs_gt_1000_bp',
'num_contigs_gt_5000_bp',
'num_contigs_gt_10000_bp',
'num_contigs_gt_25000_bp',
'num_contigs_gt_50000_bp',
'total_length_gt_0_bp',
'total_length_gt_1000_bp',
'total_length_gt_5000_bp',
'total_length_gt_10000_bp',
'total_length_gt_25000_bp',
'total_length_gt_50000_bp',
'num_N_per_100_kb',
]

report = parse_transposed_quast_report(args.transposed_quast_report)
print(json.dumps(report, indent=2))
writer = csv.DictWriter(sys.stdout, fieldnames=output_fieldnames)
writer.writeheader()
for record in report:
writer.writerow(record)


if __name__ == '__main__':
Expand Down
10 changes: 4 additions & 6 deletions modules/quast.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ process quast {

tag { sample_id }

publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_${assembler}_quast.tsv", mode: 'copy'

input:
tuple val(sample_id), path(assembly), val(assembler)

Expand All @@ -13,7 +11,7 @@ process quast {

script:
"""
printf -- "- tool_name: quast\\n tool_version: \$(quast --version | cut -d ' ' -f 2)\\n" > ${sample_id}_${assembler}_quast_provenance.yml
printf -- "- tool_name: quast\\n tool_version: \$(quast --version | cut -d ' ' -f 2 | tr -d 'v')\\n" > ${sample_id}_${assembler}_quast_provenance.yml
quast --threads ${task.cpus} ${assembly} --space-efficient --fast --output-dir ${sample_id}
mv ${sample_id}/transposed_report.tsv ${sample_id}_${assembler}_quast.tsv
"""
Expand All @@ -25,16 +23,16 @@ process parse_quast_report {

executor 'local'

publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_${assembler}_quast.json", mode: 'copy'
publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_${assembler}_quast.csv", mode: 'copy'

input:
tuple val(sample_id), path(quast_report), val(assembler)

output:
tuple val(sample_id), path("${sample_id}_${assembler}_quast.json")
tuple val(sample_id), path("${sample_id}_${assembler}_quast.csv")

script:
"""
parse_quast_report.py ${quast_report} > ${sample_id}_${assembler}_quast.json
parse_quast_report.py ${quast_report} > ${sample_id}_${assembler}_quast.csv
"""
}
2 changes: 1 addition & 1 deletion modules/unicycler.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ process unicycler {

script:
"""
printf -- "- tool_name: unicycler\\n tool_version: \$(unicycler --version | cut -d ' ' -f 2)\\n" > ${sample_id}_unicycler_provenance.yml
printf -- "- tool_name: unicycler\\n tool_version: \$(unicycler --version | cut -d ' ' -f 2 | tr -d 'v')\\n" > ${sample_id}_unicycler_provenance.yml
unicycler --threads ${task.cpus} -1 ${reads_1} -2 ${reads_2} -o ${sample_id}_assembly
sed 's/^>/>${sample_id}_/' ${sample_id}_assembly/assembly.fasta > ${sample_id}_unicycler.fa
cp ${sample_id}_assembly/assembly.gfa ${sample_id}_unicycler.gfa
Expand Down

0 comments on commit f8a6416

Please sign in to comment.