Skip to content

Commit

Permalink
Merge pull request #60 from AndreaGuarracino/gzip_output
Browse files Browse the repository at this point in the history
added flag to compress the output files
  • Loading branch information
subwaystation authored Mar 23, 2021
2 parents 4e59b8f + f0c12c0 commit e253556
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 70 deletions.
128 changes: 79 additions & 49 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -37,31 +37,31 @@ if (!params.file_name_prefix) {
} else if (params.file_name_prefix == "pggb") {
// fancy naming scheme
file_name_prefix_display = ".pggb"
alignment_prefix = """-\
${aligner}-\
s${params.alignment_segment_length}-\
l${params.alignment_block_length}-\
p${params.alignment_map_pct_id}-\
n${params.alignment_n_secondary}-\
${edyeet_align_pct_id_display}\
K${params.alignment_mash_kmer}\
${alignment_merge_cmd}\
${alignment_split_cmd}\
${alignment_exclude_cmd}\
""".stripIndent()
seqwish_prefix = """\
.seqwish-\
k${params.seqwish_min_match_length}-\
B${params.seqwish_transclose_batch}\
""".stripIndent()
smoothxg_prefix = """${seqwish_prefix}\
.smoothxg-\
w${params.smoothxg_max_block_weight}-\
j${params.smoothxg_max_path_jump}-\
e${params.smoothxg_max_edge_jump}-\
I${params.smoothxg_block_id_min}-\
p${smoothxg_poa_params_display}-M-J0.7-K-G150\
""".stripIndent()
alignment_prefix = """-\
${aligner}-\
s${params.alignment_segment_length}-\
l${params.alignment_block_length}-\
p${params.alignment_map_pct_id}-\
n${params.alignment_n_secondary}-\
${edyeet_align_pct_id_display}\
K${params.alignment_mash_kmer}\
${alignment_merge_cmd}\
${alignment_split_cmd}\
${alignment_exclude_cmd}\
"""
seqwish_prefix = """${alignment_prefix}\
.seqwish-\
k${params.seqwish_min_match_length}-\
B${params.seqwish_transclose_batch}\
"""
smoothxg_prefix = """${seqwish_prefix}\
.smoothxg-\
w${params.smoothxg_max_block_weight}-\
j${params.smoothxg_max_path_jump}-\
e${params.smoothxg_max_edge_jump}-\
I${params.smoothxg_block_id_min}-\
p${smoothxg_poa_params_display}-M-J0.7-K-G150\
"""
} else {
// take the given prefix
file_name_prefix_display= "${params.file_name_prefix}.pggb"
Expand All @@ -86,11 +86,13 @@ if (!params.file_name_prefix || params.file_name_prefix == "pggb") {
}

process edyeet {
publishDir "${params.outdir}/alignment", mode: "${params.publish_dir_mode}"

input:
tuple val(f), path(fasta)
tuple val(f), path(fasta)

output:
tuple val(f), path(fasta), path("${f}${alignment_prefix}.paf")
tuple val(f), path("${f}${alignment_prefix}.paf")

"""
edyeet ${alignment_exclude_cmd} \
Expand All @@ -104,16 +106,18 @@ process edyeet {
-k ${params.alignment_mash_kmer} \
-t ${task.cpus} \
$fasta $fasta \
>${f}${alignment_prefix}.paf
>${f}${alignment_prefix}.paf
"""
}

process wfmash {
publishDir "${params.outdir}/alignment", mode: "${params.publish_dir_mode}"

input:
tuple val(f), path(fasta)
tuple val(f), path(fasta)

output:
tuple val(f), path(fasta), path("${f}${alignment_prefix}.paf")
tuple val(f), path("${f}${alignment_prefix}.paf")

"""
wfmash ${alignment_exclude_cmd} \
Expand All @@ -126,15 +130,16 @@ process wfmash {
-k ${params.alignment_mash_kmer} \
-t ${task.cpus} \
$fasta $fasta \
>${f}${alignment_prefix}.paf
>${f}${alignment_prefix}.paf
"""
}

process seqwish {
publishDir "${params.outdir}/seqwish", mode: "${params.publish_dir_mode}"

input:
tuple val(f), path(fasta), path(alignment)
tuple val(f), path(fasta)
path(alignment)

output:
tuple val(f), path("${f}${seqwish_prefix}.gfa")
Expand Down Expand Up @@ -186,11 +191,13 @@ process smoothxg {
}

process odgiBuild {
publishDir "${params.outdir}/odgi_build", mode: "${params.publish_dir_mode}"

input:
path(graph)
path(graph)

output:
path("${graph}.og")
path("${graph}.og")

"""
odgi build -g $graph -o ${graph}.og -P -t ${task.cpus}
Expand All @@ -200,11 +207,11 @@ process odgiBuild {
process odgiStats {
publishDir "${params.outdir}/odgi_stats", mode: "${params.publish_dir_mode}"

input:
path(graph)
input:
path(graph)

output:
path("${graph}.stats")
path("${graph}.stats")

"""
odgi stats -i "${graph}" -S -s -d -l > "${graph}.stats" 2>&1
Expand All @@ -215,10 +222,10 @@ process odgiViz {
publishDir "${params.outdir}/odgi_viz", mode: "${params.publish_dir_mode}"

input:
path(graph)
path(graph)

output:
path("${graph}.viz_mqc.png")
path("${graph}.viz_mqc.png")

"""
odgi viz \
Expand Down Expand Up @@ -275,6 +282,20 @@ process odgiDraw {
"""
}

process pigzOutputFiles {
publishDir "${params.outdir}/compressed_outputs", mode: "${params.publish_dir_mode}"

input:
path(graph)

output:
path("${graph}.gz")

"""
pigz -q -p ${task.cpus} $graph -f -k
"""
}

// TODO ONCE OUR CUSTOM MULTIQC VERSION IS IN A MULTIQC RELEASE, WE CAN CHANGE THIS
process multiQC {
publishDir "${params.outdir}", mode: "${params.publish_dir_mode}"
Expand All @@ -298,15 +319,15 @@ workflow {
main:
if (params.wfmash == false) {
edyeet(fasta)
seqwish(edyeet.out)
seqwish(fasta, edyeet.out.collect{it[1]})
} else {
wfmash(fasta)
seqwish(wfmash.out)
seqwish(fasta, wfmash.out.collect{it[1]})
}
smoothxg(seqwish.out)
if (params.do_stats) {
odgiBuild(seqwish.out.collect{it[1]}.mix(smoothxg.out.gfa_smooth, smoothxg.out.consensus_smooth.flatten()))
odgiStats(odgiBuild.out)
if (params.do_stats) {
odgiBuild(seqwish.out.collect{it[1]}.mix(smoothxg.out.gfa_smooth, smoothxg.out.consensus_smooth.flatten()))
odgiStats(odgiBuild.out)
}
else {
odgiBuild(smoothxg.out.gfa_smooth)
Expand All @@ -322,6 +343,14 @@ workflow {
odgiDrawOut = odgiDraw(odgiLayout.out)
}

if (params.do_compression) {
if (params.wfmash == false) {
pigzOutputFiles(seqwish.out.collect{it[1]}.mix(smoothxg.out.gfa_smooth, smoothxg.out.consensus_smooth.flatten(), odgiBuild.out, smoothxg.out.maf_smooth, edyeet.out.collect{it[1]}))
} else {
pigzOutputFiles(seqwish.out.collect{it[1]}.mix(smoothxg.out.gfa_smooth, smoothxg.out.consensus_smooth.flatten(), odgiBuild.out, smoothxg.out.maf_smooth, wfmash.out.collect{it[1]}))
}
}

multiQC(
odgiStats.out.collect().ifEmpty([]),
odgiVizOut.collect().ifEmpty([]),
Expand Down Expand Up @@ -391,7 +420,7 @@ def helpMessage() {
--smoothxg_ratio_contain [n] minimum short length / long length ratio to compare sequences for the containment
metric in the clustering [default: 0]
--smoothxg_poa_params [str] score parameters for POA in the form of match,mismatch,gap1,ext1,gap2,ext2
[default: 1,4,6,2,26,1]
[default: 1,4,6,2,26,1]
Visualization options:
--do_viz Generate 1D visualisations of the built graphs [default: OFF]
Expand All @@ -405,6 +434,7 @@ def helpMessage() {
--max_multiqc_email_size [str] Threshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB)
-name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
--file_name_prefix [str] Prefix for the output file names. If 'pggb', the file names will be very verbose and contain all parameters for each process. [default: --input]
--do_compression Compress alignment (.paf), graph (.gfa, .og), and MSA (.maf) outputs [default: OFF]
AWSBatch options:
--awsqueue [str] The AWSBatch JobQueue that needs to be set when running on AWSBatch
Expand All @@ -415,7 +445,7 @@ def helpMessage() {

// Has the run name been specified by the user?
// this has the bonus effect of catching both -name and --name
// TODO INVOKE THIS AGAIN ONCE IT IS CLEAR HOW TO ADD A NAME TO THE RUN
// TODO INVOKE THIS AGAIN ONCE IT IS CLEAR HOW TO ADD A NAME TO THE RUN
// TODO ERROR: You used a core Nextflow option with two hyphens: '--name'. Please resubmit with '-name'
/*
custom_runName = params.name
Expand Down Expand Up @@ -664,11 +694,11 @@ process output_documentation {
publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode

input:
file output_docs from ch_output_docs
file images from ch_output_docs_images
file output_docs from ch_output_docs
file images from ch_output_docs_images

output:
file 'results_description.html'
file 'results_description.html'

script:
"""
Expand Down
2 changes: 2 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ params {
help = false
tracedir = "${params.outdir}/pipeline_info"

do_compression = false

// Config options
custom_config_version = 'master'
custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
Expand Down
38 changes: 17 additions & 21 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,17 @@
"help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.",
"pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
},
"do_compression": {
"type": "boolean",
"fa_icon": "fas fa-file-archive",
"description": "Compress output files."
},
"do_stats": {
"type": "boolean",
"default": true,
"hidden": true,
"fa_icon": "fas fa-file-csv"
"fa_icon": "fas fa-file-csv",
"description": "Perform statistics evaluation."
}
}
},
Expand Down Expand Up @@ -72,26 +78,22 @@
"alignment_n_secondary": {
"type": "integer",
"default": 10,
"description": "Number of secondary mappings to retain in 'map' filter mode.",
"fa_icon": "fab fa-draft2digital"
"description": "Number of secondary mappings to retain in 'map' filter mode."
},
"alignment_segment_length": {
"type": "integer",
"default": 10000,
"description": "Segment length for mapping.",
"fa_icon": "fab fa-draft2digital"
"description": "Segment length for mapping."
},
"alignment_block_length": {
"type": "integer",
"default": 30000,
"description": "Minimum block length filter for mapping.",
"fa_icon": "fab fa-draft2digital"
"description": "Minimum block length filter for mapping."
},
"alignment_mash_kmer": {
"type": "integer",
"default": 16,
"description": "Kmer size for mashmap.",
"fa_icon": "fab fa-draft2digital"
"description": "Kmer size for mashmap."
},
"alignment_merge_segments": {
"type": "boolean",
Expand Down Expand Up @@ -119,14 +121,12 @@
"seqwish_min_match_length": {
"type": "integer",
"default": 19,
"description": "Ignore exact matches below this length.",
"fa_icon": "fab fa-draft2digital"
"description": "Ignore exact matches below this length."
},
"seqwish_transclose_batch": {
"type": "integer",
"default": 1000000,
"description": "Number of bp to use for transitive closure batch.",
"fa_icon": "fab fa-draft2digital"
"description": "Number of bp to use for transitive closure batch."
}
},
"fa_icon": "fas fa-dna"
Expand All @@ -140,26 +140,22 @@
"smoothxg_max_block_weight": {
"type": "integer",
"default": 10000,
"description": "Maximum seed sequence in block.",
"fa_icon": "fab fa-draft2digital"
"description": "Maximum seed sequence in block."
},
"smoothxg_max_path_jump": {
"type": "integer",
"default": 5000,
"description": "Maximum path jump to include in block.",
"fa_icon": "fab fa-draft2digital"
"description": "Maximum path jump to include in block."
},
"smoothxg_max_edge_jump": {
"type": "integer",
"default": 5000,
"description": "Maximum edge jump before breaking.",
"fa_icon": "fab fa-draft2digital"
"description": "Maximum edge jump before breaking."
},
"smoothxg_max_poa_length": {
"type": "integer",
"default": 10000,
"description": "Maximum sequence length to put into POA.",
"fa_icon": "fab fa-draft2digital"
"description": "Maximum sequence length to put into POA."
},
"smoothxg_consensus_spec": {
"type": "string",
Expand Down

0 comments on commit e253556

Please sign in to comment.