From b017662e6a37ae52c51b120f3193fa017f64bb45 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Tue, 30 Nov 2021 15:57:12 +0100 Subject: [PATCH 01/41] parallelize vg_deconstruct --- main.nf | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/main.nf b/main.nf index c4fc67ba..2cbb8960 100644 --- a/main.nf +++ b/main.nf @@ -348,19 +348,16 @@ process vg_deconstruct { publishDir "${params.outdir}/vg_deconstruct", mode: "${params.publish_dir_mode}" input: - path(graph) + tuple path(graph), val(vcf_spec) output: path("${graph}.*.vcf") """ - for s in \$(echo "${params.vcf_spec}" | tr ',' ' '); - do - ref=\$(echo "\$s" | cut -f 1 -d:) - delim=\$(echo "\$s" | cut -f 2 -d:) - vcf="${graph}".\$(echo \$ref | tr '/|' '_').vcf - vg deconstruct -P \$ref -H \$delim -e -a -t "${task.cpus}" "${graph}" > \$vcf - done + ref=\$(echo "$vcf_spec" | cut -f 1 -d:) + delim=\$(echo "$vcf_spec" | cut -f 2 -d:) + vcf="${graph}".\$(echo \$ref | tr '/|' '_').vcf + vg deconstruct -P \$ref -H \$delim -e -a -t "${task.cpus}" "${graph}" > \$vcf """ } @@ -422,8 +419,12 @@ workflow { odgiDrawOut = odgiDraw(odgiLayout.out) } + ch_vg_deconstruct = Channel.empty() + ch_vcf_spec = Channel.empty() if (params.vcf_spec != false) { - vg_deconstruct(gfaffix.out.gfa_norm) + ch_vcf_spec = Channel.from(params.vcf_spec).splitCsv().flatten() + ch_vg_deconstruct = vg_deconstruct(gfaffix.out.gfa_norm.combine(ch_vcf_spec)) + // TODO add bcftools } multiQC( From 293694e4610398c05805fea8b10a24b24b687769 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Wed, 1 Dec 2021 09:57:57 +0100 Subject: [PATCH 02/41] change wfmash_poa_params default to asm5 --- main.nf | 2 +- nextflow.config | 2 +- nextflow_schema.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/main.nf b/main.nf index 2cbb8960..8d1373d8 100644 --- a/main.nf +++ b/main.nf @@ -503,7 +503,7 @@ def helpMessage() { --smoothxg_pad_max_depth [n] path depth at which we don't pad the POA problem [default: 100] --smoothxg_poa_padding [n] pad each end of each sequence in POA with N*(longest_poas_seq) bp [default: 0.03] --smoothxg_poa_params [str] score parameters for POA in the form of match,mismatch,gap1,ext1,gap2,ext2 - [default: 1,9,16,2,41,1] + [default: 1,19,39,3,81,1] --smoothxg_write_maf [n] write MAF output representing merged POA blocks [default: OFF] Visualization options: diff --git a/nextflow.config b/nextflow.config index 5ad6b56f..d1ec66e0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -51,7 +51,7 @@ params { // - asm5, --poa-params 1,19,39,3,81,1, ~0.1 divergence // - asm10, --poa-params 1,9,16,2,41,1, ~1 divergence // - asm20, --poa-params 1,4,6,2,26,1, ~5% divergence - smoothxg_poa_params = "1,9,16,2,41,1" + smoothxg_poa_params = "1,19,39,3,81,1" smoothxg_write_maf = false smoothxg_consensus_spec = false smoothxg_consensus_prefix = "Consensus_" diff --git a/nextflow_schema.json b/nextflow_schema.json index 84f105bc..f1380e24 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -176,7 +176,7 @@ }, "smoothxg_poa_params": { "type": "string", - "default": "1,9,16,2,41,1", + "default": "1,19,39,3,81,1", "description": "Score parameters for POA in the form of match,mismatch,gap1,ext1,gap2,ext2.", "fa_icon": "fab fa-superpowers" }, From 676b05103c4d3fc6cb448b1d74eb3520389aa495 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Wed, 1 Dec 2021 10:01:53 +0100 Subject: [PATCH 03/41] fix defaults of seqwish process --- nextflow_schema.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index f1380e24..ae277ffd 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -104,13 +104,13 @@ "properties": { "seqwish_min_match_length": { "type": "integer", - "default": 19, + "default": 47, "description": "Ignore exact matches below this length.", "fa_icon": "fas fa-project-diagram" }, "seqwish_transclose_batch": { "type": "integer", - "default": 1000000, + "default": 10000000, "description": "Number of bp to use for transitive closure batch.", "fa_icon": "fas fa-project-diagram" } @@ -191,7 +191,7 @@ "title": "VCF options", "type": "object", "description": "Optios for vg deconstruct.", - "default": "", + "default": "false", "properties": { "vcf_spec": { "type": "string", From 3222483761c5570a3355af70b097fa1c485f78f9 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Tue, 6 Sep 2022 15:44:00 +0200 Subject: [PATCH 04/41] cosmetic changes --- README.md | 29 +++++++++++++++++++++-------- main.nf | 23 +++++++++++++---------- nextflow.config | 5 +++-- nextflow_schema.json | 9 ++++++--- 4 files changed, 43 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 52cc788f..f0842de5 100644 --- a/README.md +++ b/README.md @@ -10,11 +10,11 @@ ## Introduction -**Warning:** This pipeline is currently UNDER CONSTRUCTION. Some features may not work or not work as intended! +> **Warning:** This pipeline is currently UNDER CONSTRUCTION. Some features may not work or not work as intended! **nf-core/pangenome** is a bioinformatics best-practise analysis pipeline for the rendering of a collection of sequences into a pangenome graph. -Its goal is to build a graph that is locally directed and acyclic while preserving large-scale variation. Maintaining local linearity is important for interpretation, visualization, mapping, comparative genomics, and reuse of pangenome graphs**. +Its goal is to build a graph that is locally directed and acyclic while preserving large-scale variation. Maintaining local linearity is important for interpretation, visualization, mapping, comparative genomics, and reuse of pangenome graphs. The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. @@ -68,12 +68,12 @@ Many thanks to all who have helped out and contributed along the way, including | Name | Affiliation | |----------------------------------------------------------|---------------------------------------------------------------------------------------| -| [Philipp Ehmele](https://github.com/imipenem) | [University of Hamburg, Hamburg, Germany](https://www.uni-hamburg.de/en.html) | +| [Philipp Ehmele](https://github.com/imipenem) | [Institute of Computational Biology, Helmholtz Zentrum München, Munich, Germany](https://www.helmholtz-muenchen.de/icb/index.html) | | [Erik Garrison](https://github.com/ekg) | [The University of Tennessee Health Science Center, Memphis, Tennessee, TN, USA](https://uthsc.edu/)| -| [Andrea Guarracino](https://github.com/AndreaGuarracino) | [University of Rome Tor Vergata, Rome, Italy](http://www.scienze.uniroma2.it/) | +| [Andrea Guarracino](https://github.com/AndreaGuarracino) | [Genomics Research Centre, Human Technopole, Milan, Italy](https://humantechnopole.it/en/) | | [Michael Heuer](https://github.com/heuermh) | [UC Berkeley, USA](https://rise.cs.berkeley.edu) | -| [Lukas Heumos](https://github.com/zethson) | [Institute of Computational Biology, Helmholtz Zentrum München, Munich, Germany](https://www.helmholtz-muenchen.de/icb/index.html) \\ [Institute of Lung Biology and Disease and Comprehensive Pneumology Center, Helmholtz Zentrum München, Munich, Germany](https://www.helmholtz-muenchen.de/ilbd/the-institute/cpc/index.html) | -| [Simon Heumos](https://github.com/subwaystation) | [Quantitative Biology Center (QBiC) Tübingen, University of Tübingen, Germany](https://uni-tuebingen.de/en/research/research-infrastructure/quantitative-biology-center-qbic/) | +| [Lukas Heumos](https://github.com/zethson) | [Institute of Computational Biology, Helmholtz Zentrum München, Munich, Germany](https://www.helmholtz-muenchen.de/icb/index.html)
[Institute of Lung Biology and Disease and Comprehensive Pneumology Center, Helmholtz Zentrum München, Munich, Germany](https://www.helmholtz-muenchen.de/ilbd/the-institute/cpc/index.html) | +| [Simon Heumos](https://github.com/subwaystation) | [Quantitative Biology Center (QBiC) Tübingen, University of Tübingen, Germany](https://uni-tuebingen.de/en/research/research-infrastructure/quantitative-biology-center-qbic/)
[Biomedical Data Science, Department of Computer Science, University of Tübingen, Germany](https://uni-tuebingen.de/en/faculties/faculty-of-science/departments/computer-science/department/) | > \* Listed in alphabetical order @@ -100,8 +100,21 @@ In addition, references of tools and data used in this pipeline are as follows: > **ODGI: understanding pangenome graphs.** > -> Andrea Guarracino, Simon Heumos, Sven Nahnsen, Pjotr Prins & Erik Garrison. +> Andrea Guarracino*, Simon Heumos*, Sven Nahnsen, Pjotr Prins & Erik Garrison. > -> _bioRxiv_ 2021 Nov 11 doi: [10.1101/2021.11.10.467921](https://doi.org/10.1101/2021.11.10.467921). +> _Bioinformatics_ 2022 Jul 01 doi: [10.1093/bioinformatics/btac308](https://doi.org/10.1093/bioinformatics/btac308). +> +> **contributed equally* + +> **Unbiased pangenome graphs** +> +> Erik Garrison, Andrea Guarracino. +> +> _bioRxiv_ 2022 Feb 02 doi: [10.1101/2022.02.14.480413](https://doi.org/10.1101/2022.02.14.480413). + +## Attention + +### MultiQC Report +In the resulting MultiQC report, in the **Detailed ODGI stats table**, it says `smoothxg`. To be clear, these are the stats of the graph after polishing with `gfaffix`! Some tools were hardcoded in the ODGI MultiQC module, but hopefully this will be fixed in the future. \ No newline at end of file diff --git a/main.nf b/main.nf index 8d1373d8..a4c8427b 100644 --- a/main.nf +++ b/main.nf @@ -29,12 +29,15 @@ def wfmash_prefix = "wfmash" def seqwish_prefix = ".seqwish" def smoothxg_prefix = ".smoothxg" def n_haps = 0 -def do_1d = false -def do_2d = false +def do_1d = true +def do_2d = true -if (params.viz) { - do_1d = true - do_2d = true +if (params.no_viz) { + do_1d = false +} + +if (params.no_layout) { + do_2d = false } def make_file_prefix = { f -> """\ @@ -129,7 +132,7 @@ process wfmash { -k ${params.wfmash_mash_kmer} \ -t ${task.cpus} \ $fasta $fasta \ - >${f}${wfmash_prefix}.paf + >${f}.${wfmash_prefix}.paf """ } @@ -406,16 +409,16 @@ workflow { smoothxg(seqwish.out) gfaffix(smoothxg.out.gfa_smooth) - odgiBuild(seqwish.out.collect{it[1]}.mix(smoothxg.out.consensus_smooth.flatten(), gfaffix.out.gfa_norm)) - odgiStats(odgiBuild.out) + odgiBuild(seqwish.out.collect{it[1]}.mix(smoothxg.out.consensus_smooth.flatten())) + odgiStats(odgiBuild.out.mix(gfaffix.out.og_norm)) odgiVizOut = Channel.empty() if (do_1d) { - odgiVizOut = odgiViz(odgiBuild.out.filter( ~/.*smoothxg.*/ )) + odgiVizOut = odgiViz(gfaffix.out.og_norm) } odgiDrawOut = Channel.empty() if (do_2d) { - odgiLayout(odgiBuild.out.filter( ~/.*smoothxg.*/ )) + odgiLayout(gfaffix.out.og_norm) odgiDrawOut = odgiDraw(odgiLayout.out) } diff --git a/nextflow.config b/nextflow.config index d1ec66e0..2b3e2356 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,7 +20,8 @@ params { // nf-core: Specify your pipeline's command line flags // Visualization - viz = false + no_viz = false + no_layout = false // Alignment options wfmash_map_pct_id = 90 @@ -224,4 +225,4 @@ def check_max(obj, type) { return obj } } -} +} \ No newline at end of file diff --git a/nextflow_schema.json b/nextflow_schema.json index ae277ffd..985f60f2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -206,10 +206,13 @@ "default": "", "fa_icon": "fas fa-project-diagram", "properties": { - "viz": { + "no_viz": { "type": "boolean", - "description": "Generate 1D and 2D visualisations of the built graphs", - "fa_icon": "fas fa-project-diagram" + "description": "Set if you don't want the 1D visualizations." + }, + "no_layout": { + "type": "boolean", + "description": "Set if you don't want the computational expensive 2D layout." } } }, From f011d7cb60e063b6efd55198b2d5c0fd593a7ee3 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Tue, 6 Sep 2022 15:58:35 +0200 Subject: [PATCH 05/41] update tests --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6f342a0f..a85bf234 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -56,7 +56,7 @@ jobs: # We also test basic visualization and reporting options here run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --viz + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --no_viz --no_layout nextflow run ${GITHUB_WORKSPACE} -profile test,docker --smoothxg consensus_spec 10,100,1000 nextflow run ${GITHUB_WORKSPACE} -profile test,docker --vcf_spec "gi|568815561:#,gi|568815567:#" nextflow run ${GITHUB_WORKSPACE} -profile test,docker --smoothxg_write_maf From eca88899a653e121fac3967565007acaae4bc259 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Tue, 6 Sep 2022 15:58:55 +0200 Subject: [PATCH 06/41] fix CI, allow optional PAF input --- main.nf | 21 +++++++++++++-------- nextflow.config | 3 +++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/main.nf b/main.nf index a4c8427b..d6e120c2 100644 --- a/main.nf +++ b/main.nf @@ -119,7 +119,7 @@ process wfmash { tuple val(f), path(fasta) output: - tuple val(f), path("${f}${wfmash_prefix}.paf") + tuple val(f), path("${f}.${wfmash_prefix}.paf") """ wfmash ${wfmash_exclude_cmd} \ @@ -397,14 +397,19 @@ workflow { wfmashAlign(fasta.combine(splitApproxMappingsInChunks.out.flatten())) } } else { - if (params.wfmash_chunks == 1) { - wfmash(fasta) - seqwish(fasta, wfmash.out.collect{it[1]}) + if (params.paf != false) { + paf_ch = Channel.fromPath(params.paf) + seqwish(fasta, paf_ch) } else { - wfmashMap(fasta) - splitApproxMappingsInChunks(wfmashMap.out) - wfmashAlign(fasta.combine(splitApproxMappingsInChunks.out.flatten())) - seqwish(fasta, wfmashAlign.out.collect()) + if (params.wfmash_chunks == 1) { + wfmash(fasta) + seqwish(fasta, wfmash.out.collect{it[1]}) + } else { + wfmashMap(fasta) + splitApproxMappingsInChunks(wfmashMap.out) + wfmashAlign(fasta.combine(splitApproxMappingsInChunks.out.flatten())) + seqwish(fasta, wfmashAlign.out.collect()) + } } smoothxg(seqwish.out) gfaffix(smoothxg.out.gfa_smooth) diff --git a/nextflow.config b/nextflow.config index 2b3e2356..642701b8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,6 +13,9 @@ params { // Input options input = null + // Optional PAF input + paf = false + // Output options outdir = "./results" publish_dir_mode = "copy" From 958dc7e1647211e7ce153f4b14cabd6caf662899 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Tue, 6 Sep 2022 16:07:59 +0200 Subject: [PATCH 07/41] node v14 for linting --- .github/workflows/linting.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index fcde400c..8ef67cd1 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -14,7 +14,7 @@ jobs: - uses: actions/checkout@v2 - uses: actions/setup-node@v1 with: - node-version: '10' + node-version: '14' - name: Install markdownlint run: npm install -g markdownlint-cli - name: Run Markdownlint From 7458f6bc6ecd729ea2226e87b92399c57bebb06f Mon Sep 17 00:00:00 2001 From: subwaystation Date: Tue, 6 Sep 2022 16:09:59 +0200 Subject: [PATCH 08/41] address linting issue --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f0842de5..6b370dda 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ In addition, references of tools and data used in this pipeline are as follows: > > _Bioinformatics_ 2022 Jul 01 doi: [10.1093/bioinformatics/btac308](https://doi.org/10.1093/bioinformatics/btac308). > -> **contributed equally* +> *_contributed equally_ > **Unbiased pangenome graphs** > @@ -117,4 +117,5 @@ In addition, references of tools and data used in this pipeline are as follows: ## Attention ### MultiQC Report -In the resulting MultiQC report, in the **Detailed ODGI stats table**, it says `smoothxg`. To be clear, these are the stats of the graph after polishing with `gfaffix`! Some tools were hardcoded in the ODGI MultiQC module, but hopefully this will be fixed in the future. \ No newline at end of file + +In the resulting MultiQC report, in the **Detailed ODGI stats table**, it says `smoothxg`. To be clear, these are the stats of the graph after polishing with `gfaffix`! Some tools were hardcoded in the ODGI MultiQC module, but hopefully this will be fixed in the future. From 654a55486646918162d3aa98ffb5ce12a385da37 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Tue, 6 Sep 2022 16:11:17 +0200 Subject: [PATCH 09/41] update YAML linting --- .github/workflows/linting.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8ef67cd1..186fdada 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -53,7 +53,7 @@ jobs: - uses: actions/checkout@v1 - uses: actions/setup-node@v1 with: - node-version: '10' + node-version: '14' - name: Install yaml-lint run: npm install -g yaml-lint - name: Run yaml-lint @@ -101,7 +101,7 @@ jobs: - uses: actions/setup-python@v1 with: - python-version: '3.6' + python-version: '3.9' architecture: 'x64' - name: Install dependencies From 014d8a1db8b2799352a66cdd6596a73a851ad314 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Tue, 6 Sep 2022 16:27:22 +0200 Subject: [PATCH 10/41] fix nf-core lint --- .github/workflows/linting.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 186fdada..ad77dd56 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -114,7 +114,7 @@ jobs: GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core -l lint_log.txt lint ${GITHUB_WORKSPACE} --markdown lint_results.md + run: nf-core -l lint_log.txt lint -d ${GITHUB_WORKSPACE} --markdown lint_results.md - name: Save PR number if: ${{ always() }} From b007463a980202135782042d87895c5f215df218 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Tue, 6 Sep 2022 16:42:08 +0200 Subject: [PATCH 11/41] add missing PAF parameter [skip ci] --- nextflow_schema.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 985f60f2..5d314dca 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -32,6 +32,11 @@ "fa_icon": "fas fa-envelope", "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + }, + "paf": { + "type": "string", + "fa_icon": "fas fa-align-center", + "description": "Optional input to skip the all vs. all alignment phase directly starting with seqwish." } } }, From 8d0e15e5eda5c82ff25549a399cc5057d11059f4 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Wed, 7 Sep 2022 10:59:30 +0200 Subject: [PATCH 12/41] define required parameters --- main.nf | 27 ++++++++++++++++++++++++--- nextflow.config | 3 ++- nextflow_schema.json | 5 ++++- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/main.nf b/main.nf index d6e120c2..2ffd1d5d 100644 --- a/main.nf +++ b/main.nf @@ -17,6 +17,26 @@ if (params.help){ exit 0 } +if (params.input == null) { + log.info""" + + Mandatory argument --input missing! For more details run with --help. + + """.stripIndent() + + exit 1 +} + +if (params.wfmash_n_mappings == null) { + log.info""" + + Mandatory argument --wfmash_n_mappings missing! For more details run with --help. + + """.stripIndent() + + exit 1 +} + ch_multiqc_config = file("$projectDir/assets/multiqc_config.yaml", checkIfExists: true) // ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() @@ -24,6 +44,7 @@ ch_multiqc_config = file("$projectDir/assets/multiqc_config.yaml", checkIfExists def wfmash_merge_cmd = params.wfmash_merge_segments ? "-M" : "" def wfmash_exclude_cmd = params.wfmash_exclude_delim ? "-Y${params.wfmash_exclude_delim}" : "-X" def wfmash_split_cmd = params.wfmash_no_splits ? "-N" : "" +def wfmash_n_mappings_minus_1 = params.wfmash_n_mappings - 1 def smoothxg_poa_params_display = params.smoothxg_poa_params.replaceAll(/,/, "_") def wfmash_prefix = "wfmash" def seqwish_prefix = ".seqwish" @@ -66,7 +87,7 @@ process wfmashMap { ${wfmash_merge_cmd} \ ${wfmash_split_cmd} \ -p ${params.wfmash_map_pct_id} \ - -n ${params.wfmash_n_mappings} \ + -n ${wfmash_n_mappings_minus_1} \ -k ${params.wfmash_mash_kmer} \ -t ${task.cpus} \ -m \ @@ -103,7 +124,7 @@ process wfmashAlign { ${wfmash_merge_cmd} \ ${wfmash_split_cmd} \ -p ${params.wfmash_map_pct_id} \ - -n ${params.wfmash_n_mappings} \ + -n ${wfmash_n_mappings_minus_1} \ -k ${params.wfmash_mash_kmer} \ -t ${task.cpus} \ -i $paf \ @@ -128,7 +149,7 @@ process wfmash { ${wfmash_merge_cmd} \ ${wfmash_split_cmd} \ -p ${params.wfmash_map_pct_id} \ - -n ${params.wfmash_n_mappings} \ + -n ${wfmash_n_mappings_minus_1} \ -k ${params.wfmash_mash_kmer} \ -t ${task.cpus} \ $fasta $fasta \ diff --git a/nextflow.config b/nextflow.config index 642701b8..842f986b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,6 +11,7 @@ params { // Workflow flags // Input options + // FASTA input = null // Optional PAF input @@ -28,7 +29,7 @@ params { // Alignment options wfmash_map_pct_id = 90 - wfmash_n_mappings = 10 // default could be the number of input sequences + wfmash_n_mappings = null // default could be the number of input sequences, but then I would have to add another process wfmash_segment_length = 3000 wfmash_block_length = 3 * wfmash_segment_length wfmash_mash_kmer = 16 diff --git a/nextflow_schema.json b/nextflow_schema.json index 5d314dca..024cec7c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -99,7 +99,10 @@ "type": "boolean", "description": "If this parameter is set, only the wfmash alignment step of the pipeline is executed. This option is offered for users who want to use wfmash on a cluster." } - } + }, + "required": [ + "wfmash_n_mappings" + ] }, "seqwish_options": { "title": "Seqwish options", From 4df1464cb086bc4b549f0b08d8cfa9e30bc749bc Mon Sep 17 00:00:00 2001 From: subwaystation Date: Wed, 7 Sep 2022 11:03:31 +0200 Subject: [PATCH 13/41] we actually want to call it n_mappings --- main.nf | 4 ++-- nextflow.config | 3 ++- nextflow_schema.json | 19 ++++++++----------- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/main.nf b/main.nf index 2ffd1d5d..d1791e26 100644 --- a/main.nf +++ b/main.nf @@ -27,7 +27,7 @@ if (params.input == null) { exit 1 } -if (params.wfmash_n_mappings == null) { +if (params.n_mappings == null) { log.info""" Mandatory argument --wfmash_n_mappings missing! For more details run with --help. @@ -44,7 +44,7 @@ ch_multiqc_config = file("$projectDir/assets/multiqc_config.yaml", checkIfExists def wfmash_merge_cmd = params.wfmash_merge_segments ? "-M" : "" def wfmash_exclude_cmd = params.wfmash_exclude_delim ? "-Y${params.wfmash_exclude_delim}" : "-X" def wfmash_split_cmd = params.wfmash_no_splits ? "-N" : "" -def wfmash_n_mappings_minus_1 = params.wfmash_n_mappings - 1 +def wfmash_n_mappings_minus_1 = params.n_mappings - 1 def smoothxg_poa_params_display = params.smoothxg_poa_params.replaceAll(/,/, "_") def wfmash_prefix = "wfmash" def seqwish_prefix = ".seqwish" diff --git a/nextflow.config b/nextflow.config index 842f986b..a1072ff3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,6 +13,8 @@ params { // Input options // FASTA input = null + // number of mappings + n_mappings = null // default could be the number of input sequences, but then I would have to add another process // Optional PAF input paf = false @@ -29,7 +31,6 @@ params { // Alignment options wfmash_map_pct_id = 90 - wfmash_n_mappings = null // default could be the number of input sequences, but then I would have to add another process wfmash_segment_length = 3000 wfmash_block_length = 3 * wfmash_segment_length wfmash_mash_kmer = 16 diff --git a/nextflow_schema.json b/nextflow_schema.json index 024cec7c..2beaef92 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -11,7 +11,8 @@ "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", "required": [ - "input" + "input", + "n_mappings" ], "properties": { "input": { @@ -20,6 +21,11 @@ "description": "Input FASTA file.", "help_text": "Use this to specify the location of your input FASTA file. For example:\n\n```bash\n--input 'path/to/data/input.fa.gz'\n```\n\n." }, + "n_mappings": { + "type": "string", + "fa_icon": "fas fa-dna", + "description": "Number of mappings to retain for each segment." + }, "outdir": { "type": "string", "description": "The output directory where the results will be saved.", @@ -52,12 +58,6 @@ "description": "percent identity in the wfmash mashmap step", "fa_icon": "fas fa-align-center" }, - "wfmash_n_mappings": { - "type": "integer", - "default": 10, - "description": "number of secondary mappings to retain in 'map' filter mode", - "fa_icon": "fas fa-align-center" - }, "wfmash_segment_length": { "type": "integer", "default": 3000, @@ -99,10 +99,7 @@ "type": "boolean", "description": "If this parameter is set, only the wfmash alignment step of the pipeline is executed. This option is offered for users who want to use wfmash on a cluster." } - }, - "required": [ - "wfmash_n_mappings" - ] + } }, "seqwish_options": { "title": "Seqwish options", From 42dd229b810bf2c37c8bd2d76b3afb93d44cc09a Mon Sep 17 00:00:00 2001 From: subwaystation Date: Wed, 7 Sep 2022 11:37:21 +0200 Subject: [PATCH 14/41] add wfmash_block_length_cmd --- main.nf | 9 +++++---- nextflow.config | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/main.nf b/main.nf index d1791e26..e45ce306 100644 --- a/main.nf +++ b/main.nf @@ -44,6 +44,7 @@ ch_multiqc_config = file("$projectDir/assets/multiqc_config.yaml", checkIfExists def wfmash_merge_cmd = params.wfmash_merge_segments ? "-M" : "" def wfmash_exclude_cmd = params.wfmash_exclude_delim ? "-Y${params.wfmash_exclude_delim}" : "-X" def wfmash_split_cmd = params.wfmash_no_splits ? "-N" : "" +def wfmash_block_length_cmd = params.wfmash_block_length ? "-l${params.wfmash_block_length}" : "" def wfmash_n_mappings_minus_1 = params.n_mappings - 1 def smoothxg_poa_params_display = params.smoothxg_poa_params.replaceAll(/,/, "_") def wfmash_prefix = "wfmash" @@ -68,7 +69,7 @@ ${f.getName()}\ fasta = channel.fromPath("${params.input}").map { f -> tuple(make_file_prefix(f), f) } if (!params.smoothxg_num_haps) { - n_haps = params.wfmash_n_mappings + n_haps = params.n_mappings } process wfmashMap { @@ -83,7 +84,7 @@ process wfmashMap { """ wfmash ${wfmash_exclude_cmd} \ -s ${params.wfmash_segment_length} \ - -l ${params.wfmash_block_length} \ + ${wfmash_block_length_cmd} \ ${wfmash_merge_cmd} \ ${wfmash_split_cmd} \ -p ${params.wfmash_map_pct_id} \ @@ -120,7 +121,7 @@ process wfmashAlign { """ wfmash ${wfmash_exclude_cmd} \ -s ${params.wfmash_segment_length} \ - -l ${params.wfmash_block_length} \ + ${wfmash_block_length_cmd} \ ${wfmash_merge_cmd} \ ${wfmash_split_cmd} \ -p ${params.wfmash_map_pct_id} \ @@ -145,7 +146,7 @@ process wfmash { """ wfmash ${wfmash_exclude_cmd} \ -s ${params.wfmash_segment_length} \ - -l ${params.wfmash_block_length} \ + ${wfmash_block_length_cmd} \ ${wfmash_merge_cmd} \ ${wfmash_split_cmd} \ -p ${params.wfmash_map_pct_id} \ diff --git a/nextflow.config b/nextflow.config index a1072ff3..a3dbab41 100644 --- a/nextflow.config +++ b/nextflow.config @@ -32,7 +32,7 @@ params { // Alignment options wfmash_map_pct_id = 90 wfmash_segment_length = 3000 - wfmash_block_length = 3 * wfmash_segment_length + wfmash_block_length = null wfmash_mash_kmer = 16 wfmash_merge_segments = false wfmash_no_splits = false From 2b8933b5bf4e8d38dac71c3e1a2f6b9bb21445f9 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Wed, 7 Sep 2022 11:42:18 +0200 Subject: [PATCH 15/41] wfmash_mash_kmer_cmd --- main.nf | 7 ++++--- nextflow.config | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/main.nf b/main.nf index e45ce306..60023258 100644 --- a/main.nf +++ b/main.nf @@ -45,6 +45,7 @@ def wfmash_merge_cmd = params.wfmash_merge_segments ? "-M" : "" def wfmash_exclude_cmd = params.wfmash_exclude_delim ? "-Y${params.wfmash_exclude_delim}" : "-X" def wfmash_split_cmd = params.wfmash_no_splits ? "-N" : "" def wfmash_block_length_cmd = params.wfmash_block_length ? "-l${params.wfmash_block_length}" : "" +def wfmash_mash_kmer_cmd = params.wfmash_mash_kmer ? "-k${params.wfmash_mash_kmer}" : "" def wfmash_n_mappings_minus_1 = params.n_mappings - 1 def smoothxg_poa_params_display = params.smoothxg_poa_params.replaceAll(/,/, "_") def wfmash_prefix = "wfmash" @@ -87,9 +88,9 @@ process wfmashMap { ${wfmash_block_length_cmd} \ ${wfmash_merge_cmd} \ ${wfmash_split_cmd} \ + ${wfmash_mash_kmer_cmd} \ -p ${params.wfmash_map_pct_id} \ -n ${wfmash_n_mappings_minus_1} \ - -k ${params.wfmash_mash_kmer} \ -t ${task.cpus} \ -m \ $fasta $fasta \ @@ -124,9 +125,9 @@ process wfmashAlign { ${wfmash_block_length_cmd} \ ${wfmash_merge_cmd} \ ${wfmash_split_cmd} \ + ${wfmash_mash_kmer_cmd} \ -p ${params.wfmash_map_pct_id} \ -n ${wfmash_n_mappings_minus_1} \ - -k ${params.wfmash_mash_kmer} \ -t ${task.cpus} \ -i $paf \ $fasta $fasta \ @@ -149,9 +150,9 @@ process wfmash { ${wfmash_block_length_cmd} \ ${wfmash_merge_cmd} \ ${wfmash_split_cmd} \ + ${wfmash_mash_kmer_cmd} \ -p ${params.wfmash_map_pct_id} \ -n ${wfmash_n_mappings_minus_1} \ - -k ${params.wfmash_mash_kmer} \ -t ${task.cpus} \ $fasta $fasta \ >${f}.${wfmash_prefix}.paf diff --git a/nextflow.config b/nextflow.config index a3dbab41..65df6306 100644 --- a/nextflow.config +++ b/nextflow.config @@ -33,7 +33,7 @@ params { wfmash_map_pct_id = 90 wfmash_segment_length = 3000 wfmash_block_length = null - wfmash_mash_kmer = 16 + wfmash_mash_kmer = null wfmash_merge_segments = false wfmash_no_splits = false wfmash_exclude_delim = false From 7028e27a8d4090c5e7f49c8c648b350956e9437d Mon Sep 17 00:00:00 2001 From: subwaystation Date: Wed, 7 Sep 2022 12:52:12 +0200 Subject: [PATCH 16/41] wfmash_mash_kmer_thres_cmd --- main.nf | 4 ++++ nextflow.config | 1 + 2 files changed, 5 insertions(+) diff --git a/main.nf b/main.nf index 60023258..c7a9e678 100644 --- a/main.nf +++ b/main.nf @@ -46,6 +46,7 @@ def wfmash_exclude_cmd = params.wfmash_exclude_delim ? "-Y${params.wfmash_exclud def wfmash_split_cmd = params.wfmash_no_splits ? "-N" : "" def wfmash_block_length_cmd = params.wfmash_block_length ? "-l${params.wfmash_block_length}" : "" def wfmash_mash_kmer_cmd = params.wfmash_mash_kmer ? "-k${params.wfmash_mash_kmer}" : "" +def wfmash_kmer_thres_cmd = params.wfmash_mash_kmer_thres ? "-H${params.wfmash_kmer_thres}" : "" def wfmash_n_mappings_minus_1 = params.n_mappings - 1 def smoothxg_poa_params_display = params.smoothxg_poa_params.replaceAll(/,/, "_") def wfmash_prefix = "wfmash" @@ -89,6 +90,7 @@ process wfmashMap { ${wfmash_merge_cmd} \ ${wfmash_split_cmd} \ ${wfmash_mash_kmer_cmd} \ + ${wfmash_kmer_thres_cmd} \ -p ${params.wfmash_map_pct_id} \ -n ${wfmash_n_mappings_minus_1} \ -t ${task.cpus} \ @@ -126,6 +128,7 @@ process wfmashAlign { ${wfmash_merge_cmd} \ ${wfmash_split_cmd} \ ${wfmash_mash_kmer_cmd} \ + ${wfmash_kmer_thres_cmd} \ -p ${params.wfmash_map_pct_id} \ -n ${wfmash_n_mappings_minus_1} \ -t ${task.cpus} \ @@ -151,6 +154,7 @@ process wfmash { ${wfmash_merge_cmd} \ ${wfmash_split_cmd} \ ${wfmash_mash_kmer_cmd} \ + ${wfmash_kmer_thres_cmd} \ -p ${params.wfmash_map_pct_id} \ -n ${wfmash_n_mappings_minus_1} \ -t ${task.cpus} \ diff --git a/nextflow.config b/nextflow.config index 65df6306..4bf38f24 100644 --- a/nextflow.config +++ b/nextflow.config @@ -34,6 +34,7 @@ params { wfmash_segment_length = 3000 wfmash_block_length = null wfmash_mash_kmer = null + wfmash_mash_kmer_thres = null wfmash_merge_segments = false wfmash_no_splits = false wfmash_exclude_delim = false From ab257cf389783df1802d25b3950786c9efd6107e Mon Sep 17 00:00:00 2001 From: subwaystation Date: Wed, 7 Sep 2022 16:04:50 +0200 Subject: [PATCH 17/41] wfmash_sparse_map --- main.nf | 27 ++++++++++++++++++++++----- nextflow.config | 1 + 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/main.nf b/main.nf index c7a9e678..b6a2791f 100644 --- a/main.nf +++ b/main.nf @@ -41,6 +41,11 @@ ch_multiqc_config = file("$projectDir/assets/multiqc_config.yaml", checkIfExists // ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() // We can't change global parameters inside this scope, so we build the ones we need locally +def n_haps = 0 +if (!params.smoothxg_num_haps) { + n_haps = params.n_mappings +} + def wfmash_merge_cmd = params.wfmash_merge_segments ? "-M" : "" def wfmash_exclude_cmd = params.wfmash_exclude_delim ? "-Y${params.wfmash_exclude_delim}" : "-X" def wfmash_split_cmd = params.wfmash_no_splits ? "-N" : "" @@ -48,11 +53,24 @@ def wfmash_block_length_cmd = params.wfmash_block_length ? "-l${params.wfmash_bl def wfmash_mash_kmer_cmd = params.wfmash_mash_kmer ? "-k${params.wfmash_mash_kmer}" : "" def wfmash_kmer_thres_cmd = params.wfmash_mash_kmer_thres ? "-H${params.wfmash_kmer_thres}" : "" def wfmash_n_mappings_minus_1 = params.n_mappings - 1 +def wfmash_sparse_map_cmd = "" +if (params.wfmash_sparse_map == "auto") { + n = n_haps + x = Math.log(n)/n * 10 + wfmash_sparse_map_frac = 1 + if (x >= 1) { + wfmash_sparse_map_frac = x + } + wfmash_sparse_map_cmd = "-x${wfmash_sparse_map_frac}" +} else { + if (params.wfmash_sparse_map != null) { + wfmash_sparse_map_cmd = "-x${params.wfmash_sparse_map}" + } +} def smoothxg_poa_params_display = params.smoothxg_poa_params.replaceAll(/,/, "_") def wfmash_prefix = "wfmash" def seqwish_prefix = ".seqwish" def smoothxg_prefix = ".smoothxg" -def n_haps = 0 def do_1d = true def do_2d = true @@ -70,10 +88,6 @@ ${f.getName()}\ fasta = channel.fromPath("${params.input}").map { f -> tuple(make_file_prefix(f), f) } -if (!params.smoothxg_num_haps) { - n_haps = params.n_mappings -} - process wfmashMap { publishDir "${params.outdir}/wfmash_map", mode: "${params.publish_dir_mode}" @@ -91,6 +105,7 @@ process wfmashMap { ${wfmash_split_cmd} \ ${wfmash_mash_kmer_cmd} \ ${wfmash_kmer_thres_cmd} \ + ${wfmash_sparse_map_cmd} \ -p ${params.wfmash_map_pct_id} \ -n ${wfmash_n_mappings_minus_1} \ -t ${task.cpus} \ @@ -129,6 +144,7 @@ process wfmashAlign { ${wfmash_split_cmd} \ ${wfmash_mash_kmer_cmd} \ ${wfmash_kmer_thres_cmd} \ + ${wfmash_sparse_map_cmd} \ -p ${params.wfmash_map_pct_id} \ -n ${wfmash_n_mappings_minus_1} \ -t ${task.cpus} \ @@ -155,6 +171,7 @@ process wfmash { ${wfmash_split_cmd} \ ${wfmash_mash_kmer_cmd} \ ${wfmash_kmer_thres_cmd} \ + ${wfmash_sparse_map_cmd} \ -p ${params.wfmash_map_pct_id} \ -n ${wfmash_n_mappings_minus_1} \ -t ${task.cpus} \ diff --git a/nextflow.config b/nextflow.config index 4bf38f24..264e9aab 100644 --- a/nextflow.config +++ b/nextflow.config @@ -35,6 +35,7 @@ params { wfmash_block_length = null wfmash_mash_kmer = null wfmash_mash_kmer_thres = null + wfmash_sparse_map = null wfmash_merge_segments = false wfmash_no_splits = false wfmash_exclude_delim = false From 99c54059b6fc75d06462ae7ed022ff3e1c9b562f Mon Sep 17 00:00:00 2001 From: subwaystation Date: Wed, 7 Sep 2022 16:09:52 +0200 Subject: [PATCH 18/41] smoothxg_block_id_min --- main.nf | 5 +++-- nextflow.config | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/main.nf b/main.nf index b6a2791f..aadf8ed7 100644 --- a/main.nf +++ b/main.nf @@ -68,6 +68,7 @@ if (params.wfmash_sparse_map == "auto") { } } def smoothxg_poa_params_display = params.smoothxg_poa_params.replaceAll(/,/, "_") +def smoothxg_block_id_min = params.wfmash_map_pct_id / 100.0 def wfmash_prefix = "wfmash" def seqwish_prefix = ".seqwish" def smoothxg_prefix = ".smoothxg" @@ -243,7 +244,7 @@ process smoothxg { -w \$(echo "\$poa_length * ${n_haps}" | bc) \ -K \ -X 100 \ - -I ${params.smoothxg_block_id_min} \ + -I ${smoothxg_block_id_min} \ -R ${params.smoothxg_block_ratio_min} \ -j ${params.smoothxg_max_path_jump} \ -e ${params.smoothxg_max_edge_jump} \ @@ -267,7 +268,7 @@ process smoothxg { -w \$(echo "\$poa_length * ${n_haps}" | bc) \ -K \ -X 100 \ - -I ${params.smoothxg_block_id_min} \ + -I ${smoothxg_block_id_min} \ -R ${params.smoothxg_block_ratio_min} \ -j ${params.smoothxg_max_path_jump} \ -e ${params.smoothxg_max_edge_jump} \ diff --git a/nextflow.config b/nextflow.config index 264e9aab..d9f16283 100644 --- a/nextflow.config +++ b/nextflow.config @@ -51,7 +51,7 @@ params { smoothxg_max_path_jump = 0 smoothxg_max_edge_jump = 0 smoothxg_poa_length = "4001,4507" - smoothxg_block_id_min = 0.95 + smoothxg_block_id_min = null smoothxg_block_ratio_min = 0 smoothxg_pad_max_depth = 100 smoothxg_poa_padding = 0.03 From c8a0153f2b368b9b9d206d6c9b7036f912bdb5b2 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Wed, 7 Sep 2022 16:20:45 +0200 Subject: [PATCH 19/41] smoothxg_poa_params_cmd --- main.nf | 23 ++++++++++++++++++++--- nextflow.config | 2 +- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/main.nf b/main.nf index aadf8ed7..45c63d10 100644 --- a/main.nf +++ b/main.nf @@ -67,8 +67,25 @@ if (params.wfmash_sparse_map == "auto") { wfmash_sparse_map_cmd = "-x${params.wfmash_sparse_map}" } } -def smoothxg_poa_params_display = params.smoothxg_poa_params.replaceAll(/,/, "_") def smoothxg_block_id_min = params.wfmash_map_pct_id / 100.0 +// TODO: CHANGE TO LARGE P ONCE WE ARE THERE +def smoothxg_poa_params_cmd = "" +if (params.smoothxg_poa_params == null) { + smoothxg_poa_params = "-p 1,19,39,3,81,1" +} else { + if (params.smoothxg_poa_params == "asm5") { + smoothxg_poa_params = "-p 1,19,39,3,81,1" + } else if (params.smoothxg_poa_params == "asm10") { + smoothxg_poa_params = "-p 1,9,16,2,41,1" + } else if (params.smoothxg_poa_params == "asm15") { + smoothxg_poa_params = "-p 1,7,11,2,33,1" + } else if (params.smoothxg_poa_params == "asm20") { + smoothxg_poa_params = "-p 1,4,6,2,26,1" + } else { + smoothxg_poa_params = "-p${params.smoothxg_poa_params}" + } +} +def smoothxg_poa_params_display = smoothxg_poa_params.replaceAll(/,/, "_") def wfmash_prefix = "wfmash" def seqwish_prefix = ".seqwish" def smoothxg_prefix = ".smoothxg" @@ -249,7 +266,7 @@ process smoothxg { -j ${params.smoothxg_max_path_jump} \ -e ${params.smoothxg_max_edge_jump} \ -l \$poa_length \ - -p ${params.smoothxg_poa_params} \ + ${smoothxg_poa_params} \ -O ${params.smoothxg_poa_padding} \ -Y \$(echo "${params.smoothxg_pad_max_depth} * ${n_haps}" | bc) \ -d 0 -D 0 \ @@ -273,7 +290,7 @@ process smoothxg { -j ${params.smoothxg_max_path_jump} \ -e ${params.smoothxg_max_edge_jump} \ -l \$poa_length \ - -p ${params.smoothxg_poa_params} \ + ${smoothxg_poa_params} \ -O ${params.smoothxg_poa_padding} \ -Y \$(echo "${params.smoothxg_pad_max_depth} * ${n_haps}" | bc) \ -d 0 -D 0 \ diff --git a/nextflow.config b/nextflow.config index d9f16283..870df93b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -59,7 +59,7 @@ params { // - asm5, --poa-params 1,19,39,3,81,1, ~0.1 divergence // - asm10, --poa-params 1,9,16,2,41,1, ~1 divergence // - asm20, --poa-params 1,4,6,2,26,1, ~5% divergence - smoothxg_poa_params = "1,19,39,3,81,1" + smoothxg_poa_params = null smoothxg_write_maf = false smoothxg_consensus_spec = false smoothxg_consensus_prefix = "Consensus_" From 8d50d00afc3643e7516c0b904bf41358c0bbd336 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Wed, 7 Sep 2022 16:31:32 +0200 Subject: [PATCH 20/41] seqwish sparse factor, but not activated --- main.nf | 2 ++ nextflow.config | 1 + 2 files changed, 3 insertions(+) diff --git a/main.nf b/main.nf index 45c63d10..b87ebd8d 100644 --- a/main.nf +++ b/main.nf @@ -67,6 +67,7 @@ if (params.wfmash_sparse_map == "auto") { wfmash_sparse_map_cmd = "-x${params.wfmash_sparse_map}" } } + def smoothxg_block_id_min = params.wfmash_map_pct_id / 100.0 // TODO: CHANGE TO LARGE P ONCE WE ARE THERE def smoothxg_poa_params_cmd = "" @@ -198,6 +199,7 @@ process wfmash { """ } +/// TODO -f ${params.seqwish_sparse_factor} \ process seqwish { publishDir "${params.outdir}/seqwish", mode: "${params.publish_dir_mode}" diff --git a/nextflow.config b/nextflow.config index 870df93b..bcb4b2b8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -45,6 +45,7 @@ params { // Seqwish options seqwish_min_match_length = 47 seqwish_transclose_batch = 10000000 + seqwish_sparse_factor = 0 // Smoothxg options smoothxg_num_haps = false From 000b34305f703380026d749592611b6c95f26339 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Thu, 8 Sep 2022 09:38:07 +0200 Subject: [PATCH 21/41] smoothxg_temp_dir --- main.nf | 3 +++ nextflow.config | 1 + 2 files changed, 4 insertions(+) diff --git a/main.nf b/main.nf index b87ebd8d..77763ed6 100644 --- a/main.nf +++ b/main.nf @@ -87,6 +87,7 @@ if (params.smoothxg_poa_params == null) { } } def smoothxg_poa_params_display = smoothxg_poa_params.replaceAll(/,/, "_") +def smoothxg_temp_dir = params.smoothxg_temp_dir ? "-b${params.smoothxg_temp_dir}" : "" def wfmash_prefix = "wfmash" def seqwish_prefix = ".seqwish" def smoothxg_prefix = ".smoothxg" @@ -261,6 +262,7 @@ process smoothxg { -T ${task.cpus} \ -g \$input_gfa \ -w \$(echo "\$poa_length * ${n_haps}" | bc) \ + ${smoothxg_temp_dir} \ -K \ -X 100 \ -I ${smoothxg_block_id_min} \ @@ -285,6 +287,7 @@ process smoothxg { -T ${task.cpus} \ -g \$input_gfa \ -w \$(echo "\$poa_length * ${n_haps}" | bc) \ + ${smoothxg_temp_dir} \ -K \ -X 100 \ -I ${smoothxg_block_id_min} \ diff --git a/nextflow.config b/nextflow.config index bcb4b2b8..430128e1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -64,6 +64,7 @@ params { smoothxg_write_maf = false smoothxg_consensus_spec = false smoothxg_consensus_prefix = "Consensus_" + smoothxg_temp_dir = null // vcf_spec = "gi|568815561:#,gi|568815567:#" vcf_spec = false From d86eb1762ebdfb6dfc2a80ee9054440882928515 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Thu, 8 Sep 2022 09:51:18 +0200 Subject: [PATCH 22/41] smoothxg_xpoa_stuff --- main.nf | 9 +++++++-- nextflow.config | 3 +++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 77763ed6..798afd5d 100644 --- a/main.nf +++ b/main.nf @@ -88,6 +88,9 @@ if (params.smoothxg_poa_params == null) { } def smoothxg_poa_params_display = smoothxg_poa_params.replaceAll(/,/, "_") def smoothxg_temp_dir = params.smoothxg_temp_dir ? "-b${params.smoothxg_temp_dir}" : "" +def smoothxg_keep_intermediate_files = params.smoothxg_keep_intermediate_files ? "-K" : "" +def smoothxg_xpoa = params.smoothxg_run_abpoa ? "" : "-S" +def smoothxg_poa_mode = params.smoothxg_run_global_poa ? "-Z" : "" def wfmash_prefix = "wfmash" def seqwish_prefix = ".seqwish" def smoothxg_prefix = ".smoothxg" @@ -263,7 +266,7 @@ process smoothxg { -g \$input_gfa \ -w \$(echo "\$poa_length * ${n_haps}" | bc) \ ${smoothxg_temp_dir} \ - -K \ + ${smoothxg_keep_intermediate_files} \ -X 100 \ -I ${smoothxg_block_id_min} \ -R ${params.smoothxg_block_ratio_min} \ @@ -274,6 +277,7 @@ process smoothxg { -O ${params.smoothxg_poa_padding} \ -Y \$(echo "${params.smoothxg_pad_max_depth} * ${n_haps}" | bc) \ -d 0 -D 0 \ + ${smoothxg_xpoa} ${smoothxg_poa_mode} \ -V \ -o smooth.\$i.gfa else @@ -288,7 +292,7 @@ process smoothxg { -g \$input_gfa \ -w \$(echo "\$poa_length * ${n_haps}" | bc) \ ${smoothxg_temp_dir} \ - -K \ + ${smoothxg_keep_intermediate_files} \ -X 100 \ -I ${smoothxg_block_id_min} \ -R ${params.smoothxg_block_ratio_min} \ @@ -299,6 +303,7 @@ process smoothxg { -O ${params.smoothxg_poa_padding} \ -Y \$(echo "${params.smoothxg_pad_max_depth} * ${n_haps}" | bc) \ -d 0 -D 0 \ + ${smoothxg_xpoa} ${smoothxg_poa_mode} \ \$maf_params \ -Q ${params.smoothxg_consensus_prefix} \ \$consensus_params \ diff --git a/nextflow.config b/nextflow.config index 430128e1..980445b5 100644 --- a/nextflow.config +++ b/nextflow.config @@ -65,6 +65,9 @@ params { smoothxg_consensus_spec = false smoothxg_consensus_prefix = "Consensus_" smoothxg_temp_dir = null + smoothxg_keep_intermediate_files = null + smoothxg_run_abpoa = null + smoothxg_run_global_poa = null // vcf_spec = "gi|568815561:#,gi|568815567:#" vcf_spec = false From 71c767a88bc878fabfe59249727cd37e21d4423a Mon Sep 17 00:00:00 2001 From: subwaystation Date: Thu, 8 Sep 2022 10:09:53 +0200 Subject: [PATCH 23/41] deactivate consensus graph --- main.nf | 17 ++++++++++++----- nextflow.config | 4 +++- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/main.nf b/main.nf index 798afd5d..5059096c 100644 --- a/main.nf +++ b/main.nf @@ -67,6 +67,9 @@ if (params.wfmash_sparse_map == "auto") { wfmash_sparse_map_cmd = "-x${params.wfmash_sparse_map}" } } +def wfmash_temp_dir = params.wfmash_temp_dir ? "-B${params.wfmash_temp_dir}" : "" + +def seqwish_temp_dir = params.seqwish_temp_dir ? "--temp-dir${params.seqwish_temp_dir}" : "" def smoothxg_block_id_min = params.wfmash_map_pct_id / 100.0 // TODO: CHANGE TO LARGE P ONCE WE ARE THERE @@ -81,7 +84,7 @@ if (params.smoothxg_poa_params == null) { } else if (params.smoothxg_poa_params == "asm15") { smoothxg_poa_params = "-p 1,7,11,2,33,1" } else if (params.smoothxg_poa_params == "asm20") { - smoothxg_poa_params = "-p 1,4,6,2,26,1" + smoothxg_poa_params = "-p 1,4,6,2,26,1"B } else { smoothxg_poa_params = "-p${params.smoothxg_poa_params}" } @@ -91,9 +94,13 @@ def smoothxg_temp_dir = params.smoothxg_temp_dir ? "-b${params.smoothxg_temp_dir def smoothxg_keep_intermediate_files = params.smoothxg_keep_intermediate_files ? "-K" : "" def smoothxg_xpoa = params.smoothxg_run_abpoa ? "" : "-S" def smoothxg_poa_mode = params.smoothxg_run_global_poa ? "-Z" : "" +// disabling consensus graph mode +def smoothxg_consensus_spec = false + def wfmash_prefix = "wfmash" def seqwish_prefix = ".seqwish" def smoothxg_prefix = ".smoothxg" + def do_1d = true def do_2d = true @@ -131,6 +138,7 @@ process wfmashMap { ${wfmash_sparse_map_cmd} \ -p ${params.wfmash_map_pct_id} \ -n ${wfmash_n_mappings_minus_1} \ + ${wfmash_temp_dir} \ -t ${task.cpus} \ -m \ $fasta $fasta \ @@ -170,6 +178,7 @@ process wfmashAlign { ${wfmash_sparse_map_cmd} \ -p ${params.wfmash_map_pct_id} \ -n ${wfmash_n_mappings_minus_1} \ + ${wfmash_temp_dir} \ -t ${task.cpus} \ -i $paf \ $fasta $fasta \ @@ -197,6 +206,7 @@ process wfmash { ${wfmash_sparse_map_cmd} \ -p ${params.wfmash_map_pct_id} \ -n ${wfmash_n_mappings_minus_1} \ + ${wfmash_temp_dir} \ -t ${task.cpus} \ $fasta $fasta \ >${f}.${wfmash_prefix}.paf @@ -229,6 +239,7 @@ process seqwish { -k ${params.seqwish_min_match_length} \ -g ${f}${seqwish_prefix}.gfa -P \ -B ${params.seqwish_transclose_batch} \ + ${seqwish_temp_dir} \ -P """ } @@ -283,9 +294,6 @@ process smoothxg { else poa_length=\$(echo ${params.smoothxg_poa_length} | cut -f \$i -d,) consensus_params="-V" - if [[ ${params.smoothxg_consensus_spec} != false ]]; then - consensus_params="-C ${f}.cons,${params.smoothxg_consensus_spec}" - fi smoothxg \ -t ${task.cpus} \ -T ${task.cpus} \ @@ -305,7 +313,6 @@ process smoothxg { -d 0 -D 0 \ ${smoothxg_xpoa} ${smoothxg_poa_mode} \ \$maf_params \ - -Q ${params.smoothxg_consensus_prefix} \ \$consensus_params \ -o ${f}${smoothxg_prefix}.gfa fi diff --git a/nextflow.config b/nextflow.config index 980445b5..a7693f2c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -41,11 +41,13 @@ params { wfmash_exclude_delim = false wfmash_chunks = 1 wfmash_only = false + wfmash_temp_dir = null // Seqwish options seqwish_min_match_length = 47 seqwish_transclose_batch = 10000000 seqwish_sparse_factor = 0 + seqwish_temp_dir = null // Smoothxg options smoothxg_num_haps = false @@ -62,7 +64,7 @@ params { // - asm20, --poa-params 1,4,6,2,26,1, ~5% divergence smoothxg_poa_params = null smoothxg_write_maf = false - smoothxg_consensus_spec = false + // smoothxg_consensus_spec = false smoothxg_consensus_prefix = "Consensus_" smoothxg_temp_dir = null smoothxg_keep_intermediate_files = null From 2faea2b4f2d6dda3e0c55b7fd618180de3811b91 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Thu, 8 Sep 2022 11:37:14 +0200 Subject: [PATCH 24/41] update to the most recent PGGB Dockerfile --- Dockerfile | 2 +- main.nf | 24 ++++++++++++++++-------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0845e437..e1d5a1a0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ghcr.io/pangenome/pggb:20211103204137531f85 +FROM ghcr.io/pangenome/pggb:20220823114803a8b1ea LABEL authors="Simon Heumos, Michael Heuer, Lukas Heumos, Erik Garrison, Andrea Guarracino" \ description="Docker image containing all software requirements for the nf-core/pangenome pipeline" diff --git a/main.nf b/main.nf index 5059096c..a9259e32 100644 --- a/main.nf +++ b/main.nf @@ -117,12 +117,16 @@ ${f.getName()}\ """ } fasta = channel.fromPath("${params.input}").map { f -> tuple(make_file_prefix(f), f) } +fai = channel.fromPath("${params.input}.fai").collect() +gzi = channel.fromPath("${params.input}.gzi").collect() process wfmashMap { publishDir "${params.outdir}/wfmash_map", mode: "${params.publish_dir_mode}" input: tuple val(f), path(fasta) + path(fai) + path(gzi) output: tuple val(f), path("${f}.${wfmash_prefix}.map.paf") @@ -162,7 +166,9 @@ process wfmashAlign { publishDir "${params.outdir}/wfmash_align", mode: "${params.publish_dir_mode}" input: - tuple val(f), path(fasta), path(paf) + tuple val(f), path(fasta), path(paf) + path(fai) + path(gzi) output: path("${paf}.align.paf") @@ -191,6 +197,8 @@ process wfmash { input: tuple val(f), path(fasta) + path(fai) + path(gzi) output: tuple val(f), path("${f}.${wfmash_prefix}.paf") @@ -425,7 +433,6 @@ process odgiDraw { """ } -// TODO we can parallelize this for each reference given in ${params.vcf_spec} process vg_deconstruct { publishDir "${params.outdir}/vg_deconstruct", mode: "${params.publish_dir_mode}" @@ -469,11 +476,12 @@ workflow { if (params.wfmash_only) { // TODO Once we changed the way we changed the publish_dir_mode, we have to emit the .paf file as default, else not if (params.wfmash_chunks == 1) { - wfmash(fasta) + wfmash(fasta, fai, gzi) } else { - wfmashMap(fasta) + wfmashMap(fasta, fai, gzi) splitApproxMappingsInChunks(wfmashMap.out) - wfmashAlign(fasta.combine(splitApproxMappingsInChunks.out.flatten())) + // TODO update this once I understood it + wfmashAlign(fasta.combine(splitApproxMappingsInChunks.out.flatten()), fai, gzi) } } else { if (params.paf != false) { @@ -481,12 +489,12 @@ workflow { seqwish(fasta, paf_ch) } else { if (params.wfmash_chunks == 1) { - wfmash(fasta) + wfmash(fasta, fai, gzi) seqwish(fasta, wfmash.out.collect{it[1]}) } else { - wfmashMap(fasta) + wfmashMap(fasta, fai, gzi) splitApproxMappingsInChunks(wfmashMap.out) - wfmashAlign(fasta.combine(splitApproxMappingsInChunks.out.flatten())) + wfmashAlign(fasta.combine(splitApproxMappingsInChunks.out.flatten()), fai, gzi) seqwish(fasta, wfmashAlign.out.collect()) } } From 1a4e6c87b32c5118a1be9f9f9e8ae2838ca94053 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Thu, 8 Sep 2022 11:39:32 +0200 Subject: [PATCH 25/41] seqwish_sparse_factor --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index a9259e32..196a2f68 100644 --- a/main.nf +++ b/main.nf @@ -221,7 +221,6 @@ process wfmash { """ } -/// TODO -f ${params.seqwish_sparse_factor} \ process seqwish { publishDir "${params.outdir}/seqwish", mode: "${params.publish_dir_mode}" @@ -245,6 +244,7 @@ process seqwish { -s $fasta \ -p \$input \ -k ${params.seqwish_min_match_length} \ + -f ${params.seqwish_sparse_factor} \ -g ${f}${seqwish_prefix}.gfa -P \ -B ${params.seqwish_transclose_batch} \ ${seqwish_temp_dir} \ From f99cefab6a3831029c70ecb4da12cd6db47e2291 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Thu, 8 Sep 2022 13:44:25 +0200 Subject: [PATCH 26/41] update default values in nextflow.config --- main.nf | 3 +-- nextflow.config | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/main.nf b/main.nf index 196a2f68..3586e204 100644 --- a/main.nf +++ b/main.nf @@ -301,7 +301,6 @@ process smoothxg { -o smooth.\$i.gfa else poa_length=\$(echo ${params.smoothxg_poa_length} | cut -f \$i -d,) - consensus_params="-V" smoothxg \ -t ${task.cpus} \ -T ${task.cpus} \ @@ -321,7 +320,7 @@ process smoothxg { -d 0 -D 0 \ ${smoothxg_xpoa} ${smoothxg_poa_mode} \ \$maf_params \ - \$consensus_params \ + -V \ -o ${f}${smoothxg_prefix}.gfa fi done diff --git a/nextflow.config b/nextflow.config index a7693f2c..9f689c4e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -31,7 +31,7 @@ params { // Alignment options wfmash_map_pct_id = 90 - wfmash_segment_length = 3000 + wfmash_segment_length = 5000 wfmash_block_length = null wfmash_mash_kmer = null wfmash_mash_kmer_thres = null @@ -44,7 +44,7 @@ params { wfmash_temp_dir = null // Seqwish options - seqwish_min_match_length = 47 + seqwish_min_match_length = 19 seqwish_transclose_batch = 10000000 seqwish_sparse_factor = 0 seqwish_temp_dir = null @@ -53,11 +53,11 @@ params { smoothxg_num_haps = false smoothxg_max_path_jump = 0 smoothxg_max_edge_jump = 0 - smoothxg_poa_length = "4001,4507" + smoothxg_poa_length = "700,900,1100" smoothxg_block_id_min = null smoothxg_block_ratio_min = 0 smoothxg_pad_max_depth = 100 - smoothxg_poa_padding = 0.03 + smoothxg_poa_padding = 0.001 // poa param suggestions from minimap2 // - asm5, --poa-params 1,19,39,3,81,1, ~0.1 divergence // - asm10, --poa-params 1,9,16,2,41,1, ~1 divergence From ec46421621dd05d86fb9f855a1673bfbe7fa81cd Mon Sep 17 00:00:00 2001 From: subwaystation Date: Thu, 8 Sep 2022 15:32:54 +0200 Subject: [PATCH 27/41] added vcfwave step in vg_deconstruct --- main.nf | 44 +++++++++++++++++++++++++++++++++++++------- nextflow.config | 2 +- nextflow_schema.json | 4 ++-- 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/main.nf b/main.nf index 3586e204..99107186 100644 --- a/main.nf +++ b/main.nf @@ -439,13 +439,34 @@ process vg_deconstruct { tuple path(graph), val(vcf_spec) output: - path("${graph}.*.vcf") + path("${graph}.*.vcf"), emit: vg_deconstruct_vcf + path("*.vcf.stats"), optional: true, emit: vg_deconstruct_bcftools_stats """ ref=\$(echo "$vcf_spec" | cut -f 1 -d:) delim=\$(echo "$vcf_spec" | cut -f 2 -d:) + pop_length=\$(echo "$vcf_spec" | cut -f 3 -d:) + if [[ -z \$pop_length ]]; then + pop_length=0 + fi vcf="${graph}".\$(echo \$ref | tr '/|' '_').vcf vg deconstruct -P \$ref -H \$delim -e -a -t "${task.cpus}" "${graph}" > \$vcf + bcftools stats \$vcf > \$vcf.stats + + if [[ \$pop_length -gt 0 ]]; then + vcf_decomposed=${graph}.final.\$(echo \$ref | tr '/|' '_').decomposed.vcf + vcf_decomposed_tmp=\$vcf_decomposed.tmp.vcf + bgzip -c -@ ${task.cpus} \$vcf > \$vcf.gz + vcfbub -l 0 -a \$pop_length --input \$vcf.gz | vcfwave -I 1000 -t ${task.cpus} > \$vcf_decomposed_tmp + + #TODO: to remove when vcfwave will be bug-free + # The TYPE info sometimes is wrong/missing + # There are variants without the ALT allele + bcftools annotate -x INFO/TYPE \$vcf_decomposed_tmp | awk '\$5 != "."' > \$vcf_decomposed + rm \$vcf_decomposed_tmp \$vcf.gz + + bcftools stats \$vcf_decomposed > \$vcf_decomposed.stats +fi """ } @@ -454,6 +475,7 @@ process multiQC { publishDir "${params.outdir}", mode: "${params.publish_dir_mode}" input: + path vg_deconstruct_bcftools_stats path odgi_stats path odgi_viz path odgi_draw @@ -513,20 +535,28 @@ workflow { odgiDrawOut = odgiDraw(odgiLayout.out) } - ch_vg_deconstruct = Channel.empty() ch_vcf_spec = Channel.empty() + vg_deconstruct = Channel.empty() if (params.vcf_spec != false) { ch_vcf_spec = Channel.from(params.vcf_spec).splitCsv().flatten() - ch_vg_deconstruct = vg_deconstruct(gfaffix.out.gfa_norm.combine(ch_vcf_spec)) + vg_deconstruct(gfaffix.out.gfa_norm.combine(ch_vcf_spec)) // TODO add bcftools - } - - multiQC( + multiQC( + vg_deconstruct.out.vg_deconstruct_bcftools_stats.collect().ifEmpty([]), odgiStats.out.collect().ifEmpty([]), odgiVizOut.collect().ifEmpty([]), odgiDrawOut.collect().ifEmpty([]), ch_multiqc_config - ) + ) + } else { + multiQC( + vg_deconstruct.collect().ifEmpty([]), + odgiStats.out.collect().ifEmpty([]), + odgiVizOut.collect().ifEmpty([]), + odgiDrawOut.collect().ifEmpty([]), + ch_multiqc_config + ) + } } } diff --git a/nextflow.config b/nextflow.config index 9f689c4e..f0d14530 100644 --- a/nextflow.config +++ b/nextflow.config @@ -71,7 +71,7 @@ params { smoothxg_run_abpoa = null smoothxg_run_global_poa = null - // vcf_spec = "gi|568815561:#,gi|568815567:#" + // vcf_spec = "gi|568815561:#,gi|568815567:#:10000" vcf_spec = false // Boilerplate options diff --git a/nextflow_schema.json b/nextflow_schema.json index 2beaef92..a728f981 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -22,7 +22,7 @@ "help_text": "Use this to specify the location of your input FASTA file. For example:\n\n```bash\n--input 'path/to/data/input.fa.gz'\n```\n\n." }, "n_mappings": { - "type": "string", + "type": "integer", "fa_icon": "fas fa-dna", "description": "Number of mappings to retain for each segment." }, @@ -129,7 +129,7 @@ "default": "", "properties": { "smoothxg_num_haps": { - "type": "string", + "type": "integer", "default": "wfmash_n_mappings", "description": "number of haplotypes in the given FASTA" }, From a5379b98c86b856d1aca5a6d7617f9a7ca9eb8c2 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Thu, 8 Sep 2022 16:32:51 +0200 Subject: [PATCH 28/41] check for input indices and create them on the fly if required --- main.nf | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 99107186..d6367432 100644 --- a/main.nf +++ b/main.nf @@ -117,8 +117,24 @@ ${f.getName()}\ """ } fasta = channel.fromPath("${params.input}").map { f -> tuple(make_file_prefix(f), f) } -fai = channel.fromPath("${params.input}.fai").collect() -gzi = channel.fromPath("${params.input}.gzi").collect() +fai_path = file("${params.input}.fai") +gzi_path = file("${params.input}.gzi") + +process samtoolsFaidx { + publishDir "${params.outdir}/samtools_faidx", mode: "${params.publish_dir_mode}" + + input: + tuple val(f), path(fasta) + + output: + path("${f}.fai"), emit: samtools_fai + path("${f}.gzi"), emit: samtools_gzi + + """ + samtools faidx $fasta + """ +} + process wfmashMap { publishDir "${params.outdir}/wfmash_map", mode: "${params.publish_dir_mode}" @@ -494,6 +510,14 @@ process multiQC { workflow { main: + if (!fai_path.exists() || !gzi_path.exists()) { // the assumption is that none of the files exist if only one does not exist + samtoolsFaidx(fasta) + fai = samtoolsFaidx.out.samtools_fai.collect() + gzi = samtoolsFaidx.out.samtools_gzi.collect() + } else { + fai = channel.fromPath("${params.input}.fai").collect() + gzi = channel.fromPath("${params.input}.gzi").collect() + } if (params.wfmash_only) { // TODO Once we changed the way we changed the publish_dir_mode, we have to emit the .paf file as default, else not if (params.wfmash_chunks == 1) { From 5d14f7de14153e6d5f43ebf2fb9e127bc91a17d1 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Fri, 9 Sep 2022 15:29:17 +0200 Subject: [PATCH 29/41] help message correction --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index d6367432..da76bd86 100644 --- a/main.nf +++ b/main.nf @@ -30,7 +30,7 @@ if (params.input == null) { if (params.n_mappings == null) { log.info""" - Mandatory argument --wfmash_n_mappings missing! For more details run with --help. + Mandatory argument --n_mappings missing! For more details run with --help. """.stripIndent() From 45eb867ddf55c2ae052641e784294240639188c0 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Mon, 12 Sep 2022 14:31:20 +0200 Subject: [PATCH 30/41] if you happy and you know it clap your hands! --- main.nf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/main.nf b/main.nf index da76bd86..f35d03db 100644 --- a/main.nf +++ b/main.nf @@ -75,18 +75,18 @@ def smoothxg_block_id_min = params.wfmash_map_pct_id / 100.0 // TODO: CHANGE TO LARGE P ONCE WE ARE THERE def smoothxg_poa_params_cmd = "" if (params.smoothxg_poa_params == null) { - smoothxg_poa_params = "-p 1,19,39,3,81,1" + smoothxg_poa_params = "-P 1,19,39,3,81,1" } else { if (params.smoothxg_poa_params == "asm5") { - smoothxg_poa_params = "-p 1,19,39,3,81,1" + smoothxg_poa_params = "-P 1,19,39,3,81,1" } else if (params.smoothxg_poa_params == "asm10") { - smoothxg_poa_params = "-p 1,9,16,2,41,1" + smoothxg_poa_params = "-P 1,9,16,2,41,1" } else if (params.smoothxg_poa_params == "asm15") { - smoothxg_poa_params = "-p 1,7,11,2,33,1" + smoothxg_poa_params = "-P 1,7,11,2,33,1" } else if (params.smoothxg_poa_params == "asm20") { - smoothxg_poa_params = "-p 1,4,6,2,26,1"B + smoothxg_poa_params = "-P 1,4,6,2,26,1"B } else { - smoothxg_poa_params = "-p${params.smoothxg_poa_params}" + smoothxg_poa_params = "-P${params.smoothxg_poa_params}" } } def smoothxg_poa_params_display = smoothxg_poa_params.replaceAll(/,/, "_") From 2e022b5ee8f2d85237b2a57db6ad9a69ba78a4d5 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Mon, 12 Sep 2022 14:32:39 +0200 Subject: [PATCH 31/41] make the CI happy --- .github/workflows/ci.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a85bf234..60c30f2f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,10 +55,10 @@ jobs: # Remember that you can parallelise this by using strategy.matrix # We also test basic visualization and reporting options here run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --no_viz --no_layout - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --smoothxg consensus_spec 10,100,1000 - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --vcf_spec "gi|568815561:#,gi|568815567:#" - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --smoothxg_write_maf - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --wfmash_chunks 2 - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --wfmash_only + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_mappings 11 + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_mappings 11 --no_viz --no_layout + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_mappings 11 --smoothxg consensus_spec 10,100,1000 + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_mappings 11 --vcf_spec "gi|568815561:#,gi|568815567:#" + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_mappings 11 --smoothxg_write_maf + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_mappings 11 --wfmash_chunks 2 + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --n_mappings 11 --wfmash_only From d49b11f62c01a1a38fcf3fa1f98c0049df6b33a5 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Mon, 12 Sep 2022 15:16:10 +0200 Subject: [PATCH 32/41] bring schema up to date --- nextflow.config | 22 ++++++++--------- nextflow_schema.json | 57 +++++++++++++++++++++++++++++++++----------- 2 files changed, 54 insertions(+), 25 deletions(-) diff --git a/nextflow.config b/nextflow.config index f0d14530..851f7b3f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,7 +17,7 @@ params { n_mappings = null // default could be the number of input sequences, but then I would have to add another process // Optional PAF input - paf = false + paf = null // Output options outdir = "./results" @@ -38,7 +38,7 @@ params { wfmash_sparse_map = null wfmash_merge_segments = false wfmash_no_splits = false - wfmash_exclude_delim = false + wfmash_exclude_delim = null wfmash_chunks = 1 wfmash_only = false wfmash_temp_dir = null @@ -50,7 +50,7 @@ params { seqwish_temp_dir = null // Smoothxg options - smoothxg_num_haps = false + smoothxg_num_haps = null smoothxg_max_path_jump = 0 smoothxg_max_edge_jump = 0 smoothxg_poa_length = "700,900,1100" @@ -72,12 +72,12 @@ params { smoothxg_run_global_poa = null // vcf_spec = "gi|568815561:#,gi|568815567:#:10000" - vcf_spec = false + vcf_spec = null // Boilerplate options - multiqc_config = false - email = false - email_on_fail = false + multiqc_config = null + email = null + email_on_fail = null max_multiqc_email_size = 25.MB plaintext_email = false monochrome_logs = false @@ -87,10 +87,10 @@ params { // Config options custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - hostnames = false - config_profile_description = false - config_profile_contact = false - config_profile_url = false + hostnames = null + config_profile_description = null + config_profile_contact = null + config_profile_url = null validate_params = true show_hidden_params = false schema_ignore_params = 'genomes,input_paths' diff --git a/nextflow_schema.json b/nextflow_schema.json index a728f981..9e1de71f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -60,7 +60,7 @@ }, "wfmash_segment_length": { "type": "integer", - "default": 3000, + "default": 5000, "description": "segment length for mapping", "fa_icon": "fas fa-align-center" }, @@ -71,10 +71,13 @@ }, "wfmash_mash_kmer": { "type": "integer", - "default": 16, "description": "kmer size for mashmap", "fa_icon": "fas fa-align-center" }, + "wfmash_mash_kmer_thres": { + "type": "number", + "description": "ignore the top % most-frequent kmers [default: 0.001]" + }, "wfmash_merge_segments": { "type": "boolean", "description": "merge successive mappings", @@ -98,6 +101,14 @@ "wfmash_only": { "type": "boolean", "description": "If this parameter is set, only the wfmash alignment step of the pipeline is executed. This option is offered for users who want to use wfmash on a cluster." + }, + "wfmash_sparse_map": { + "type": "string", + "description": "keep this fraction of mappings ('auto' for giant component heuristic) [default: 1.0]" + }, + "wfmash_temp_dir": { + "type": "string", + "description": "directory for temporary files" } } }, @@ -109,7 +120,7 @@ "properties": { "seqwish_min_match_length": { "type": "integer", - "default": 47, + "default": 19, "description": "Ignore exact matches below this length.", "fa_icon": "fas fa-project-diagram" }, @@ -118,6 +129,14 @@ "default": 10000000, "description": "Number of bp to use for transitive closure batch.", "fa_icon": "fas fa-project-diagram" + }, + "seqwish_sparse_factor": { + "type": "integer", + "default": 0, + "description": "keep this randomly selected fraction of input matches [default: no sparsification]" + }, + "seqwish_temp_dir": { + "type": "string" } }, "fa_icon": "fas fa-project-diagram" @@ -130,7 +149,7 @@ "properties": { "smoothxg_num_haps": { "type": "integer", - "default": "wfmash_n_mappings", + "default": 0, "description": "number of haplotypes in the given FASTA" }, "smoothxg_max_path_jump": { @@ -145,14 +164,9 @@ }, "smoothxg_poa_length": { "type": "string", - "default": "4001,4507", + "default": "700,900,1100", "description": "maximum sequence length to put into POA, can be a comma-separated list; for each element smoothxg will be executed once" }, - "smoothxg_consensus_spec": { - "type": "string", - "description": "Consensus graph specification: write the consensus graph to BASENAME.cons_[spec].gfa; where each spec contains at least a min_len parameter (which defines the length of divergences from consensus paths to preserve in the output), optionally a file containing reference paths to preserve in the output, a flag (y/n) indicating whether we should also use the POA consensus paths, a minimum coverage of consensus paths to retain (min_cov), and a maximum allele length (max_len, defaults to 1e6); implies -a; example: cons,100,1000:refs1.txt:n,1000:refs2.txt:y:2.3:1000000,10000.", - "fa_icon": "fab fa-superpowers" - }, "smoothxg_consensus_prefix": { "type": "string", "default": "Consensus_", @@ -166,8 +180,7 @@ "smoothxg_block_id_min": { "type": "number", "description": "Split blocks into groups connected by this identity threshold.", - "fa_icon": "fas fa-percentage", - "default": 0.95 + "fa_icon": "fas fa-percentage" }, "smoothxg_pad_max_depth": { "type": "integer", @@ -176,7 +189,7 @@ }, "smoothxg_poa_padding": { "type": "number", - "default": 0.03, + "default": 0.001, "description": "pad each end of each sequence in POA with N*(longest_poas_seq) bp" }, "smoothxg_poa_params": { @@ -185,9 +198,25 @@ "description": "Score parameters for POA in the form of match,mismatch,gap1,ext1,gap2,ext2.", "fa_icon": "fab fa-superpowers" }, + "smoothxg_run_abpoa": { + "type": "boolean", + "description": "run abPOA [default: SPOA]" + }, + "smoothxg_run_global_poa": { + "type": "string", + "default": null + }, "smoothxg_write_maf": { "type": "boolean", "description": "write MAF output representing merged POA blocks" + }, + "smoothxg_keep_intermediate_files": { + "type": "string", + "description": "keep intermediate graphs during smoothxg step" + }, + "smoothxg_temp_dir": { + "type": "string", + "description": "directory for temporary files" } }, "fa_icon": "fas fa-project-diagram" @@ -423,4 +452,4 @@ "$ref": "#/definitions/institutional_config_options" } ] -} \ No newline at end of file +} From 9173fc5d445229feabfb8bc29aeee4d945ae14fb Mon Sep 17 00:00:00 2001 From: subwaystation Date: Mon, 12 Sep 2022 15:16:24 +0200 Subject: [PATCH 33/41] bring schema up to date --- nextflow_schema.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 9e1de71f..ea0a02de 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -174,7 +174,7 @@ }, "smoothxg_block_ratio_min": { "type": "number", - "default": 0, + "default": 0.0, "description": "minimum small / large length ratio to cluster in a block" }, "smoothxg_block_id_min": { @@ -204,7 +204,7 @@ }, "smoothxg_run_global_poa": { "type": "string", - "default": null + "default": "None" }, "smoothxg_write_maf": { "type": "boolean", From a2247ba58e389a8c1cb66c2f341e24885697febc Mon Sep 17 00:00:00 2001 From: subwaystation Date: Mon, 12 Sep 2022 15:35:21 +0200 Subject: [PATCH 34/41] update help message --- main.nf | 49 ++++++++++++++++++++++---------------------- nextflow_schema.json | 13 ++++++------ 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/main.nf b/main.nf index f35d03db..be6aa648 100644 --- a/main.nf +++ b/main.nf @@ -610,58 +610,57 @@ def helpMessage() { nextflow run nf-core/pangenome --input 'data/input.fa.gz' -profile docker Mandatory arguments: - --input [file] Path to input FASTA (must be surrounded with quotes) + --input [file] Path to bgzipped input FASTA (must be surrounded with quotes) + -- n_mappings [int] Number of mappings to retain for each segment. -profile [str] Configuration profile to use. Can use multiple (comma separated) Available: conda, docker, singularity, test, awsbatch, and more + PAF options: + --paf [file] Optional input to skip the all vs. all alignment wfmash phase directly starting with seqwish. Wfmash options: --wfmash_map_pct_id [n] percent identity in the wfmash mashmap step [default: 90] - --wfmash_n_mappings [n] number of secondary mappings to retain in 'map' filter mode [default: 10] - --wfmash_segment_length [n] segment length for mapping [default: 3000] - --wfmash_block_length [n] minimum block length filter for mapping [default: 3 * wfmash_segment_length] - --wfmash_mash_kmer [n] kmer size for mashmap [default: 16] + --wfmash_segment_length [n] segment length for mapping [default: 5000] + --wfmash_block_length [n] minimum block length filter for mapping + --wfmash_mash_kmer [n] kmer size for mashmap + --wfmash_mash_kmer_thres [n] ignore the top % most-frequent kmers [default: 0.001] --wfmash_merge_segments merge successive mappings [default: OFF] --wfmash_no_splits disable splitting of input sequences during mapping [default: OFF] --wfmash_exclude--delim [c] skip mappings between sequences with the same name prefix before the given delimiter character [default: all-vs-all and !self] --wfmash_chunks The number of files to generate from the approximate wfmash mappings to scale across a whole cluster. It is recommended to set this to the number of available nodes. If only one machine is available, leave it at 1. [default: 1] --wfmash_only If this parameter is set, only the wfmash alignment step of the pipeline is executed. This option is offered for users who want to use wfmash on a cluster. [default: OFF] + --wfmash_sparse_map keep this fraction of mappings ('auto' for giant component heuristic) [default: 1.0] + --wfmash_temp_dir [str] directory for temporary files Seqwish options: - --seqwish_min_match_length [n] ignore exact matches below this length [default: 47] + --seqwish_min_match_length [n] ignore exact matches below this length [default: 19] --seqwish_transclose_batch [n] number of bp to use for transitive closure batch [default: 10000000] + --seqwish_sparse_factor [n] keep this randomly selected fraction of input matches [default: no sparsification] + --seqwish_temp_dir [str] directory for temporary files Smoothxg options: --smoothxg_num_haps [n] number of haplotypes in the given FASTA [default: wfmash_n_mappings] --smoothxg_max_path_jump [n] maximum path jump to include in block [default: 0] --smoothxg_max_edge_jump [n] maximum edge jump before breaking [default: 0] - --smoothxg_max_poa_length [n] maximum sequence length to put into POA, can be a comma-separated list; - for each element smoothxg will be executed once [default: 4001,4507] - --smoothxg_consensus_spec [str] consensus graph specification: write the consensus graph to - BASENAME.cons_[spec].gfa; where each spec contains at least a min_len parameter - (which defines the length of divergences from consensus paths to preserve in the - output), optionally a file containing reference paths to preserve in the output, - a flag (y/n) indicating whether we should also use the POA consensus paths, a - minimum coverage of consensus paths to retain (min_cov), and a maximum allele - length (max_len, defaults to 1e6); implies -a; example: - cons,100,1000:refs1.txt:n,1000:refs2.txt:y:2.3:1000000,10000 - [default: OFF] + --smoothxg_poa_length [n] maximum sequence length to put into POA, can be a comma-separated list; + for each element smoothxg will be executed once [default: 700,900,1100] --smoothxg_consensus_prefix [n] use this prefix for consensus path names [default: Consensus_] --smoothxg_block_ratio_min [n] minimum small / large length ratio to cluster in a block [default: 0.0] - --smoothxg_block_id_min [n] split blocks into groups connected by this identity threshold [default: 0.95] + --smoothxg_block_id_min [n] split blocks into groups connected by this identity threshold [default: wfmash_map_pct_id / 100.0] --smoothxg_pad_max_depth [n] path depth at which we don't pad the POA problem [default: 100] --smoothxg_poa_padding [n] pad each end of each sequence in POA with N*(longest_poas_seq) bp [default: 0.03] - --smoothxg_poa_params [str] score parameters for POA in the form of match,mismatch,gap1,ext1,gap2,ext2 - [default: 1,19,39,3,81,1] + --smoothxg_poa_params [str] score parameters for POA in the form of match,mismatch,gap1,ext1,gap2,ext2 may also be given as presets: asm5, asm10, asm15, asm20 [default: 1,19,39,3,81,1 = asm5] + --smoothxg_run_abpoa run abPOA [default: SPOA] + --smoothxg_run_global_poa run the POA in global mode [default: local mode] --smoothxg_write_maf [n] write MAF output representing merged POA blocks [default: OFF] + --smoothxg_keep_intermediate_files keep intermediate graphs during smoothxg step + --smoothxg_temp_dir [str] directory for temporary files Visualization options: - --viz Generate 1D and 2D visualisations of the built graphs [default: OFF] + --no_viz Set if you don't want the 1D visualizations. + --no_layout Set if you don't want the computational expensive 2D layout. VCF options: - --vcf_spec specify a set of VCFs to produce with SPEC = REF:DELIM[,REF:DELIM]* - the paths matching ^REF are used as a reference, while the sample haplotypes - are derived from path names, e.g. when DELIM=# and with '-V chm13:#', - a path named HG002#1#ctg would be assigned to sample HG002 phase 1 [default: OFF] + --vcf_spec specify a set of VCFs to produce with SPEC = REF:DELIM[:LEN][,REF:DELIM:[LEN]]* the paths matching ^REF are used as a reference, while the sample haplotypes are derived from path names, e.g. when DELIM=# and with '-V chm13:#', a path name HG002#1#ctg would be assigned to sample HG002 phase 1. If LEN is specified and greater than 0, the VCFs are decomposed, filtering sites whose max allele length is greater than LEN. [default: off] Other options: --outdir [file] The output directory where the results will be saved [default: ./results] diff --git a/nextflow_schema.json b/nextflow_schema.json index ea0a02de..87d04218 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -136,7 +136,8 @@ "description": "keep this randomly selected fraction of input matches [default: no sparsification]" }, "seqwish_temp_dir": { - "type": "string" + "type": "string", + "description": "directory for temporary files" } }, "fa_icon": "fas fa-project-diagram" @@ -174,8 +175,8 @@ }, "smoothxg_block_ratio_min": { "type": "number", - "default": 0.0, - "description": "minimum small / large length ratio to cluster in a block" + "description": "minimum small / large length ratio to cluster in a block", + "default": 0 }, "smoothxg_block_id_min": { "type": "number", @@ -195,7 +196,7 @@ "smoothxg_poa_params": { "type": "string", "default": "1,19,39,3,81,1", - "description": "Score parameters for POA in the form of match,mismatch,gap1,ext1,gap2,ext2.", + "description": "score parameters for POA in the form of match,mismatch,gap1,ext1,gap2,ext2 may also be given as presets: asm5, asm10, asm15, asm20 [default: 1,19,39,3,81,1 = asm5]", "fa_icon": "fab fa-superpowers" }, "smoothxg_run_abpoa": { @@ -204,7 +205,7 @@ }, "smoothxg_run_global_poa": { "type": "string", - "default": "None" + "description": "run the POA in global mode [default: local mode]" }, "smoothxg_write_maf": { "type": "boolean", @@ -229,7 +230,7 @@ "properties": { "vcf_spec": { "type": "string", - "description": "specify a set of VCFs to produce with SPEC = REF:DELIM[,REF:DELIM]* the paths matching ^REF are used as a reference, while the sample haplotypes are derived from path names, e.g. when DELIM=# and with '-V chm13:#', a path named HG002#1#ctg would be assigned to sample HG002 phase 1" + "description": "specify a set of VCFs to produce with SPEC = REF:DELIM[:LEN][,REF:DELIM:[LEN]]* the paths matching ^REF are used as a reference, while the sample haplotypes are derived from path names, e.g. when DELIM=# and with '-V chm13:#', a path name HG002#1#ctg would be assigned to sample HG002 phase 1. If LEN is specified and greater than 0, the VCFs are decomposed, filtering sites whose max allele length is greater than LEN. [default: off]" } } }, From 44e5943eab15e4d8bdfc5044bf4be4c394249055 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Mon, 12 Sep 2022 16:03:04 +0200 Subject: [PATCH 35/41] another bug fix --- main.nf | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/main.nf b/main.nf index be6aa648..6f5700c7 100644 --- a/main.nf +++ b/main.nf @@ -92,7 +92,10 @@ if (params.smoothxg_poa_params == null) { def smoothxg_poa_params_display = smoothxg_poa_params.replaceAll(/,/, "_") def smoothxg_temp_dir = params.smoothxg_temp_dir ? "-b${params.smoothxg_temp_dir}" : "" def smoothxg_keep_intermediate_files = params.smoothxg_keep_intermediate_files ? "-K" : "" -def smoothxg_xpoa = params.smoothxg_run_abpoa ? "" : "-S" +def smoothxg_xpoa = "-S" +if (params.smoothxg_run_abpoa != null) { + smoothxg_xpoa = "" +} def smoothxg_poa_mode = params.smoothxg_run_global_poa ? "-Z" : "" // disabling consensus graph mode def smoothxg_consensus_spec = false @@ -312,7 +315,8 @@ process smoothxg { -O ${params.smoothxg_poa_padding} \ -Y \$(echo "${params.smoothxg_pad_max_depth} * ${n_haps}" | bc) \ -d 0 -D 0 \ - ${smoothxg_xpoa} ${smoothxg_poa_mode} \ + ${smoothxg_xpoa} \ + ${smoothxg_poa_mode} \ -V \ -o smooth.\$i.gfa else @@ -334,7 +338,8 @@ process smoothxg { -O ${params.smoothxg_poa_padding} \ -Y \$(echo "${params.smoothxg_pad_max_depth} * ${n_haps}" | bc) \ -d 0 -D 0 \ - ${smoothxg_xpoa} ${smoothxg_poa_mode} \ + ${smoothxg_xpoa} \ + ${smoothxg_poa_mode} \ \$maf_params \ -V \ -o ${f}${smoothxg_prefix}.gfa @@ -529,7 +534,7 @@ workflow { wfmashAlign(fasta.combine(splitApproxMappingsInChunks.out.flatten()), fai, gzi) } } else { - if (params.paf != false) { + if (params.paf != null) { paf_ch = Channel.fromPath(params.paf) seqwish(fasta, paf_ch) } else { @@ -561,7 +566,7 @@ workflow { ch_vcf_spec = Channel.empty() vg_deconstruct = Channel.empty() - if (params.vcf_spec != false) { + if (params.vcf_spec != null) { ch_vcf_spec = Channel.from(params.vcf_spec).splitCsv().flatten() vg_deconstruct(gfaffix.out.gfa_norm.combine(ch_vcf_spec)) // TODO add bcftools From d74f0dc3c5c7b46f0e8d81038edba621a2c6aeb5 Mon Sep 17 00:00:00 2001 From: Simon Heumos Date: Mon, 12 Sep 2022 16:36:43 +0200 Subject: [PATCH 36/41] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6b370dda..702bc621 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool 4. Test the workflow on a minimal dataset ```bash - nextflow run nf-core/pangenome -profile test, + nextflow run nf-core/pangenome -profile test, --n_mappings 11 ``` [//]: # (```bash nextflow run nf-core/pangenome -profile test,```) From e36231632e6e12dab690c88c3182ca22544bf246 Mon Sep 17 00:00:00 2001 From: Simon Heumos Date: Mon, 12 Sep 2022 16:37:55 +0200 Subject: [PATCH 37/41] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 702bc621..957fd16d 100644 --- a/README.md +++ b/README.md @@ -45,10 +45,10 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool 5. Start running your own analysis! ```bash - nextflow run nf-core/pangenome -profile --input "input.fa.gz" + nextflow run nf-core/pangenome -profile --input "input.fa.gz" --n_mappings 11 ``` -See [usage docs](https://nf-co.re/pangenome/usage) for all of the available options when running the pipeline. +Be careful, the input FASTA must have been compressed with [bgzip](http://www.htslib.org/doc/bgzip.html). See [usage docs](https://nf-co.re/pangenome/usage) for all of the available options when running the pipeline. ## Pipeline Summary From ca551b12e351df7dc725156d12d1a2bef9678c6f Mon Sep 17 00:00:00 2001 From: subwaystation Date: Wed, 14 Sep 2022 13:02:34 +0200 Subject: [PATCH 38/41] seqwish_groovey_magic --- main.nf | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/main.nf b/main.nf index 6f5700c7..acd46e43 100644 --- a/main.nf +++ b/main.nf @@ -190,7 +190,7 @@ process wfmashAlign { path(gzi) output: - path("${paf}.align.paf") + path("${paf}.align.paf"), emit: paf """ wfmash ${wfmash_exclude_cmd} \ @@ -245,23 +245,18 @@ process seqwish { input: tuple val(f), path(fasta) - path(pafs) + path(paf) output: tuple val(f), path("${f}${seqwish_prefix}.gfa") script: + def input = paf.join(',') """ - if [[ \$(ls *.paf | wc -l) == 1 ]]; then - input=$pafs - else - input=\$(ls *.paf | tr '\\\n' ',') - input=\${input::-1} - fi seqwish \ -t ${task.cpus} \ -s $fasta \ - -p \$input \ + -p $input \ -k ${params.seqwish_min_match_length} \ -f ${params.seqwish_sparse_factor} \ -g ${f}${seqwish_prefix}.gfa -P \ From 59e348f7eb69385f9893014e18a246c1168677f0 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Fri, 16 Sep 2022 14:22:23 +0200 Subject: [PATCH 39/41] odgi sort/layout power up --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index e1d5a1a0..417278cb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ghcr.io/pangenome/pggb:20220823114803a8b1ea +FROM ghcr.io/pangenome/pggb:20220915184416303dfa LABEL authors="Simon Heumos, Michael Heuer, Lukas Heumos, Erik Garrison, Andrea Guarracino" \ description="Docker image containing all software requirements for the nf-core/pangenome pipeline" From d45e6d9994dd3e92178e4473eb1ebf115c6cd4ba Mon Sep 17 00:00:00 2001 From: subwaystation Date: Thu, 22 Sep 2022 09:26:06 +0200 Subject: [PATCH 40/41] update Dockerfile to use the most recent biwflambda update --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 417278cb..61ac8872 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ghcr.io/pangenome/pggb:20220915184416303dfa +FROM ghcr.io/pangenome/pggb:2022092009132668cb88 LABEL authors="Simon Heumos, Michael Heuer, Lukas Heumos, Erik Garrison, Andrea Guarracino" \ description="Docker image containing all software requirements for the nf-core/pangenome pipeline" From 985df215f350ffa58130218dbecc97d441211425 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Fri, 23 Sep 2022 09:39:48 +0200 Subject: [PATCH 41/41] diagnostic vizzz --- Dockerfile | 2 +- assets/multiqc_config.yaml | 6 ++++++ main.nf | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 61ac8872..9b6af64a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ghcr.io/pangenome/pggb:2022092009132668cb88 +FROM ghcr.io/pangenome/pggb:2022092217355879ede8 LABEL authors="Simon Heumos, Michael Heuer, Lukas Heumos, Erik Garrison, Andrea Guarracino" \ description="Docker image containing all software requirements for the nf-core/pangenome pipeline" diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index 779d97b4..c2d56baa 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -1,5 +1,8 @@ # Report section config for nice titles and descriptions custom_data: + odgi_O: + section_name: ODGI Compressed 1D visualization + description: This image shows a 1D rendering of the built pangenome graph. The graph nodes are arranged from left to right, forming the pangenome sequence. Summarization of path coverage across all paths. A heatmap color-coding from https://colorbrewer2.org/#type=diverging&scheme=RdBu&n=11 is used. Dark blue means highest coverage. Dark red means lowest coverage. The path names are placed on the left. The black lines under the paths are the links, which represent the graph topology. odgi_viz: section_name: ODGI 1D visualization description: This image shows a 1D rendering of the built pangenome graph. The graph nodes are arranged from left to right, forming the pangenome sequence. The colored bars represent the paths versus the pangenome sequence in a binary matrix. The path names are placed on the left. The black lines under the paths are the links, which represent the graph topology. @@ -18,6 +21,8 @@ custom_data: # Custom search patterns to find the image outputs sp: + odgi_O: + fn: "*O_multiqc.png" odgi_draw: fn: "*draw_multiqc.png" odgi_viz: @@ -40,6 +45,7 @@ module_order: # Set the order that the custom content plots should come in custom_content: order: + - odgi_O - odgi_viz - odgi_viz_pos - odgi_viz_inv diff --git a/main.nf b/main.nf index acd46e43..6b57f499 100644 --- a/main.nf +++ b/main.nf @@ -405,6 +405,7 @@ process odgiViz { odgi viz -i $graph -o ${graph}.viz_pos_multiqc.png -x 1500 -y 500 -a 10 -I ${params.smoothxg_consensus_prefix} -u -d odgi viz -i $graph -o ${graph}.viz_depth_multiqc.png -x 1500 -y 500 -a 10 -I ${params.smoothxg_consensus_prefix} -m odgi viz -i $graph -o ${graph}.viz_inv_multiqc.png -x 1500 -y 500 -a 10 -I ${params.smoothxg_consensus_prefix} -z + odgi viz -i $graph -o ${graph}.viz_O_multiqc.png -x 1500 -y 500 -a 10 -I ${params.smoothxg_consensus_prefix} -O """ }