From 1dda68a6a68e1a0d6b08c6d2aa03a7876337c49d Mon Sep 17 00:00:00 2001
From: Dan Fornika <dfornika@gmail.com>
Date: Fri, 31 May 2024 20:18:12 -0700
Subject: [PATCH] fix producing provenance (#23)

---
 README.md       |  3 ++-
 main.nf         | 15 ++++++---------
 nextflow.config |  1 +
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 4eb3053..86ed7a2 100644
--- a/README.md
+++ b/README.md
@@ -69,13 +69,14 @@ nextflow run BCCDC-PHL/alignment-variants \
 
 ...which would produce files named: `test_fastp.csv` and `test_qualimap_bamqc.csv`.
 
-If long reads are available, they can be included with the `--fastq_input_long` flag:
+If long reads are available, they can be included with the `--fastq_input_long` and `--align_long_reads` flags:
 
 ```
 nextflow run BCCDC-PHL/alignment-variants \
   --ref /path/to/ref.fa \
   --fastq_input /path/to/fastqs \
   --fastq_input_long /path/to/long_fastqs \
+  --align_long_reads \
   --outdir /path/to/outputs
 ```
 
diff --git a/main.nf b/main.nf
index f9f3c58..fbc0b06 100644
--- a/main.nf
+++ b/main.nf
@@ -45,7 +45,6 @@ workflow {
 	ch_nanopore_fastq = Channel.fromPath( params.fastq_nanopore_search_path ).map{ it -> [it.getName().split('_')[0], it] }.unique{ it -> it[0] }
     }
 
-    
 
     main:
     ch_illumina_sample_ids = ch_illumina_fastq.map{ it -> it[0] }
@@ -113,17 +112,15 @@ workflow {
     // [sample_id, [provenance_file_1.yml, provenance_file_2.yml, provenance_file_3.yml...]]
     // At each step, we add another provenance file to the list using the << operator...
     // ...and then concatenate them all together in the 'collect_provenance' process.
-    ch_provenance = ch_provenance.combine(ch_pipeline_provenance).map{ it ->        [it[0], [it[1]]] }
-    ch_provenance = ch_provenance.join(hash_ref.out.provenance).map{ it ->          [it[0], it[1] << it[2]] }
+    ch_provenance = ch_provenance.combine(ch_pipeline_provenance).map{ it ->            [it[0], [it[1]]] }
+    ch_provenance = ch_provenance.join(hash_ref.out.provenance).map{ it ->              [it[0], it[1] << it[2]] }
     ch_provenance = ch_provenance.join(hash_fastq_short.out.provenance).map{ it ->  [it[0], it[1] << it[2]] }
-    // Check the length of the collected nanopore sample IDs to determine if we need to collect nanopore provenance
-    // I need to pull the value out of the channel and convert it to a list to get the length
-    ch_illumina_sample_ids.toList().size().view()
-    if (false) {
+    ch_provenance = ch_provenance.join(fastp.out.provenance).map{ it ->             [it[0], it[1] << it[2]] }
+    if (params.align_long_reads) {
 	ch_provenance = ch_provenance.join(hash_fastq_long.out.provenance).map{ it ->   [it[0], it[1] << it[2]] }
     }
-    ch_provenance = ch_provenance.join(bwa_mem.out.provenance).map{ it ->           [it[0], it[1] << it[2]] }
-    if (false) {
+    ch_provenance = ch_provenance.join(bwa_mem.out.provenance).map{ it ->               [it[0], it[1] << it[2]] }
+    if (params.align_long_reads) {
 	ch_provenance = ch_provenance.join(nanoq_pre_filter.out.provenance).map{ it ->  [it[0], it[1] << it[2]] }
 	ch_provenance = ch_provenance.join(filtlong.out.provenance).map{ it ->          [it[0], it[1] << it[2]] }
 	ch_provenance = ch_provenance.join(nanoq_post_filter.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
diff --git a/nextflow.config b/nextflow.config
index 4641c0a..4b71980 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -29,6 +29,7 @@ params {
     coverage_by_depth_limit = 500
     qualimap_memory = '4G'
     skip_alignment_cleaning = false
+    align_long_reads = false
     ref = 'NO_FILE'
     outdir = 'results'
     collect_outputs = false