From 1dda68a6a68e1a0d6b08c6d2aa03a7876337c49d Mon Sep 17 00:00:00 2001 From: Dan Fornika Date: Fri, 31 May 2024 20:18:12 -0700 Subject: [PATCH] fix producing provenance (#23) --- README.md | 3 ++- main.nf | 15 ++++++--------- nextflow.config | 1 + 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 4eb3053..86ed7a2 100644 --- a/README.md +++ b/README.md @@ -69,13 +69,14 @@ nextflow run BCCDC-PHL/alignment-variants \ ...which would produce files named: `test_fastp.csv` and `test_qualimap_bamqc.csv`. -If long reads are available, they can be included with the `--fastq_input_long` flag: +If long reads are available, they can be included with the `--fastq_input_long` and `--align_long_reads` flags: ``` nextflow run BCCDC-PHL/alignment-variants \ --ref /path/to/ref.fa \ --fastq_input /path/to/fastqs \ --fastq_input_long /path/to/long_fastqs \ + --align_long_reads \ --outdir /path/to/outputs ``` diff --git a/main.nf b/main.nf index f9f3c58..fbc0b06 100644 --- a/main.nf +++ b/main.nf @@ -45,7 +45,6 @@ workflow { ch_nanopore_fastq = Channel.fromPath( params.fastq_nanopore_search_path ).map{ it -> [it.getName().split('_')[0], it] }.unique{ it -> it[0] } } - main: ch_illumina_sample_ids = ch_illumina_fastq.map{ it -> it[0] } @@ -113,17 +112,15 @@ workflow { // [sample_id, [provenance_file_1.yml, provenance_file_2.yml, provenance_file_3.yml...]] // At each step, we add another provenance file to the list using the << operator... // ...and then concatenate them all together in the 'collect_provenance' process. - ch_provenance = ch_provenance.combine(ch_pipeline_provenance).map{ it -> [it[0], [it[1]]] } - ch_provenance = ch_provenance.join(hash_ref.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + ch_provenance = ch_provenance.combine(ch_pipeline_provenance).map{ it -> [it[0], [it[1]]] } + ch_provenance = ch_provenance.join(hash_ref.out.provenance).map{ it -> [it[0], it[1] << it[2]] } ch_provenance = ch_provenance.join(hash_fastq_short.out.provenance).map{ it -> [it[0], it[1] << it[2]] } - // Check the length of the collected nanopore sample IDs to determine if we need to collect nanopore provenance - // I need to pull the value out of the channel and convert it to a list to get the length - ch_illumina_sample_ids.toList().size().view() - if (false) { + ch_provenance = ch_provenance.join(fastp.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + if (params.align_long_reads) { ch_provenance = ch_provenance.join(hash_fastq_long.out.provenance).map{ it -> [it[0], it[1] << it[2]] } } - ch_provenance = ch_provenance.join(bwa_mem.out.provenance).map{ it -> [it[0], it[1] << it[2]] } - if (false) { + ch_provenance = ch_provenance.join(bwa_mem.out.provenance).map{ it -> [it[0], it[1] << it[2]] } + if (params.align_long_reads) { ch_provenance = ch_provenance.join(nanoq_pre_filter.out.provenance).map{ it -> [it[0], it[1] << it[2]] } ch_provenance = ch_provenance.join(filtlong.out.provenance).map{ it -> [it[0], it[1] << it[2]] } ch_provenance = ch_provenance.join(nanoq_post_filter.out.provenance).map{ it -> [it[0], it[1] << it[2]] } diff --git a/nextflow.config b/nextflow.config index 4641c0a..4b71980 100644 --- a/nextflow.config +++ b/nextflow.config @@ -29,6 +29,7 @@ params { coverage_by_depth_limit = 500 qualimap_memory = '4G' skip_alignment_cleaning = false + align_long_reads = false ref = 'NO_FILE' outdir = 'results' collect_outputs = false