fastuniq is now an option

agalitsyna · Feb 22, 2022 · 058e85d · 058e85d
1 parent d98f702
commit 058e85d
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 23 deletions.
diff --git a/params-redc.yml b/params-redc.yml
@@ -31,7 +31,7 @@ protocol:
     dedup_crop: 50
     trimmomatic_params: 'SLIDINGWINDOW:5:26 MINLEN:0'
     merge_groups: true
-    dedup_method: fastuniq
+    run_fastuniq: true
 
 # Scheme of oligos mapping: 
 oligos:
@@ -221,7 +221,7 @@ filters:
     RNADNASamePos: '(chrom_dna==chrom_rna2) & (strand_dna!=strand_rna2) & (np.abs(start_dna-end_rna2)<=1)'
 
     # Filter 1: Read is not PCR duplication
-    F1_notDup: '(isUnique)'
+    F1_notDup: '(isUnique)' # Note that this filter won't work if protocol.run_fastuniq is false
     # Filter 2: there is a bridge in the read, with last AG letters, it is not cut by the quality filter
     F2_goodBridge: '(start_hit_bridge_forward_R1<9999) & oligo_ga_at_35'
     # Filter 3: Reverse read starts with GGG

diff --git a/redc.nf b/redc.nf
@@ -45,12 +45,12 @@ def protocol = params.getOrDefault('protocol', [:])
 def chunksize = protocol.getOrDefault('chunksize', 100000000000)
 def check_restriction = protocol.getOrDefault('check_restriction', false)
 def RenzymesPreloaded = Genome.getOrDefault('restricted', [:])
-def dedupMethod = protocol.getOrDefault('dedup_method', 'fastuniq')
+def runFastuniq = protocol.getOrDefault('run_fastuniq', false)
 
 // Check number of oligos, short oligos and fragments:
-nOligos = params.getOrDefault('oligos', [:]).keySet().size
-nShortOligos = params.getOrDefault('short_oligos', [:]).keySet().size
-nFragments = params.getOrDefault('fragments', [:]).keySet().size
+nOligos = params.getOrDefault('oligos', [:]).keySet().size()
+nShortOligos = params.getOrDefault('short_oligos', [:]).keySet().size()
+nFragments = params.getOrDefault('fragments', [:]).keySet().size()
 
 // Include modules and subworkflows
 include { INPUT_CHECK_DOWNLOAD } from './subworkflows/local/input_check' addParams( options: [:] )
@@ -73,12 +73,9 @@ include { GENOME_PREPARE_RNA_ANNOTATIONS } from './modules/local/genome_prepare_
 include { FASTQC } from './modules/nf-core/software/fastqc/main' addParams( options: [:] )
 include { FASTQ2TSV as TABLE_FASTQ2TSV } from './modules/local/fastq2table/main' addParams( options: [:] )
 
-if (dedupMethod=="fastuniq"){
+if (runFastuniq){
     include { DEDUP_FASTUNIQ as TABLE_DEDUP } from './subworkflows/local/dedup_fastuniq' addParams( options: [:] )
-}
-include { TRIMTABLE_CREATE as TABLE_TRIM } from './subworkflows/local/trimtable_create' addParams( options: [:] )
-
-include { RNADNATOOLS_TABLE_ALIGN as TABLE_DEDUP_ALIGN } from './modules/rnadnatools/table_align/main'  addParams( options: [
+    include { RNADNATOOLS_TABLE_ALIGN as TABLE_DEDUP_ALIGN } from './modules/rnadnatools/table_align/main'  addParams( options: [
                                                             args: [
                                                              input_format: 'tsv',
                                                              ref_format: 'parquet',
@@ -88,6 +85,10 @@ include { RNADNATOOLS_TABLE_ALIGN as TABLE_DEDUP_ALIGN } from './modules/rnadnat
                                                              params: '--no-input-header --key-column 0 --ref-colname readID \
                                                              --fill-values ".",0 --drop-key --new-colnames readID,isUnique'
                                                             ], suffix:'.fastuniq'] )
+}
+include { TRIMTABLE_CREATE as TABLE_TRIM } from './subworkflows/local/trimtable_create' addParams( options: [:] )
+
+
 
 include { OLIGOS_MAP } from './subworkflows/local/oligos_map' addParams( options: [:] )
 
@@ -273,20 +274,22 @@ workflow REDC {
     /* Deduplicate input sequences */
 
     // Run subworkflow that trims first basepairs of reads and runs fastuniq on them:
-    if (dedupMethod=="fastuniq"){
+    if (runFastuniq){
         FastuniqOut = TABLE_DEDUP( Fastq )
+
+        // Align deduplicated reads with read table
+        // (we want to guarantee the same order of reads in each table):
+        DedupAlignInput = Table.combine(FastuniqOut).filter{ it[0].original_id==it[2].id }
+                                    .multiMap{ it ->
+                                        reference: [ it[0], it[3] ]
+                                        table: [ it[0], it[1] ]
+                                    }
+        TableDedup = TABLE_DEDUP_ALIGN( DedupAlignInput ).table.map{ removeKeys(it, dedupKeys) }
+
     } else {
-        FastuniqOut = Channel.empty()
+        TableDedup = Channel.empty()
     }
 
-    // Align deduplicated reads with read table
-    // (we want to guarantee the same order of reads in each table):
-    DedupAlignInput = Table.combine(FastuniqOut).filter{ it[0].original_id==it[2].id }
-                                .multiMap{ it ->
-                                    reference: [ it[0], it[3] ]
-                                    table: [ it[0], it[1] ]
-                                }
-    TableDedup = TABLE_DEDUP_ALIGN( DedupAlignInput ).table.map{ removeKeys(it, dedupKeys) }
 
     /* Collect fragment columns data. */
     // params.fragments has the list of new columns with expressions that will be evaluated for each fragment
@@ -654,4 +657,4 @@ def removeKeys( it, ks ) {
 
     def array = [ meta, *it[1..-1] ]
     return array
-}
+}
diff --git a/subworkflows/local/oligos_map.nf b/subworkflows/local/oligos_map.nf
@@ -53,7 +53,7 @@ workflow OLIGOS_MAP {
 
             /* Step 3: Format the tables and channel: */
             HitsOligosGrouped = HitsOligosStream
-                                .map{ it -> [ it[0].id, it ] }.view()
+                                .map{ it -> [ it[0].id, it ] }
                                 .groupTuple(by: 0, sort: { a, b -> a[0].idx <=> b[0].idx }, size: nOligos)
                                 .map{ it -> it.collect()[1].collect{ item -> [item[0], file(item[1]), item[0].oligo+'_R'+item[0].side] }.transpose() }
                                 .multiMap{meta, files, suffixes ->