Skip to content

Commit

Permalink
fastuniq is now an option
Browse files Browse the repository at this point in the history
  • Loading branch information
agalitsyna committed Feb 22, 2022
1 parent d98f702 commit 058e85d
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 23 deletions.
4 changes: 2 additions & 2 deletions params-redc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ protocol:
dedup_crop: 50
trimmomatic_params: 'SLIDINGWINDOW:5:26 MINLEN:0'
merge_groups: true
dedup_method: fastuniq
run_fastuniq: true

# Scheme of oligos mapping:
oligos:
Expand Down Expand Up @@ -221,7 +221,7 @@ filters:
RNADNASamePos: '(chrom_dna==chrom_rna2) & (strand_dna!=strand_rna2) & (np.abs(start_dna-end_rna2)<=1)'

# Filter 1: Read is not PCR duplication
F1_notDup: '(isUnique)'
F1_notDup: '(isUnique)' # Note that this filter won't work if protocol.run_fastuniq is false
# Filter 2: there is a bridge in the read, with last AG letters, it is not cut by the quality filter
F2_goodBridge: '(start_hit_bridge_forward_R1<9999) & oligo_ga_at_35'
# Filter 3: Reverse read starts with GGG
Expand Down
43 changes: 23 additions & 20 deletions redc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ def protocol = params.getOrDefault('protocol', [:])
def chunksize = protocol.getOrDefault('chunksize', 100000000000)
def check_restriction = protocol.getOrDefault('check_restriction', false)
def RenzymesPreloaded = Genome.getOrDefault('restricted', [:])
def dedupMethod = protocol.getOrDefault('dedup_method', 'fastuniq')
def runFastuniq = protocol.getOrDefault('run_fastuniq', false)

// Check number of oligos, short oligos and fragments:
nOligos = params.getOrDefault('oligos', [:]).keySet().size
nShortOligos = params.getOrDefault('short_oligos', [:]).keySet().size
nFragments = params.getOrDefault('fragments', [:]).keySet().size
nOligos = params.getOrDefault('oligos', [:]).keySet().size()
nShortOligos = params.getOrDefault('short_oligos', [:]).keySet().size()
nFragments = params.getOrDefault('fragments', [:]).keySet().size()

// Include modules and subworkflows
include { INPUT_CHECK_DOWNLOAD } from './subworkflows/local/input_check' addParams( options: [:] )
Expand All @@ -73,12 +73,9 @@ include { GENOME_PREPARE_RNA_ANNOTATIONS } from './modules/local/genome_prepare_
include { FASTQC } from './modules/nf-core/software/fastqc/main' addParams( options: [:] )
include { FASTQ2TSV as TABLE_FASTQ2TSV } from './modules/local/fastq2table/main' addParams( options: [:] )

if (dedupMethod=="fastuniq"){
if (runFastuniq){
include { DEDUP_FASTUNIQ as TABLE_DEDUP } from './subworkflows/local/dedup_fastuniq' addParams( options: [:] )
}
include { TRIMTABLE_CREATE as TABLE_TRIM } from './subworkflows/local/trimtable_create' addParams( options: [:] )

include { RNADNATOOLS_TABLE_ALIGN as TABLE_DEDUP_ALIGN } from './modules/rnadnatools/table_align/main' addParams( options: [
include { RNADNATOOLS_TABLE_ALIGN as TABLE_DEDUP_ALIGN } from './modules/rnadnatools/table_align/main' addParams( options: [
args: [
input_format: 'tsv',
ref_format: 'parquet',
Expand All @@ -88,6 +85,10 @@ include { RNADNATOOLS_TABLE_ALIGN as TABLE_DEDUP_ALIGN } from './modules/rnadnat
params: '--no-input-header --key-column 0 --ref-colname readID \
--fill-values ".",0 --drop-key --new-colnames readID,isUnique'
], suffix:'.fastuniq'] )
}
include { TRIMTABLE_CREATE as TABLE_TRIM } from './subworkflows/local/trimtable_create' addParams( options: [:] )



include { OLIGOS_MAP } from './subworkflows/local/oligos_map' addParams( options: [:] )

Expand Down Expand Up @@ -273,20 +274,22 @@ workflow REDC {
/* Deduplicate input sequences */

// Run subworkflow that trims first basepairs of reads and runs fastuniq on them:
if (dedupMethod=="fastuniq"){
if (runFastuniq){
FastuniqOut = TABLE_DEDUP( Fastq )

// Align deduplicated reads with read table
// (we want to guarantee the same order of reads in each table):
DedupAlignInput = Table.combine(FastuniqOut).filter{ it[0].original_id==it[2].id }
.multiMap{ it ->
reference: [ it[0], it[3] ]
table: [ it[0], it[1] ]
}
TableDedup = TABLE_DEDUP_ALIGN( DedupAlignInput ).table.map{ removeKeys(it, dedupKeys) }

} else {
FastuniqOut = Channel.empty()
TableDedup = Channel.empty()
}

// Align deduplicated reads with read table
// (we want to guarantee the same order of reads in each table):
DedupAlignInput = Table.combine(FastuniqOut).filter{ it[0].original_id==it[2].id }
.multiMap{ it ->
reference: [ it[0], it[3] ]
table: [ it[0], it[1] ]
}
TableDedup = TABLE_DEDUP_ALIGN( DedupAlignInput ).table.map{ removeKeys(it, dedupKeys) }

/* Collect fragment columns data. */
// params.fragments has the list of new columns with expressions that will be evaluated for each fragment
Expand Down Expand Up @@ -654,4 +657,4 @@ def removeKeys( it, ks ) {

def array = [ meta, *it[1..-1] ]
return array
}
}
2 changes: 1 addition & 1 deletion subworkflows/local/oligos_map.nf
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ workflow OLIGOS_MAP {

/* Step 3: Format the tables and channel: */
HitsOligosGrouped = HitsOligosStream
.map{ it -> [ it[0].id, it ] }.view()
.map{ it -> [ it[0].id, it ] }
.groupTuple(by: 0, sort: { a, b -> a[0].idx <=> b[0].idx }, size: nOligos)
.map{ it -> it.collect()[1].collect{ item -> [item[0], file(item[1]), item[0].oligo+'_R'+item[0].side] }.transpose() }
.multiMap{meta, files, suffixes ->
Expand Down

0 comments on commit 058e85d

Please sign in to comment.