Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RNAseq preprocessing: rrna sequences make more sense as a channel of FASTAs, port tests #5988

Merged
merged 4 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
ch_salmon_index // channel: /path/to/salmon/index/ (optional)
ch_sortmerna_index // channel: /path/to/sortmerna/index/ (optional)
ch_bbsplit_index // channel: /path/to/bbsplit/index/ (optional)
ch_ribo_db // channel: /path/to/ Text file containing paths to fasta files (one per line) that will be used to create the database for SortMeRNA. (optional)
ch_rrna_fastas // channel: one or more fasta files containing rrna sequences to be passed to SortMeRNA (optional)
skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads.
skip_fastqc // boolean: true/false
skip_trimming // boolean: true/false
Expand Down Expand Up @@ -233,8 +233,7 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
// MODULE: Remove ribosomal RNA reads
//
if (remove_ribo_rna) {
ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines())
.map { row -> file(row, checkIfExists: true) }
ch_sortmerna_fastas = ch_rrna_fastas
.collect()
.map{ ['rrna_refs', it] }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ input:
- ch_bbsplit_index:
type: file
description: Path to directory or tar.gz archive for pre-built BBSplit index
- ch_ribo_db:
- ch_rrna_fastas:
type: file
description: |
Channel with text file containing paths to fasta files (one per line)
that will be used to create the database for SortMeRNA
Channel containing one or more FASTA files containing rRNA sequences
for use with SortMeRNA
- skip_bbsplit:
type: boolean
description: Whether to skip BBSplit for removal of non-reference genome reads
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
nextflow_function {

name "Test Functions"
script "../main.nf"

//
// Test function for deriving strandedness from Salmon numbers
//

test("Test Function getSalmonInferredStrandedness unstranded") {

function "getSalmonInferredStrandedness"

when {
function {
"""
import groovy.json.JsonOutput

// Define the JSON contents for the test
def json_contents = JsonOutput.toJson([
"SF": 0,
"SR": 0,
"ISF": 100,
"ISR": 100,
"IU": 0,
"U": 0
])
def jsonFile = file("${workDir}/salmonUnstranded.json")
jsonFile.write(json_contents)

input[0] = jsonFile
input[1] = 0.8
input[2] = 0.1
"""
}
}

then {
assertAll(
{ assert function.success },
{ assert snapshot(function.result).match() }
)
}

}

test("Test Function getSalmonInferredStrandedness forward") {

function "getSalmonInferredStrandedness"

when {
function {
"""
import groovy.json.JsonOutput

def json_contents = JsonOutput.toJson([
"SF": 0,
"SR": 0,
"ISF": 100,
"ISR": 0,
"IU": 0,
"U": 0
])
def jsonFile = file("${workDir}/salmonForward.json")
jsonFile.write(json_contents)

input[0] = jsonFile
input[1] = 0.8
input[2] = 0.1
"""
}
}

then {
assertAll(
{ assert function.success },
{ assert snapshot(function.result).match() }
)
}

}

test("Test Function getSalmonInferredStrandedness reverse") {

function "getSalmonInferredStrandedness"

when {
function {
"""
import groovy.json.JsonOutput

def json_contents = JsonOutput.toJson([
"SF": 0,
"SR": 0,
"ISF": 0,
"ISR": 100,
"IU": 0,
"U": 0
])
def jsonFile = file("${workDir}/salmonReverse.json")
jsonFile.write(json_contents)

input[0] = jsonFile
input[1] = 0.8
input[2] = 0.1
"""
}
}

then {
assertAll(
{ assert function.success },
{ assert snapshot(function.result).match() }
)
}

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{
"Test Function getSalmonInferredStrandedness forward": {
"content": [
{
"inferred_strandedness": "forward",
"forwardFragments": 100.0,
"reverseFragments": 0.0,
"unstrandedFragments": 0.0
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
"timestamp": "2024-07-17T10:28:59.284820592"
},
"Test Function getSalmonInferredStrandedness unstranded": {
"content": [
{
"inferred_strandedness": "unstranded",
"forwardFragments": 50.0,
"reverseFragments": 50.0,
"unstrandedFragments": 0.0
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
"timestamp": "2024-07-17T10:28:47.514036696"
},
"Test Function getSalmonInferredStrandedness reverse": {
"content": [
{
"inferred_strandedness": "reverse",
"forwardFragments": 0.0,
"reverseFragments": 100.0,
"unstrandedFragments": 0.0
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
"timestamp": "2024-07-17T10:29:11.43961965"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,14 @@ nextflow_workflow {
]
])

ch_ribo_db = file('ribo_db.txt')
ch_ribo_db.append('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta')

input[0] = ch_reads
input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // ch_fasta
input[2] = Channel.of(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/transcriptome.fasta", checkIfExists: true)) // ch_transcript_fasta
input[3] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)) // ch_gtf
input[4] = [] // ch_salmon_index
input[5] = [] // ch_sortmerna_index
input[6] = [] // ch_bbsplit_index
input[7] = ch_ribo_db // ch_ribo_db
input[7] = Channel.of(file('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta', checkIfExists: true)) // ch_rrna_fastas
input[8] = true // skip_bbsplit
input[9] = false // skip_fastqc
input[10] = false // skip_trimming
Expand Down Expand Up @@ -91,17 +88,14 @@ nextflow_workflow {
]
])

ch_ribo_db = file('ribo_db.txt')
ch_ribo_db.append('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta')

input[0] = ch_reads
input[1] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)) // ch_fasta
input[2] = Channel.of(file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/transcriptome.fasta", checkIfExists: true)) // ch_transcript_fasta
input[3] = Channel.of(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true)) // ch_gtf
input[4] = [] // ch_salmon_index
input[5] = [] // ch_sortmerna_index
input[6] = [] // ch_bbsplit_index
input[7] = ch_ribo_db // ch_ribo_db
input[7] = Channel.of(file('https://raw.githubusercontent.com/biocore/sortmerna/v4.3.4/data/rRNA_databases/rfam-5.8s-database-id98.fasta', checkIfExists: true)) // ch_rrna_fastas
input[8] = true // skip_bbsplit
input[9] = false // skip_fastqc
input[10] = false // skip_trimming
Expand Down Expand Up @@ -136,4 +130,6 @@ nextflow_workflow {
)
}
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -14,81 +14,81 @@
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
"nextflow": "24.04.3"
},
"timestamp": "2024-02-24T16:19:13.057802"
"timestamp": "2024-07-17T10:24:00.044553245"
},
"trimgalore_test_pe_reads_2_lines": {
"content": "eccf3e9e74589ff01c77fce7f4548e41",
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
"nextflow": "24.04.3"
},
"timestamp": "2024-02-24T17:44:07.667653"
"timestamp": "2024-07-17T10:24:26.838793051"
},
"fastp_test_pe_reads_1_size": {
"content": [
4508
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
"nextflow": "24.04.3"
},
"timestamp": "2024-02-24T17:43:46.173892"
"timestamp": "2024-07-17T10:23:59.889337984"
},
"trimgalore_test_pe_reads_1_size": {
"content": [
4508
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
"nextflow": "24.04.3"
},
"timestamp": "2024-02-24T17:44:07.642318"
"timestamp": "2024-07-17T10:24:26.778599725"
},
"trimgalore_test_pe_reads_1_lines": {
"content": "3868fc1caf09367141d2bbf47e158823",
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
"nextflow": "24.04.3"
},
"timestamp": "2024-02-24T17:44:07.641186"
"timestamp": "2024-07-17T10:24:26.774975135"
},
"fastp_test_pe_reads_2_lines": {
"content": "eccf3e9e74589ff01c77fce7f4548e41",
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
"nextflow": "24.04.3"
},
"timestamp": "2024-02-24T17:43:46.235022"
"timestamp": "2024-07-17T10:23:59.997625278"
},
"fastp_test_pe_reads_2_size": {
"content": [
4508
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
"nextflow": "24.04.3"
},
"timestamp": "2024-02-24T17:43:46.242006"
"timestamp": "2024-07-17T10:24:00.042449965"
},
"trimgalore_test_pe_reads_2_size": {
"content": [
4508
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
"nextflow": "24.04.3"
},
"timestamp": "2024-02-24T17:44:07.668644"
"timestamp": "2024-07-17T10:24:26.841434261"
},
"fastp_test_pe_reads_1_lines": {
"content": "3868fc1caf09367141d2bbf47e158823",
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
"nextflow": "24.04.3"
},
"timestamp": "2024-02-24T17:43:46.161535"
"timestamp": "2024-07-17T10:23:59.882844295"
},
"trimgalore_read_count": {
"content": [
Expand All @@ -105,8 +105,8 @@
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
"nextflow": "24.04.3"
},
"timestamp": "2024-02-24T17:44:07.669435"
"timestamp": "2024-07-17T10:24:26.84402498"
}
}
Loading