Skip to content

Commit

Permalink
Add param --quality_type
Browse files Browse the repository at this point in the history
  • Loading branch information
d4straub committed Nov 19, 2024
1 parent 4052833 commit 49202ab
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 5 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Added`

- [#801](https://github.com/nf-core/ampliseq/pull/801) - Parameter `--quality_type` allows specifying the type of quality scores in raw read data, by default `Auto` (i.e. default behavior did not change)

### `Changed`

### `Fixed`
Expand Down
6 changes: 4 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ process {
max_len = params.max_len ?: "Inf"
withName: DADA2_FILTNTRIM {
ext.args = [
'maxN = 0, truncQ = 2, trimRight = 0, minQ = 0, rm.lowcomplex = 0, orient.fwd = NULL, matchIDs = FALSE, id.sep = "\\\\s", id.field = NULL, n = 1e+05, OMP = TRUE, qualityType = "Auto"',
'maxN = 0, truncQ = 2, trimRight = 0, minQ = 0, rm.lowcomplex = 0, orient.fwd = NULL, matchIDs = FALSE, id.sep = "\\\\s", id.field = NULL, n = 1e+05, OMP = TRUE',
"qualityType = \"${params.quality_type}\"",
params.pacbio || params.iontorrent || params.single_end ? "maxEE = ${params.max_ee}" : "maxEE = c(${params.max_ee}, ${params.max_ee})",
params.pacbio ? "trimLeft = 0, minLen = ${params.min_len}, maxLen = $max_len, rm.phix = FALSE" :
params.iontorrent ? "trimLeft = 15, minLen = ${params.min_len}, maxLen = $max_len, rm.phix = TRUE" :
Expand Down Expand Up @@ -179,7 +180,8 @@ process {
ext.seed = "${params.seed}"
ext.prefix = { meta.region ? "region-${meta.region}_run-${meta.run}" : "${meta.run}" }
ext.args = [
'nbases = 1e8, nreads = NULL, randomize = TRUE, MAX_CONSIST = 10, OMEGA_C = 0, qualityType = "Auto"',
'nbases = 1e8, nreads = NULL, randomize = TRUE, MAX_CONSIST = 10, OMEGA_C = 0',
"qualityType = \"${params.quality_type}\"",
params.pacbio ? "errorEstimationFunction = PacBioErrfun" : "errorEstimationFunction = loessErrfun"
].join(',').replaceAll('(,)*$', "")
publishDir = [
Expand Down
21 changes: 18 additions & 3 deletions modules/local/dada2_denoising.nf
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ process DADA2_DENOISING {

script:
def prefix = task.ext.prefix ?: "prefix"
def quality_type = task.ext.quality_type ?: "Auto"
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
if (!meta.single_end) {
Expand All @@ -39,9 +40,17 @@ process DADA2_DENOISING {
#denoising
sink(file = "${prefix}.dada.log")
dadaFs <- dada(filtFs, err = errF, $args, multithread = $task.cpus)
if ("${quality_type}" == "Auto") {
# Avoid using memory-inefficient derepFastq() if not necessary
dadaFs <- dada(filtFs, err = errF, $args, multithread = $task.cpus)
dadaRs <- dada(filtRs, err = errR, $args, multithread = $task.cpus)
} else {
derepFs <- derepFastq(filtFs, qualityType="${quality_type}")
dadaFs <- dada(derepFs, err = errF, $args, multithread = $task.cpus)
derepRs <- derepFastq(filtRs, qualityType="${quality_type}")
dadaRs <- dada(derepRs, err = errR, $args, multithread = $task.cpus)
}
saveRDS(dadaFs, "${prefix}_1.dada.rds")
dadaRs <- dada(filtRs, err = errR, $args, multithread = $task.cpus)
saveRDS(dadaRs, "${prefix}_2.dada.rds")
sink(file = NULL)
Expand All @@ -66,7 +75,13 @@ process DADA2_DENOISING {
#denoising
sink(file = "${prefix}.dada.log")
dadaFs <- dada(filtFs, err = errF, $args, multithread = $task.cpus)
if ("${quality_type}" == "Auto") {
# Avoid using memory-inefficient derepFastq() if not necessary
dadaFs <- dada(filtFs, err = errF, $args, multithread = $task.cpus)
} else {
derepFs <- derepFastq(filtFs, qualityType="${quality_type}")
dadaFs <- dada(derepFs, err = errF, $args, multithread = $task.cpus)
}
saveRDS(dadaFs, "${prefix}.dada.rds")
sink(file = NULL)
Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ params {
extension = "/*_R{1,2}_001.fastq.gz"
pacbio = false
iontorrent = false
quality_type = "Auto"
FW_primer = null
RV_primer = null
classifier = null
Expand Down
8 changes: 8 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,14 @@
"help_text": "This will cause the pipeline to\n- not truncate input reads if not `--trunclenf` and `--trunclenr` are overwriting defaults\n- remove reverse complement primers from the end of reads in case the read length exceeds the amplicon length",
"fa_icon": "fas fa-align-justify"
},
"quality_type": {
"type": "string",
"default": "Auto",
"description": "Type of quality scores in raw read data",
"help_text": "From R package 'ShortRead' function 'readFastq': Representation to be used for quality scores, must be one of `Auto` (infer automatically), `FastqQuality` (Phred-like base 33 encoding), `SFastqQuality` (Illumina base 64 encoding).",
"enum": ["Auto", "FastqQuality", "SFastqQuality"],
"fa_icon": "fab fa-amilia"
},
"multiple_sequencing_runs": {
"type": "boolean",
"description": "If using `--input_folder`: samples were sequenced in multiple sequencing runs",
Expand Down

0 comments on commit 49202ab

Please sign in to comment.