Skip to content

Commit

Permalink
Adds fq/lint for early validation of FASTQs
Browse files Browse the repository at this point in the history
Validation of FASTQS early prevents running the pipeline on invalid FASTQ files which will make the pipeline more efficient at achieving it's ultimate objective of checking FASTQ validity.

It adds 3 more parameters:
 - `--skip_linting` which enables the linting of FASTQs
 - `--fq_lint_args` which is a string of arguments to pass to the linting tool
 - `--continue_with_lint_fail` which is a boolean to determine whether to continue if the linting fails

Between these three options the user has a high degree of control over how the pipeline lints which should handle most use cases.

Closes #31
  • Loading branch information
adamrtalbot committed Nov 2, 2024
1 parent 5e56fc3 commit 6563d8f
Show file tree
Hide file tree
Showing 13 changed files with 348 additions and 4 deletions.
9 changes: 9 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,15 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]

withName: 'FQ_LINT' {
ext.args = { params.fq_lint_args }
errorStrategy = {
task.exitStatus in ((130..145) + 104) ? 'retry' :
params.continue_with_lint_fail ? 'ignore' :
'finish'
}
}

withName: SEQTK_SAMPLE {
ext.args = '-s100'
}
Expand Down
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
"git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
"installed_by": ["modules"]
},
"fq/lint": {
"branch": "master",
"git_sha": "a1abf90966a2a4016d3c3e41e228bfcbd4811ccc",
"installed_by": ["modules"]
},
"multiqc": {
"branch": "master",
"git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d",
Expand Down
5 changes: 5 additions & 0 deletions modules/nf-core/fq/lint/environment.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 33 additions & 0 deletions modules/nf-core/fq/lint/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions modules/nf-core/fq/lint/meta.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

63 changes: 63 additions & 0 deletions modules/nf-core/fq/lint/tests/main.nf.test

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions modules/nf-core/fq/lint/tests/main.nf.test.snap

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions modules/nf-core/fq/lint/tests/tags.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ params {
// Input options
input = null
sample_size = 0

// Options
skip_linting = false
fq_lint_args = ""
continue_with_lint_fail = false


// References
genome = null
fasta = null
Expand Down
44 changes: 40 additions & 4 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
"required": ["input", "outdir"],
"required": [
"input",
"outdir"
],
"properties": {
"input": {
"type": "string",
Expand All @@ -31,7 +34,6 @@
},
"outdir": {
"type": "string",
"default": null,
"format": "directory-path",
"description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
"fa_icon": "fas fa-folder-open"
Expand All @@ -50,6 +52,30 @@
}
}
},
"validation_options": {
"title": "Validation options",
"type": "object",
"description": "Options for validating and screening FASTQ files.",
"default": "",
"properties": {
"skip_linting": {
"type": "boolean",
"default": false,
"description": "Whether to lint the FASTQs before performing QC on the sequences",
"help_text": "FASTQ files will be linted with FQ early in the pipeline. If they fail validation, the pipeline will terminate preventing expensive quality control steps being performed on the other samples. If ignoring FQ is enabled, quality control will be performed on the remaining samples."
},
"fq_lint_args": {
"type": "string",
"description": "Arguments to pass to FQ lint",
"help_text": "Arguments to pass to FQ lint. This can be used to disable overly strict linting. See https://github.com/stjude-rust-labs/fq?tab=readme-ov-file#lint for more information."
},
"continue_with_lint_fail": {
"type": "boolean",
"description": "Whether to continue with the pipeline if linting fails for a single sample.",
"help_text": "If set to true, the pipeline will continue with the remaining samples if linting fails for a single sample. If set to false, the pipeline will terminate if linting fails for a single sample."
}
}
},
"reference_genome_options": {
"title": "Reference genome options",
"type": "object",
Expand Down Expand Up @@ -156,7 +182,14 @@
"description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy",
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
"enum": [
"symlink",
"rellink",
"link",
"copy",
"copyNoFollow",
"move"
],
"hidden": true
},
"email_on_fail": {
Expand Down Expand Up @@ -233,6 +266,9 @@
{
"$ref": "#/$defs/input_output_options"
},
{
"$ref": "#/$defs/validation_options"
},
{
"$ref": "#/$defs/reference_genome_options"
},
Expand All @@ -243,4 +279,4 @@
"$ref": "#/$defs/generic_options"
}
]
}
}
88 changes: 88 additions & 0 deletions tests/rnaseq.main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
nextflow_pipeline {

name "Test Workflow main.nf on NovaSeq6000 data"
script "../main.nf"
tag "seqinspector"
tag "PIPELINE"

test("rnaseq data test fail linting") {

when {
config "./rnaseq.main.nf.test.config"
params {
outdir = "$outputDir"
}
}

then {
assertAll(
// Linting should fail!
{ assert workflow.failed },
)
}
}

test("rnaseq data test skip linting") {

when {
config "./rnaseq.main.nf.test.config"
params {
outdir = "$outputDir"
skip_linting = true
}
}

then {
assertAll(
// Linting should fail!
{ assert workflow.failed },
)
}
}

test("rnaseq data test ignore linting") {

when {
config "./rnaseq.main.nf.test.config"
params {
outdir = "$outputDir"
continue_with_lint_fail = true
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(
path("$outputDir/multiqc/global_report/multiqc_data/multiqc_citations.txt"),
path("$outputDir/multiqc/global_report/multiqc_data/multiqc_fastqc.txt"),
path("$outputDir/multiqc/global_report/multiqc_data/multiqc_general_stats.txt")
)
},
)
}
}

test("rnaseq data test add args to fq/lint") {

when {
config "./rnaseq.main.nf.test.config"
params {
outdir = "$outputDir"
fq_lint_args = "--disable-validator P001"
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(
path("$outputDir/multiqc/global_report/multiqc_data/multiqc_citations.txt"),
path("$outputDir/multiqc/global_report/multiqc_data/multiqc_fastqc.txt"),
path("$outputDir/multiqc/global_report/multiqc_data/multiqc_general_stats.txt")
)
},
)
}
}
}
Loading

0 comments on commit 6563d8f

Please sign in to comment.