nf-core · mirpedrol · Jun 28, 2023 · Jun 26, 2023 · Jun 27, 2023 · Jun 27, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- Add new parameters --reference and --protospacer ([#45](https://github.com/nf-core/crisprseq/pull/45))
+
 ### Fixed
 
 - Fix warning "module used more than once" ([#25](https://github.com/nf-core/crisprseq/pull/25))

diff --git a/conf/modules.config b/conf/modules.config
@@ -43,6 +43,7 @@ process {
     }
 
     withName: ORIENT_REFERENCE {
+        ext.prefix = { params.reference_fasta ? "${reference.baseName}" : "${meta.id}_reference" }
         publishDir = [
             path: { "${params.outdir}/preprocessing/sequences" },
             mode: params.publish_dir_mode,

diff --git a/docs/usage/targeted.md b/docs/usage/targeted.md
@@ -55,6 +55,54 @@ chr6,chr6-61942198-61942498_R1.fastq.gz,,CAA...GGA,TTTTATGATATTTATCTTTT,TTC...CA
 
 An [example samplesheet](https://nf-co.re/crisprseq/1.0/assets/samplesheet.csv) has been provided with the pipeline.
 
+## Other input parameters
+
+### Reference
+
+If you want to provide the same reference for every sample, you can select a genome with `--genome` or provide a reference FASTA file with `--reference_fasta`.
+Using any of these two parameters will override any reference sequence provided through an input sample sheet.
+
+Please refer to the [nf-core website](https://nf-co.re/usage/reference_genomes) for general usage docs and guidelines regarding reference genomes.
+
+### Protospacer
+
+If you want to provide the same protospacer sequence for every sample, you can provide the sequence with the parameter `--protospacer`.
+Using this parameter will override any protospacer sequence provided through an input sample sheet.
+
+Providing a protospacer, either through a sample sheet or by using the parameter `--protospacer` is requeired.
+
+## Alignment options
+
+By default, the pipeline uses `minimap2` (i.e. `--aligner minimap2`) to map the sequenced FASTQ reads to the reference.
+You also have the option to select other alignment tools by suing the parameter `--alignment`. Possible options are `minimap2`, `bwa` or `bowtie2`.
+
+The default alignment with `minimap2` uses adapted parameters which were seen to improve the alignment and reduce potential sequencing or alignment errors.
+The default parameters are:
+
+- A matching score of 29
+- A mismatching penalty of 17
+- A gap open penalty of 25
+- A gap extension penalty of 2.
+
+Please refer to the original [CRISPR-Analytics](https://doi.org/10.1371/journal.pcbi.1011137) publication to see the benchmarking of such parameters.
+
+In order to customise such parameters, you can override the arguments given to `minimap2` by creating a configuration file and provide it to your nextflow run with `-c`:
+
+```groovy
+// Custom config file custom.config
+process {
+    withName: MINIMAP2_ALIGN_ORIGINAL {
+        ext.args = '-A 29 -B 17 -O 25 -E 2'
+    }
+}
+```
+
+Command:
+
+```bash
+nextflow run nf-core/crisprseq --input samplesheet.csv --analysis targeted --outdir <OUTDIR> -profile docker -c custom.config
+```
+
 ## Running the pipeline
 
 The typical command for running the pipeline is as follows:

diff --git a/main.nf b/main.nf
@@ -18,7 +18,7 @@ nextflow.enable.dsl = 2
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta')
+params.reference_fasta = params.reference_fasta ?: WorkflowMain.getGenomeAttribute(params, 'fasta')
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/modules/local/orient_reference.nf b/modules/local/orient_reference.nf
@@ -11,16 +11,16 @@ process ORIENT_REFERENCE {
     tuple val(meta), file(reference), val(protospacer)
 
     output:
-    tuple val(meta), path('*_reference-correctOrient.fasta') , emit: reference
-    path "versions.yml"                                      , emit: versions
+    tuple val(meta), path('*-correctOrient.fasta') , emit: reference
+    path "versions.yml"                            , emit: versions
 
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
     revComp_reference.R \\
         $reference \\
-        ${meta.id}_reference-correctOrient.fasta \\
+        ${prefix}-correctOrient.fasta \\
         $protospacer;
 
     cat <<-END_VERSIONS > versions.yml

diff --git a/nextflow.config b/nextflow.config
@@ -13,6 +13,7 @@ params {
     input                      = null
     analysis                   = null
     aligner                    = 'minimap2'
+    protospacer                = null
     library                    = null
     crisprcleanr               = null
     rra_contrasts              = null
@@ -26,6 +27,7 @@ params {
     genome                     = null
     igenomes_base              = 's3://ngi-igenomes/igenomes'
     igenomes_ignore            = false
+    reference_fasta            = null
 
     // MultiQC options
     multiqc_config             = null

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -48,8 +48,8 @@
                 }
             }
         },
-        "alignment_parameters": {
-            "title": "Alignment parameters",
+        "targeted_parameters": {
+            "title": "Targeted parameters",
             "type": "object",
             "description": "",
             "default": "",
@@ -60,6 +60,11 @@
                     "default": "minimap2",
                     "fa_icon": "fas fa-align-justify",
                     "enum": ["minimap2", "bwa", "bowtie2"]
+                },
+                "protospacer": {
+                    "type": "string",
+                    "fa_icon": "fas fa-grip-lines",
+                    "description": "Provide the same protospacer sequence for all samples. Will override protospacer sequences provided by an input samplesheet."
                 }
             }
         },
@@ -114,14 +119,12 @@
                     "fa_icon": "fas fa-book",
                     "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
                 },
-                "fasta": {
+                "reference_fasta": {
                     "type": "string",
-                    "format": "file-path",
-                    "mimetype": "text/plain",
                     "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$",
-                    "description": "Path to FASTA genome file.",
-                    "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.",
-                    "fa_icon": "far fa-file-code"
+                    "description": "Path to the reference FASTA file. Will override reference sequences provided by an input sample sheet.",
+                    "fa_icon": "far fa-file-alt",
+                    "format": "file-path"
                 },
                 "igenomes_base": {
                     "type": "string",
@@ -332,7 +335,7 @@
             "$ref": "#/definitions/input_output_options"
         },
         {
-            "$ref": "#/definitions/alignment_parameters"
+            "$ref": "#/definitions/targeted_parameters"
         },
         {
             "$ref": "#/definitions/screening_parameters"

diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
@@ -120,7 +120,7 @@ def create_protospacer_channel(LinkedHashMap row) {
 
     // add protospacer sequence to meta
     def protospacer_meta = []
-    if (row.protospacer.length() <= 0) {
+    if (row.protospacer.length() <= 0 && !params.protospacer) {
         exit 1, "ERROR: Please check input samplesheet -> Protospacer sequence is not provided!\n"
     } else {
         protospacer_meta = [ meta, row.protospacer ]

diff --git a/workflows/crisprseq_screening.nf b/workflows/crisprseq_screening.nf
@@ -10,7 +10,7 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params)
 WorkflowCrisprseq.initialise(params, log)
 
 // Check input path parameters to see if they exist
-def checkPathParamList = [ params.multiqc_config, params.fasta, params.library, params.mle_design_matrix ]
+def checkPathParamList = [ params.multiqc_config, params.reference_fasta, params.library, params.mle_design_matrix ]
 for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
 
 // Check mandatory parameters

diff --git a/workflows/crisprseq_targeted.nf b/workflows/crisprseq_targeted.nf
@@ -10,7 +10,7 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params)
 WorkflowCrisprseq.initialise(params, log)
 
 // Check input path parameters to see if they exist
-def checkPathParamList = [ params.input, params.multiqc_config ]
+def checkPathParamList = [ params.input, params.multiqc_config, params.reference_fasta ]
 for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
 
 // Check mandatory parameters
@@ -114,12 +114,6 @@ workflow CRISPRSEQ_TARGETED {
     .set { ch_fastq }
     ch_versions = ch_versions.mix(INPUT_CHECK.out.versions)
 
-    INPUT_CHECK.out.reference
-    .map {
-        meta, fastq ->
-            [ meta - meta.subMap('id') + [id: meta.id.split('_')[0..-2].join('_')], fastq ]
-    }
-
     //
     // MODULE: Add reference sequences to file
     //
@@ -148,14 +142,42 @@ workflow CRISPRSEQ_TARGETED {
 
     // Join channels with reference and protospacer
     // to channel: [ meta, reference, protospacer]
-    SEQ_TO_FILE_REF.out.file
-        .join(INPUT_CHECK.out.protospacer
-            .map {
-                meta, fastq ->
-                    [ meta - meta.subMap('id') + [id: meta.id.split('_')[0..-2].join('_')], fastq ]
-            },
-            by: 0)
-        .set{ reference_protospacer }
+    if (!params.reference_fasta && !params.protospacer) {
+        SEQ_TO_FILE_REF.out.file
+            .join(INPUT_CHECK.out.protospacer
+                .map {
+                    meta, fastq ->
+                        [ meta - meta.subMap('id') + [id: meta.id.split('_')[0..-2].join('_')], fastq ]
+                },
+                by: 0)
+            .set{ reference_protospacer }
+    } else if (!params.reference_fasta) {
+        // If a protospacer was provided through the --protospacer param instead of the samplesheet
+        ch_protospacer = Channel.of(params.protospacer)
+        SEQ_TO_FILE_REF.out.file
+            .combine(ch_protospacer)
+            .set{ reference_protospacer }
+    } else if (!params.protospacer) {
+        // If a reference was provided through a fasta file or igenomes instead of the samplesheet
+        ch_reference = Channel.fromPath(params.reference_fasta)
+        INPUT_CHECK.out.protospacer
+            .combine(ch_reference)
+            .map{ meta, protospacer, reference ->
+                [ meta - meta.subMap('id') + [id: meta.id.split('_')[0..-2].join('_')], reference, protospacer ]
+            }
+            .set{ reference_protospacer }
+    } else {
+        ch_reference = Channel.fromPath(params.reference_fasta)
+        ch_protospacer = Channel.of(params.protospacer)
+        INPUT_CHECK.out.reads
+            .combine(ch_reference)
+            .combine(ch_protospacer)
+            .map{ meta, reads, reference, protospacer ->
+                [meta - meta.subMap('id') + [id: meta.id.split('_')[0..-2].join('_')], reference, protospacer]
+            }
+            .set{ reference_protospacer }
+    }
+
 
     //
     // MODULE: Prepare reference sequence