diff --git a/CHANGELOG.md b/CHANGELOG.md index c53cb89f..09385ad7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Add new parameters --reference and --protospacer ([#45](https://github.com/nf-core/crisprseq/pull/45)) + ### Fixed - Fix warning "module used more than once" ([#25](https://github.com/nf-core/crisprseq/pull/25)) diff --git a/conf/modules.config b/conf/modules.config index a68bb4ab..ad961c41 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -43,6 +43,7 @@ process { } withName: ORIENT_REFERENCE { + ext.prefix = { params.reference_fasta ? "${reference.baseName}" : "${meta.id}_reference" } publishDir = [ path: { "${params.outdir}/preprocessing/sequences" }, mode: params.publish_dir_mode, diff --git a/docs/usage/targeted.md b/docs/usage/targeted.md index 667d03df..8f415d0b 100644 --- a/docs/usage/targeted.md +++ b/docs/usage/targeted.md @@ -55,6 +55,54 @@ chr6,chr6-61942198-61942498_R1.fastq.gz,,CAA...GGA,TTTTATGATATTTATCTTTT,TTC...CA An [example samplesheet](https://nf-co.re/crisprseq/1.0/assets/samplesheet.csv) has been provided with the pipeline. +## Other input parameters + +### Reference + +If you want to provide the same reference for every sample, you can select a genome with `--genome` or provide a reference FASTA file with `--reference_fasta`. +Using any of these two parameters will override any reference sequence provided through an input sample sheet. + +Please refer to the [nf-core website](https://nf-co.re/usage/reference_genomes) for general usage docs and guidelines regarding reference genomes. + +### Protospacer + +If you want to provide the same protospacer sequence for every sample, you can provide the sequence with the parameter `--protospacer`. +Using this parameter will override any protospacer sequence provided through an input sample sheet. + +Providing a protospacer, either through a sample sheet or by using the parameter `--protospacer` is requeired. + +## Alignment options + +By default, the pipeline uses `minimap2` (i.e. `--aligner minimap2`) to map the sequenced FASTQ reads to the reference. +You also have the option to select other alignment tools by suing the parameter `--alignment`. Possible options are `minimap2`, `bwa` or `bowtie2`. + +The default alignment with `minimap2` uses adapted parameters which were seen to improve the alignment and reduce potential sequencing or alignment errors. +The default parameters are: + +- A matching score of 29 +- A mismatching penalty of 17 +- A gap open penalty of 25 +- A gap extension penalty of 2. + +Please refer to the original [CRISPR-Analytics](https://doi.org/10.1371/journal.pcbi.1011137) publication to see the benchmarking of such parameters. + +In order to customise such parameters, you can override the arguments given to `minimap2` by creating a configuration file and provide it to your nextflow run with `-c`: + +```groovy +// Custom config file custom.config +process { + withName: MINIMAP2_ALIGN_ORIGINAL { + ext.args = '-A 29 -B 17 -O 25 -E 2' + } +} +``` + +Command: + +```bash +nextflow run nf-core/crisprseq --input samplesheet.csv --analysis targeted --outdir -profile docker -c custom.config +``` + ## Running the pipeline The typical command for running the pipeline is as follows: diff --git a/main.nf b/main.nf index bef576ad..14acb614 100644 --- a/main.nf +++ b/main.nf @@ -18,7 +18,7 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.reference_fasta = params.reference_fasta ?: WorkflowMain.getGenomeAttribute(params, 'fasta') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/modules/local/orient_reference.nf b/modules/local/orient_reference.nf index 4a6de794..842e1f70 100644 --- a/modules/local/orient_reference.nf +++ b/modules/local/orient_reference.nf @@ -11,8 +11,8 @@ process ORIENT_REFERENCE { tuple val(meta), file(reference), val(protospacer) output: - tuple val(meta), path('*_reference-correctOrient.fasta') , emit: reference - path "versions.yml" , emit: versions + tuple val(meta), path('*-correctOrient.fasta') , emit: reference + path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' @@ -20,7 +20,7 @@ process ORIENT_REFERENCE { """ revComp_reference.R \\ $reference \\ - ${meta.id}_reference-correctOrient.fasta \\ + ${prefix}-correctOrient.fasta \\ $protospacer; cat <<-END_VERSIONS > versions.yml diff --git a/nextflow.config b/nextflow.config index 04f1f5bc..a1fa8021 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,6 +13,7 @@ params { input = null analysis = null aligner = 'minimap2' + protospacer = null library = null crisprcleanr = null rra_contrasts = null @@ -26,6 +27,7 @@ params { genome = null igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false + reference_fasta = null // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index bfb3788f..b38ebd95 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -48,8 +48,8 @@ } } }, - "alignment_parameters": { - "title": "Alignment parameters", + "targeted_parameters": { + "title": "Targeted parameters", "type": "object", "description": "", "default": "", @@ -60,6 +60,11 @@ "default": "minimap2", "fa_icon": "fas fa-align-justify", "enum": ["minimap2", "bwa", "bowtie2"] + }, + "protospacer": { + "type": "string", + "fa_icon": "fas fa-grip-lines", + "description": "Provide the same protospacer sequence for all samples. Will override protospacer sequences provided by an input samplesheet." } } }, @@ -114,14 +119,12 @@ "fa_icon": "fas fa-book", "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, - "fasta": { + "reference_fasta": { "type": "string", - "format": "file-path", - "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", - "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" + "description": "Path to the reference FASTA file. Will override reference sequences provided by an input sample sheet.", + "fa_icon": "far fa-file-alt", + "format": "file-path" }, "igenomes_base": { "type": "string", @@ -332,7 +335,7 @@ "$ref": "#/definitions/input_output_options" }, { - "$ref": "#/definitions/alignment_parameters" + "$ref": "#/definitions/targeted_parameters" }, { "$ref": "#/definitions/screening_parameters" diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 08f9fb3c..bd12e09f 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -120,7 +120,7 @@ def create_protospacer_channel(LinkedHashMap row) { // add protospacer sequence to meta def protospacer_meta = [] - if (row.protospacer.length() <= 0) { + if (row.protospacer.length() <= 0 && !params.protospacer) { exit 1, "ERROR: Please check input samplesheet -> Protospacer sequence is not provided!\n" } else { protospacer_meta = [ meta, row.protospacer ] diff --git a/workflows/crisprseq_screening.nf b/workflows/crisprseq_screening.nf index e35c15cc..6379faa0 100644 --- a/workflows/crisprseq_screening.nf +++ b/workflows/crisprseq_screening.nf @@ -10,7 +10,7 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) WorkflowCrisprseq.initialise(params, log) // Check input path parameters to see if they exist -def checkPathParamList = [ params.multiqc_config, params.fasta, params.library, params.mle_design_matrix ] +def checkPathParamList = [ params.multiqc_config, params.reference_fasta, params.library, params.mle_design_matrix ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } // Check mandatory parameters diff --git a/workflows/crisprseq_targeted.nf b/workflows/crisprseq_targeted.nf index 99aa168f..eeb155e6 100644 --- a/workflows/crisprseq_targeted.nf +++ b/workflows/crisprseq_targeted.nf @@ -10,7 +10,7 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) WorkflowCrisprseq.initialise(params, log) // Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config ] +def checkPathParamList = [ params.input, params.multiqc_config, params.reference_fasta ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } // Check mandatory parameters @@ -114,12 +114,6 @@ workflow CRISPRSEQ_TARGETED { .set { ch_fastq } ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - INPUT_CHECK.out.reference - .map { - meta, fastq -> - [ meta - meta.subMap('id') + [id: meta.id.split('_')[0..-2].join('_')], fastq ] - } - // // MODULE: Add reference sequences to file // @@ -148,14 +142,42 @@ workflow CRISPRSEQ_TARGETED { // Join channels with reference and protospacer // to channel: [ meta, reference, protospacer] - SEQ_TO_FILE_REF.out.file - .join(INPUT_CHECK.out.protospacer - .map { - meta, fastq -> - [ meta - meta.subMap('id') + [id: meta.id.split('_')[0..-2].join('_')], fastq ] - }, - by: 0) - .set{ reference_protospacer } + if (!params.reference_fasta && !params.protospacer) { + SEQ_TO_FILE_REF.out.file + .join(INPUT_CHECK.out.protospacer + .map { + meta, fastq -> + [ meta - meta.subMap('id') + [id: meta.id.split('_')[0..-2].join('_')], fastq ] + }, + by: 0) + .set{ reference_protospacer } + } else if (!params.reference_fasta) { + // If a protospacer was provided through the --protospacer param instead of the samplesheet + ch_protospacer = Channel.of(params.protospacer) + SEQ_TO_FILE_REF.out.file + .combine(ch_protospacer) + .set{ reference_protospacer } + } else if (!params.protospacer) { + // If a reference was provided through a fasta file or igenomes instead of the samplesheet + ch_reference = Channel.fromPath(params.reference_fasta) + INPUT_CHECK.out.protospacer + .combine(ch_reference) + .map{ meta, protospacer, reference -> + [ meta - meta.subMap('id') + [id: meta.id.split('_')[0..-2].join('_')], reference, protospacer ] + } + .set{ reference_protospacer } + } else { + ch_reference = Channel.fromPath(params.reference_fasta) + ch_protospacer = Channel.of(params.protospacer) + INPUT_CHECK.out.reads + .combine(ch_reference) + .combine(ch_protospacer) + .map{ meta, reads, reference, protospacer -> + [meta - meta.subMap('id') + [id: meta.id.split('_')[0..-2].join('_')], reference, protospacer] + } + .set{ reference_protospacer } + } + // // MODULE: Prepare reference sequence