Skip to content

Commit

Permalink
Merge branch 'dev' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurenceKuhl authored Dec 14, 2023
2 parents 0e4573f + e0fd8d3 commit bfcc005
Show file tree
Hide file tree
Showing 12 changed files with 148 additions and 50 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ jobs:
- "latest-everything"
ANALYSIS:
- "test_screening"
- "test_screening_paired"
- "test_targeted"
- "test_umis"
steps:
Expand Down
8 changes: 6 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
ext.prefix = 'count_table'

}

withName: MAGECK_MLE {
Expand Down Expand Up @@ -131,7 +130,12 @@ process {
}

withName: CUTADAPT {
ext.args = '-g file:overrepresented.fasta -N'
if(params.analysis == 'targeted') {
ext.args = '-g file:overrepresented.fasta -N'
}
if(params.analysis == 'screening' && params.cutadapt) {
ext.args = "-a ${params.cutadapt}"
}
publishDir = [
path: { "${params.outdir}/preprocessing/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
mode: params.publish_dir_mode,
Expand Down
26 changes: 26 additions & 0 deletions conf/test_screening_paired.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/crisprseq -profile test_screening,<conda/docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Test screening profile paired-end'
config_profile_description = 'Minimal test dataset to check pipeline function for paired-end data'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/samplesheet_test_paired.csv'
analysis = 'screening'
library = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/brunello_target_sequence.txt"
}
3 changes: 3 additions & 0 deletions conf/test_targeted.config
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,7 @@ params {

// Aligner
aligner = 'minimap2'

// Steps
overrepresented = true
}
15 changes: 15 additions & 0 deletions docs/output/screening.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d

- [Preprocessing](#preprocessing)
- [FastQC](#fastqc) - Read Quality Control
- [cutadapt](#cutadapt) - Trimming reads from fastq files
- [Counting](#counting)
- [MAGeCK count](#mageck-count) - Mapping reads to reference
- [CNV correction](#cnv-correction))
Expand All @@ -42,6 +43,20 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d

[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/).

### cutadapt

<details markdown="1">
<summary>Output files</summary>

- `cutadapt/`
- `*.log`: log file of the command ran and the output
- `*.trim.fastq.gz`: trimmed fastq files

</details>

[cutadapt](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/). Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads. MAGeCK count normally automatically detects adapter sequences and trims, however if trimming lengths are different, cutadapt can be used, as mentioned [here](https://sourceforge.net/p/mageck/wiki/advanced_tutorial/).
For further reading and documentation see the [cutadapt helper page](https://cutadapt.readthedocs.io/en/stable/guide.html).

## Counting

### MAGeCK count
Expand Down
14 changes: 8 additions & 6 deletions docs/usage/screening.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,17 @@ After the alignment step, the pipeline currently supports 3 algorithms to detect

MAGeCK RRA performs robust ranking aggregation to identify genes that are consistently ranked highly across multiple replicate screens. To run MAGeCK rra, `--rra_contrasts` contains two columns : treatment and reference. These two columns should be separated with a dot comma (;) and contain the `csv` extension. You can also integrate several samples/conditions by comma separating them. Please find an example here below :

| treatment | reference |
| ----------------------- | ------------------- |
| treatment1 | control1 |
| treatment1,treatment2 | control1,control2 |
| ----------------------- | ------------------- |
| treatment1 | control1 |
| reference | treatment |
| ----------------- | --------------------- |
| control1 | treatment1 |
| control1,control2 | treatment1,treatment2 |

A full example can be found [here](https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/full_test/samplesheet_full.csv).

### cutadapt

MAGeCK is normally able to automatically determine the trimming length and sgRNA length, in most cases. Therefore, you don't need to go to this step unless MAGeCK fails to do so by itself. If the nucleotide length in front of sgRNA varies between different reads, you can use cutadapt to remove the adaptor sequences by using the flag `--cutadapt ADAPTER`.

### MAGeCK mle

MAGeCK MLE uses a maximum likelihood estimation approach to estimate the effects of gene knockout on cell fitness. It models the read count data of guide RNAs targeting each gene and estimates the dropout probability for each gene. MAGeCK mle requires a design matrix. The design matrix is a `txt` file indicating the effects of different conditions on different samples.
Expand Down
10 changes: 1 addition & 9 deletions modules/nf-core/cutadapt/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

37 changes: 25 additions & 12 deletions modules/nf-core/mageck/count/mageck-count.diff

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 7 additions & 6 deletions modules/nf-core/mageck/count/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 8 additions & 6 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ params {
protospacer = null
library = null
crisprcleanr = null
cutadapt = null
rra_contrasts = null
mle_design_matrix = null
count_table = null
Expand Down Expand Up @@ -192,12 +193,13 @@ profiles {
executor.cpus = 4
executor.memory = 8.GB
}
test { includeConfig 'conf/test_targeted.config' }
test_targeted { includeConfig 'conf/test_targeted.config' }
test_full { includeConfig 'conf/test_full.config' }
test_umis { includeConfig 'conf/test_umis.config' }
test_screening_full { includeConfig 'conf/test_screening_full.config' }
test_screening { includeConfig 'conf/test_screening.config' }
test { includeConfig 'conf/test_targeted.config' }
test_targeted { includeConfig 'conf/test_targeted.config' }
test_full { includeConfig 'conf/test_full.config' }
test_umis { includeConfig 'conf/test_umis.config' }
test_screening_full { includeConfig 'conf/test_screening_full.config' }
test_screening { includeConfig 'conf/test_screening.config' }
test_screening_paired { includeConfig 'conf/test_screening_paired.config' }
}

// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile
Expand Down
6 changes: 5 additions & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
"required": ["input", "outdir", "analysis"],
"required": ["outdir", "analysis"],
"properties": {
"input": {
"type": "string",
Expand Down Expand Up @@ -183,6 +183,10 @@
"type": "string",
"description": "sgRNA library annotation for crisprcleanR"
},
"cutadapt": {
"type": "string",
"description": "cutadapt adapter for screening analysis"
},
"min_reads": {
"type": "number",
"description": "a filter threshold value for sgRNAs, based on their average counts in the control sample",
Expand Down
51 changes: 43 additions & 8 deletions workflows/crisprseq_screening.nf
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil
// MODULE: Installed directly from nf-core/modules
//
include { FASTQC } from '../modules/nf-core/fastqc/main'
include { CUTADAPT } from '../modules/nf-core/cutadapt/main'
include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { MAGECK_COUNT } from '../modules/nf-core/mageck/count/main'
include { MAGECK_MLE } from '../modules/nf-core/mageck/mle/main'
Expand Down Expand Up @@ -87,36 +88,70 @@ workflow CRISPRSEQ_SCREENING {
Channel.fromSamplesheet("input")
.map{ meta, fastq_1, fastq_2, x, y, z ->
// x (reference), y (protospacer), and z (template) are part of the targeted workflows and we don't need them
return [ meta + [ single_end:fastq_2?false:true ], fastq_2?[ fastq_1, fastq_2 ]:[ fastq_1 ] ]
}
return [ meta + [ single_end:fastq_2?false:true ], fastq_2?[ fastq_1, fastq_2 ]:[ fastq_1 ] ] }
.set { ch_input }


//
// MODULE: Run FastQC
//
FASTQC (
ch_input
)
ch_versions = ch_versions.mix(FASTQC.out.versions.first())


ch_versions = ch_versions.mix(FASTQC.out.versions.first())
empty_channel = Channel.value([[]])
ch_input_cutadapt = ch_input.combine(Channel.value([[]]))

if(params.cutadapt) {
CUTADAPT(
ch_input_cutadapt
)
ch_versions = ch_versions.mix(CUTADAPT.out.versions)

CUTADAPT.out.reads
.map{ meta, fastq ->
[meta, [fastq]]
}
.set { ch_input }
}

// this is to concatenate everything for mageck count

ch_input
.map { meta, fastq ->
[meta.condition, fastq, meta.single_end]
.map { meta, fastqs ->
if(fastqs.size() == 1){
[meta.condition, [fastqs[0]], meta.single_end, []]
} else {
[meta.condition, [fastqs[0]], meta.single_end, [fastqs[1]]]
}
}
// if one element is paired-end and the other single-end throw an error
// otherwise just concatenate the conditions and the fastqs
.reduce { a, b ->
if(a[2] != b[2] ) {
error "Your samplesheet contains a mix of single-end and paired-end data. This is not supported."
}
return ["${a[0]},${b[0]}", a[1] + b[1], b[2]]
return ["${a[0]},${b[0]}", a[1] + b[1], b[2] ,a[3] + b[3]]
}
.map { condition, fastqs, single_end ->
[[id: condition, single_end: single_end], fastqs]
.map { condition, fastqs_1, single_end, fastqs_2 ->
[[id: condition, single_end: single_end], fastqs_1, fastqs_2]
}
.last()
.set { joined }

//
// MODULE: Run FastQC
//
FASTQC (
ch_input
)


ch_versions = ch_versions.mix(FASTQC.out.versions.first())



//
// MODULE: Run mageck count
Expand Down

0 comments on commit bfcc005

Please sign in to comment.