Skip to content

Commit

Permalink
add sketch modules but still need nftest
Browse files Browse the repository at this point in the history
  • Loading branch information
jiahang1233333 committed Oct 30, 2024
1 parent 5e56fc3 commit 0b26de2
Show file tree
Hide file tree
Showing 9 changed files with 151 additions and 16 deletions.
9 changes: 2 additions & 7 deletions assets/samplesheet.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,2 @@
sample,fastq_1,fastq_2,rundir,tags
SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane1
SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A2_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A2_S2_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane1
SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A3_S3_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A3_S3_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane2
SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group1
SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group2
SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group3
sample,fastq_1,fastq_2,rundir,tags,reference
test2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test2_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test2_2.fastq.gz,,,https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/sarscov2/genome/genome.fasta
6 changes: 6 additions & 0 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@
"pattern": "^\\S+\\.f(ast)?q\\.gz$",
"errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
},
"reference": {
"type": "string",
"format": "file-path",
"pattern": "^\\S+\\.(fasta|fa|fna|tar)(\\.gz)?$",
"errorMessage": "FASTA file must be gzipped with extensions '.fasta.gz', '.fa.gz', 'tar.gz' or '.fna.gz'"
},
"rundir": {
"type": "string",
"format": "path",
Expand Down
5 changes: 4 additions & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ params {
// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = params.pipelines_testdata_base_path + 'seqinspector/testdata/NovaSeq6000/samplesheet.csv'
//input = params.pipelines_testdata_base_path + 'seqinspector/testdata/NovaSeq6000/samplesheet.csv'

// In this samplesheet.csv , I added extra references col
input = './assets/samplesheet.csv'

// Genome references
genome = 'R64-1-1'
Expand Down
7 changes: 7 additions & 0 deletions modules/local/sylph/profile/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::sylph==v0.6.1
47 changes: 47 additions & 0 deletions modules/local/sylph/profile/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
process SYLPH_PROFILE {
tag "$meta.id"
label 'process_high'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/sylph:0.6.1--h4ac6f70_0' :
'biocontainers/sylph:0.6.1--h4ac6f70_0' }"

input:
tuple val(meta), path(sketch_fastq), path(sketch_fastq_genome)

output:
tuple val(meta), path('profile_out.tsv'), emit: profile_out

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
sylph profile \\
$args \\
$sketch_fastq \\
$sketch_fastq_genome \\
-o profile_out.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
sylph: \$(sylph -V|awk '{print \$2}')
END_VERSIONS
"""

stub:
"""
touch profile_out.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
sylph: \$(sylph -V|awk '{print \$2}')
END_VERSIONS
"""

}
7 changes: 7 additions & 0 deletions modules/local/sylph/sketch/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::sylph==v0.6.1
52 changes: 52 additions & 0 deletions modules/local/sylph/sketch/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
process SYLPH_SKETCH {
tag "$meta.id"
label 'process_high'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/sylph:0.6.1--h4ac6f70_0' :
'biocontainers/sylph:0.6.1--h4ac6f70_0' }"

input:
tuple val(meta), path(reads), path(reference)

output:
tuple val(meta), path('my_sketches/*.sylsp'), path('database.syldb'), emit: sketch_fastq_genome

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def fastq = meta.single_end ? "-r ${reads[0]}" : "-1 ${reads[0]} -2 ${reads[1]}"
"""
sylph sketch \\
$args \\
$fastq \\
-g $reference \\
-S $prefix \\
-d my_sketches \\
-c 200 \\
-k 31
cat <<-END_VERSIONS > versions.yml
"${task.process}":
sylph: \$(sylph -V|awk '{print \$2}')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def end = meta.single_end ?"": ".paired"
"""
touch ${prefix}${end}.slysp
cat <<-END_VERSIONS > versions.yml
"${task.process}":
sylph: \$(sylph -V|awk '{print \$2}')
END_VERSIONS
"""


}
15 changes: 9 additions & 6 deletions subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -77,20 +77,22 @@ workflow PIPELINE_INITIALISATION {
.toList()
.flatMap { it.withIndex().collect { entry, idx -> entry + "${idx+1}" } }
.map {
meta, fastq_1, fastq_2, idx ->
meta, fastq_1, fastq_2, reference, idx ->
def tags = meta.tags ? meta.tags.tokenize(":") : []
def updated_meta = meta + [ id:"${meta.sample}_${idx}", tags:tags ]
if (!fastq_2) {
return [
updated_meta.id,
updated_meta + [ single_end:true ],
[ fastq_1 ]
[ fastq_1 ],
reference
]
} else {
return [
updated_meta.id,
updated_meta + [ single_end:false ],
[ fastq_1, fastq_2 ]
[ fastq_1, fastq_2 ],
reference
]
}
}
Expand All @@ -103,11 +105,12 @@ workflow PIPELINE_INITIALISATION {
// meta, fastqs ->
// return [ meta, fastqs.flatten() ]
// }
.view{"dadadad$it"}
.set { ch_samplesheet }

ch_samplesheet
.map {
meta, fastqs -> meta.tags
meta, fastqs, reference -> meta.tags
}
.flatten()
.unique()
Expand Down Expand Up @@ -189,15 +192,15 @@ def validateInputParameters() {
// Validate channels from input samplesheet
//
def validateInputSamplesheet(input) {
def (metas, fastqs) = input[1..2]
def (metas, fastqs, reference) = input[1..3]

// Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end
def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1
if (!endedness_ok) {
error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}")
}

return [ metas[0], fastqs ]
return [ metas[0], fastqs, reference ]
}
//
// Get attribute from genome config file e.g. fasta
Expand Down
19 changes: 17 additions & 2 deletions workflows/seqinspector.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
include { SEQTK_SAMPLE } from '../modules/nf-core/seqtk/sample/main'
include { FASTQC } from '../modules/nf-core/fastqc/main'

include {SYLPH_SKETCH } from '../modules/local/sylph/sketch/main'
include {SYLPH_PROFILE } from '../modules/local/sylph/profile/main'

include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main'
include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main'

Expand Down Expand Up @@ -38,7 +41,7 @@ workflow SEQINSPECTOR {
if (params.sample_size > 0 ) {
ch_sample_sized = SEQTK_SAMPLE(
ch_samplesheet.map {
meta, reads -> [meta, reads, params.sample_size]
meta, reads, reference -> [meta, reads, params.sample_size]
}
).reads
ch_versions = ch_versions.mix(SEQTK_SAMPLE.out.versions.first())
Expand All @@ -52,7 +55,7 @@ workflow SEQINSPECTOR {
//
FASTQC (
ch_sample_sized.map {
meta, subsampled -> [meta, subsampled]
meta, subsampled, reference -> [meta, subsampled]
}
)
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip)
Expand All @@ -69,6 +72,18 @@ workflow SEQINSPECTOR {
newLine: true
).set { ch_collated_versions }

//
// MODULE: Run SYLPH
//
SYLPH_SKETCH (
ch_samplesheet
)

sketch_files = SYLPH_SKETCH.out.sketch_fastq_genome

SYLPH_PROFILE (
sketch_files
)

//
// MODULE: MultiQC
Expand Down

0 comments on commit 0b26de2

Please sign in to comment.