diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index ba2542d..e809af0 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,7 +1,2 @@ -sample,fastq_1,fastq_2,rundir,tags -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane1 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A2_S2_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A2_S2_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane1 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A3_S3_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A3_S3_L002_R2_001.fastq.gz,/path/to/rundir,paired_sample:lane2 -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group1 -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group2 -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,,/path/to/rundir,group3 +sample,fastq_1,fastq_2,rundir,tags,reference +test2,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test2_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test2_2.fastq.gz,,,https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/sarscov2/genome/genome.fasta \ No newline at end of file diff --git a/assets/schema_input.json b/assets/schema_input.json index 97ec617..0e4c91a 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -27,6 +27,12 @@ "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, + "reference": { + "type": "string", + "format": "file-path", + "pattern": "^\\S+\\.(fasta|fa|fna|tar)(\\.gz)?$", + "errorMessage": "FASTA file must be gzipped with extensions '.fasta.gz', '.fa.gz', 'tar.gz' or '.fna.gz'" + }, "rundir": { "type": "string", "format": "path", diff --git a/conf/test.config b/conf/test.config index 76a7ad0..ac6807f 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,7 +25,10 @@ params { // Input data // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'seqinspector/testdata/NovaSeq6000/samplesheet.csv' + //input = params.pipelines_testdata_base_path + 'seqinspector/testdata/NovaSeq6000/samplesheet.csv' + + // In this samplesheet.csv , I added extra references col + input = './assets/samplesheet.csv' // Genome references genome = 'R64-1-1' diff --git a/modules/local/sylph/profile/environment.yml b/modules/local/sylph/profile/environment.yml new file mode 100644 index 0000000..54521da --- /dev/null +++ b/modules/local/sylph/profile/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::sylph==v0.6.1 \ No newline at end of file diff --git a/modules/local/sylph/profile/main.nf b/modules/local/sylph/profile/main.nf new file mode 100644 index 0000000..29c76e2 --- /dev/null +++ b/modules/local/sylph/profile/main.nf @@ -0,0 +1,47 @@ +process SYLPH_PROFILE { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sylph:0.6.1--h4ac6f70_0' : + 'biocontainers/sylph:0.6.1--h4ac6f70_0' }" + + input: + tuple val(meta), path(sketch_fastq), path(sketch_fastq_genome) + + output: + tuple val(meta), path('profile_out.tsv'), emit: profile_out + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + sylph profile \\ + $args \\ + $sketch_fastq \\ + $sketch_fastq_genome \\ + -o profile_out.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sylph: \$(sylph -V|awk '{print \$2}') + END_VERSIONS + + """ + + stub: + """ + touch profile_out.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sylph: \$(sylph -V|awk '{print \$2}') + END_VERSIONS + """ + +} \ No newline at end of file diff --git a/modules/local/sylph/sketch/environment.yml b/modules/local/sylph/sketch/environment.yml new file mode 100644 index 0000000..54521da --- /dev/null +++ b/modules/local/sylph/sketch/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::sylph==v0.6.1 \ No newline at end of file diff --git a/modules/local/sylph/sketch/main.nf b/modules/local/sylph/sketch/main.nf new file mode 100644 index 0000000..d360061 --- /dev/null +++ b/modules/local/sylph/sketch/main.nf @@ -0,0 +1,52 @@ +process SYLPH_SKETCH { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sylph:0.6.1--h4ac6f70_0' : + 'biocontainers/sylph:0.6.1--h4ac6f70_0' }" + + input: + tuple val(meta), path(reads), path(reference) + + output: + tuple val(meta), path('my_sketches/*.sylsp'), path('database.syldb'), emit: sketch_fastq_genome + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def fastq = meta.single_end ? "-r ${reads[0]}" : "-1 ${reads[0]} -2 ${reads[1]}" + """ + sylph sketch \\ + $args \\ + $fastq \\ + -g $reference \\ + -S $prefix \\ + -d my_sketches \\ + -c 200 \\ + -k 31 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sylph: \$(sylph -V|awk '{print \$2}') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def end = meta.single_end ?"": ".paired" + """ + touch ${prefix}${end}.slysp + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sylph: \$(sylph -V|awk '{print \$2}') + END_VERSIONS + """ + + +} \ No newline at end of file diff --git a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf index a84e064..64c0566 100644 --- a/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqinspector_pipeline/main.nf @@ -77,20 +77,22 @@ workflow PIPELINE_INITIALISATION { .toList() .flatMap { it.withIndex().collect { entry, idx -> entry + "${idx+1}" } } .map { - meta, fastq_1, fastq_2, idx -> + meta, fastq_1, fastq_2, reference, idx -> def tags = meta.tags ? meta.tags.tokenize(":") : [] def updated_meta = meta + [ id:"${meta.sample}_${idx}", tags:tags ] if (!fastq_2) { return [ updated_meta.id, updated_meta + [ single_end:true ], - [ fastq_1 ] + [ fastq_1 ], + reference ] } else { return [ updated_meta.id, updated_meta + [ single_end:false ], - [ fastq_1, fastq_2 ] + [ fastq_1, fastq_2 ], + reference ] } } @@ -103,11 +105,12 @@ workflow PIPELINE_INITIALISATION { // meta, fastqs -> // return [ meta, fastqs.flatten() ] // } + .view{"dadadad$it"} .set { ch_samplesheet } ch_samplesheet .map { - meta, fastqs -> meta.tags + meta, fastqs, reference -> meta.tags } .flatten() .unique() @@ -189,7 +192,7 @@ def validateInputParameters() { // Validate channels from input samplesheet // def validateInputSamplesheet(input) { - def (metas, fastqs) = input[1..2] + def (metas, fastqs, reference) = input[1..3] // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1 @@ -197,7 +200,7 @@ def validateInputSamplesheet(input) { error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") } - return [ metas[0], fastqs ] + return [ metas[0], fastqs, reference ] } // // Get attribute from genome config file e.g. fasta diff --git a/workflows/seqinspector.nf b/workflows/seqinspector.nf index 7a2dfae..e7f8287 100644 --- a/workflows/seqinspector.nf +++ b/workflows/seqinspector.nf @@ -7,6 +7,9 @@ include { SEQTK_SAMPLE } from '../modules/nf-core/seqtk/sample/main' include { FASTQC } from '../modules/nf-core/fastqc/main' +include {SYLPH_SKETCH } from '../modules/local/sylph/sketch/main' +include {SYLPH_PROFILE } from '../modules/local/sylph/profile/main' + include { MULTIQC as MULTIQC_GLOBAL } from '../modules/nf-core/multiqc/main' include { MULTIQC as MULTIQC_PER_TAG } from '../modules/nf-core/multiqc/main' @@ -38,7 +41,7 @@ workflow SEQINSPECTOR { if (params.sample_size > 0 ) { ch_sample_sized = SEQTK_SAMPLE( ch_samplesheet.map { - meta, reads -> [meta, reads, params.sample_size] + meta, reads, reference -> [meta, reads, params.sample_size] } ).reads ch_versions = ch_versions.mix(SEQTK_SAMPLE.out.versions.first()) @@ -52,7 +55,7 @@ workflow SEQINSPECTOR { // FASTQC ( ch_sample_sized.map { - meta, subsampled -> [meta, subsampled] + meta, subsampled, reference -> [meta, subsampled] } ) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip) @@ -69,6 +72,18 @@ workflow SEQINSPECTOR { newLine: true ).set { ch_collated_versions } + // + // MODULE: Run SYLPH + // + SYLPH_SKETCH ( + ch_samplesheet + ) + + sketch_files = SYLPH_SKETCH.out.sketch_fastq_genome + + SYLPH_PROFILE ( + sketch_files + ) // // MODULE: MultiQC