Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retire snpeff_genome #1656

Merged
merged 9 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 13 additions & 26 deletions conf/igenomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ params {
known_indels_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_phase1.indels.b37.vcf.gz --resource:mills,known=false,training=true,truth=true,prior=10.0 Mills_and_1000G_gold_standard.indels.b37.vcf.gz'
mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/Control-FREEC/out100m2_hg19.gem"
ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/NGSCheckMate/SNP_GRCh37_hg19_woChr.bed"
snpeff_db = '87'
snpeff_genome = 'GRCh37'
snpeff_db = 'GRCh37.87'
vep_cache_version = '111'
vep_genome = 'GRCh37'
vep_species = 'homo_sapiens'
Expand Down Expand Up @@ -73,8 +72,7 @@ params {
pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz"
pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi"
sentieon_dnascope_model = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Sentieon/SentieonDNAscopeModel1.1.model"
snpeff_db = '105'
snpeff_genome = 'GRCh38'
snpeff_db = 'GRCh38.105'
vep_cache_version = '111'
vep_genome = 'GRCh38'
vep_species = 'homo_sapiens'
Expand All @@ -84,8 +82,7 @@ params {
fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa"
ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/NGSCheckMate/SNP_GRCh37_hg19_woChr.bed"
readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt"
snpeff_db = '87'
snpeff_genome = 'GRCh37'
snpeff_db = 'GRCh37.87'
vep_cache_version = '111'
vep_genome = 'GRCh37'
vep_species = 'homo_sapiens'
Expand All @@ -94,8 +91,7 @@ params {
bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/"
fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa"
ngscheckmate_bed ="${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/NGSCheckMate/SNP_GRCh38_hg38_wChr.bed"
snpeff_db = '105'
snpeff_genome = 'GRCh38'
snpeff_db = 'GRCh38.105'
vep_cache_version = '111'
vep_genome = 'GRCh38'
vep_species = 'homo_sapiens'
Expand All @@ -118,8 +114,7 @@ params {
known_indels_tbi = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz.tbi"
mappability = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Control-FREEC/GRCm38_68_mm10.gem"
readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt"
snpeff_db = '99'
snpeff_genome = 'GRCm38'
snpeff_db = 'GRCm38.99'
vep_cache_version = '102'
vep_genome = 'GRCm38'
vep_species = 'mus_musculus'
Expand All @@ -138,17 +133,15 @@ params {
bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/"
fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa"
readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt"
snpeff_db = '75'
snpeff_genome = 'UMD3.1'
snpeff_db = 'UMD3.1.75'
vep_cache_version = '94'
vep_genome = 'UMD3.1'
vep_species = 'bos_taurus'
}
'WBcel235' {
bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/"
fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa"
snpeff_db = '105'
snpeff_genome = 'WBcel235'
snpeff_db = 'WBcel235.105'
vep_cache_version = '111'
vep_genome = 'WBcel235'
vep_species = 'caenorhabditis_elegans'
Expand All @@ -157,8 +150,7 @@ params {
bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/"
fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa"
readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt"
snpeff_db = '99'
snpeff_genome = 'CanFam3.1'
snpeff_db = 'CanFam3.1.99'
vep_cache_version = '104'
vep_genome = 'CanFam3.1'
vep_species = 'canis_lupus_familiaris'
Expand Down Expand Up @@ -215,8 +207,7 @@ params {
'R64-1-1' {
bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/"
fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa"
snpeff_db = '105'
snpeff_genome = 'R64-1-1'
snpeff_db = 'R64-1-1.105'
vep_cache_version = '111'
vep_genome = 'R64-1-1'
vep_species = 'saccharomyces_cerevisiae'
Expand All @@ -243,8 +234,7 @@ params {
'hg38' {
bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/"
fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa"
snpeff_db = '105'
snpeff_genome = 'GRCh38'
snpeff_db = 'GRCh38.105'
vep_cache_version = '111'
vep_genome = 'GRCh38'
vep_species = 'homo_sapiens'
Expand All @@ -253,8 +243,7 @@ params {
bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/"
fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa"
readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt"
snpeff_db = '87'
snpeff_genome = 'GRCh37'
snpeff_db = 'GRCh37.87'
vep_cache_version = '111'
vep_genome = 'GRCh37'
vep_species = 'homo_sapiens'
Expand All @@ -263,8 +252,7 @@ params {
bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/"
fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa"
readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt"
snpeff_db = '99'
snpeff_genome = 'GRCm38'
snpeff_db = 'GRCm38.99'
vep_cache_version = '102'
vep_genome = 'GRCm38'
vep_species = 'mus_musculus'
Expand Down Expand Up @@ -334,8 +322,7 @@ params {
known_indels_tbi = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi"
known_indels_vqsr = '--resource:mills,known=false,training=true,truth=true,prior=10.0 mills_and_1000G.indels.vcf.gz'
ngscheckmate_bed = "${params.igenomes_base}/genomics/homo_sapiens/genome/chr21/germlineresources/SNP_GRCh38_hg38_wChr.bed"
snpeff_db = '105'
snpeff_genome = 'WBcel235'
snpeff_db = 'WBcel235.105'
vep_cache_version = '111'
vep_genome = 'WBcel235'
vep_species = 'caenorhabditis_elegans'
Expand Down
10 changes: 7 additions & 3 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -898,16 +898,14 @@ By default all is specified in the [igenomes.config](https://github.com/nf-core/
Explanation can be found for all params in the documentation:

- [snpeff_db](https://nf-co.re/sarek/parameters#snpeff_db)
- [snpeff_genome](https://nf-co.re/sarek/parameters#snpeff_genome)
- [vep_genome](https://nf-co.re/sarek/parameters#vep_genome)
- [vep_species](https://nf-co.re/sarek/parameters#vep_species)
- [vep_cache_version](https://nf-co.re/sarek/parameters#vep_cache_version)

With the previous example of `GRCh38`, these are the values that were used for these params:

```bash
snpeff_db = '105'
snpeff_genome = 'GRCh38'
snpeff_db = 'GRCh38.105'
vep_cache_version = '110'
vep_genome = 'GRCh38'
vep_species = 'homo_sapiens'
Expand Down Expand Up @@ -1014,6 +1012,12 @@ This command could be used to point to the recently downloaded cache and run Snp
nextflow run nf-core/sarek --outdir results --vep_cache /path_to/my-own-cache/vep_cache --snpeff_cache /path_to/my-own-cache/snpeff_cache --tools vep,snpeff --input samplesheet_vcf.csv
```

Here is an example on how sarek may be used to download the SnpEff cache for Candida auris:

```bash
nextflow run nf-core/sarek --outdir results --outdir_cache /path_to/my-own-cache --tools snpeff --download_cache --build_only_index --input false --snpeff_db _candida_auris_gca_001189475 --step annotate --genome null --igenomes_ignore
```

### Create containers with pre-downloaded cache

nf-core is no longer maintaining containers with pre-downloaded cache. Hosting the cache within the container is not recommended as it can cause a number of problems. Instead we recommned using an external cache. The following is left for legacy reasons.
Expand Down
6 changes: 2 additions & 4 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ params.pon = getGenomeAttribute('pon')
params.pon_tbi = getGenomeAttribute('pon_tbi')
params.sentieon_dnascope_model = getGenomeAttribute('sentieon_dnascope_model')
params.snpeff_db = getGenomeAttribute('snpeff_db')
params.snpeff_genome = getGenomeAttribute('snpeff_genome')
params.vep_cache_version = getGenomeAttribute('vep_cache_version')
params.vep_genome = getGenomeAttribute('vep_genome')
params.vep_species = getGenomeAttribute('vep_species')
Expand Down Expand Up @@ -235,7 +234,7 @@ workflow NFCORE_SAREK {
if (params.download_cache) {
// Assuming that even if the cache is provided, if the user specify download_cache, sarek will download the cache
ensemblvep_info = Channel.of([ [ id:"${params.vep_cache_version}_${params.vep_genome}" ], params.vep_genome, params.vep_species, params.vep_cache_version ])
snpeff_info = Channel.of([ [ id:"${params.snpeff_genome}.${params.snpeff_db}" ], params.snpeff_genome, params.snpeff_db ])
snpeff_info = Channel.of([ [ id:"${params.snpeff_db}" ], params.snpeff_db ])
DOWNLOAD_CACHE_SNPEFF_VEP(ensemblvep_info, snpeff_info)
snpeff_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.snpeff_cache
vep_cache = DOWNLOAD_CACHE_SNPEFF_VEP.out.ensemblvep_cache.map{ meta, cache -> [ cache ] }
Expand All @@ -244,9 +243,8 @@ workflow NFCORE_SAREK {
} else {
// Looks for cache information either locally or on the cloud
ANNOTATION_CACHE_INITIALISATION(
(params.snpeff_cache && params.tools && (params.tools.split(',').contains("snpeff") || params.tools.split(',').contains('merge'))),
(params.snpeff_db && params.tools && (params.tools.split(',').contains("snpeff") || params.tools.split(',').contains('merge'))),
asp8200 marked this conversation as resolved.
Show resolved Hide resolved
params.snpeff_cache,
params.snpeff_genome,
params.snpeff_db,
(params.vep_cache && params.tools && (params.tools.split(',').contains("vep") || params.tools.split(',').contains('merge'))),
params.vep_cache,
Expand Down
6 changes: 3 additions & 3 deletions modules/nf-core/snpeff/download/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions modules/nf-core/snpeff/snpeff/tests/main.nf.test

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions modules/nf-core/snpeff/snpeff/tests/nextflow.config

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 0 additions & 6 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -766,12 +766,6 @@
"description": "snpEff DB version.",
"help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the database to be use to annotate with.\nAlternatively databases' names can be listed with the `snpEff databases`."
},
"snpeff_genome": {
"type": "string",
"fa_icon": "fas fa-microscope",
"description": "snpEff genome.",
"help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the genome when looking for local cache, or cloud based cache."
},
"vep_genome": {
"type": "string",
"fa_icon": "fas fa-microscope",
Expand Down
7 changes: 3 additions & 4 deletions subworkflows/local/annotation_cache_initialisation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ workflow ANNOTATION_CACHE_INITIALISATION {
take:
snpeff_enabled
snpeff_cache
snpeff_genome
snpeff_db
vep_enabled
vep_cache
Expand All @@ -24,8 +23,8 @@ workflow ANNOTATION_CACHE_INITIALISATION {

main:
if (snpeff_enabled) {
def snpeff_annotation_cache_key = (snpeff_cache == "s3://annotation-cache/snpeff_cache/") ? "${snpeff_genome}.${snpeff_db}/" : ""
def snpeff_cache_dir = "${snpeff_annotation_cache_key}${snpeff_genome}.${snpeff_db}"
def snpeff_annotation_cache_key = (snpeff_cache == "s3://annotation-cache/snpeff_cache/") ? "${snpeff_db}/" : ""
def snpeff_cache_dir = "${snpeff_annotation_cache_key}${snpeff_db}"
def snpeff_cache_path_full = file("$snpeff_cache/$snpeff_cache_dir", type: 'dir')
if ( !snpeff_cache_path_full.exists() || !snpeff_cache_path_full.isDirectory() ) {
if (snpeff_cache == "s3://annotation-cache/snpeff_cache/") {
Expand All @@ -35,7 +34,7 @@ workflow ANNOTATION_CACHE_INITIALISATION {
}
}
snpeff_cache = Channel.fromPath(file("${snpeff_cache}/${snpeff_annotation_cache_key}"), checkIfExists: true).collect()
.map{ cache -> [ [ id:"${snpeff_genome}.${snpeff_db}" ], cache ] }
.map{ cache -> [ [ id:"${snpeff_db}" ], cache ] }
} else snpeff_cache = []

if (vep_enabled) {
Expand Down
2 changes: 1 addition & 1 deletion tests/test_annotation_cache.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
exit_code: 1

- name: Fail to locate snpEff cache
command: nextflow run main.nf -profile test,annotation --snpeff_cache s3://annotation-cache/snpeff_cache/ --snpeff_genome na --tools snpeff --input false --build_only_index --outdir results
command: nextflow run main.nf -profile test,annotation --snpeff_cache s3://annotation-cache/snpeff_cache/ --snpeff_db na --tools snpeff --input false --build_only_index --outdir results
tags:
- annotation
- cache
Expand Down
2 changes: 1 addition & 1 deletion workflows/sarek/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,7 @@ workflow SAREK {
vcf_to_annotate.map{meta, vcf -> [ meta + [ file_name: vcf.baseName ], vcf ] },
vep_fasta,
params.tools,
params.snpeff_genome ? "${params.snpeff_genome}.${params.snpeff_db}" : "${params.genome}.${params.snpeff_db}",
params.snpeff_db,
maxulysse marked this conversation as resolved.
Show resolved Hide resolved
snpeff_cache,
vep_genome,
vep_species,
Expand Down
Loading