From 0055a318d4069be8814493392d1bebe6f7fbb57e Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Tue, 21 Nov 2023 18:03:59 +0100 Subject: [PATCH] feat: build annonars regions --- Snakefile | 4 ++ rules/output/annonars/regions.smk | 54 +++++++++++++++++++++++++ rules/output/annonars/regions.spec.yaml | 14 +++++++ 3 files changed, 72 insertions(+) create mode 100644 rules/output/annonars/regions.smk create mode 100644 rules/output/annonars/regions.spec.yaml diff --git a/Snakefile b/Snakefile index c0f1dea..c1ea664 100644 --- a/Snakefile +++ b/Snakefile @@ -165,8 +165,11 @@ rule all: f"output/full/annonars/gnomad-sv-exomes-grch38-{DV.gnomad_cnv4}+{PV.annonars}/rocksdb/IDENTITY", f"output/full/annonars/gnomad-sv-genomes-grch37-{DV.gnomad_sv}+{PV.annonars}/rocksdb/IDENTITY", f"output/full/annonars/gnomad-sv-genomes-grch38-{DV.gnomad_sv4}+{PV.annonars}/rocksdb/IDENTITY", + # ----- sequence annotation f"output/full/annonars/functional-grch37-{DV.refseq_fe_37}+{PV.annonars}/rocksdb/IDENTITY", f"output/full/annonars/functional-grch38-{DV.refseq_fe_38}+{PV.annonars}/rocksdb/IDENTITY", + f"output/full/annonars/regions-grch37-{DV.today}+{PV.annonars}/rocksdb/IDENTITY", + f"output/full/annonars/regions-grch38-{DV.today}+{PV.annonars}/rocksdb/IDENTITY", # ----- conservation f"output/full/annonars/cons-grch37-{DV.ucsc_cons_37}+{PV.annonars}/rocksdb/IDENTITY", f"output/full/annonars/cons-grch38-{DV.ucsc_cons_38}+{PV.annonars}/rocksdb/IDENTITY", @@ -389,6 +392,7 @@ include: "rules/output/annonars/gnomad_sv.smk" include: "rules/output/annonars/helix.smk" include: "rules/output/annonars/genes.smk" include: "rules/output/annonars/functional.smk" +include: "rules/output/annonars/regions.smk" # ---- worker include: "rules/output/worker/patho_mms.smk" include: "rules/output/worker/clinvar.smk" diff --git a/rules/output/annonars/regions.smk b/rules/output/annonars/regions.smk new file mode 100644 index 0000000..a93ad97 --- /dev/null +++ b/rules/output/annonars/regions.smk @@ -0,0 +1,54 @@ +## Rules to create build annonars regions annotation database.. + + +rule work_annonars_regions_download: # -- download clingen regions + output: + "work/download/clingen/{genome_release}/{today}/ClinGen_region_curation_list_{genome_release}.tsv", + shell: + r""" + if [[ "{wildcards.genome_release}" == "grch38" ]]; then + GENOME=GRCh37 + else + GENOME=GRCh38 + fi + + wget -O {output} \ + ftp://ftp.clinicalgenome.org/ClinGen_region_curation_list_$GENOME.tsv + """ + +rule output_annonars_regions: # -- build annonars regions RocksDB file + input: + "work/download/clingen/{genome_release}/{date}/ClinGen_region_curation_list_{genome_release}.tsv", + output: + rocksdb_identity=( + "output/full/annonars/regions-{genome_release}-{date}+{v_annonars}/" + "rocksdb/IDENTITY" + ), + spec_yaml=( + "output/full/annonars/regions-{genome_release}-{date}+{v_annonars}/spec.yaml" + ), + wildcard_constraints: + v_refseq=RE_VERSION, + v_annonars=RE_VERSION, + shell: + r""" + if [[ "$(date +%Y%m%d)" != "{wildcards.date}" ]] && [[ "{FORCE_TODAY}" != "True" ]]; then + >&2 echo "{wildcards.date} is not today" + exit 1 + fi + + annonars regions import -vvv \ + --genome-release {wildcards.genome_release} \ + --path-in-clingen {input} \ + --path-out-rocksdb $(dirname {output.rocksdb_identity}) + + varfish-db-downloader tpl \ + --template rules/output/annonars/regions.spec.yaml \ + --value today={TODAY} \ + \ + --value version={wildcards.date}+{wildcards.v_annonars} \ + \ + --value v_annonars={wildcards.v_annonars} \ + --value v_downloader={PV.downloader} \ + > {output.spec_yaml} + """ diff --git a/rules/output/annonars/regions.spec.yaml b/rules/output/annonars/regions.spec.yaml new file mode 100644 index 0000000..88e5206 --- /dev/null +++ b/rules/output/annonars/regions.spec.yaml @@ -0,0 +1,14 @@ +dc.identifier: annonars/regions:{{ version }}-{{ genome_release }} +dc.title: annonars regions annotation RocksDB +dc.creator: VarFish Developer Teams +dc.format: application/x-rocksdb +dc.date: {{ today }} +x-version: {{ version }} +x-genome-release: {{ genome_release }} +dc.description: | + RocksDB with region annotation. +dc.source: + - https://search.clinicalgenome.org/kb/gene-dosage +x-created-from: + - name: ClinGen Region Dosage Pathogenicity + version: {{ today }}