Skip to content

Commit

Permalink
feat: download of ClinGen dosage sensitivity (#61)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Nov 7, 2023
1 parent a2e63f0 commit 17fa976
Show file tree
Hide file tree
Showing 9 changed files with 110 additions and 1 deletion.
9 changes: 8 additions & 1 deletion Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ rule all:
f"work/annos/grch38/features/cons/{DV.ucsc_cons_38}/ucsc_conservation.tsv",
f"work/annos/grch38/features/ensembl/{DV.ensembl_38}/ensembl_genes.bed.gz",
f"work/annos/grch38/features/refseq/{DV.refseq_38}/refseq_genes.bed.gz",
# f"work/annos/grch38/features/clingen_dosage/{DV.today}/clingen_dosage_sensitivity_regions.bed.gz",
#
# == output directory ===================================================================
#
Expand Down Expand Up @@ -161,6 +162,9 @@ rule all:
# ----- conservation
f"output/full/annonars/cons-grch37-{DV.ucsc_cons_37}+{PV.annonars}/rocksdb/IDENTITY",
f"output/full/annonars/cons-grch38-{DV.ucsc_cons_38}+{PV.annonars}/rocksdb/IDENTITY",
# ----- features
f"output/full/annonars/clingen-dosage-grch37/{DV.today}/clingen_region_curation_list.bed.gz",
f"output/full/annonars/clingen-dosage-grch38/{DV.today}/clingen_region_curation_list.bed.gz",
# ----- genes
f"output/full/annonars/genes-{DV.acmg_sf}+{DV.gnomad_constraints}+{DV.dbnsfp}+{DV.hpo}+{DV.orphapacket}+{DV.today}+{PV.annonars}/rocksdb/IDENTITY",
# -- worker data
Expand Down Expand Up @@ -339,6 +343,7 @@ include: "rules/work/genes/shet.smk"
include: "rules/work/reference/human.smk"
# Features (position and not variant specific).
include: "rules/work/annos/features/cons.smk"
include: "rules/work/annos/features/clingen_dosage.smk"
include: "rules/work/annos/features/ensembl.smk"
include: "rules/work/annos/features/refseq.smk"
include: "rules/work/annos/features/tads.smk"
Expand All @@ -363,7 +368,7 @@ include: "rules/work/annos/strucvars/clinvar.smk"
include: "rules/output/mehari/freqs.smk"
# ---- viguno
include: "rules/output/viguno/hpo.smk"
# ------ annonars
# ---- annonars
include: "rules/output/annonars/cadd.smk"
include: "rules/output/annonars/cons.smk"
include: "rules/output/annonars/dbnsfp.smk"
Expand All @@ -374,6 +379,8 @@ include: "rules/output/annonars/gnomad_genomes.smk"
include: "rules/output/annonars/gnomad_mtdna.smk"
include: "rules/output/annonars/helix.smk"
include: "rules/output/annonars/genes.smk"
# ------ features
include: "rules/output/annonars/clingen_dosage.smk"
# ---- worker
include: "rules/output/worker/patho_mms.smk"
include: "rules/output/worker/clinvar.smk"
Expand Down
3 changes: 3 additions & 0 deletions download_urls.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
- url: ftp://ftp.clinicalgenome.org/ClinGen_region_curation_list_GRCh37.tsv
- url: ftp://ftp.clinicalgenome.org/ClinGen_region_curation_list_GRCh38.tsv

- url: https://storage.googleapis.com/gtex_analysis_v8/annotations/GTEx_Analysis_v8_Annotations_SampleAttributesDS.txt
excerpt_strategy:
strategy: no-excerpt
Expand Down
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/796c631dc892eda6/url.txt
Git LFS file not shown
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/e424cce724cdc500/url.txt
Git LFS file not shown
48 changes: 48 additions & 0 deletions rules/output/annonars/clingen_dosage.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
## Output rules related to ClinGen dosage sensitivity regions.


rule annos_features_clingen_dosage_download_to_bed: # -- convert ClinGen dosage sensitivity to BEd
input:
tsv="work/download/annos/{genome_release}/features/clingen_dosage/{date}/clingen_region_curation_list.tsv",
output:
bed="output/full/annonars/clingen-dosage-{genome_release}/{date}/clingen_region_curation_list.bed.gz",
bed_md5="output/full/annonars/clingen-dosage-{genome_release}/{date}/clingen_region_curation_list.bed.gz.md5",
bed_tbi="output/full/annonars/clingen-dosage-{genome_release}/{date}/clingen_region_curation_list.bed.gz.tbi",
bed_tbi_md5="output/full/annonars/clingen-dosage-{genome_release}/{date}/clingen_region_curation_list.bed.gz.tbi.md5",
spec_yaml="output/full/annonars/clingen-dosage-{genome_release}/{date}/clingen_region_curation_list.spec.yaml",
shell:
r"""
if [[ "{wildcards.genome_release}" == "grch37" ]]; then
chr_prefix=
else
chr_prefix=chr
fi
tail -n +8 {input.tsv} \
| awk -v chr_prefix=$chr_prefix -F $'\t' 'BEGIN {{ OFS=FS }}
{{
if ($4 == "tbd") {{
next; /* skip, unmatched region */
}}
region=$4;
split($4, a, /[:-]/);
sub(/^chr/, "", a[1]);
print chr_prefix a[1], a[2] - 1, a[3], $0;
}}' \
| LC_ALL=C sort -k1,1V -k2,2n \
| bgzip -c \
> {output.bed}
tabix -f {output.bed}
md5sum {output.bed} > {output.bed_md5}
md5sum {output.bed_tbi} > {output.bed_tbi_md5}
varfish-db-downloader tpl \
--template rules/output/annonars/clingen_dosage.spec.yaml \
--value today={wildcards.date} \
--value genome_release={wildcards.genome_release} \
\
--value v_downloader={PV.downloader} \
> {output.spec_yaml}
"""
13 changes: 13 additions & 0 deletions rules/output/annonars/clingen_dosage.spec.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
dc.identifier: annonars/features/clingen-dosage:{{ version }}-{{ genome_release }}
dc.title: ClinGen Dosage Sensitivity
dc.creator: NCBI ClinGen Team
dc.contributor:
- VarFish Developer Teams
dc.format: application/x-bed
dc.date: {{ today }}
x-version: {{ today }}
x-genome-release: {{ genome_release }}
dc.description: |
BED file with ClinGen dosage sensitivity regions.
dc.source:
- https://search.clinicalgenome.org/kb/downloads#section_dosage
26 changes: 26 additions & 0 deletions rules/work/annos/features/clingen_dosage.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
## Work rules related to ClinGen dosage sensitivity regions.


rule annos_features_clingen_dosage_download: # -- download ClinGen dosage sensitivity
output:
tsv="work/download/annos/{genome_release}/features/clingen_dosage/{date}/clingen_region_curation_list.tsv",
tsv_md5="work/download/annos/{genome_release}/features/clingen_dosage/{date}/clingen_region_curation_list.tsv.md5",
shell:
r"""
if [[ "$(date +%Y%m%d)" != "{wildcards.date}" ]] && [[ "{FORCE_TODAY}" != "True" ]]; then
>&2 echo "{wildcards.date} is not today"
exit 1
fi
if [[ "{wildcards.genome_release}" == "grch37" ]]; then
URL_RELEASE=GRCh37
else
URL_RELEASE=GRCh38
fi
wget --no-check-certificate \
-O {output.tsv} \
ftp://ftp.clinicalgenome.org/ClinGen_region_curation_list_${{URL_RELEASE}}.tsv
md5sum {output.tsv} > {output.tsv_md5}
"""

0 comments on commit 17fa976

Please sign in to comment.