Skip to content

Commit

Permalink
feat: replace orphapacket by orphadata API access (#84) (#85)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Jan 19, 2024
1 parent 06c800c commit 482bdd0
Show file tree
Hide file tree
Showing 58 changed files with 1,699 additions and 129 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Ignore all pickled data
*.pickle*

# Ignore the workflow directories.
/work/
/output/
Expand Down
15 changes: 10 additions & 5 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,9 @@ rule all:
#
# genes
f"work/download/genes/rcnv/2022/Collins_rCNV_2022.dosage_sensitivity_scores.tsv.gz",
f"work/download/genes/orphapacket/{DV.orphapacket}/orphapacket.tar.gz",
"work/download/genes/alphamissense/1/AlphaMissense_gene_hg38.tsv.gz",
f"work/download/genes/ctd/{DV.today}/CTD_diseases.tsv.gz",
f"work/download/do/{DV.today}/omim-unmapped.csv",
f"work/genes/dbnsfp/{DV.dbnsfp}/genes.tsv.gz",
"work/genes/decipher/v3/decipher_hi_prediction.tsv.gz",
f"work/genes/ensembl/{DV.ensembl}/ensembl_xlink.tsv",
Expand All @@ -96,7 +97,8 @@ rule all:
f"work/genes/gnomad/{DV.gnomad_constraints}/gnomad_constraints.tsv",
f"work/genes/hgnc/{DV.today}/hgnc_info.jsonl",
f"work/genes/omim/{DV.hpo}+{DV.today}/omim_diseases.tsv",
f"work/genes/orphapacket/{DV.orphapacket}+{DV.today}/orpha_diseases.tsv",
f"work/genes/orphadata/{DV.orphadata}/orphadata.jsonl",
f"work/genes/mondo/{DV.today}/mondo.obo",
"work/genes/rcnv/2022/rcnv_collins_2022.tsv",
"work/genes/shet/2019/shet_weghorn_2019.tsv",
f"work/genes/clingen/{DV.today}/ClinGen_gene_curation_list_GRCh37.tsv",
Expand Down Expand Up @@ -177,7 +179,7 @@ rule all:
f"output/full/annonars/cons-grch37-{DV.ucsc_cons_37}+{PV.annonars}/rocksdb/IDENTITY",
f"output/full/annonars/cons-grch38-{DV.ucsc_cons_38}+{PV.annonars}/rocksdb/IDENTITY",
# ----- genes
f"output/full/annonars/genes-{DV.acmg_sf}+{DV.gnomad_constraints}+{DV.dbnsfp}+{DV.hpo}+{DV.orphapacket}+{DV.today}+{PV.annonars}/rocksdb/IDENTITY",
f"output/full/annonars/genes-{DV.acmg_sf}+{DV.gnomad_constraints}+{DV.dbnsfp}+{DV.hpo}+{DV.today}+{PV.annonars}/rocksdb/IDENTITY",
# -- worker data
f"output/full/worker/genes-regions-grch37-{DV.refseq_37}+{PV.worker}/refseq_genes.bin",
f"output/full/worker/genes-regions-grch37-{DV.ensembl_37}+{PV.worker}/ensembl_genes.bin",
Expand Down Expand Up @@ -341,6 +343,9 @@ include: "rules/work/misc/hpo.smk"
include: "rules/work/genes/alphamissense.smk"
include: "rules/work/genes/dbnsfp.smk"
include: "rules/work/genes/clingen.smk"
include: "rules/work/genes/conditions.smk"
include: "rules/work/genes/ctd.smk"
include: "rules/work/genes/do.smk"
include: "rules/work/genes/decipher.smk"
include: "rules/work/genes/ensembl.smk"
include: "rules/work/genes/gnomad.smk"
Expand All @@ -350,11 +355,11 @@ include: "rules/work/genes/mehari_data_tx.smk"
include: "rules/work/genes/ncbi.smk"
include: "rules/work/genes/omim.smk"
include: "rules/work/genes/panelapp.smk"
include: "rules/work/genes/orphapacket.smk"
include: "rules/work/genes/mondo.smk"
include: "rules/work/genes/orphadata.smk"
include: "rules/work/genes/rcnv.smk"
include: "rules/work/genes/shet.smk"
include: "rules/work/genes/domino.smk"
include: "rules/work/genes/clingen.smk"
# Reference sequence--related rules.
include: "rules/work/reference/human.smk"
# Features (position and not variant specific).
Expand Down
55 changes: 50 additions & 5 deletions download_urls.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,48 @@
# Note that this just tests the availability of the OrphaData API. We have a file in
# excerpts/__orphadata__ that is used by `genes-orpha-diseases.py` in CI=true mode.
- url: https://api.orphadata.com/rd-cross-referencing/orphacodes
excerpt_strategy:
strategy: no-excerpt
count: null
- url: https://api.orphadata.com/rd-cross-referencing/orphacodes/20?lang=en
excerpt_strategy:
strategy: no-excerpt
count: null
- url: https://api.orphadata.com/rd-associated-genes/orphacodes/20
excerpt_strategy:
strategy: no-excerpt
count: null

- url: https://raw.githubusercontent.com/monarch-initiative/mondo-ingest/main/src/ontology/reports/omim_unmapped_terms.tsv
excerpt_strategy:
strategy: no-excerpt
count: null

- url: https://github.com/DiseaseOntology/HumanDiseaseOntology/raw/main/src/deprecated/DO_NON_Production_Files/omim_import.obo
excerpt_strategy:
strategy: no-excerpt
count: null

- url: https://raw.githubusercontent.com/DiseaseOntology/HumanDiseaseOntology/main/DOreports/OMIMinDO.tsv
excerpt_strategy:
strategy: no-excerpt
count: null

- url: https://github.com/DiseaseOntology/HumanDiseaseOntology/raw/main/src/deprecated/reports/omim-unmapped.csv
excerpt_strategy:
strategy: no-excerpt
count: null

- url: https://ctdbase.org/reports/CTD_diseases.tsv.gz
excerpt_strategy:
strategy: no-excerpt
count: null

- url: http://purl.obolibrary.org/obo/mondo.obo
excerpt_strategy:
strategy: no-excerpt
count: null

- url: https://panelapp.genomicsengland.co.uk/api/v1/entities/
excerpt_strategy:
strategy: no-excerpt
Expand Down Expand Up @@ -101,7 +146,7 @@
strategy: manual
count: null

- url: https://github.com/Orphanet/orphapacket/archive/refs/tags/v10.1.tar.gz
- url: https://data.bioontology.org/ontologies/ORDO/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb&download_format=csv
excerpt_strategy:
strategy: no-excerpt
count: null
Expand Down Expand Up @@ -130,19 +175,19 @@
strategy: no-excerpt
count: null

- url: https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2023-06-06/hp.obo
- url: https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2024-01-16/hp.obo
excerpt_strategy:
strategy: no-excerpt
count: null
- url: https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2023-06-06/phenotype.hpoa
- url: https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2024-01-16/phenotype.hpoa
excerpt_strategy:
strategy: no-excerpt
count: null
- url: https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2023-06-06/phenotype_to_genes.txt
- url: https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2024-01-16/phenotype_to_genes.txt
excerpt_strategy:
strategy: no-excerpt
count: null
- url: https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2023-06-06/genes_to_phenotype.txt
- url: https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2024-01-16/genes_to_phenotype.txt
excerpt_strategy:
strategy: no-excerpt
count: null
Expand Down
8 changes: 7 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ dependencies:
- click
- loguru
- numpy
- pydantic
- pronto >=2.5,<3.0
- pyyaml
- requests
- requests-ftp
Expand Down Expand Up @@ -41,9 +43,13 @@ dependencies:
# Parallel (de)compression.
- pigz
# Varfish related
- annonars =0.33.0
- annonars =0.34.0
- viguno =0.2.0
- mehari =0.21.1
- varfish-server-worker =0.10.2
# S3 uploads
- s5cmd =2.1.0
# async HTTP requests
- httpx =0.25.0
- httpcore =0.18.0
- trio
3 changes: 3 additions & 0 deletions excerpt-data/111d8c6e08038f62/20
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/111d8c6e08038f62/url.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/1963f3c58ea066be/omim_unmapped_terms.tsv
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/1963f3c58ea066be/url.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/32c97f6adaf88f01/hp.obo
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/32c97f6adaf88f01/url.txt
Git LFS file not shown
3 changes: 0 additions & 3 deletions excerpt-data/366878c8178827e3/phenotype_to_genes.txt

This file was deleted.

3 changes: 0 additions & 3 deletions excerpt-data/366878c8178827e3/url.txt

This file was deleted.

3 changes: 3 additions & 0 deletions excerpt-data/41961c7350780224/phenotype.hpoa
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/41961c7350780224/url.txt
Git LFS file not shown
3 changes: 0 additions & 3 deletions excerpt-data/5dfdf97f46a7c299/hp.obo

This file was deleted.

3 changes: 0 additions & 3 deletions excerpt-data/5dfdf97f46a7c299/url.txt

This file was deleted.

3 changes: 0 additions & 3 deletions excerpt-data/6045c008f1c0f370/url.txt

This file was deleted.

3 changes: 0 additions & 3 deletions excerpt-data/6045c008f1c0f370/v10.1.tar.gz

This file was deleted.

3 changes: 3 additions & 0 deletions excerpt-data/615312ce3f5fc1bf/OMIMinDO.tsv
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/615312ce3f5fc1bf/url.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/617bebe58c82f24e/CTD_diseases.tsv.gz
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/617bebe58c82f24e/url.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/652646c24140df2a/mondo.obo
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/652646c24140df2a/url.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/6f378db589a4bbb9/orphacodes
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/6f378db589a4bbb9/url.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/91f964d1aa8367a5/20
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/91f964d1aa8367a5/url.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/97187ff23d7d2773/phenotype_to_genes.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/97187ff23d7d2773/url.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/__orphadata__/orphadata.jsonl
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/a0f9f11118d32143/download
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/a0f9f11118d32143/url.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/ac92d419f271d95f/genes_to_phenotype.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/ac92d419f271d95f/url.txt
Git LFS file not shown
3 changes: 0 additions & 3 deletions excerpt-data/afba4597f963e400/phenotype.hpoa

This file was deleted.

3 changes: 0 additions & 3 deletions excerpt-data/afba4597f963e400/url.txt

This file was deleted.

3 changes: 0 additions & 3 deletions excerpt-data/e3f85776a4d3a44b/genes_to_phenotype.txt

This file was deleted.

3 changes: 0 additions & 3 deletions excerpt-data/e3f85776a4d3a44b/url.txt

This file was deleted.

3 changes: 3 additions & 0 deletions excerpt-data/e4097d7046a53998/omim_import.obo
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/e4097d7046a53998/url.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/ede624c2cd588939/omim-unmapped.csv
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/ede624c2cd588939/url.txt
Git LFS file not shown
16 changes: 11 additions & 5 deletions rules/output/annonars/genes.smk
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,21 @@ rule output_annonars_genes: # -- build annonars genes RocksDB file
hgnc="work/genes/hgnc/{date}/hgnc_info.jsonl",
ncbi="work/genes/entrez/{date}/gene_info.jsonl",
omim="work/genes/omim/{v_hpo}+{date}/omim_diseases.tsv",
orpha="work/genes/orphapacket/{v_orpha}+{date}/orpha_diseases.tsv",
orpha="work/genes/orphadata/{date}/orpha_diseases.tsv",
panelapp="work/download/genes/panelapp/{date}/panelapp.jsonl",
conditions="work/genes/conditions/{v_hpo}+{date}/conditions.jsonl",
rcnv="work/genes/rcnv/2022/rcnv_collins_2022.tsv",
shet="work/genes/shet/2019/shet_weghorn_2019.tsv",
gtex="work/genes/annonars/gtex_v8/genes_tpm.jsonl.gz",
domino="work/genes/domino/20190219/domino.tsv",
decipher_hi="work/genes/decipher/v3/decipher_hi_prediction.tsv.gz",
output:
rocksdb_identity=(
"output/full/annonars/genes-{v_acmg_sf}+{v_gnomad_constraints}+{v_dbnsfp}+{v_hpo}+{v_orpha}+{date}+{v_annonars}/"
"output/full/annonars/genes-{v_acmg_sf}+{v_gnomad_constraints}+{v_dbnsfp}+{v_hpo}+{date}+{v_annonars}/"
"rocksdb/IDENTITY"
),
spec_yaml=(
"output/full/annonars/genes-{v_acmg_sf}+{v_gnomad_constraints}+{v_dbnsfp}+{v_hpo}+{v_orpha}+{date}+{v_annonars}/"
"output/full/annonars/genes-{v_acmg_sf}+{v_gnomad_constraints}+{v_dbnsfp}+{v_hpo}+{date}+{v_annonars}/"
"spec.yaml"
),
wildcard_constraints:
Expand All @@ -35,11 +36,17 @@ rule output_annonars_genes: # -- build annonars genes RocksDB file
v_annonars=RE_VERSION,
shell:
r"""
if [[ "$(date +%Y%m%d)" != "{wildcards.date}" ]] && [[ "{FORCE_TODAY}" != "True" ]]; then
>&2 echo "{wildcards.date} is not today"
exit 1
fi
annonars gene import \
--path-out-rocksdb $(dirname {output.rocksdb_identity}) \
--path-in-acmg {input.acmg_sf} \
--path-in-clingen-37 {input.clingen_37} \
--path-in-clingen-38 {input.clingen_38} \
--path-in-conditions {input.conditions} \
--path-in-gnomad-constraints {input.gnomad_constraints} \
--path-in-dbnsfp {input.dbnsfp} \
--path-in-hgnc {input.hgnc} \
Expand All @@ -64,7 +71,6 @@ rule output_annonars_genes: # -- build annonars genes RocksDB file
--value date={wildcards.date} \
\
--value v_annonars={wildcards.v_annonars} \
--value v_downloader={PV.downloader} \
--value v_orphapacket={wildcards.v_orpha} \
--value v_downloader={PV.downloader}
> {output.spec_yaml}
"""
2 changes: 1 addition & 1 deletion rules/output/annonars/genes.spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ x-created-from:
- name: OMIM
version: {{ today }}
- name: ORDO
version: {{ v_orphapacket }}
version: {{ v_ordo }}
- name: rCNV pHaplo/pTriplo scores
version: 2022-Collins-et-al
- name: sHet scores
Expand Down
5 changes: 5 additions & 0 deletions rules/output/worker/hgnc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ rule output_hgnc_xlink_binary:
spec_yaml=f"output/full/worker/genes-xlink-{{date}}+{PV.worker}/genes-xlink.spec.yaml",
shell:
r"""
if [[ "$(date +%Y%m%d)" != "{wildcards.date}" ]] && [[ "{FORCE_TODAY}" != "True" ]]; then
>&2 echo "{wildcards.date} is not today"
exit 1
fi
varfish-server-worker db to-bin \
--input-type xlink \
--path-input {input.tsv} \
Expand Down
Loading

0 comments on commit 482bdd0

Please sign in to comment.