Skip to content

Commit

Permalink
feat: upgrade clinvar-this for new ClinVar XML format (#72) (#77)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed May 30, 2024
1 parent 0eb7168 commit 368d9f8
Show file tree
Hide file tree
Showing 16 changed files with 1,070 additions and 22 deletions.
6 changes: 3 additions & 3 deletions .github/actions/acmg-class-by-freq/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ runs:
uses: actions/cache@v3
with:
path: ${{ env.OUTPUT_DIR }}/acmg-class-by-freq
key: acmg-class-by-freq-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output
key: acmg-class-by-freq-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RECORDS }}-output
lookup-only: true

- name: Create requirements.txt
Expand All @@ -50,15 +50,15 @@ runs:
uses: actions/cache@v3
with:
path: ${{ env.OUTPUT_DIR }}/acmg-class-by-freq
key: acmg-class-by-freq-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output
key: acmg-class-by-freq-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RECORDS }}-output

- name: Retrieve cached ClinVar JSONL file
if: |
(steps.check-cache-acmg-class-by-freq-output.outputs.cache-hit != 'true')
uses: actions/cache@v3
with:
path: ${{ env.OUTPUT_DIR }}/convert-clinvar
key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output
key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RECORDS }}-output
fail-on-cache-miss: true

- name: Run the ACMG class by freq. generation
Expand Down
4 changes: 2 additions & 2 deletions .github/actions/convert-clinvar/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ runs:
uses: actions/cache@v3
with:
path: ${{ env.OUTPUT_DIR }}/convert-clinvar
key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output
key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RECORDS }}-output
lookup-only: true

- name: Create requirements.txt
Expand All @@ -50,7 +50,7 @@ runs:
uses: actions/cache@v3
with:
path: ${{ env.OUTPUT_DIR }}/convert-clinvar
key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output
key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RECORDS }}-output

- name: Retrieve cached ClinVar file
if: |
Expand Down
4 changes: 2 additions & 2 deletions .github/actions/convert-clinvar/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ df -h
clinvar-this data xml-to-jsonl \
${CLINVAR_DIR}/ClinVarFullRelease_00-latest_weekly.xml.gz \
${OUTPUT_DIR}/convert-clinvar/clinvar-full-release.jsonl.gz \
$(if [[ "$MAX_RCVS" != "" ]] && [[ "$MAX_RCVS" != "0" ]]; then \
echo --max-records $MAX_RCVS;
$(if [[ "$MAX_RECORDS" != "" ]] && [[ "$MAX_RECORDS" != "0" ]]; then \
echo --max-records $MAX_RECORDS;
fi)


Expand Down
4 changes: 2 additions & 2 deletions .github/actions/download-clinvar/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ df -h

# Check that the release name corresponds to the date of the current weekly release.

curl https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/RCV_xml_old_format/weekly_release/ \
curl https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/weekly_release/ \
> /tmp/lst.html

grep 'latest_weekly.xml.gz"' /tmp/lst.html \
Expand All @@ -32,6 +32,6 @@ fi
# Actually download the file

wget -O $CLINVAR_DIR/ClinVarFullRelease_00-latest_weekly.xml.gz \
https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/RCV_xml_old_format/weekly_release/ClinVarFullRelease_00-latest_weekly.xml.gz
https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/weekly_release/ClinVarVCVRelease_00-latest_weekly.xml.gz

df -h
6 changes: 3 additions & 3 deletions .github/actions/extract-vars/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ runs:
uses: actions/cache@v3
with:
path: ${{ env.OUTPUT_DIR }}/extract-vars
key: extract-vars-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output
key: extract-vars-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RECORDS }}-output
lookup-only: true

- name: Create requirements.txt
Expand All @@ -50,15 +50,15 @@ runs:
uses: actions/cache@v3
with:
path: ${{ env.OUTPUT_DIR }}/extract-vars
key: extract-vars-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output
key: extract-vars-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RECORDS }}-output

- name: Retrieve cached ClinVar JSONL file
if: |
(steps.check-cache-extract-vars-output.outputs.cache-hit != 'true')
uses: actions/cache@v3
with:
path: ${{ env.OUTPUT_DIR }}/convert-clinvar
key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output
key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RECORDS }}-output
fail-on-cache-miss: true

- name: Run the variant extraction generation
Expand Down
6 changes: 3 additions & 3 deletions .github/actions/gene-variant-report/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ runs:
uses: actions/cache@v3
with:
path: ${{ env.OUTPUT_DIR }}/gene-variant-report
key: gene-variant-report-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output
key: gene-variant-report-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RECORDS }}-output
lookup-only: true

- name: Create requirements.txt
Expand All @@ -50,15 +50,15 @@ runs:
uses: actions/cache@v3
with:
path: ${{ env.OUTPUT_DIR }}/gene-variant-report
key: gene-variant-report-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output
key: gene-variant-report-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RECORDS }}-output

- name: Retrieve cached ClinVar JSONL file
if: |
(steps.check-cache-gene-variant-report-output.outputs.cache-hit != 'true')
uses: actions/cache@v3
with:
path: ${{ env.OUTPUT_DIR }}/convert-clinvar
key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output
key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RECORDS }}-output
fail-on-cache-miss: true

- name: Run the gene variant report generation
Expand Down
6 changes: 3 additions & 3 deletions .github/actions/phenotype-links/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ runs:
uses: actions/cache@v3
with:
path: ${{ env.OUTPUT_DIR }}/phenotype-links
key: phenotype-links-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output
key: phenotype-links-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RECORDS }}-output
lookup-only: true

- name: Create requirements.txt
Expand All @@ -50,15 +50,15 @@ runs:
uses: actions/cache@v3
with:
path: ${{ env.OUTPUT_DIR }}/phenotype-links
key: phenotype-links-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output
key: phenotype-links-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RECORDS }}-output

- name: Retrieve cached ClinVar JSONL file
if: |
(steps.check-cache-phenotype-links-output.outputs.cache-hit != 'true')
uses: actions/cache@v3
with:
path: ${{ env.OUTPUT_DIR }}/convert-clinvar
key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output
key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RECORDS }}-output
fail-on-cache-miss: true

- name: Run the phenotype links generation
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/-build-artifacts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ env:
# Python version
PYTHON_VERSION: "3.11"
# clinvar-this package version
CLINVAR_THIS_VERSION: "0.14.3"
CLINVAR_THIS_VERSION: "0.15.5"
# Helper to get unique cache keys
CACHE_SEED: "0"
# Maximal number of RCVs to process (0 = no limit).
MAX_RCVS: "0"
# Maximal number of records to process (0 = no limit).
MAX_RECORDS: "0"
# Lower verbosity of TQDM progress bar
TQDM_MININTERVAL: "5"

Expand Down
2 changes: 1 addition & 1 deletion release-name.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
20240520
20240528
23 changes: 23 additions & 0 deletions utils/terraform/.terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 368d9f8

Please sign in to comment.