From 3542e25fbc6cfc073a00f80720bef73fb8cdf8dc Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Wed, 6 Sep 2023 17:16:37 +0200 Subject: [PATCH] chore: initializing repository --- .editorconfig | 27 ++++ .gitattributes | 1 + .github/ISSUE_TEMPLATE/bug_report.md | 46 +++++++ .github/ISSUE_TEMPLATE/feature_request.md | 28 ++++ .github/actions/acmg-class-by-freq/action.yml | 100 ++++++++++++++ .github/actions/acmg-class-by-freq/run.sh | 38 ++++++ .github/actions/convert-clinvar/action.yml | 104 +++++++++++++++ .github/actions/convert-clinvar/run.sh | 42 ++++++ .github/actions/download-clinvar/action.yml | 34 +++++ .github/actions/download-clinvar/run.sh | 37 ++++++ .github/actions/free-disk-space/action.yml | 21 +++ .../actions/gene-variant-report/action.yml | 100 ++++++++++++++ .github/actions/gene-variant-report/run.sh | 38 ++++++ .github/actions/phenotype-links/action.yml | 100 ++++++++++++++ .github/actions/phenotype-links/run.sh | 38 ++++++ .github/workflows/-build-artifacts.yml | 122 ++++++++++++++++++ .github/workflows/conventional-prs.yml | 18 +++ .github/workflows/cron.yml | 106 +++++++++++++++ .github/workflows/main.yml | 16 +++ .github/workflows/release.yml | 97 ++++++++++++++ .gitignore | 7 + CHANGELOG.md | 1 + LICENSE.txt | 21 +++ README.md | 36 ++++++ utils/terraform/.gitignore | 2 + utils/terraform/main.tf | 19 +++ utils/terraform/provider.tf | 1 + 27 files changed, 1200 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitattributes create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/actions/acmg-class-by-freq/action.yml create mode 100644 .github/actions/acmg-class-by-freq/run.sh create mode 100644 .github/actions/convert-clinvar/action.yml create mode 100644 .github/actions/convert-clinvar/run.sh create mode 100644 .github/actions/download-clinvar/action.yml create mode 100644 .github/actions/download-clinvar/run.sh create mode 100644 .github/actions/free-disk-space/action.yml create mode 100644 .github/actions/gene-variant-report/action.yml create mode 100644 .github/actions/gene-variant-report/run.sh create mode 100644 .github/actions/phenotype-links/action.yml create mode 100644 .github/actions/phenotype-links/run.sh create mode 100644 .github/workflows/-build-artifacts.yml create mode 100644 .github/workflows/conventional-prs.yml create mode 100644 .github/workflows/cron.yml create mode 100644 .github/workflows/main.yml create mode 100644 .github/workflows/release.yml create mode 100644 .gitignore create mode 100644 CHANGELOG.md create mode 100644 LICENSE.txt create mode 100644 README.md create mode 100644 utils/terraform/.gitignore create mode 100644 utils/terraform/main.tf create mode 100644 utils/terraform/provider.tf diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..101ca1d --- /dev/null +++ b/.editorconfig @@ -0,0 +1,27 @@ +# http://editorconfig.org + +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.{py,rst,ini,rs,toml}] +indent_style = space +indent_size = 4 + +[*.{html,css,scss,json,yml}] +indent_style = space +indent_size = 2 + +[*.md] +trim_trailing_whitespace = false + +[Makefile] +indent_style = tab + +[nginx.conf] +indent_style = space +indent_size = 2 diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a2ca402 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +utils/* linguist-vendored=false diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..0f38948 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,46 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' + +--- + + + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..609d5be --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,28 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: enhancement +assignees: '' + +--- + + + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/actions/acmg-class-by-freq/action.yml b/.github/actions/acmg-class-by-freq/action.yml new file mode 100644 index 0000000..de88cc8 --- /dev/null +++ b/.github/actions/acmg-class-by-freq/action.yml @@ -0,0 +1,100 @@ +name: acmg-class-by-freq +description: Create ACMG class by freq. file + +inputs: + publish-artifacts: + default: "false" + description: "Whether to publish artifacts to the named release" + release-name: + description: "The name of the release to publish artifacts to" + token: + description: "A Github PAT" + required: true + +runs: + using: "composite" + steps: + - name: Get ClinVar version + id: get-clinvar + run: | + echo "release-name=$(cat release-name.txt || echo 00-latest_weekly)" >> $GITHUB_OUTPUT + shell: bash -l {0} + + - name: Check cache ACMG class by freq. output + id: check-cache-acmg-class-by-freq-output + uses: actions/cache@v3 + with: + path: ${{ env.OUTPUT_DIR }}/acmg-class-by-freq + key: acmg-class-by-freq-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output + lookup-only: true + + - name: Create requirements.txt + run: | + echo clinvar-this==${{ env.CLINVAR_THIS_VERSION }} > requirements.txt + shell: stdbuf -oL bash -l {0} + + - name: Install python and cache dependencies + uses: actions/setup-python@v4 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: pip + + - name: Install Python dependencies + run: pip install -r requirements.txt + shell: stdbuf -oL bash -l {0} + + - name: Cache the generated ACMG class by freq. + if: | + (steps.check-cache-acmg-class-by-freq-output.outputs.cache-hit != 'true') || + (inputs.publish-artifacts == 'true') + uses: actions/cache@v3 + with: + path: ${{ env.OUTPUT_DIR }}/acmg-class-by-freq + key: acmg-class-by-freq-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output + + - name: Retrieve cached ClinVar JSONL file + if: | + (steps.check-cache-acmg-class-by-freq-output.outputs.cache-hit != 'true') + uses: actions/cache@v3 + with: + path: ${{ env.OUTPUT_DIR }}/convert-clinvar + key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output + fail-on-cache-miss: true + + - name: Run the ACMG class by freq. generation + if: steps.check-cache-acmg-class-by-freq-output.outputs.cache-hit != 'true' + run: bash .github/actions/acmg-class-by-freq/run.sh + env: + RELEASE_NAME: "${{ steps.get-clinvar.outputs.release-name }}" + shell: stdbuf -oL bash -l {0} + + - name: Publish artifacts + if: inputs.publish-artifacts == 'true' + run: | + set -xv + + version=$(\ + grep ^dc.identifier \ + ${{ env.OUTPUT_DIR }}/acmg-class-by-freq/spec.yaml \ + | rev \ + | cut -d - -f 1 \ + | rev) + + mkdir -p /tmp/for-upload/clinvar-data-acmg-class-by-freq-$version + cp ${{ env.OUTPUT_DIR }}/acmg-class-by-freq/*.jsonl.gz \ + /tmp/for-upload/clinvar-data-acmg-class-by-freq-$version/. + ls -lhR /tmp/for-upload/clinvar-data-acmg-class-by-freq-$version + tar --directory=/tmp/for-upload --create --owner=0:0 --gzip \ + --file /tmp/for-upload/clinvar-data-acmg-class-by-freq-$version.tar.gz \ + clinvar-data-acmg-class-by-freq-$version + + pushd /tmp/for-upload + sha256sum clinvar-data-acmg-class-by-freq-$version.tar.gz \ + > clinvar-data-acmg-class-by-freq-$version.tar.gz.sha256 + popd + + gh release upload --clobber "clinvar-weekly-${{ inputs.release-name }}" \ + /tmp/for-upload/clinvar-data-acmg-class-by-freq-$version.tar.gz* + env: + GITHUB_TOKEN: ${{ inputs.token }} + shell: stdbuf -oL bash -l {0} diff --git a/.github/actions/acmg-class-by-freq/run.sh b/.github/actions/acmg-class-by-freq/run.sh new file mode 100644 index 0000000..6975089 --- /dev/null +++ b/.github/actions/acmg-class-by-freq/run.sh @@ -0,0 +1,38 @@ +#!/usr/bin/bash + +set -euo pipefail +set -x + +mkdir -p ${OUTPUT_DIR}/acmg-class-by-freq + +df -h + + +clinvar-this data acmg-class-by-freq \ + ${OUTPUT_DIR}/convert-clinvar/clinvar-full-release.jsonl.gz \ + ${OUTPUT_DIR}/acmg-class-by-freq/clinvar-acmg-class-by-freq.jsonl.gz + + +cat >${OUTPUT_DIR}/acmg-class-by-freq/spec.yaml <> $GITHUB_OUTPUT + shell: bash -l {0} + + - name: Check cache JSONL output + id: check-cache-convert-clinvar-output + uses: actions/cache@v3 + with: + path: ${{ env.OUTPUT_DIR }}/convert-clinvar + key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output + lookup-only: true + + - name: Create requirements.txt + run: | + echo clinvar-this==${{ env.CLINVAR_THIS_VERSION }} > requirements.txt + shell: stdbuf -oL bash -l {0} + + - name: Install python and cache dependencies + uses: actions/setup-python@v4 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: pip + + - name: Install Python dependencies + run: pip install -r requirements.txt + shell: stdbuf -oL bash -l {0} + + - name: Cache the generated JSONL + if: | + (steps.check-cache-convert-clinvar-output.outputs.cache-hit != 'true') || + (inputs.publish-artifacts == 'true') + uses: actions/cache@v3 + with: + path: ${{ env.OUTPUT_DIR }}/convert-clinvar + key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output + + - name: Retrieve cached ClinVar file + if: | + (steps.check-cache-convert-clinvar-output.outputs.cache-hit != 'true') + uses: actions/cache@v3 + with: + path: ${{ env.CLINVAR_DIR }} + key: download-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }} + fail-on-cache-miss: true + + - name: Run the conversion to JSONL + if: steps.check-cache-convert-clinvar-output.outputs.cache-hit != 'true' + run: bash .github/actions/convert-clinvar/run.sh + env: + RELEASE_NAME: "${{ steps.get-clinvar.outputs.release-name }}" + shell: stdbuf -oL bash -l {0} + + - name: Publish artifacts + if: inputs.publish-artifacts == 'true' + run: | + set -xv + + version=$(\ + grep ^dc.identifier \ + ${{ env.OUTPUT_DIR }}/convert-clinvar/spec.yaml \ + | rev \ + | cut -d - -f 1 \ + | rev) + + mkdir -p /tmp/for-upload/clinvar-data-jsonl-$version + cp ${{ env.OUTPUT_DIR }}/convert-clinvar/*.jsonl.gz \ + /tmp/for-upload/clinvar-data-jsonl-$version/. + ls -lhR /tmp/for-upload/clinvar-data-jsonl-$version + tar --directory=/tmp/for-upload --create --owner=0:0 --gzip \ + --file /tmp/for-upload/clinvar-data-jsonl-$version.tar.gz \ + clinvar-data-jsonl-$version + + pushd /tmp/for-upload + sha256sum clinvar-data-jsonl-$version.tar.gz \ + > clinvar-data-jsonl-$version.tar.gz.sha256 + + split -d -b 1G \ + clinvar-data-jsonl-$version.tar.gz \ + clinvar-data-jsonl-$version.tar.gz. + popd + + gh release upload --clobber "clinvar-weekly-${{ inputs.release-name }}" \ + /tmp/for-upload/clinvar-data-jsonl-$version.tar.gz.* + env: + GITHUB_TOKEN: ${{ inputs.token }} + shell: stdbuf -oL bash -l {0} diff --git a/.github/actions/convert-clinvar/run.sh b/.github/actions/convert-clinvar/run.sh new file mode 100644 index 0000000..b3a99dc --- /dev/null +++ b/.github/actions/convert-clinvar/run.sh @@ -0,0 +1,42 @@ +#!/usr/bin/bash + +set -euo pipefail +set -x + +mkdir -p ${OUTPUT_DIR}/convert-clinvar + +df -h + + +clinvar-this data xml-to-jsonl \ + ${CLINVAR_DIR}/ClinVarFullRelease_00-latest_weekly.xml.gz \ + ${OUTPUT_DIR}/convert-clinvar/clinvar-full-release.jsonl.gz \ + $(if [[ "$MAX_RCVS" != "" ]] && [[ "$MAX_RCVS" != "0" ]]; then \ + echo --max-records $MAX_RCVS; + fi) + + +cat >${OUTPUT_DIR}/convert-clinvar/spec.yaml <> $GITHUB_OUTPUT + # cf. https://github.com/mamba-org/provision-with-micromamba#important + shell: bash -l {0} + + - name: Check for cache downloaded ClinVar file + id: check-cache-clinvar-file + uses: actions/cache@v3 + with: + path: ${{ env.CLINVAR_DIR }} + key: download-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }} + lookup-only: true + + - name: Cache downloaded ClinVar file + if: steps.check-cache-clinvar-file.outputs.cache-hit != 'true' + uses: actions/cache@v3 + with: + path: ${{ env.CLINVAR_DIR }} + key: download-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }} + + - name: Download reference files if necessary + if: steps.check-cache-clinvar-file.outputs.cache-hit != 'true' + run: bash .github/actions/download-clinvar/run.sh + env: + RELEASE_NAME: "${{ steps.get-clinvar.outputs.release-name }}" + # cf. https://github.com/mamba-org/provision-with-micromamba#important + shell: bash -l {0} diff --git a/.github/actions/download-clinvar/run.sh b/.github/actions/download-clinvar/run.sh new file mode 100644 index 0000000..012e08e --- /dev/null +++ b/.github/actions/download-clinvar/run.sh @@ -0,0 +1,37 @@ +#!/usr/bin/bash + +set -euo pipefail +set -x + +mkdir -p $CLINVAR_DIR + +export TMPDIR=$(mktemp -d) +trap "rm -rf $TMPDIR" EXIT ERR + +df -h + +# Check that the release name corresponds to the date of the current weekly release. + +curl https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/weekly_release/ \ +> /tmp/lst.html + +grep 'latest_weekly.xml.gz"' /tmp/lst.html \ +| head -n 1 \ +| cut -d '>' -f 21- \ +| cut -d '<' -f 1 \ +| cut -d ' ' -f 1 \ +| tr -d '-' \ +> /tmp/release-name.txt + +if ! diff /tmp/release-name.txt release-name.txt >/dev/null; then + >&2 echo "Difference in release names" + >&2 diff /tmp/release-name.txt release-name.txt + exit 1 +fi + +# Actually download the file + +wget -O $CLINVAR_DIR/ClinVarFullRelease_00-latest_weekly.xml.gz \ + https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/weekly_release/ClinVarFullRelease_00-latest_weekly.xml.gz + +df -h diff --git a/.github/actions/free-disk-space/action.yml b/.github/actions/free-disk-space/action.yml new file mode 100644 index 0000000..089c646 --- /dev/null +++ b/.github/actions/free-disk-space/action.yml @@ -0,0 +1,21 @@ +name: free-disk-space +description: Free disk space by removing unused packages and images + +runs: + using: "composite" + steps: + - name: Remove unused files + # cf. https://github.com/mamba-org/provision-with-micromamba#important + shell: bash -l {0} + run: | + set +x + + sudo df -h + + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force + + sudo df -h diff --git a/.github/actions/gene-variant-report/action.yml b/.github/actions/gene-variant-report/action.yml new file mode 100644 index 0000000..0e23153 --- /dev/null +++ b/.github/actions/gene-variant-report/action.yml @@ -0,0 +1,100 @@ +name: gene-variant-report +description: Create gene variant report file + +inputs: + publish-artifacts: + default: "false" + description: "Whether to publish artifacts to the named release" + release-name: + description: "The name of the release to publish artifacts to" + token: + description: "A Github PAT" + required: true + +runs: + using: "composite" + steps: + - name: Get ClinVar version + id: get-clinvar + run: | + echo "release-name=$(cat release-name.txt || echo 00-latest_weekly)" >> $GITHUB_OUTPUT + shell: bash -l {0} + + - name: Check cache gene variant report output + id: check-cache-gene-variant-report-output + uses: actions/cache@v3 + with: + path: ${{ env.OUTPUT_DIR }}/gene-variant-report + key: gene-variant-report-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output + lookup-only: true + + - name: Create requirements.txt + run: | + echo clinvar-this==${{ env.CLINVAR_THIS_VERSION }} > requirements.txt + shell: stdbuf -oL bash -l {0} + + - name: Install python and cache dependencies + uses: actions/setup-python@v4 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: pip + + - name: Install Python dependencies + run: pip install -r requirements.txt + shell: stdbuf -oL bash -l {0} + + - name: Cache the generated gene variant report + if: | + (steps.check-cache-gene-variant-report-output.outputs.cache-hit != 'true') || + (inputs.publish-artifacts == 'true') + uses: actions/cache@v3 + with: + path: ${{ env.OUTPUT_DIR }}/gene-variant-report + key: gene-variant-report-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output + + - name: Retrieve cached ClinVar JSONL file + if: | + (steps.check-cache-gene-variant-report-output.outputs.cache-hit != 'true') + uses: actions/cache@v3 + with: + path: ${{ env.OUTPUT_DIR }}/convert-clinvar + key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output + fail-on-cache-miss: true + + - name: Run the gene variant report generation + if: steps.check-cache-gene-variant-report-output.outputs.cache-hit != 'true' + run: bash .github/actions/gene-variant-report/run.sh + env: + RELEASE_NAME: "${{ steps.get-clinvar.outputs.release-name }}" + shell: stdbuf -oL bash -l {0} + + - name: Publish artifacts + if: inputs.publish-artifacts == 'true' + run: | + set -xv + + version=$(\ + grep ^dc.identifier \ + ${{ env.OUTPUT_DIR }}/gene-variant-report/spec.yaml \ + | rev \ + | cut -d - -f 1 \ + | rev) + + mkdir -p /tmp/for-upload/clinvar-data-gene-variant-report-$version + cp ${{ env.OUTPUT_DIR }}/gene-variant-report/*.jsonl.gz \ + /tmp/for-upload/clinvar-data-gene-variant-report-$version/. + ls -lhR /tmp/for-upload/clinvar-data-gene-variant-report-$version + tar --directory=/tmp/for-upload --create --owner=0:0 --gzip \ + --file /tmp/for-upload/clinvar-data-gene-variant-report-$version.tar.gz \ + clinvar-data-gene-variant-report-$version + + pushd /tmp/for-upload + sha256sum clinvar-data-gene-variant-report-$version.tar.gz \ + > clinvar-data-gene-variant-report-$version.tar.gz.sha256 + popd + + gh release upload --clobber "clinvar-weekly-${{ inputs.release-name }}" \ + /tmp/for-upload/clinvar-data-gene-variant-report-$version.tar.gz* + env: + GITHUB_TOKEN: ${{ inputs.token }} + shell: stdbuf -oL bash -l {0} diff --git a/.github/actions/gene-variant-report/run.sh b/.github/actions/gene-variant-report/run.sh new file mode 100644 index 0000000..0079a1b --- /dev/null +++ b/.github/actions/gene-variant-report/run.sh @@ -0,0 +1,38 @@ +#!/usr/bin/bash + +set -euo pipefail +set -x + +mkdir -p ${OUTPUT_DIR}/gene-variant-report + +df -h + + +clinvar-this data gene-variant-report \ + ${OUTPUT_DIR}/convert-clinvar/clinvar-full-release.jsonl.gz \ + ${OUTPUT_DIR}/gene-variant-report/gene-variant-report.jsonl.gz + + +cat >${OUTPUT_DIR}/gene-variant-report/spec.yaml <> $GITHUB_OUTPUT + shell: bash -l {0} + + - name: Check cache phenotype links output + id: check-cache-phenotype-links-output + uses: actions/cache@v3 + with: + path: ${{ env.OUTPUT_DIR }}/phenotype-links + key: phenotype-links-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output + lookup-only: true + + - name: Create requirements.txt + run: | + echo clinvar-this==${{ env.CLINVAR_THIS_VERSION }} > requirements.txt + shell: stdbuf -oL bash -l {0} + + - name: Install python and cache dependencies + uses: actions/setup-python@v4 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: pip + + - name: Install Python dependencies + run: pip install -r requirements.txt + shell: stdbuf -oL bash -l {0} + + - name: Cache the generated phenotype links + if: | + (steps.check-cache-phenotype-links-output.outputs.cache-hit != 'true') || + (inputs.publish-artifacts == 'true') + uses: actions/cache@v3 + with: + path: ${{ env.OUTPUT_DIR }}/phenotype-links + key: phenotype-links-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output + + - name: Retrieve cached ClinVar JSONL file + if: | + (steps.check-cache-phenotype-links-output.outputs.cache-hit != 'true') + uses: actions/cache@v3 + with: + path: ${{ env.OUTPUT_DIR }}/convert-clinvar + key: convert-clinvar-${{ env.CACHE_SEED }}-${{ steps.get-clinvar.outputs.release-name }}-${{ env.CLINVAR_THIS_VERSION }}-${{ env.MAX_RCVS }}-output + fail-on-cache-miss: true + + - name: Run the phenotype links generation + if: steps.check-cache-phenotype-links-output.outputs.cache-hit != 'true' + run: bash .github/actions/phenotype-links/run.sh + env: + RELEASE_NAME: "${{ steps.get-clinvar.outputs.release-name }}" + shell: stdbuf -oL bash -l {0} + + - name: Publish artifacts + if: inputs.publish-artifacts == 'true' + run: | + set -xv + + version=$(\ + grep ^dc.identifier \ + ${{ env.OUTPUT_DIR }}/phenotype-links/spec.yaml \ + | rev \ + | cut -d - -f 1 \ + | rev) + + mkdir -p /tmp/for-upload/clinvar-data-phenotype-links-$version + cp ${{ env.OUTPUT_DIR }}/phenotype-links/*.jsonl.gz \ + /tmp/for-upload/clinvar-data-phenotype-links-$version/. + ls -lhR /tmp/for-upload/clinvar-data-phenotype-links-$version + tar --directory=/tmp/for-upload --create --owner=0:0 --gzip \ + --file /tmp/for-upload/clinvar-data-phenotype-links-$version.tar.gz \ + clinvar-data-phenotype-links-$version + + pushd /tmp/for-upload + sha256sum clinvar-data-phenotype-links-$version.tar.gz \ + > clinvar-data-phenotype-links-$version.tar.gz.sha256 + popd + + gh release upload --clobber "clinvar-weekly-${{ inputs.release-name }}" \ + /tmp/for-upload/clinvar-data-phenotype-links-$version.tar.gz* + env: + GITHUB_TOKEN: ${{ inputs.token }} + shell: stdbuf -oL bash -l {0} diff --git a/.github/actions/phenotype-links/run.sh b/.github/actions/phenotype-links/run.sh new file mode 100644 index 0000000..b89d2b8 --- /dev/null +++ b/.github/actions/phenotype-links/run.sh @@ -0,0 +1,38 @@ +#!/usr/bin/bash + +set -euo pipefail +set -x + +mkdir -p ${OUTPUT_DIR}/phenotype-links + +df -h + + +clinvar-this data gene-phenotype-links \ + ${OUTPUT_DIR}/convert-clinvar/clinvar-full-release.jsonl.gz \ + ${OUTPUT_DIR}/phenotype-links/clinvar-phenotype-links.jsonl.gz + + +cat >${OUTPUT_DIR}/phenotype-links/spec.yaml <" + + - name: Create new PR if necessary + env: + GITHUB_TOKEN: ${{ secrets.BOT_PAT }} + run: | + set -x + + # Download ClinVar XML listing + curl https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/weekly_release/ \ + > /tmp/lst.html + + # Look for date of the "*-latest_weekly.xml.gz" + grep 'latest_weekly.xml.gz"' /tmp/lst.html \ + | head -n 1 \ + | cut -d '>' -f 21- \ + | cut -d '<' -f 1 \ + | cut -d ' ' -f 1 \ + | tr -d '-' \ + > /tmp/release-name.txt + release_name=$(cat /tmp/release-name.txt) + + set +e + + # Check whether we already have a tag for this + tag_name=clinvar-weekly-$release_name + git tag | grep "${tag_name}$" >/dev/null + tag_needed=$? + # Check whether we already have a branch for this + branch_name=release-${tag_name} + git branch -a | grep "${branch_name}$" >/dev/null + branch_needed=$? + + # Only create a new branch if no tag and no branch exist yet. The + # tag will be created together with the release on the main branch. + if [[ "$tag_needed" -eq 1 ]]; then + # The tag does not exist yet. We may need to create a new branch + # and PR. + if [[ "$branch_needed" -eq 1 ]]; then + >&2 echo "Neither the tag nor the branch exist; create branch and PR" + git checkout -b $branch_name + + cp /tmp/release-name.txt release-name.txt + git add release-name.txt + + git commit -m "chore: weekly ClinVar release $release_name" + git push --set-upstream origin $branch_name + else + >&2 echo "The tag is missing but the branch exists" + git checkout $branch_name + fi + + # If necessary, create new pull request and set it to auto-merge. + gh pr view >/dev/null + pr_needed=$? + if [ "$pr_needed" -eq 1 ]; then + >&2 echo "The tag is missing, branch exists here, now creating PR" + + # Enforce that the "autorelease" ticket is present. + gh label create "autorelease" --color EDEDED --force + + # Creat epull request and set to auto-merge. + set -e + gh pr create --fill --base main --head $branch_name --label autorelease + gh pr merge --auto --squash + else + set -e + >&2 echo "The tag is missing but the PR already exists" + fi + else + set -e + >&2 echo "The tag already exist, move on; nothing to do" + fi diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..9635a79 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,16 @@ +name: CI + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + Build-Artifacts: + uses: ./.github/workflows/-build-artifacts.yml + secrets: inherit + with: + workflow-name: main.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..8d5c880 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,97 @@ +name: Release + +defaults: + run: + # cf. https://github.com/mamba-org/provision-with-micromamba#important + shell: bash -l {0} + +on: + push: + branches: + - main + +jobs: + Create-Release: + runs-on: ubuntu-latest + outputs: + is-release: ${{ steps.check-release.outputs.is-release }} + release-name: ${{ steps.check-release.outputs.release-name }} + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: setup git config + run: | + git config user.name "GitHub Actions Bot" + git config user.email "<>" + + - name: Check whether we are on a release commit + id: check-release + run: | + set -euo pipefail + set -x + + msg=$(git log -1 --pretty=%B | head -n 1) + if [[ "$msg" == "chore: weekly ClinVar release "* ]]; then + echo "This is a release commit." + echo is-release=true >> $GITHUB_OUTPUT + # The full name of the weekly release, includes the dash after the year. + echo release-name=$(cat release-name.txt || echo 00-latest_weekly) >> $GITHUB_OUTPUT + # The release name without the dash after the year. + echo release-name=$((cat release-name.txt || echo 00-latest_weekly) | tr -d '-') >> $GITHUB_OUTPUT + else + echo "This is not a release commit." + echo is-release=false >> $GITHUB_OUTPUT + fi + + - name: Create release + if: steps.check-release.outputs.is-release == 'true' + run: | + tag=clinvar-weekly-$RELEASE_NAME + if ! git tag | grep $tag >/dev/null; then + git tag -a $tag -m "Auto-release of weekly ClinVar $RELEASE_NAME" + git push --tags + fi + if ! gh release list | grep $tag >/dev/null; then + gh release create clinvar-weekly-$RELEASE_NAME \ + --title=clinvar-weekly-$RELEASE_NAME \ + --verify-tag \ + --draft=true \ + --notes "This release was created automatically by GitHub Action in our CI." + fi + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + RELEASE_NAME: ${{ steps.check-release.outputs.release-name }} + + Publish-Artifacts: + needs: + - Create-Release + if: needs.Create-Release.outputs.is-release == 'true' + uses: ./.github/workflows/-build-artifacts.yml + secrets: inherit + with: + publish-artifacts: ${{ needs.Create-Release.outputs.is-release }} + release-name: ${{ needs.Create-Release.outputs.release-name }} + workflow-name: release.yml + + Publish-Release: + runs-on: ubuntu-latest + needs: + - Create-Release + - Publish-Artifacts + if: needs.Create-Release.outputs.is-release == 'true' + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Mark release as published + run: | + set -euo pipefail + set -x + + gh release edit clinvar-weekly-$RELEASE_NAME \ + --draft=false \ + --discussion-category=Announcements + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + RELEASE_NAME: ${{ needs.Create-Release.outputs.release-name }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f3000ee --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +requirements.txt # auto-generated + +*:Zone.Identifier + +*~ +.*.sw? +/.vscode diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..825c32f --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1 @@ +# Changelog diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..b57c565 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Berlin Institute of Health + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..f686239 --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ +[![CI](https://github.com/bihealth/clinvar-data-jsonl/actions/workflows/main.yml/badge.svg)](https://github.com/bihealth/clinvar-data-jsonl/actions/workflows/main.yml) + +# (Weekly) ClinVar Data in JSONL Format + +## Following Rolling Releases + +The GitHub actions for this repository follows the ClinVar XML file release on the [ClinVar FTP](https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/weekly_release/). +A scheduled action is run every hour that: + +- Checks for the latest file on the ClinVar FTP +- Checks whether a corresponding release or branch exists in this repository +- If it does then nothing else is done +- Otherwise, it will: + - update the repository's `clinvar-releasae.txt` file in a new branch, + - creates a pull request with this branch, and + - set the PR to auto-merge +- The "Publish" action is run on the `main` branch and after the merge will create the corresponding release and deposit the files. + +# Developer Documentation + +The following is for developers of `clinvar-data-jsonl` itself. + +## Managing Project with Terraform + +``` +# export GITHUB_OWNER=bihealth +# export GITHUB_TOKEN=ghp_TOKEN + +# cd utils/terraform +# terraform init +# terraform import github_repository.clinvar-data-jsonl clinvar-data-jsonl +# terraform validate +# terraform fmt +# terraform plan +# terraform apply +``` diff --git a/utils/terraform/.gitignore b/utils/terraform/.gitignore new file mode 100644 index 0000000..5dfe310 --- /dev/null +++ b/utils/terraform/.gitignore @@ -0,0 +1,2 @@ +.terraform* +terraform.tfstate* diff --git a/utils/terraform/main.tf b/utils/terraform/main.tf new file mode 100644 index 0000000..b843fc4 --- /dev/null +++ b/utils/terraform/main.tf @@ -0,0 +1,19 @@ +# Mangement of the GitHub project. + +resource "github_repository" "clinvar-data-jsonl" { + name = "clinvar-data-jsonl" + description = "Weekly ClinVar releases as JSONL with additonal useful files" + + has_issues = true + visibility = "public" + + allow_auto_merge = true + allow_merge_commit = false + allow_rebase_merge = false + has_downloads = true + has_discussions = true + delete_branch_on_merge = true + + squash_merge_commit_message = "BLANK" + squash_merge_commit_title = "PR_TITLE" +} diff --git a/utils/terraform/provider.tf b/utils/terraform/provider.tf new file mode 100644 index 0000000..0349641 --- /dev/null +++ b/utils/terraform/provider.tf @@ -0,0 +1 @@ +provider "github" {}