diff --git a/.env.sample b/.env.sample index 4b0f09c32..975096bba 100644 --- a/.env.sample +++ b/.env.sample @@ -1,6 +1,7 @@ DEBUG=True -PIXL_DICOM_TRANSFER_TIMEOUT=120 -PIXL_QUERY_TIMEOUT=120 +PIXL_DICOM_TRANSFER_TIMEOUT=240 +PIXL_QUERY_TIMEOUT=10 +CLI_RETRY_SECONDS=300 # PIXL PostgreSQL instance PIXL_DB_HOST=postgres @@ -9,14 +10,6 @@ PIXL_DB_USER=pixl PIXL_DB_PASSWORD= SKIP_ALEMBIC=false -# EMAP UDS -EMAP_UDS_HOST= -EMAP_UDS_PORT= -EMAP_UDS_NAME= -EMAP_UDS_USER= -EMAP_UDS_PASSWORD= -EMAP_UDS_SCHEMA_NAME= - # Exposed ports HASHER_API_PORT= POSTGRES_PORT= @@ -52,11 +45,12 @@ ORTHANC_RAW_AE_TITLE= ORTHANC_AUTOROUTE_RAW_TO_ANON=true ORTHANC_RAW_MAXIMUM_STORAGE_SIZE= // MB ORTHANC_RAW_JOB_HISTORY_SIZE=100 -ORTHANC_RAW_CONCURRENT_JOBS=5 +ORTHANC_CONCURRENT_JOBS=20 ORTHANC_RAW_RECORD_HEADERS= ORTHANC_RAW_HEADER_LOG_PATH= # PIXL Orthanc anon instance +ORTHANC_ANON_URL=http://orthanc-anon:8042 ORTHANC_ANON_USERNAME= ORTHANC_ANON_PASSWORD= ORTHANC_ANON_AE_TITLE= @@ -67,11 +61,17 @@ STUDY_TIME_OFFSET= LOCAL_SALT_VALUE=PIXL -# UCVNAQR DICOM node information -VNAQR_AE_TITLE= -VNAQR_DICOM_PORT= -VNAQR_IP_ADDR= -VNAQR_MODALITY=UCVNAQR +# UCPRIMARYQR DICOM node information +PRIMARY_DICOM_SOURCE_AE_TITLE= +PRIMARY_DICOM_SOURCE_PORT= +PRIMARY_DICOM_SOURCE_IP_ADDR= +PRIMARY_DICOM_SOURCE_MODALITY=UCPRIMARYQR + +# UCSECONDARYQR DICOM node information - commented out vars are optional +#SECONDARY_DICOM_SOURCE_AE_TITLE= +#SECONDARY_DICOM_SOURCE_PORT= +#SECONDARY_DICOM_SOURCE_IP_ADDR= +SECONDARY_DICOM_SOURCE_MODALITY=UCSECONDARYQR # DICOMweb endpoint AZ_DICOM_ENDPOINT_NAME= @@ -88,9 +88,7 @@ RABBITMQ_USERNAME= RABBITMQ_PASSWORD= # Imaging extraction API -PIXL_DICOM_TRANSFER_TIMEOUT=240 -PIXL_QUERY_TIMEOUT=10 -PIXL_MAX_MESSAGES_IN_FLIGHT=50 +PIXL_MAX_MESSAGES_IN_FLIGHT=5 # Project configs directory PROJECT_CONFIGS_DIR=projects/configs diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 000000000..88370c408 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,43 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: Bug Report +description: Have you got a bug? Please report it here! +projects: [ "SAFEHR-data/13" ] +body: + - type: markdown + attributes: + value: | + A clear and concise description of what the bug is. Please use a short, concise title for the bug and elaborate here + - type: textarea + id: actual_behaviour + attributes: + label: What happened? + description: A clear and concise description of what actually happens. + placeholder: "If you have a code sample, error messages, stack traces, please provide it here as well" + validations: + required: false + - type: textarea + id: logs + attributes: + label: Relevant log output (optional) + description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. + validations: + required: false + - type: textarea + id: expected_behaviour + attributes: + label: What did you expect? (optional) + description: A clear and concise description of what you expected to happen. + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/feature_request.yaml b/.github/ISSUE_TEMPLATE/feature_request.yaml new file mode 100644 index 000000000..65dde0108 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yaml @@ -0,0 +1,51 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +name: Feature Request +description: Submit a proposal/request for a new feature +projects: [ "SAFEHR-data/13" ] +body: + - type: markdown + attributes: + value: | + A clear and concise description of the feature proposal + - type: textarea + id: motivation + attributes: + label: Motivation + description: Please outline the motivation for the proposal. + placeholder: "Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too" + validations: + required: true + - type: textarea + id: pitch + attributes: + label: Pitch + description: A clear and concise description of what you want to happen. + validations: + required: false + - type: textarea + id: alternatives + attributes: + label: Alternatives + description: A clear and concise description of any alternative solutions or features you've considered, if any. + placeholder: + validations: + required: false + - type: textarea + id: additional_context + attributes: + label: Additional context + description: Add any other context or screenshots about the feature request here. + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/voxl.yml b/.github/ISSUE_TEMPLATE/voxl.yml index c3012d0ca..9fa8da48b 100644 --- a/.github/ISSUE_TEMPLATE/voxl.yml +++ b/.github/ISSUE_TEMPLATE/voxl.yml @@ -13,7 +13,7 @@ # limitations under the License. name: VOXL story description: Create a new story or task for VOXL -projects: [ "UCLH-Foundry/13" ] +projects: [ "SAFEHR-data/13" ] body: - type: markdown attributes: diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..bdf4d2add --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,28 @@ + +## Description +Fixes #{issue_number}: A few words or sentences describing the changes proposed in this pull request (PR). + +## Type of change +Please delete options accordingly to the description. + + +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] This change requires a documentation update + + +### Suggested Checklist + + + +- [ ] I have performed a self-review of my own code. +- [ ] I have made corresponding changes to the documentation. +- [ ] My changes generate no new warnings. +- [ ] I have commented my code, particularly in hard-to-understand areas. +- [ ] I have passed on my local host device. (see further details at the [CONTRIBUTING](https://github.com/SAFEHR-data/PIXL/blob/main/CONTRIBUTING.md#local-testing) document) +- [ ] Make sure your branch is up-to-date with main branch. See [CONTRIBUTING](https://github.com/SAFEHR-data/PIXL/blob/main/CONTRIBUTING.md) for a general example to syncronise your branch with the `main` branch. +- [ ] I have requested review to this PR. +- [ ] I have addressed and marked as resolved all the review comments in my PR. +- [ ] Finally, I have selected `squash and merge` + diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f0f592a2e..f95ad02ef 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -17,6 +17,7 @@ on: push: branches: - main + - "renovate/**" pull_request: workflow_dispatch: @@ -27,14 +28,14 @@ concurrency: jobs: lint: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 timeout-minutes: 5 steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - name: Run pre-commit - uses: pre-commit/action@v3.0.1 + uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 with: extra_args: --all-files @@ -45,18 +46,18 @@ jobs: docker compose config --quiet test: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 timeout-minutes: 30 strategy: fail-fast: false # run all tests if even if one fails matrix: - package_dir: [pixl_core, hasher, pixl_dcmd, cli, pixl_export, pixl_imaging] + package_dir: [pixl_core, hasher, pixl_dcmd, cli, pixl_export, pixl_imaging, pytest-pixl] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - name: Init Python - uses: actions/setup-python@v5 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 with: python-version: "3.11" cache: "pip" @@ -70,14 +71,14 @@ jobs: - name: Run tests and generate coverage report working-directory: ${{ matrix.package_dir }} - run: pytest --cov --cov-report=xml + run: COV_CORE_SOURCE=src COV_CORE_CONFIG=.coveragerc COV_CORE_DATAFILE=.coverage.eager pytest --cov=src --cov-append --cov-report=xml --cov-report=term-missing env: ENV: test AZURE_KEY_VAULT_NAME: test AZURE_KEY_VAULT_SECRET_NAME: test - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v4.2.0 + uses: codecov/codecov-action@7f8b4b4bde536c465e797be725718b88c5d95e0e # v5.1.1 with: directory: ${{ matrix.package_dir }} env: @@ -85,13 +86,13 @@ jobs: system-test: if: ${{ ! github.event.pull_request.draft || contains(github.event.pull_request.title, '[force-system-test]') }} - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 timeout-minutes: 30 steps: - - uses: actions/checkout@v4 - - uses: docker/setup-buildx-action@v3 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: docker/setup-buildx-action@c47758b77c9736f4b2ef4073d4d51994fabfe349 # v3 - name: Init Python - uses: actions/setup-python@v5 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 with: python-version: "3.11" cache: "pip" @@ -131,23 +132,54 @@ jobs: ./run-system-test.sh coverage echo FINISHED SYSTEM TEST SCRIPT - - name: Dump export-api docker logs for debugging + - name: Dump queue docker logs for debugging if: ${{ failure() }} run: | - docker logs -t system-test-export-api-1 2>&1 + docker logs -t system-test-queue-1 2>&1 - - name: Dump orthanc-anon docker logs for debugging + - name: Dump postgres docker logs for debugging if: ${{ failure() }} run: | - docker logs -t system-test-orthanc-anon-1 2>&1 + docker logs -t system-test-postgres-1 2>&1 - name: Dump imaging-api docker logs for debugging if: ${{ failure() }} run: | docker logs -t system-test-imaging-api-1 2>&1 + - name: Dump orthanc-raw docker logs for debugging + if: ${{ failure() }} + run: | + docker logs -t system-test-orthanc-raw-1 2>&1 + + - name: Dump orthanc-anon docker logs for debugging + if: ${{ failure() }} + run: | + docker logs -t system-test-orthanc-anon-1 2>&1 + + - name: Dump hasher-api docker logs for debugging + if: ${{ failure() }} + run: | + docker logs -t system-test-hasher-api-1 2>&1 + + - name: Dump export-api docker logs for debugging + if: ${{ failure() }} + run: | + docker logs -t system-test-export-api-1 2>&1 + + - name: Dump VNA docker logs for debugging + if: ${{ failure() }} + run: | + docker logs -t system-test-vna-qr-1 2>&1 + + - name: Dump DICOMWeb docker logs for debugging + if: ${{ failure() }} + run: | + docker logs -t system-test-dicomweb-server-1 2>&1 + + - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v4.2.0 + uses: codecov/codecov-action@7f8b4b4bde536c465e797be725718b88c5d95e0e # v5.1.1 with: directory: test env: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cd91f08da..720ba6df4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ --- repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.3.3 + rev: v0.5.5 hooks: - id: ruff-format - id: ruff @@ -22,7 +22,7 @@ repos: - --fix - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.9.0 + rev: v1.11.1 hooks: - id: mypy entry: mypy . @@ -30,12 +30,21 @@ repos: pass_filenames: false additional_dependencies: [ - "mypy==1.9.0", + "mypy==1.11.1", "types-PyYAML", "types-requests", "types-python-slugify", "types-psycopg2", ] + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.28.6 + hooks: + # Schemas taken from https://www.schemastore.org/json/ + - id: check-jsonschema + name: "Validate GitHub issue templates" + files: ^\.github/ISSUE_TEMPLATE/.*\.yml$ + exclude: ^\.github/ISSUE_TEMPLATE/config\.yml$ + args: ["--verbose", "--schemafile", "schemas/github-issue-forms.json"] - repo: local hooks: diff --git a/.renovaterc.json5 b/.renovaterc.json5 new file mode 100644 index 000000000..ed970c527 --- /dev/null +++ b/.renovaterc.json5 @@ -0,0 +1,37 @@ +{ + $schema: "https://docs.renovatebot.com/renovate-schema.json", + extends: [ + "github>UCL-ARC/.github//renovate/default-config.json", + ":assignAndReview(team:arc-dev)", + "group:allNonMajor" + ], + customDatasources: { + dicomSpec: { + defaultRegistryUrlTemplate: "https://dicom.nema.org/medical/dicom", + format: "html", + }, + }, + customManagers: [ + { + customType: "regex", + description: "Update DICOM Spec edition used for validation", + fileMatch: [ + "orthanc/orthanc-anon/plugin/download_dicom_spec.py", + "pixl_dcmd/src/pixl_dcmd/main.py", + ], + matchStrings: [ + 'edition\\s?=\\s?"(?.*?)"\n', + '.*\\(edition\\s?=\\s?"(?.*?)"\\)\n', + ], + depNameTemplate: "dicomSpec", + datasourceTemplate: "custom.dicomSpec", + }, + ], + packageRules: [ + { + matchDatasources: ["custom.dicomSpec"], + extractVersion: "/medical/dicom/(?\\d{4}[a-z])/", + versioning: "loose", + } + ] +} diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..d6e177944 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,77 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to make participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when +an individual is representing the project or its community in public spaces. +Examples of representing a project or community include using an official +project e-mail address, posting via an official social media account, or acting +as an appointed representative at an online or offline event. Representation of +a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team opening a [new issue](https://github.com/SAFEHR-data/PIXL/issues/new/choose). +All complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq + diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..3fd071858 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,78 @@ +# Contributing to `PIXL`. + +👍🎉 First off, thanks for taking the time to contribute! 🎉👍 + +When contributing to this repository, please first discuss the change you wish to make via issue, instant message in clara-agx chatbox, or any other method with the owners of this repository before making a change. +Please note we have a [code of conduct](CODE_OF_CONDUCT.md), please follow it in all your interactions with the project. + +## :octocat: Setting up project locally. +1. Generate your SSH keys as suggested [here](https://docs.github.com/en/github/authenticating-to-github/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent) +2. Clone the repository by typing (or copying) the following lines in a terminal +``` +git clone git@github.com:SAFEHR-data/PIXL.git +``` + +## New issues +* Open an issue (bug report, feature request, or something is not working): https://github.com/SAFEHR-data/PIXL/issues/new/choose +* Workflow for issue management +```mermaid + flowchart TD; + Z[Bug Reported] -->A[...]; + A[Bug resolution] -->B(Testing OK?); + B--Yes-->C[Prepare commit]; + B--No-->D[Reopen issue]; + D----> A[Bug resolution]; + C ----> E[Request Review]; + E ----> F[Commit changes]; + F --> H[Merge PR and close issue] +``` + +## Committing and pushing changes +The following commands are typed or copied via command line. +Altenatively, you can use the features of your integrated development environment (pycharm, code, vim, etc). + +1. Clone this repo +``` +git clone git@github.com:SAFEHR-data/PIXL.git +``` +2. Create new branch `{FEATURE_BRANCH_NAME}` using issue number `{ISSUE_NUMBER}` +``` +git checkout -b ISSUE_NUMBER-FEATURE_BRANCH_NAME #(e.g. `git checkout -b 422-my-feature-branch`) +``` +3. Commit changes and push to your branch +``` +git add . +git commit -m 'short message #ISSUE_NUMBER' #(e.g. git commit -m 'adding a message to my feature branch #422' +git push origin ISSUENUMBER-branch-name +``` +4. Submit a Pull Request against the `main` branch. + +## Pull Request (PR) and merge to `main` branch +1. Select branch that contain your commits. +2. Click `Compare and pull request` and create PR for the associated branch. +3. Type a title and description of your PR and create PR +4. Please keep your PR in sync with the base branch. +It is recommended that you use [Squashing and merging a long-running branch](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/incorporating-changes-from-a-pull-request/about-pull-request-merges#squashing-and-merging-a-long-running-branch). +Otherwise, you have the option to rebase your `{ISSUE_NUMBER-FEATURE_BRANCH_NAME}` branch with the base branch (e.g. `main`). +``` +git checkout main +git pull origin main +git checkout ISSUE_NUMBER-FEATURE_BRANCH #(e.g. git checkout 422-my-feature-branch) +git fetch +git merge main +git push --force origin FEATURE_BRANCH +``` +5. Run `pre-commit run -a` to tidy up code and documentation (this is also tested in [CI](https://github.com/SAFEHR-data/PIXL/blob/main/.github/workflows/main.yml)). +6. If you are developing in your local host, please check that your code is properly tested with `pytest` (this is also tested in [CI](https://github.com/SAFEHR-data/PIXL/blob/main/.github/workflows/main.yml)). +7. Request a PR review. +See [collaborating-with-pull-requests](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests) for further details. +8. Once your PRs has been approved, procced to merge it to main. See [Merging a pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/incorporating-changes-from-a-pull-request/merging-a-pull-request) +9. Delete and remove your merged branch +9.1 You can navigate branches in https://github.com/SAFEHR-data/PIXL/branches and removed merged branches by clickling :wastebasket: icon. +9.2 Alternatively, you can delete your local and merged branches using the following commands: +``` +#Local git clear +git branch --merged | grep -v '\*\|master\|main\|develop' | xargs -n 1 git branch -d +#Remote git clear +git branch -r --merged | grep -v '\*\|master\|main\|develop' | sed 's/origin\///' | xargs -n 1 git push --delete origin +``` diff --git a/README.md b/README.md index 597e63b53..e9a30860d 100644 --- a/README.md +++ b/README.md @@ -1,42 +1,38 @@ -[![pixl-ci](https://github.com/UCLH-Foundry/PIXL/actions/workflows/main.yml/badge.svg)](https://github.com/UCLH-Foundry/PIXL/actions/workflows/main.yml) -[![codecov](https://codecov.io/gh/UCLH-Foundry/PIXL/graph/badge.svg?token=99CHF3ZCAW)](https://codecov.io/gh/UCLH-Foundry/PIXL) +[![pixl-ci](https://github.com/SAFEHR-data/PIXL/actions/workflows/main.yml/badge.svg)](https://github.com/SAFEHR-data/PIXL/actions/workflows/main.yml) +[![codecov](https://codecov.io/gh/SAFEHR-data/PIXL/graph/badge.svg?token=99CHF3ZCAW)](https://codecov.io/gh/SAFEHR-data/PIXL) # PIXL PIXL Image eXtraction Laboratory `PIXL` is a system for extracting, linking and de-identifying DICOM imaging data, structured EHR data and free-text data from radiology reports at UCLH. -Please see the [rolling-skeleton]([https://github.com/UCLH-Foundry/the-rolling-skeleton=](https://github.com/UCLH-Foundry/the-rolling-skeleton/blob/main/docs/design/100-day-design.md)) for more details. +Please see the [rolling-skeleton]([https://github.com/SAFEHR-data/the-rolling-skeleton=](https://github.com/SAFEHR-data/the-rolling-skeleton/blob/main/docs/design/100-day-design.md)) for more details. -PIXL is intended run on one of the [GAE](https://github.com/UCLH-Foundry/Book-of-FlowEHR/blob/main/glossary.md#gaes)s and comprises +PIXL is intended run on one of the [GAE (General Application Environments)](https://github.com/SAFEHR-data/Book-of-FlowEHR/blob/main/glossary.md#gaes)s and comprises several services orchestrated by [Docker Compose](https://docs.docker.com/compose/). -To get access to the GAE, [see the documentation on Slab](https://uclh.slab.com/posts/gae-access-7hkddxap) +To get access to the GAE, [see the documentation on Slab](https://uclh.slab.com/posts/gae-access-7hkddxap). +Please request access to Slab and add further details in a [new blank issue](https://github.com/SAFEHR-data/PIXL/issues/new). -## Development - -[Follow the developer setup instructions](./docs/setup/developer.md). +## Installation -Before raising a PR, make sure to **run the tests** for the PIXL module you have been working on . -In addition, make sure to [have `pre-commit` installed](/docs/setup/developer.md#linting) to -automatically check your code before committing. - -You can run all tests from the root of the repo with: +Install the PIXL Python modules by running the following commands from the top-level `PIXL/` directory: ```shell -pytest +python -m pip install -e pixl_core/ +python -m pip install -e cli/ ``` -The `pytest.ini` file in the root of the repo contains the configuration for running all tests at once. +Note, the CLI currently [needs to be installed in editable mode](https://github.com/SAFEHR-data/PIXL/issues/318). -We run [pre-commit](https://pre-commit.com/) as part of the GitHub Actions CI. To install and run it locally, do: +## Development -```shell -pip install pre-commit -pre-commit install -``` +[Follow the developer setup instructions](./docs/setup/developer.md). + +Before raising a PR, make sure to **run the tests** for every PIXL module, not just the one you +have been working on. In addition, make sure to [have `pre-commit` installed](/docs/setup/developer.md#linting) +to automatically check your code before committing. -The configuration can be found in [`.pre-commit-config.yml`](./.pre-commit-config.yaml) ## Design @@ -83,7 +79,10 @@ HTTP API to export files (parquet and DICOM) from UCLH to endpoints. HTTP API to process messages from the `imaging` queue and populate the raw orthanc instance with images from PACS/VNA. -## Setup +## Setup `PIXL` in GAE + +
+ Click here to expand steps and configurations ### 0. [UCLH infrastructure setup](./docs/setup/uclh-infrastructure-setup.md) @@ -139,10 +138,12 @@ To configure a new project, follow these steps: >[!NOTE] > The project slug should match the [slugify](https://github.com/un33k/python-slugify)-ed project name in the `extract_summary.json` log file! -2. [Open a PR in PIXL](https://github.com/UCLH-Foundry/PIXL/compare) to merge the new project config into `main` +2. [Open a PR in PIXL](https://github.com/SAFEHR-data/PIXL/compare) to merge the new project config into `main` #### The config YAML file +
The config YAML file +

The configuration file defines: - Project name: the `` name of the Project @@ -174,13 +175,19 @@ The configuration file defines: [parquet files](./docs/file_types/parquet_files.md). We currently support the following endpoints: - `"none"`: no upload - `"ftps"`: a secure FTP server (for both _DICOM_ and _parquet_ files) + - `"dicomweb"`: a DICOMweb server (for _DICOM_ files only). + Requires the `DICOMWEB_*` environment variables to be set in `.env` + - `"xnat"`: an [XNAT](https://www.xnat.org/) instance (for _DICOM_ files only) - - +

+
#### Project secrets +
Project secrets +

+ Any credentials required for uploading the project's results should be stored in an **Azure Key Vault** (set up instructions below). PIXL will query this key vault for the required secrets at runtime. This requires the following @@ -206,15 +213,22 @@ This kevyault is configured with the following environment variables: - `HASHER_API_AZ_KEY_VAULT_NAME` the name of the key vault, used to connect to the correct key vault See the [hasher documentation](./hasher/README.md) for more information. +

+
+ +
-## Run +## Run `PIXL` in GAE + +
+ Click here to view detailed steps ### Start From the _PIXL_ directory: -```bash -bin/pixldc pixl_dev up +```shell +pixl dc up ``` Once the services are running, you can interact with the services using the [`pixl` CLI](./cli/README.md). @@ -223,10 +237,13 @@ Once the services are running, you can interact with the services using the [`pi From the _PIXL_ directory: -```bash -bin/pixldc pixl_dev down +```shell +pixl dc down # --volumes to remove all data volumes ``` +
+ + ## Analysis The number of DICOM instances in the raw Orthanc instance can be accessed from @@ -292,3 +309,11 @@ be uploaded to the FTP server at the following path: FTPROOT/PROJECT_SLUG/EXTRACT_DATETIME/parquet/radiology/radiology.parquet ..............................................omop/public/*.parquet ``` + +## :octocat: Cloning repository +* Generate your SSH keys as suggested [here](https://docs.github.com/en/github/authenticating-to-github/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent) +* Clone the repository by typing (or copying) the following lines in a terminal +``` +git clone git@github.com:SAFEHR-data/PIXL.git +``` + diff --git a/bin/pixldc b/bin/pixldc deleted file mode 100755 index 578588071..000000000 --- a/bin/pixldc +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""A wrapper around docker compose that sets the correct directory and environment""" - -import argparse -import os -import sys -from pathlib import Path - -from loguru import logger - -ALLOWED_PROJECT_NAMES = ["pixl_dev", "pixl_test", "pixl_prod"] -parser = argparse.ArgumentParser(description="Wrapper around docker compose for pixl") -parser.add_argument("--debug", action="store_true", help="print debugging for this wrapper") -parser.add_argument( - "project", - choices=ALLOWED_PROJECT_NAMES, - help="Which project to run this docker compose command on", -) -parser.add_argument("command", help="Which docker compose command to run") -args, unknown_args = parser.parse_known_args() - -logging_level = "INFO" if not args.debug else "DEBUG" -logger.remove() # Remove all handlers -logger.add(sys.stderr, level=logging_level) - -BIN_DIR = Path(__file__).parent.absolute() -os.chdir(BIN_DIR) -PROJECT_DIR = BIN_DIR.parent.absolute() -COMPOSE_FILE = PROJECT_DIR / "docker-compose.yml" - -# The first arg is necessary even if it looks repetitive! Equivalent to bash's $0. -docker_args = [ - "docker", - "compose", - "--file", - COMPOSE_FILE, - "--project-name", - args.project, - args.command, -] - -# up gets these options for free -if args.command == "up": - docker_args.extend(["--remove-orphans", "--abort-on-container-exit", "--build"]) - -# add on the user's extra args -docker_args.extend(unknown_args) - -logger.debug("args = {}", args) -logger.debug("extra args = {}", unknown_args) -logger.debug("about to run with docker: {}", docker_args) - -os.execvp("docker", docker_args) # noqa: S606, S607 this is what the previous script was doing diff --git a/cli/README.md b/cli/README.md index 77847c3f8..374c358bd 100644 --- a/cli/README.md +++ b/cli/README.md @@ -1,40 +1,38 @@ # PIXL Driver + Command line interface The PIXL CLI driver provides functionality to populate a queue with messages containing information -required to run electronic health queries against the EMAP star database and the VNA image system. +required to run electronic health queries against the VNA image system. Once a set of queues are populated the consumers can be started, updated and the system extractions stopped cleanly. ## Prerequisites - -`PIXL CLI` requires Python version 3.10. - -Running the tests requires [docker](https://docs.docker.com/get-docker/) to be installed. - -## Installation - -We recommend installing in a project specific virtual environment created using a environment +* Python version 3.11 (matching python versions in [pixl-ci](../.github/workflows/main.yml) and [dev](../docs/setup/developer.md#installation-of-pixl-modules)). +* [Docker](https://docs.docker.com/get-docker/) with version `>=27.0.3` +* [Docker Compose](https://docs.docker.com/compose/install/#installation-scenarios) with version `>=v2.28.1-desktop.1` +* We recommend installing PIXL project in specific virtual environment using a environment management tool such as [conda](https://docs.conda.io/en/latest/) or -[virtualenv](https://virtualenv.pypa.io/en/latest/). - -Then install in editable mode by running +[virtualenv](https://virtualenv.pypa.io/en/latest/). +See detailed instructions [here](../docs/setup/developer.md#setting-up-python-virtual-environment) +## Installation +Activate your python virtual environment and install `PIXL` project in editable mode by running ```bash -pip install -e ../pixl_core/ -e . +python -m pip install -e ../pixl_core -e . ``` ## Usage -> **Note** The `rabbitmq`, `export-api` and `imaging-api` services must be started prior to using the CLI -> This is typically done by spinning up the necessary Docker containers through `docker compose`. -> For convenience, we provide the [`bin/pixldc`](../bin/pixldc) script to spin up the relevant -> services in production. +**Note** The `rabbitmq`, `export-api` and `imaging-api` services must be started prior to using the CLI +This is done by spinning up the necessary Docker containers through `docker compose`. +For convenience, we provide the `pixl dc` command, which acts as a wrapper for `docker compose`, +but takes care of some of the configuration for you. See the commands and subcommands with ```bash pixl --help ``` + ### Configuration The `rabbitmq` and `postgres` services are configured by setting the following environment variables @@ -65,7 +63,7 @@ where the `*_RATE` variables set the default querying rate for the message queue ### Running the pipeline -Populate queue for Imaging +Populate queue for Imaging using parquet files: ```bash pixl populate @@ -83,6 +81,16 @@ parquet_dir └── PROCEDURE_OCCURRENCE.parquet ``` +Alternatively, the queue can be populated based on records in CSV files: + +```bash +pixl populate +``` + +One advantage of using a CSV file is that multiple projects can be listed +for export in the file. Using the parquet format, in contrast, only supports +exporting a single project per call to `pixl populate`. + Extraction will start automatically after populating the queues. If granular customisation of the rate per queue is required or a queue should not be started then supply the argument `--no-start` and use `pixl start...` to launch @@ -101,43 +109,36 @@ Stop Imaging extraction pixl stop ``` -## Development +### High-priority messages -The CLI is created using [click](https://click.palletsprojects.com/en/8.0.x/), and curently provides -the following commands: +By default, messages will be sent to the queue with the lowest priority (1). -```sh -$ pixl --help -Usage: pixl [OPTIONS] COMMAND [ARGS]... - - PIXL command line interface - -Options: - --debug / --no-debug - --help Show this message and exit. - -Commands: - extract-radiology-reports Export processed radiology reports to... - kill Stop all the PIXL services - populate Populate a (set of) queue(s) from a parquet... - start Start consumers for a set of queues - status Get the status of the PIXL consumers - stop Stop extracting data - update Update one or a list of consumers with a... +To send to the queue with a different priority, you can use the `--priority` argument to +`populate`: + +```bash +pixl populate --priority 5 ``` -Install locally in editable mode with the development and testing dependencies by running +`priority` must be an integer between 1 and 5, with 5 being the highest priority. +## Development +### Help commands +The CLI is created using [click](https://click.palletsprojects.com/en/8.0.x/). To see which commands +are currently available, you can use the `pixl --help` command: + +### Local installation +Activate your python environment and install project locally in editable mode with the development and testing dependencies by running ```bash -pip install -e ../pixl_core/ -e .[test] +python -m pip install -e ../pixl_core -e ../pytest-pixl -e ".[test]" -e ".[dev]" ``` ### Running tests - The CLI tests require a running instance of the `rabbitmq` service, for which we provide a `docker-compose` [file](./tests/docker-compose.yml). The service is automatically started by the `run_containers` _pytest_ fixture. So to run the tests, run ```bash -pytest +pytest -vs tests #for all tests +pytest -vs tests/test_docker_commands.py #e.g., for particular tests ``` diff --git a/cli/pyproject.toml b/cli/pyproject.toml index 638daee06..4b7888a5d 100644 --- a/cli/pyproject.toml +++ b/cli/pyproject.toml @@ -1,26 +1,24 @@ [project] name = "pixl_cli" -version = "0.0.4" +version = "0.2.0rc0" authors = [{ name = "PIXL authors" }] description = "PIXL command line interface" readme = "README.md" requires-python = ">=3.10" classifiers = ["Programming Language :: Python :: 3"] dependencies = [ - "core", - "click==8.1.3", + "core==0.2.0rc0", + "click==8.1.7", + "tqdm==4.67.1", ] [project.optional-dependencies] test = [ - "pytest==7.4.*", - "pytest-mock==3.12.*", - "pytest-pixl" + "core[test]==0.2.0rc0", + "pytest-mock==3.14.*", ] dev = [ - "mypy", - "pre-commit", - "ruff", + "core[dev]==0.2.0rc0", ] [project.scripts] @@ -29,3 +27,19 @@ pixl = "pixl_cli.main:cli" [build-system] requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" + +[tool.coverage.report] +exclude_also = [ + "def __repr__", + "if self.debug:", + "if settings.DEBUG", + "except subprocess.CalledProcessError as exception:", + "raise AssertionError", + "raise NotImplementedError", + "if 0:", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", + "if typing.TYPE_CHECKING", + "class .*\\bProtocol\\):", + "@(abc\\.)?abstractmethod", +] diff --git a/cli/src/pixl_cli/_config.py b/cli/src/pixl_cli/_config.py index a5dab35ec..7a4f578cc 100644 --- a/cli/src/pixl_cli/_config.py +++ b/cli/src/pixl_cli/_config.py @@ -68,7 +68,11 @@ def base_url(self) -> str: def api_config_for_queue(queue_name: str) -> APIConfig: """Configuration for an API associated with a queue""" - api_name = f"{queue_name}_api" + api_name = { + "export": "export_api", + "imaging-primary": "imaging_api", + "imaging-secondary": "imaging_api", + }[queue_name] if api_name not in API_CONFIGS: msg = ( diff --git a/cli/src/pixl_cli/_database.py b/cli/src/pixl_cli/_database.py index cc8ecd0b7..4c9c6e197 100644 --- a/cli/src/pixl_cli/_database.py +++ b/cli/src/pixl_cli/_database.py @@ -16,9 +16,9 @@ from typing import cast +import pandas as pd from core.db.models import Extract, Image -from core.patient_queue.message import Message -from sqlalchemy import URL, create_engine +from sqlalchemy import URL, create_engine, not_, select from sqlalchemy.orm import Session, sessionmaker from pixl_cli._config import SERVICE_SETTINGS @@ -37,88 +37,123 @@ engine = create_engine(url) -def filter_exported_or_add_to_db(messages: list[Message], project_slug: str) -> list[Message]: +def filter_exported_or_add_to_db(messages_df: pd.DataFrame) -> pd.DataFrame: """ - Filter exported images for this project, and adds missing extract or images to database. + Filter exported images for multiple projects, and adds missing extract and images to database. :param messages: Initial messages to filter if they already exist - :param project_slug: project slug to query on - :return messages that have not been exported + :return DataFrame of messages that have not been exported """ PixlSession = sessionmaker(engine) with PixlSession() as pixl_session, pixl_session.begin(): - extract, extract_created = _get_or_create_project(project_slug, pixl_session) - - return _filter_exported_messages( - extract, messages, pixl_session, extract_created=extract_created - ) - + messages_dfs = [ + _filter_exported_or_add_to_db_for_project( + pixl_session, project_messages_df, project_slug + ) + for project_slug, project_messages_df in messages_df.groupby("project_name") + ] + return pd.concat(messages_dfs) + + +def _filter_exported_or_add_to_db_for_project( + session: Session, messages_df: pd.DataFrame, project_slug: str +) -> pd.DataFrame: + """ + Filter exported images for this project, and adds missing extract and images to database. -def _get_or_create_project(project_slug: str, session: Session) -> tuple[Extract, bool]: - existing_extract = session.query(Extract).filter(Extract.slug == project_slug).one_or_none() - if existing_extract: - return existing_extract, False - new_extract = Extract(slug=project_slug) - session.add(new_extract) - return new_extract, True + :param session: SQLAlchemy session + :param messages: Initial messages to filter if they already exist + :param project_slug: project slug to query on + :return DataFrame of messages that have not been exported for this project + """ + extract = session.query(Extract).filter(Extract.slug == project_slug).one_or_none() + if extract: + db_images_df = all_images_for_project(project_slug) + missing_images_df = _filter_existing_images(messages_df, db_images_df) + messages_df = _filter_exported_messages(messages_df, db_images_df) + else: + # We need to add the extract to the database and retrive it again so + # we can access extract.extract_id (needed by session.bulk_save_objects(images)) + session.add(Extract(slug=project_slug)) + extract = session.query(Extract).filter(Extract.slug == project_slug).one_or_none() + missing_images_df = messages_df + + _add_images_to_session(extract, missing_images_df, session) + + return messages_df + + +def _filter_existing_images( + messages_df: pd.DataFrame, + images_df: pd.DataFrame, +) -> pd.DataFrame: + # DataFrame indices must batch when using df.isin (or df.index.isin) + # So we re-index the DataFrames to match on the columns we want to compare + messages_df_reindexed = messages_df.set_index(["accession_number", "mrn", "study_date"]) + images_df_reindexed = images_df.set_index(["accession_number", "mrn", "study_date"]) + keep_indices = ~messages_df_reindexed.index.isin(images_df_reindexed.index) + return messages_df[keep_indices] def _filter_exported_messages( - extract: Extract, messages: list[Message], session: Session, *, extract_created: bool -) -> list[Message]: - output_messages = [] - for message in messages: - _, image_exported = _get_image_and_check_exported( - extract, message, session, extract_created=extract_created - ) - if not image_exported: - output_messages.append(message) - return output_messages - - -def _get_image_and_check_exported( - extract: Extract, message: Message, session: Session, *, extract_created: bool -) -> tuple[Image, bool]: - if extract_created: - new_image = _add_new_image_to_session(extract, message, session) - return new_image, False - - existing_image = ( - session.query(Image) - .filter( - Image.extract == extract, - Image.accession_number == message.accession_number, - Image.mrn == message.mrn, - Image.study_date == message.study_date, - ) - .one_or_none() + messages_df: pd.DataFrame, + images_df: pd.DataFrame, +) -> pd.DataFrame: + merged = messages_df.merge( + images_df, + on=["accession_number", "mrn", "study_date"], + how="left", + validate="one_to_one", + suffixes=(None, None), ) + keep_indices = merged["exported_at"].isna().to_numpy() + return merged[keep_indices][messages_df.columns] + + +def _add_images_to_session(extract: Extract, images_df: pd.DataFrame, session: Session) -> None: + images = [] + for _, row in images_df.iterrows(): + new_image = Image( + accession_number=row["accession_number"], + study_date=row["study_date"], + mrn=row["mrn"], + study_uid=row["study_uid"], + extract=extract, + extract_id=extract.extract_id, + pseudo_patient_id=row["pseudo_patient_id"], + ) + images.append(new_image) + session.bulk_save_objects(images) - if existing_image: - if existing_image.exported_at is not None: - return existing_image, True - return existing_image, False - - new_image = _add_new_image_to_session(extract, message, session) - return new_image, False +def all_images_for_project(project_slug: str) -> pd.DataFrame: + """Given a project, get all images in the DB for that project.""" + PixlSession = sessionmaker(engine) -def _add_new_image_to_session(extract: Extract, message: Message, session: Session) -> Image: - new_image = Image( - accession_number=message.accession_number, - study_date=message.study_date, - mrn=message.mrn, - extract=extract, + query = ( + select(Image.accession_number, Image.study_date, Image.mrn, Image.exported_at) + .join(Extract) + .where(Extract.slug == project_slug) ) - session.add(new_image) - return new_image + with PixlSession() as session: + return pd.read_sql( + sql=query, + con=session.bind, + ) -def images_for_project(project_slug: str) -> list[Image]: - """Given a project, get all images in the DB for that project.""" +def exported_images_for_project(project_slug: str) -> list[Image]: + """ + Given a project, get all images in the DB for that project + that have not yet been exported. + """ PixlSession = sessionmaker(engine) with PixlSession() as session: return cast( list[Image], - session.query(Image).join(Extract).filter(Extract.slug == project_slug).all(), + session.query(Image) + .join(Extract) + .filter(Extract.slug == project_slug) + .filter(not_(Image.exported_at.is_(None))) + .all(), ) diff --git a/cli/src/pixl_cli/_docker_commands.py b/cli/src/pixl_cli/_docker_commands.py new file mode 100644 index 000000000..ba9406be1 --- /dev/null +++ b/cli/src/pixl_cli/_docker_commands.py @@ -0,0 +1,73 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import shutil +import subprocess +from pathlib import Path +from typing import Optional + +import click +from decouple import config +from loguru import logger + +PIXL_ROOT = Path(__file__).parents[3].resolve() + + +# Required to allow passing unkown options to docker-compose +# https://click.palletsprojects.com/en/8.1.x/advanced/#forwarding-unknown-options +@click.command(context_settings={"ignore_unknown_options": True}) +@click.argument("args", nargs=-1, type=click.UNPROCESSED) +def dc(args: tuple[str]) -> None: + """Wrapper around docker compose for PIXL""" + # Construct the docker-compose arguments based on subcommand + docker_args = list(args) + + if "up" in args: + docker_args = [*args, "--wait", "--build", "--remove-orphans"] + if "down" in args: + docker_args = _check_down_args(args) + + run_docker_compose(docker_args, working_dir=PIXL_ROOT) + + +def _check_down_args(args: tuple[str, ...]) -> list: + """Stop all the PIXL services""" + if config("ENV") == "prod" and "--volumes" in args: + click.secho("WARNING: Attempting to remove volumes in production.", fg="yellow") + if not click.confirm("Are you sure you want to remove the volumes?"): + click.secho("Running 'docker compose down' without removing volumes.", fg="blue") + return [arg for arg in args if arg != "--volumes"] + return list(args) + + +def run_docker_compose(args: list, working_dir: Optional[Path]) -> None: + """Wrapper to run docker-compose through the CLI.""" + docker_cmd = shutil.which("docker") + + if not docker_cmd: + err_msg = "docker not found in $PATH. Please make sure it's installed." + raise FileNotFoundError(err_msg) + + pixl_env = config("ENV") + + docker_args = [ + docker_cmd, + "compose", + "--project-name", + f"pixl_{pixl_env}", + *args, + ] + logger.info("Running docker compose with: {}, from {}", docker_args, working_dir) + + subprocess.run(docker_args, check=True, cwd=working_dir) # noqa: S603 diff --git a/cli/src/pixl_cli/_io.py b/cli/src/pixl_cli/_io.py index 9336e9125..39bff743b 100644 --- a/cli/src/pixl_cli/_io.py +++ b/cli/src/pixl_cli/_io.py @@ -16,13 +16,14 @@ from __future__ import annotations import json -from datetime import UTC, datetime +from datetime import datetime +from enum import StrEnum, auto from pathlib import Path from typing import TYPE_CHECKING +import numpy as np import pandas as pd from core.exports import ParquetExport -from core.patient_queue.message import Message from loguru import logger if TYPE_CHECKING: @@ -53,104 +54,78 @@ def copy_parquet_return_logfile_fields(resources_path: Path) -> tuple[str, datet return project_name_slug, extract_generated_timestamp -def messages_from_csv(filepath: Path) -> list[Message]: +def read_patient_info(resources_path: Path) -> pd.DataFrame: + """ + Read patient information from a CSV file or parquet files within directory structure. + :param resources_path: Path for CSV file or parquet directory containing private and public + :return: DataFrame with patient information + """ + if resources_path.is_file() and resources_path.suffix == ".csv": + messages_df = _load_csv(resources_path) + else: + messages_df = _load_parquet(resources_path) + + messages_df = messages_df.sort_values(by=["project_name", "study_date"]) + messages_df = messages_df.drop_duplicates( + subset=["project_name", "mrn", "accession_number", "study_date"] + ) + + if len(messages_df) == 0: + msg = f"Failed to find any messages in {resources_path}" + raise ValueError(msg) + + logger.info("Created {} messages from {}", len(messages_df), resources_path) + + return messages_df + + +def _load_csv(filepath: Path) -> pd.DataFrame: """ Reads patient information from CSV and transforms that into messages. :param filepath: Path for CSV file to be read """ - expected_col_names = [ - "procedure_id", - "mrn", - "accession_number", - "project_name", - "extract_generated_timestamp", - "study_date", - ] - # First line is column names messages_df = pd.read_csv(filepath, header=0, dtype=str) + messages_df = _map_columns(messages_df, MAP_CSV_TO_MESSAGE_KEYS) + _raise_if_column_names_not_found(messages_df, [col.name for col in DF_COLUMNS]) + messages_df["pseudo_patient_id"] = messages_df["pseudo_patient_id"].replace(np.nan, None) + + # Parse non string columns + messages_df["procedure_occurrence_id"] = messages_df["procedure_occurrence_id"].astype(int) + messages_df["study_date"] = pd.to_datetime( + messages_df["study_date"], format="%Y-%m-%d", errors="raise" + ).dt.date + messages_df["extract_generated_timestamp"] = pd.to_datetime( + messages_df["extract_generated_timestamp"], + format="%Y-%m-%dT%H:%M:%SZ", + errors="raise", + utc=True, + ) - _raise_if_column_names_not_found(messages_df, expected_col_names) - - ( - procedure_id_col_name, - mrn_col_name, - acc_num_col_name, - project_col_name, - extract_col_name, - dt_col_name, - ) = expected_col_names - - messages = [] - for _, row in messages_df.iterrows(): - message = Message( - mrn=row[mrn_col_name], - accession_number=row[acc_num_col_name], - study_date=datetime.strptime(row[dt_col_name], "%d/%m/%Y %H:%M") - .replace(tzinfo=UTC) - .date(), - procedure_occurrence_id=row[procedure_id_col_name], - project_name=row[project_col_name], - extract_generated_timestamp=datetime.strptime( - row[extract_col_name], "%d/%m/%Y %H:%M" - ).replace(tzinfo=UTC), - ) - messages.append(message) - - if len(messages) == 0: - msg = f"Failed to find any messages in {filepath}" - raise ValueError(msg) - - logger.info("Created {} messages from {}", len(messages), filepath) - return messages + return messages_df -def messages_from_parquet( - dir_path: Path, project_name: str, extract_generated_timestamp: datetime -) -> list[Message]: +def _load_parquet( + dir_path: Path, +) -> pd.DataFrame: """ Reads patient information from parquet files within directory structure - and transforms that into messages. + and transforms that into a DataFrame :param dir_path: Path for parquet directory containing private and public - :param project_name: Name of the project, should be a slug, so it can match the export directory - :param extract_generated_timestamp: Datetime that OMOP ES ran the extract - files """ public_dir = dir_path / "public" private_dir = dir_path / "private" - cohort_data = _check_and_parse_parquet(private_dir, public_dir) - cohort_data_mapped = _map_columns(cohort_data) + messages_df = _check_and_parse_parquet(private_dir, public_dir) + messages_df = _map_columns(messages_df, MAP_PARQUET_TO_MESSAGE_KEYS) - messages = [] - for _, row in cohort_data_mapped.iterrows(): - message = Message( - project_name=project_name, - extract_generated_timestamp=extract_generated_timestamp, - **{column: row[column] for column in MAP_PARQUET_TO_MESSAGE_KEYS.values()}, - ) - messages.append(message) - - if len(messages) == 0: - msg = f"Failed to find any messages in {dir_path}" - raise ValueError(msg) + project_name, extract_generated_timestamp = copy_parquet_return_logfile_fields(dir_path) + messages_df["project_name"] = project_name + messages_df["extract_generated_timestamp"] = extract_generated_timestamp + messages_df["pseudo_patient_id"] = None - logger.info("Created {} messages from {}", len(messages), dir_path) - return messages - - -MAP_PARQUET_TO_MESSAGE_KEYS = { - "PrimaryMrn": "mrn", - "AccessionNumber": "accession_number", - "procedure_date": "study_date", - "procedure_occurrence_id": "procedure_occurrence_id", -} - - -def _map_columns(input_df: pd.DataFrame) -> pd.DataFrame: - _raise_if_column_names_not_found(input_df, list(MAP_PARQUET_TO_MESSAGE_KEYS.keys())) - return input_df.rename(MAP_PARQUET_TO_MESSAGE_KEYS, axis=1) + return messages_df def _check_and_parse_parquet(private_dir: Path, public_dir: Path) -> pd.DataFrame: @@ -169,24 +144,42 @@ def _check_and_parse_parquet(private_dir: Path, public_dir: Path) -> pd.DataFram # joining data together people_procedures = people.merge(procedure, on="person_id") people_procedures_accessions = people_procedures.merge(accessions, on="procedure_occurrence_id") - return people_procedures_accessions[~people_procedures_accessions["AccessionNumber"].isna()] + # Filter out any rows where accession number is NA or an empty string + return people_procedures_accessions[ + ~people_procedures_accessions["AccessionNumber"].isna() + & (people_procedures_accessions["AccessionNumber"] != "") + ] -def make_radiology_linker_table(parquet_dir: Path, images: list[Image]) -> pd.DataFrame: - """ - Make a table linking the OMOP procedure_occurrence_id to the hashed image/study ID. - :param parquet_dir: location of OMOP extract - (this gives us: procedure_occurrence_id <-> mrn+accession mapping) - :param images: the images already processed by PIXL, from the DB - (this gives us: mrn+accession <-> hashed ID) - """ - public_dir = parquet_dir / "public" - private_dir = parquet_dir / "private" - people_procedures_accessions = _map_columns(_check_and_parse_parquet(private_dir, public_dir)) - images_df = pd.DataFrame.from_records([vars(im) for im in images]) - merged = people_procedures_accessions.merge(images_df, on=("mrn", "accession_number")) - return merged[["procedure_occurrence_id", "hashed_identifier"]] +class DF_COLUMNS(StrEnum): # noqa: N801 + procedure_occurrence_id = auto() + mrn = auto() + accession_number = auto() + project_name = auto() + extract_generated_timestamp = auto() + study_date = auto() + study_uid = auto() + pseudo_patient_id = auto() + + +MAP_CSV_TO_MESSAGE_KEYS = { + "procedure_id": "procedure_occurrence_id", + "participant_id": "pseudo_patient_id", +} + + +MAP_PARQUET_TO_MESSAGE_KEYS = { + "PrimaryMrn": "mrn", + "AccessionNumber": "accession_number", + "procedure_date": "study_date", + "StudyUid_X": "study_uid", +} + + +def _map_columns(input_df: pd.DataFrame, columns: dict) -> pd.DataFrame: + _raise_if_column_names_not_found(input_df, list(columns.keys())) + return input_df.rename(columns, axis=1) def _raise_if_column_names_not_found( @@ -202,3 +195,23 @@ def _raise_if_column_names_not_found( f"column names" ) raise ValueError(msg) + + +def make_radiology_linker_table(parquet_dir: Path, images: list[Image]) -> pd.DataFrame: + """ + Make a table linking the OMOP procedure_occurrence_id to the pseudo image/study ID. + :param parquet_dir: location of OMOP extract + (this gives us: procedure_occurrence_id <-> mrn+accession mapping) + :param images: the images already processed by PIXL, from the DB + (this gives us: mrn+accession <-> pseudo_study_uid) + """ + public_dir = parquet_dir / "public" + private_dir = parquet_dir / "private" + people_procedures_accessions = _map_columns( + _check_and_parse_parquet(private_dir, public_dir), + MAP_PARQUET_TO_MESSAGE_KEYS, + ) + + images_df = pd.DataFrame.from_records([vars(im) for im in images]) + merged = people_procedures_accessions.merge(images_df, on=("mrn", "accession_number")) + return merged[["procedure_occurrence_id", "pseudo_study_uid"]] diff --git a/cli/src/pixl_cli/_message_processing.py b/cli/src/pixl_cli/_message_processing.py new file mode 100644 index 000000000..254ae0c77 --- /dev/null +++ b/cli/src/pixl_cli/_message_processing.py @@ -0,0 +1,158 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Processing of messages and interaction with rabbitmq.""" + +from __future__ import annotations + +from time import sleep +from typing import TYPE_CHECKING + +import pandas as pd +import tqdm +from core.patient_queue._base import PixlBlockingInterface +from core.patient_queue.message import Message +from core.patient_queue.producer import PixlProducer +from decouple import config +from loguru import logger + +from pixl_cli._config import SERVICE_SETTINGS +from pixl_cli._database import exported_images_for_project, filter_exported_or_add_to_db + +if TYPE_CHECKING: + import pandas as pd + + +def messages_from_df( + df: pd.DataFrame, +) -> list[Message]: + """ + Reads patient information from a DataFrame and transforms that into messages. + + :param messages_df: DataFrame containing patient information + """ + messages = [] + for _, row in df.iterrows(): + message = Message( + mrn=row["mrn"], + accession_number=row["accession_number"], + study_uid=row["study_uid"], + study_date=row["study_date"], + procedure_occurrence_id=row["procedure_occurrence_id"], + project_name=row["project_name"], + extract_generated_timestamp=row["extract_generated_timestamp"].to_pydatetime(), + ) + messages.append(message) + + return messages + + +def retry_until_export_count_is_unchanged( + messages_df: pd.DataFrame, + num_retries: int, + queues_to_populate: list[str], + messages_priority: int, +) -> None: + """Retry populating messages until there is no change in the number of exported studies.""" + last_exported_count = 0 + + # wait PIXL_DICOM_TRANSFER_TIMEOUT seconds if CLI_RETRY_SECONDS is not defined + total_wait_seconds = config("PIXL_DICOM_TRANSFER_TIMEOUT", default=300, cast=int) + total_wait_seconds = config("CLI_RETRY_SECONDS", default=total_wait_seconds, cast=int) + wait_to_display = f"{total_wait_seconds //60} minutes" + if total_wait_seconds % 60: + wait_to_display = f"{total_wait_seconds //60} minutes & {total_wait_seconds % 60} seconds" + + logger.info( + "Retrying extraction every {} until no new extracts are found, max retries: {}", + wait_to_display, + num_retries, + ) + for i in range(1, num_retries + 1): + _wait_for_queues_to_empty(queues_to_populate) + logger.info("Waiting {} for new extracts to be found", wait_to_display) + for _ in tqdm.tqdm( + range(total_wait_seconds), desc="Waiting for series to be fully processed" + ): + sleep(1) + + images = ( + [ + exported_images_for_project(project_name) + for project_name in messages_df["project_name"].unique() + ] + if messages_df["project_name"].size + else [[]] + ) + new_last_exported_count = sum([len(project_images) for project_images in images]) + if new_last_exported_count == last_exported_count: + logger.info( + "{} studies exported, didn't change between retries", + new_last_exported_count, + ) + return + logger.info( + "{} studies exported, retrying extraction {}/{}", + new_last_exported_count - last_exported_count, + i, + num_retries, + ) + last_exported_count = new_last_exported_count + populate_queue_and_db(queues_to_populate, messages_df, messages_priority=messages_priority) + + +def _wait_for_queues_to_empty(queues_to_populate: list[str]) -> None: + logger.info("Waiting for rabbitmq queues to be empty") + message_count = _message_count(queues_to_populate) + while message_count != 0: + logger.debug(f"{message_count=}, sleeping for a minute") + sleep(60) + message_count = _message_count(queues_to_populate) + logger.info("Queues are empty") + + +def _message_count(queues_to_populate: list[str]) -> int: + # We don't want to modify the queues we're populating, but if we're populating imaging-primary + # we also need to wait for imaging-secondary to be empty + queues_to_count = set(queues_to_populate) + if "imaging-primary" in queues_to_populate: + queues_to_count.add("imaging-secondary") + + messages_in_queues = 0 + for queue in queues_to_count: + with PixlBlockingInterface(queue_name=queue, **SERVICE_SETTINGS["rabbitmq"]) as rabbitmq: + messages_in_queues += rabbitmq.message_count + + return messages_in_queues + + +def populate_queue_and_db( + queues: list[str], messages_df: pd.DataFrame, messages_priority: int +) -> list[Message]: + """ + Populate queues with messages, + for imaging queue update the database and filter out exported studies. + """ + output_messages = [] + for queue in queues: + # For imaging, we don't want to query again for images that have already been exported + if "imaging" in queue and len(messages_df): + logger.info("Filtering out exported images and uploading new ones to the database") + messages_df = filter_exported_or_add_to_db(messages_df) + + messages = messages_from_df(messages_df) + with PixlProducer(queue_name=queue, **SERVICE_SETTINGS["rabbitmq"]) as producer: + producer.publish(messages, priority=messages_priority) + output_messages.extend(messages) + + return output_messages diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index 62be7fb7b..97dbafdc3 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -18,7 +18,6 @@ import json import os import sys -from operator import attrgetter from pathlib import Path from typing import Any, Optional @@ -30,19 +29,24 @@ from loguru import logger from pixl_cli._config import SERVICE_SETTINGS, api_config_for_queue -from pixl_cli._database import filter_exported_or_add_to_db, images_for_project +from pixl_cli._database import exported_images_for_project +from pixl_cli._docker_commands import dc from pixl_cli._io import ( HOST_EXPORT_ROOT_DIR, - copy_parquet_return_logfile_fields, make_radiology_linker_table, - messages_from_csv, - messages_from_parquet, project_info, + read_patient_info, +) +from pixl_cli._message_processing import ( + populate_queue_and_db, + retry_until_export_count_is_unchanged, ) # localhost needs to be added to the NO_PROXY environment variables on GAEs os.environ["NO_PROXY"] = os.environ["no_proxy"] = "localhost" +PIXL_ROOT = Path(__file__).parents[3].resolve() + @click.group() @click.option("--debug/--no-debug", default=False) @@ -53,6 +57,9 @@ def cli(*, debug: bool) -> None: logger.add(sys.stderr, level=logging_level) +cli.add_command(dc) + + @cli.command() @click.option( "--error", @@ -68,10 +75,10 @@ def cli(*, debug: bool) -> None: type=click.Path(exists=True), help="Path to the sample env file", ) -def check_env(*, error: bool, sample_env_file: click.Path) -> None: +def check_env(*, error: bool, sample_env_file: Path) -> None: """Check that all variables from .env.sample are set either in .env or in environ""" if not sample_env_file: - sample_env_file = Path(__file__).parents[3] / ".env.sample" + sample_env_file = PIXL_ROOT / ".env.sample" sample_config = RepositoryEnv(sample_env_file) for key in sample_config.data: try: @@ -83,9 +90,12 @@ def check_env(*, error: bool, sample_env_file: click.Path) -> None: @cli.command() +@click.argument( + "parquet-path", required=True, type=click.Path(path_type=Path, exists=True, file_okay=True) +) @click.option( "--queues", - default="imaging", + default="imaging-primary", show_default=True, help="Comma seperated list of queues to populate with messages generated from the " "input file(s)", @@ -103,11 +113,29 @@ def check_env(*, error: bool, sample_env_file: click.Path) -> None: default=None, help="Rate at which to process items from a queue (in items per second).", ) -@click.argument( - "parquet-path", required=True, type=click.Path(path_type=Path, exists=True, file_okay=True) +@click.option( + "--num-retries", + "num_retries", + type=int, + show_default=True, + default=5, + help="Number of retries to attempt before giving up, 5 minute wait inbetween", +) +@click.option( + "--priority", + "priority", + show_default=True, + default=1, + help="Priority of the messages, from 1 (lowest) to 5 (highest)", ) -def populate( - parquet_path: Path, *, queues: str, start_processing: bool, rate: Optional[float] +def populate( # noqa: PLR0913 - too many args + parquet_path: Path, + *, + queues: str, + rate: Optional[float], + num_retries: int, + start_processing: bool, + priority: int, ) -> None: """ Populate a (set of) queue(s) from a parquet file directory @@ -123,26 +151,21 @@ def populate( │ └── PROCEDURE_OCCURRENCE.parquet └── extract_summary.json """ + queues_to_populate = queues.split(",") if start_processing: - _start_or_update_extract(queues=queues.split(","), rate=rate) - - logger.info("Populating queue(s) {} from {}", queues, parquet_path) - if parquet_path.is_file() and parquet_path.suffix == ".csv": - messages = messages_from_csv(parquet_path) + _start_or_update_extract(queues=queues_to_populate, rate=rate) else: - project_name, omop_es_datetime = copy_parquet_return_logfile_fields(parquet_path) - messages = messages_from_parquet(parquet_path, project_name, omop_es_datetime) + logger.info("Starting to process messages disabled, setting `--num-retries` to 0") + num_retries = 0 - for queue in queues.split(","): - sorted_messages = sorted(messages, key=attrgetter("study_date")) - # For imaging, we don't want to query again for images that have already been exported - if queue == "imaging" and messages: - logger.info("Filtering out exported images and uploading new ones to the database") - sorted_messages = filter_exported_or_add_to_db( - sorted_messages, messages[0].project_name - ) - with PixlProducer(queue_name=queue, **SERVICE_SETTINGS["rabbitmq"]) as producer: - producer.publish(sorted_messages) + logger.info("Populating queue(s) {} from {}", queues_to_populate, parquet_path) + messages_df = read_patient_info(parquet_path) + + populate_queue_and_db(queues_to_populate, messages_df, messages_priority=priority) + if num_retries != 0: + retry_until_export_count_is_unchanged( + messages_df, num_retries, queues_to_populate, messages_priority=priority + ) @cli.command() @@ -165,7 +188,7 @@ def export_patient_data(parquet_dir: Path, timeout: int) -> None: project_name_raw, omop_es_datetime = project_info(parquet_dir) export = ParquetExport(project_name_raw, omop_es_datetime, HOST_EXPORT_ROOT_DIR) - images = images_for_project(export.project_slug) + images = exported_images_for_project(export.project_slug) linker_data = make_radiology_linker_table(parquet_dir, images) export.export_radiology_linker(linker_data) @@ -189,7 +212,7 @@ def export_patient_data(parquet_dir: Path, timeout: int) -> None: @cli.command() @click.option( "--queues", - default="imaging", + default="imaging-primary", show_default=True, help="Comma seperated list of queues to start consuming from", ) @@ -211,7 +234,7 @@ def start(queues: str, rate: Optional[float]) -> None: @cli.command() @click.option( "--queues", - default="imaging", + default="imaging-primary", show_default=True, help="Comma seperated list of queues to update the consume rate of", ) @@ -264,7 +287,7 @@ def _update_extract_rate(queue_name: str, rate: Optional[float]) -> None: @cli.command() @click.option( "--queues", - default="imaging", + default="imaging-primary", show_default=True, help="Comma seperated list of queues to consume messages from", ) @@ -300,7 +323,7 @@ def kill() -> None: @cli.command() @click.option( "--queues", - default="imaging", + default="imaging-primary", show_default=True, help="Comma seperated list of queues to consume messages from", ) @@ -314,7 +337,7 @@ def _get_extract_rate(queue_name: str) -> str: """ Get the extraction rate in items per second from a queue - :param queue_name: Name of the queue to get the extract rate for (e.g. imaging) + :param queue_name: Name of the queue to get the extract rate for (e.g. imaging-primary) :return: The extract rate in items per seconds Throws a RuntimeError if the status code is not 200. diff --git a/cli/tests/conftest.py b/cli/tests/conftest.py index 53c192771..3218387ae 100644 --- a/cli/tests/conftest.py +++ b/cli/tests/conftest.py @@ -15,37 +15,56 @@ from __future__ import annotations +import datetime import os from pathlib import Path +from typing import TYPE_CHECKING +from zoneinfo import ZoneInfo +import pandas as pd import pytest from core.db.models import Base, Extract, Image +from core.patient_queue.message import Message +from core.patient_queue.producer import PixlProducer from sqlalchemy import Engine, create_engine from sqlalchemy.orm import Session, sessionmaker +if TYPE_CHECKING: + from collections.abc import Generator + from unittest.mock import Mock + + +# Load environment variables from test .env file +with (Path(__file__).parents[2] / "test/.env").open() as f: + for line in f.readlines(): + if "=" in line: + key, value = line.strip().split("=") + os.environ[key] = value + +# Set the remaining environment variables os.environ["PROJECT_CONFIGS_DIR"] = str(Path(__file__).parents[2] / "projects/configs") -# Set the necessary environment variables -os.environ["PIXL_EXPORT_API_HOST"] = "localhost" -os.environ["PIXL_EXPORT_API_PORT"] = "7006" +os.environ["EXPORT_AZ_CLIENT_ID"] = "export client id" +os.environ["EXPORT_AZ_CLIENT_PASSWORD"] = "export client password" +os.environ["EXPORT_AZ_TENANT_ID"] = "export tenant id" +os.environ["EXPORT_AZ_KEY_VAULT_NAME"] = "export key vault name" -os.environ["PIXL_IMAGING_API_HOST"] = "localhost" -os.environ["PIXL_IMAGING_API_RATE"] = "1" -os.environ["PIXL_IMAGING_API_PORT"] = "7007" +os.environ["HASHER_API_AZ_CLIENT_ID"] = "hasher client id" +os.environ["HASHER_API_AZ_CLIENT_PASSWORD"] = "hasher client password" +os.environ["HASHER_API_AZ_TENANT_ID"] = "hasher tenant id" +os.environ["HASHER_API_AZ_KEY_VAULT_NAME"] = "hasher key vault name" -os.environ["RABBITMQ_HOST"] = "localhost" -os.environ["RABBITMQ_USERNAME"] = "rabbitmq_username" -os.environ["RABBITMQ_PASSWORD"] = "rabbitmq_password" # noqa: S105 -os.environ["RABBITMQ_PORT"] = "7008" +os.environ["ORTHANC_RAW_JOB_HISTORY_SIZE"] = "100" +os.environ["ORTHANC_CONCURRENT_JOBS"] = "20" -os.environ["PIXL_DB_USER"] = "pixl_db_username" -os.environ["PIXL_DB_PASSWORD"] = "pixl_db_password" # noqa: S105 -os.environ["POSTGRES_HOST"] = "locahost" -os.environ["POSTGRES_PORT"] = "7001" -os.environ["PIXL_DB_NAME"] = "pixl" +os.environ["AZ_DICOM_ENDPOINT_NAME"] = "dicom endpoint name" +os.environ["AZ_DICOM_ENDPOINT_URL"] = "dicom endpoint url" +os.environ["AZ_DICOM_ENDPOINT_TOKEN"] = "dicom endpoint token" +os.environ["AZ_DICOM_ENDPOINT_CLIENT_ID"] = "dicom endpoint client id" +os.environ["AZ_DICOM_ENDPOINT_CLIENT_SECRET"] = "dicom endpoint client secret" +os.environ["AZ_DICOM_ENDPOINT_TENANT_ID"] = "dicom endpoint tenant id" -os.environ["ORTHANC_ANON_USERNAME"] = "orthanc" -os.environ["ORTHANC_ANON_PASSWORD"] = "orthanc" # noqa: S105, hardcoded password +os.environ["TZ"] = "Europe/London" @pytest.fixture(autouse=True) @@ -90,7 +109,7 @@ def db_engine(monkeymodule) -> Engine: @pytest.fixture() -def db_session(db_engine) -> Session: +def db_session(db_engine) -> Generator[Session]: """ Creates a session for interacting with an in memory database. @@ -106,3 +125,127 @@ def db_session(db_engine) -> Session: session.query(Extract).delete() yield session session.close() + + +STUDY_DATE = datetime.date.fromisoformat("2023-01-01") + + +def _make_message( + project_name: str, + accession_number: str, + mrn: str, + study_uid: str, +) -> Message: + return Message( + project_name=project_name, + accession_number=accession_number, + mrn=mrn, + study_uid=study_uid, + study_date=STUDY_DATE, + procedure_occurrence_id=1, + extract_generated_timestamp=datetime.datetime.now(tz=ZoneInfo(os.environ["TZ"])), + ) + + +@pytest.fixture() +def example_messages() -> list[Message]: + """Test input data.""" + return [ + _make_message( + project_name="i-am-a-project", accession_number="123", mrn="mrn", study_uid="1.2.3" + ), + _make_message( + project_name="i-am-a-project", accession_number="234", mrn="mrn", study_uid="2.3.4" + ), + _make_message( + project_name="i-am-a-project", accession_number="345", mrn="mrn", study_uid="3.4.5" + ), + ] + + +@pytest.fixture() +def example_messages_df(example_messages): + """Test input data in a DataFrame.""" + messages_df = pd.DataFrame.from_records([vars(im) for im in example_messages]) + messages_df["pseudo_patient_id"] = None + return messages_df + + +@pytest.fixture() +def example_messages_multiple_projects() -> list[Message]: + """Test input data.""" + return [ + _make_message( + project_name="i-am-a-project", accession_number="123", mrn="mrn", study_uid="1.2.3" + ), + _make_message( + project_name="i-am-a-project", accession_number="234", mrn="mrn", study_uid="2.3.4" + ), + _make_message( + project_name="i-am-a-project", accession_number="345", mrn="mrn", study_uid="3.4.5" + ), + _make_message( + project_name="i-am-another-project", + accession_number="123", + mrn="mrn", + study_uid="1.2.3", + ), + _make_message( + project_name="i-am-another-project", + accession_number="234", + mrn="mrn", + study_uid="2.3.4", + ), + _make_message( + project_name="i-am-another-project", + accession_number="345", + mrn="mrn", + study_uid="3.4.5", + ), + ] + + +@pytest.fixture() +def example_messages_multiple_projects_df(example_messages_multiple_projects) -> pd.DataFrame: + """Test input data.""" + messages_df = pd.DataFrame.from_records([vars(im) for im in example_messages_multiple_projects]) + messages_df["pseudo_patient_id"] = None + return messages_df + + +@pytest.fixture() +def rows_in_session(db_session) -> Session: + """Insert a test row for each table, returning the session for use in tests.""" + extract = Extract(slug="i-am-a-project") + + image_exported = Image( + accession_number="123", + study_date=STUDY_DATE, + mrn="mrn", + study_uid="1.2.3", + extract=extract, + extract_id=extract.extract_id, + exported_at=datetime.datetime.now(ZoneInfo(os.environ["TZ"])), + ) + image_not_exported = Image( + accession_number="234", + study_date=STUDY_DATE, + mrn="mrn", + study_uid="2.3.4", + extract=extract, + extract_id=extract.extract_id, + ) + with db_session: + db_session.add_all([extract, image_exported, image_not_exported]) + db_session.commit() + + return db_session + + +@pytest.fixture() +def mock_publisher(mocker) -> Generator[Mock, None, None]: + """Patched publisher that does nothing, returns MagicMock of the publish method.""" + mocker.patch.object(PixlProducer, "__init__", return_value=None) + mocker.patch.object(PixlProducer, "__enter__", return_value=PixlProducer) + mocker.patch.object(PixlProducer, "__exit__") + return mocker.patch.object(PixlProducer, "publish") diff --git a/cli/tests/test_check_env.py b/cli/tests/test_check_env.py index 057997f29..f4a8b44f1 100644 --- a/cli/tests/test_check_env.py +++ b/cli/tests/test_check_env.py @@ -15,7 +15,9 @@ from pathlib import Path +import pytest from click.testing import CliRunner +from decouple import UndefinedValueError from pixl_cli.main import check_env SAMPLE_ENV_FILE = Path(__file__).parents[2] / ".env.sample" @@ -28,7 +30,7 @@ def test_check_env(): - current test env file matches the sample env file """ runner = CliRunner() - result = runner.invoke(check_env) + result = runner.invoke(check_env, args=["--error"]) assert result.exit_code == 0 @@ -40,5 +42,9 @@ def test_check_env_fails(tmp_path): tmp_sample_env_file.write_text("NONEXISTENT_VARIABLE=") runner = CliRunner() - result = runner.invoke(check_env, str(tmp_sample_env_file)) - assert result.exit_code != 0 + with pytest.raises(UndefinedValueError, match="not found"): + runner.invoke( + check_env, + args=["--error", "--sample-env-file", tmp_sample_env_file], + catch_exceptions=False, + ) diff --git a/cli/tests/test_database.py b/cli/tests/test_database.py deleted file mode 100644 index 9e2fe3dcb..000000000 --- a/cli/tests/test_database.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Test database interaction methods for the cli.""" - -import datetime - -import pytest -from core.db.models import Extract, Image -from core.patient_queue.message import Message -from pixl_cli._database import filter_exported_or_add_to_db -from sqlalchemy.orm import Session - -STUDY_DATE = datetime.date.fromisoformat("2023-01-01") - - -def _make_message(project_name: str, accession_number: str, mrn: str) -> Message: - return Message( - project_name=project_name, - accession_number=accession_number, - mrn=mrn, - study_date=STUDY_DATE, - procedure_occurrence_id=1, - extract_generated_timestamp=datetime.datetime.now(tz=datetime.UTC), - ) - - -@pytest.fixture() -def example_messages(): - """Test input data.""" - return [ - _make_message(project_name="i-am-a-project", accession_number="123", mrn="mrn"), - _make_message(project_name="i-am-a-project", accession_number="234", mrn="mrn"), - _make_message(project_name="i-am-a-project", accession_number="345", mrn="mrn"), - ] - - -@pytest.fixture() -def rows_in_session(db_session) -> Session: - """Insert a test row for each table, returning the session for use in tests.""" - extract = Extract(slug="i-am-a-project") - - image_exported = Image( - accession_number="123", - study_date=STUDY_DATE, - mrn="mrn", - extract=extract, - exported_at=datetime.datetime.now(tz=datetime.UTC), - ) - image_not_exported = Image( - accession_number="234", - study_date=STUDY_DATE, - mrn="mrn", - extract=extract, - ) - with db_session: - db_session.add_all([extract, image_exported, image_not_exported]) - db_session.commit() - - return db_session - - -def test_project_doesnt_exist(example_messages, db_session): - """If project doesn't exist, no filtering and then project & messages saved to database""" - output = filter_exported_or_add_to_db(example_messages, "i-am-a-project") - assert output == example_messages - extract = db_session.query(Extract).one() - images = db_session.query(Image).filter(Image.extract == extract).all() - assert len(images) == len(example_messages) - - -def test_first_image_exported(example_messages, rows_in_session): - """ - GIVEN 3 messages, where one has been exported, the second has been saved to db but not exported - WHEN the messages are filtered - THEN the first message that has an exported_at value should not be in the filtered list - and all images should be saved to the database - """ - output = filter_exported_or_add_to_db(example_messages, "i-am-a-project") - assert len(output) == len(example_messages) - 1 - assert [x for x in output if x.accession_number == "123"] == [] - extract = rows_in_session.query(Extract).one() - images = rows_in_session.query(Image).filter(Image.extract == extract).all() - assert len(images) == len(example_messages) - - -def test_new_extract_with_overlapping_images(example_messages, rows_in_session): - """ - GIVEN messages from a new extract, two have been saved to the database with another extract - WHEN the messages are filtered - THEN all messages should be returned and all new images should be added - """ - new_project_name = "new-project" - for message in example_messages: - message.project_name = new_project_name - - output = filter_exported_or_add_to_db(example_messages, new_project_name) - - # none filtered out - assert len(output) == len(example_messages) - # all new batch of images saved - extract = rows_in_session.query(Extract).filter(Extract.slug == new_project_name).one() - images = rows_in_session.query(Image).filter(Image.extract == extract).all() - assert len(images) == len(example_messages) - # other extract and images still in database - assert len(rows_in_session.query(Extract).all()) > 1 - assert len(rows_in_session.query(Image).all()) > len(example_messages) diff --git a/cli/tests/test_database_cli_interaction.py b/cli/tests/test_database_cli_interaction.py new file mode 100644 index 000000000..efd4f147b --- /dev/null +++ b/cli/tests/test_database_cli_interaction.py @@ -0,0 +1,76 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test database interaction methods for the cli.""" + +from core.db.models import Extract, Image +from pandas.testing import assert_frame_equal +from pixl_cli._database import exported_images_for_project, filter_exported_or_add_to_db + + +def test_project_doesnt_exist(example_messages_df, db_session): + """If project doesn't exist, no filtering and then project & messages saved to database""" + output = filter_exported_or_add_to_db(example_messages_df) + assert_frame_equal(output, example_messages_df) + extract = db_session.query(Extract).one() + images = db_session.query(Image).filter(Image.extract == extract).all() + assert len(images) == len(example_messages_df) + + +def test_first_image_exported(example_messages_df, rows_in_session): + """ + GIVEN 3 messages, where one has been exported, the second has been saved to db but not exported + WHEN the messages are filtered + THEN the first message that has an exported_at value should not be in the filtered list + and all images should be saved to the database + """ + output = filter_exported_or_add_to_db(example_messages_df) + assert len(output) == len(example_messages_df) - 1 + assert "123" not in output.accession_number.to_numpy() + extract = rows_in_session.query(Extract).one() + images = rows_in_session.query(Image).filter(Image.extract == extract).all() + assert len(images) == len(example_messages_df) + + +def test_new_extract_with_overlapping_images(example_messages_df, rows_in_session): + """ + GIVEN messages from a new extract, two have been saved to the database with another extract + WHEN the messages are filtered + THEN all messages should be returned and all new images should be added + """ + new_project_name = "new-project" + example_messages_df["project_name"] = new_project_name + + output = filter_exported_or_add_to_db(example_messages_df) + + # none filtered out + assert len(output) == len(example_messages_df) + # all new batch of images saved + extract = rows_in_session.query(Extract).filter(Extract.slug == new_project_name).one() + images = rows_in_session.query(Image).filter(Image.extract == extract).all() + assert len(images) == len(example_messages_df) + # other extract and images still in database + assert len(rows_in_session.query(Extract).all()) > 1 + assert len(rows_in_session.query(Image).all()) > len(example_messages_df) + + +def test_processed_images_for_project(rows_in_session): + """ + GIVEN a project with 3 images in the database, only one of which is exported + WHEN the processed_images_for_project function is called + THEN only the exported images are returned + """ + processed = exported_images_for_project("i-am-a-project") + assert len(processed) == 1 + assert processed[0].accession_number == "123" diff --git a/cli/tests/test_docker_commands.py b/cli/tests/test_docker_commands.py new file mode 100644 index 000000000..a9590fa8f --- /dev/null +++ b/cli/tests/test_docker_commands.py @@ -0,0 +1,61 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Docker commands tests""" + +import pytest +from click.testing import CliRunner +from pixl_cli.main import PIXL_ROOT, cli + + +@pytest.fixture(autouse=True) +def _change_working_directory(monkeypatch) -> None: + """ + Change the working directory to the PIXL root directory. + This is required to spin up the docker containers. + """ + monkeypatch.setenv("ENV", "test") + monkeypatch.chdir(PIXL_ROOT) + + +@pytest.fixture() +def default_args() -> list[str]: + """Default arguments for the docker commands.""" + return ["--dry-run", "--env-file=test/.env"] + + +def test_pixl_up_works(default_args): + """Test that pixl up works and attempts to spin up docker containers.""" + runner = CliRunner() + result = runner.invoke(cli, args=["dc", *default_args, "up"]) + assert result.exit_code == 0 + + +def test_pixl_down_works(default_args): + """Test that pixl up works and attempts to spin up docker containers.""" + runner = CliRunner() + result = runner.invoke(cli, args=["dc", *default_args, "down"]) + assert result.exit_code == 0 + + +def test_pixl_down_warns_on_volumes(monkeypatch, default_args): + """Test that a warning is displayed when attempting to remove volumes in production.""" + monkeypatch.setenv("ENV", "prod") + + runner = CliRunner() + result = runner.invoke(cli, args=["dc", *default_args, "down", "--volumes"]) + + assert result.exit_code == 0 + assert "WARNING: Attempting to remove volumes in production." in result.output + assert "Are you sure you want to remove the volumes?" in result.output + assert "Running 'docker compose down' without removing volumes." in result.output diff --git a/cli/tests/test_io.py b/cli/tests/test_io.py index e02aeedcb..18b50b00d 100644 --- a/cli/tests/test_io.py +++ b/cli/tests/test_io.py @@ -19,17 +19,19 @@ import pytest from core.db.models import Extract, Image -from pixl_cli._io import make_radiology_linker_table, messages_from_csv +from pixl_cli._io import make_radiology_linker_table, read_patient_info +from pydicom.uid import generate_uid def test_message_from_csv_raises_for_malformed_input(tmpdir): """Test that messages_from_csv raises for malformed input.""" # Create a CSV file with the wrong column names csv_file = tmpdir.join("malformed.csv") - csv_file.write("procedure_id,mrn,accession_number,extract_generated_timestamp,study_date\n") - csv_file.write("1,123,1234,01/01/2021 00:00,01/01/2021\n") + with csv_file.open("w") as f: + f.write("procedure_id,mrn,accession_number,extract_generated_timestamp,study_date\n") + f.write("1,123,1234,01/01/2021 00:00,01/01/2021\n") with pytest.raises(ValueError, match=".*expected to have at least.*"): - messages_from_csv(csv_file) + read_patient_info(Path(csv_file)) def test_make_radiology_linker_table(omop_resources: Path): @@ -43,28 +45,28 @@ def test_make_radiology_linker_table(omop_resources: Path): accession_number="AA12345601", study_date=date(1, 1, 1), mrn="987654321", - hashed_identifier="test_hashed_id_1", + pseudo_study_uid=generate_uid(entropy_srcs=["test_pseudo_id_1"]), extract=extract, ), Image( accession_number="AA12345605", study_date=date(1, 1, 1), mrn="987654321", - hashed_identifier="test_hashed_id_2", + pseudo_study_uid=generate_uid(entropy_srcs=["test_pseudo_id_2"]), extract=extract, ), Image( accession_number="different_should_ignore", study_date=date(1, 1, 1), mrn="987654321", - hashed_identifier="should_never_see_1", + pseudo_study_uid=generate_uid(entropy_srcs=["should_never_see_1"]), extract=extract, ), Image( accession_number="AA12345605", study_date=date(1, 1, 1), mrn="different_should_ignore", - hashed_identifier="should_never_see_2", + pseudo_study_uid=generate_uid(entropy_srcs=["should_never_see_2"]), extract=extract, ), ] @@ -73,8 +75,8 @@ def test_make_radiology_linker_table(omop_resources: Path): po_col = linker_df["procedure_occurrence_id"] row_po_4 = linker_df[po_col == 4].iloc[0] row_po_5 = linker_df[po_col == 5].iloc[0] - assert row_po_4.hashed_identifier == "test_hashed_id_1" - assert row_po_5.hashed_identifier == "test_hashed_id_2" + assert row_po_4.pseudo_study_uid == generate_uid(entropy_srcs=["test_pseudo_id_1"]) + assert row_po_5.pseudo_study_uid == generate_uid(entropy_srcs=["test_pseudo_id_2"]) assert linker_df.shape[0] == 2 - assert set(linker_df.columns) == {"procedure_occurrence_id", "hashed_identifier"} + assert set(linker_df.columns) == {"procedure_occurrence_id", "pseudo_study_uid"} diff --git a/cli/tests/test_message_processing.py b/cli/tests/test_message_processing.py new file mode 100644 index 000000000..dbd473486 --- /dev/null +++ b/cli/tests/test_message_processing.py @@ -0,0 +1,100 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test message processing module.""" + +import os +from collections.abc import Generator +from unittest.mock import Mock + +import pytest +from _pytest.monkeypatch import MonkeyPatch +from core.patient_queue.producer import PixlProducer +from pixl_cli._message_processing import retry_until_export_count_is_unchanged + + +@pytest.fixture() +def _zero_message_count(monkeypatch: MonkeyPatch) -> None: + """Ensure that message count is always zero, so that we don't have to deal with rabbitmq""" + monkeypatch.setattr("pixl_cli._message_processing._message_count", lambda _: 0) + + +@pytest.fixture() +def mock_publisher(mocker) -> Generator[Mock, None, None]: + """Patched publisher that does nothing, returns MagicMock of the publish method.""" + mocker.patch.object(PixlProducer, "__init__", return_value=None) + mocker.patch.object(PixlProducer, "__enter__", return_value=PixlProducer) + mocker.patch.object(PixlProducer, "__exit__") + return mocker.patch.object(PixlProducer, "publish") + + +@pytest.mark.usefixtures("_zero_message_count") +def test_no_retry_if_none_exported(example_messages_df, db_session, mock_publisher): + """ + GIVEN no images have been exported before starting, and num_retries set to 5 + WHEN rabbitmq messages set to zero and no messages are published to queue + THEN populate_queue_and_db should never be called + """ + os.environ["CLI_RETRY_SECONDS"] = "1" + + retry_until_export_count_is_unchanged( + example_messages_df, + num_retries=5, + queues_to_populate=["imaging-primary"], + messages_priority=1, + ) + + mock_publisher.assert_not_called() + + +@pytest.mark.usefixtures("_zero_message_count") +def test_retry_with_image_exported_and_no_change( + example_messages_df, rows_in_session, mock_publisher +): + """ + GIVEN one image already has been exported, and num_retries set to 5 + WHEN rabbitmq messages set to zero and no messages are published to queue + THEN populate_queue_and_db should be called once + """ + os.environ["CLI_RETRY_SECONDS"] = "1" + + retry_until_export_count_is_unchanged( + example_messages_df, + num_retries=5, + queues_to_populate=["imaging-primary"], + messages_priority=1, + ) + + mock_publisher.assert_called_once() + + +@pytest.mark.usefixtures("_zero_message_count") +def test_retry_with_image_exported_and_no_change_multiple_projects( + example_messages_multiple_projects_df, rows_in_session, mock_publisher +): + """ + GIVEN one image across two projects has been exported, and num_retries set to 5 + WHEN rabbitmq messages set to zero and no messages are published to queue + THEN populate_queue_and_db should be called once + """ + os.environ["CLI_RETRY_SECONDS"] = "1" + + retry_until_export_count_is_unchanged( + example_messages_multiple_projects_df, + num_retries=5, + queues_to_populate=["imaging-primary"], + messages_priority=1, + ) + + mock_publisher.assert_called_once() diff --git a/cli/tests/test_messages_from_files.py b/cli/tests/test_messages_from_files.py new file mode 100644 index 000000000..ca6c7339a --- /dev/null +++ b/cli/tests/test_messages_from_files.py @@ -0,0 +1,190 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Unit tests for reading cohorts from parquet files.""" + +from __future__ import annotations + +import datetime +from typing import TYPE_CHECKING + +from core.db.models import Image +from core.patient_queue.message import Message +from pixl_cli._io import read_patient_info +from pixl_cli._message_processing import messages_from_df, populate_queue_and_db + +if TYPE_CHECKING: + from pathlib import Path + + +def test_messages_from_csv(omop_resources: Path) -> None: + """ + Given a csv with a single dataset. + When the messages are generated from the directory + Then one message should be generated + """ + # Arrange + test_csv = omop_resources / "test.csv" + messages_df = read_patient_info(test_csv) + # Act + messages = messages_from_df(messages_df) + # Assert + assert all(isinstance(msg, Message) for msg in messages) + + expected_messages = [ + Message( + procedure_occurrence_id=0, + mrn="patient_identifier", + accession_number="123456789", + study_uid="1.2.3.4.5.6.7.8", + project_name="ms-pinpoint-test", + extract_generated_timestamp=datetime.datetime.fromisoformat("2023-01-01T00:01:00Z"), + study_date=datetime.date.fromisoformat("2022-01-01"), + ), + ] + assert messages == expected_messages + + +def test_messages_from_csv_multiple_projects( + omop_resources: Path, rows_in_session, mock_publisher +) -> None: + """ + GIVEN the database has a single Export entity, with one exported Image, one un-exported Image, + WHEN we parse a file with two projects, each with the same 3 images + where one project has already exported one of the images + and the other project has not exported any images + THEN the database should have 6 Images, with 5 messages returned. + """ + input_file = omop_resources / "multiple_projects.csv" + messages_df = read_patient_info(input_file) + messages = populate_queue_and_db(["imaging-primary"], messages_df, messages_priority=1) + + # Database has 6 rows now + images_in_db = rows_in_session.query(Image).all() + assert len(images_in_db) == 6 + # Exported image filtered out + assert len(messages) == 5 + + +def test_messages_from_parquet(omop_resources: Path) -> None: + """ + Given a valid OMOP ES extract with 4 procedures, two of which are x-rays. + When the messages are generated from the directory and the output of logfile parsing + Then two messages should be generated + """ + # Arrange + omop_parquet_dir = omop_resources / "omop" + messages_df = read_patient_info(omop_parquet_dir) + # Act + messages = messages_from_df(messages_df) + # Assert + assert all(isinstance(msg, Message) for msg in messages) + + expected_messages = [ + Message( + mrn="987654321", + accession_number="AA12345601", + study_uid="1.3.6.1.4.1.14519.5.2.1.99.1071.12985477682660597455732044031486", + study_date=datetime.date.fromisoformat("2020-05-23"), + procedure_occurrence_id=4, + project_name="test-extract-uclh-omop-cdm", + extract_generated_timestamp=datetime.datetime.fromisoformat("2023-12-07T14:08:58"), + ), + Message( + mrn="987654321", + accession_number="AA12345605", + study_uid="1.2.276.0.7230010.3.1.2.929116473.1.1710754859.579485", + study_date=datetime.date.fromisoformat("2020-05-23"), + procedure_occurrence_id=5, + project_name="test-extract-uclh-omop-cdm", + extract_generated_timestamp=datetime.datetime.fromisoformat("2023-12-07T14:08:58"), + ), + ] + + assert messages == expected_messages + + +def test_batch_upload(omop_resources: Path, rows_in_session, mock_publisher) -> None: + """ + GIVEN the database has a single Export entity, with one exported Image, one unexported Image + WHEN we parse a file with the two existing images and one new image with no participant_ids + THEN the database should have 3 images, and returned messages excludes the exported image. + """ + input_file = omop_resources / "batch_input.csv" + messages_df = read_patient_info(input_file) + messages = populate_queue_and_db(["imaging-primary"], messages_df, messages_priority=1) + + # Database has 3 rows now + images_in_db: list[Image] = rows_in_session.query(Image).all() + assert len(images_in_db) == 3 + # Exported image filtered out + assert len(messages) == 2 + + +def test_duplicate_upload(omop_resources: Path, rows_in_session, mock_publisher) -> None: + """ + GIVEN the database has a single Export entity, with one exported Image, one un-exported Image + WHEN we parse a file with duplicated entries the two existing images and one new image + THEN the database should have 3 Images, with two message returned. + """ + input_file = omop_resources / "duplicate_input.csv" + messages_df = read_patient_info(input_file) + messages = populate_queue_and_db(["imaging-primary"], messages_df, messages_priority=1) + + # Database has 3 rows now + images_in_db = rows_in_session.query(Image).all() + assert len(images_in_db) == 3 + # Exported and duplicate messages filtered out + assert len(messages) == 2 + + +def test_upload_with_participant_id(omop_resources: Path, db_session, mock_publisher) -> None: + """ + GIVEN the database is empty, + WHEN we parse a file with the images that have participant_ids, + THEN the database should have 3 images and the `pseudo_patient_id`s in the database should + math the participant_ids in the CSV file. + """ + input_file = omop_resources / "participant_id.csv" + messages_df = read_patient_info(input_file) + messages = populate_queue_and_db(["imaging-primary"], messages_df, messages_priority=1) + + # Database has 3 rows now + images_in_db: list[Image] = db_session.query(Image).all() + assert len(images_in_db) == 3 + # A message per image + assert len(messages) == 3 + # Pseudo_patient_id for new image is same as participant_ids in CSV file + assert images_in_db[0].pseudo_patient_id == "AAA00" + assert images_in_db[1].pseudo_patient_id == "BBB11" + assert images_in_db[2].pseudo_patient_id == "CCC22" + + +def test_upload_with_no_participant_id(omop_resources: Path, db_session, mock_publisher) -> None: + """ + GIVEN the database is empty, + WHEN we parse a file with images that do not have participant_ids, + THEN the database should have 3 images and the `pseudo_patient_id`s should be `None` in the + database. + """ + input_file = omop_resources / "batch_input.csv" + messages_df = read_patient_info(input_file) + messages = populate_queue_and_db(["imaging-primary"], messages_df, messages_priority=1) + + # Database has 3 rows now + images_in_db: list[Image] = db_session.query(Image).all() + assert len(images_in_db) == 3 + # A message per image + assert len(messages) == 3 + # Pseudo_patient_id for new image is same as participant_ids in CSV file + assert all(image.pseudo_patient_id is None for image in images_in_db) diff --git a/cli/tests/test_messages_from_parquet.py b/cli/tests/test_messages_from_parquet.py deleted file mode 100644 index a56bd3941..000000000 --- a/cli/tests/test_messages_from_parquet.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Unit tests for reading cohorts from parquet files.""" - -from __future__ import annotations - -import datetime -from typing import TYPE_CHECKING - -from core.patient_queue.message import Message -from pixl_cli._io import ( - copy_parquet_return_logfile_fields, - messages_from_csv, - messages_from_parquet, -) - -if TYPE_CHECKING: - from pathlib import Path - - -def test_messages_from_csv(omop_resources: Path) -> None: - """ - Given a csv with a single dataset. - When the messages are generated from the directory - Then one message should be generated - """ - # Arrange - test_csv = omop_resources / "test.csv" - # Act - messages = messages_from_csv(test_csv) - # Assert - assert all(isinstance(msg, Message) for msg in messages) - - expected_messages = [ - Message( - mrn="patient_identifier", - accession_number="123456789", - study_date=datetime.date.fromisoformat("2022-01-01"), - procedure_occurrence_id="0", - project_name="ms-pinpoint-test", - extract_generated_timestamp=datetime.datetime(2022, 1, 1, 0, 1, tzinfo=datetime.UTC), - ), - ] - assert messages == expected_messages - - -def test_messages_from_parquet(omop_resources: Path) -> None: - """ - Given a valid OMOP ES extract with 4 procedures, two of which are x-rays. - When the messages are generated from the directory and the output of logfile parsing - Then two messages should be generated - """ - # Arrange - omop_parquet_dir = omop_resources / "omop" - project_name, omop_es_datetime = copy_parquet_return_logfile_fields(omop_parquet_dir) - # Act - messages = messages_from_parquet(omop_parquet_dir, project_name, omop_es_datetime) - # Assert - assert all(isinstance(msg, Message) for msg in messages) - - expected_messages = [ - Message( - mrn="987654321", - accession_number="AA12345601", - study_date=datetime.date.fromisoformat("2020-05-23"), - procedure_occurrence_id=4, - project_name="test-extract-uclh-omop-cdm", - extract_generated_timestamp=datetime.datetime.fromisoformat("2023-12-07T14:08:58"), - ), - Message( - mrn="987654321", - accession_number="AA12345605", - study_date=datetime.date.fromisoformat("2020-05-23"), - procedure_occurrence_id=5, - project_name="test-extract-uclh-omop-cdm", - extract_generated_timestamp=datetime.datetime.fromisoformat("2023-12-07T14:08:58"), - ), - ] - - assert messages == expected_messages diff --git a/cli/tests/test_populate.py b/cli/tests/test_populate.py index 3a279a043..86b48e364 100644 --- a/cli/tests/test_populate.py +++ b/cli/tests/test_populate.py @@ -13,11 +13,13 @@ # limitations under the License. """Patient queue tests""" +# ruff: noqa: SLF001 allow accessing of private members for mocking + from __future__ import annotations from typing import TYPE_CHECKING -import pixl_cli.main +import pixl_cli._message_processing from click.testing import CliRunner from core.patient_queue.producer import PixlProducer from pixl_cli.main import populate @@ -39,7 +41,7 @@ def __exit__(self, *args: object, **kwargs) -> None: """Context exit point.""" return - def publish(self, messages: list[Message]) -> None: # noqa: ARG002 don't access messages + def publish(self, messages: list[Message], priority: int) -> None: # noqa: ARG002 don't access messages or priority """Dummy method for publish.""" return @@ -51,9 +53,12 @@ def test_populate_queue_parquet( omop_parquet_dir = str(omop_resources / "omop") runner = CliRunner() - monkeypatch.setattr(pixl_cli.main, "PixlProducer", MockProducer) + monkeypatch.setattr(pixl_cli._message_processing, "PixlProducer", MockProducer) - result = runner.invoke(populate, args=[omop_parquet_dir, "--queues", queue_name, "--no-start"]) + result = runner.invoke( + populate, + args=[omop_parquet_dir, "--queues", queue_name, "--no-start", "--num-retries", "0"], + ) assert result.exit_code == 0 @@ -65,12 +70,17 @@ def test_populate_queue_and_start( runner = CliRunner() mocked_start = mocker.patch("pixl_cli.main._start_or_update_extract") - monkeypatch.setattr(pixl_cli.main, "PixlProducer", MockProducer) + monkeypatch.setattr(pixl_cli._message_processing, "PixlProducer", MockProducer) - result = runner.invoke(populate, args=[omop_parquet_dir, "--queues", queue_name, "--no-start"]) + result = runner.invoke( + populate, + args=[omop_parquet_dir, "--queues", queue_name, "--no-start", "--num-retries", "0"], + ) assert result.exit_code == 0 mocked_start.assert_not_called() - result = runner.invoke(populate, args=[omop_parquet_dir, "--queues", queue_name]) + result = runner.invoke( + populate, args=[omop_parquet_dir, "--queues", queue_name, "--num-retries", "0"] + ) assert result.exit_code == 0 mocked_start.assert_called_with(queues=queue_name.split(","), rate=None) diff --git a/docker-compose.yml b/docker-compose.yml index 21770d113..996853e28 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -31,6 +31,7 @@ x-build-args-common: &build-args-common x-pixl-common-env: &pixl-common-env DEBUG: ${DEBUG} LOG_LEVEL: ${LOG_LEVEL} + TZ: ${TZ:-Europe/London} x-pixl-rabbit-mq: &pixl-rabbit-mq RABBITMQ_HOST: "queue" # Name of the queue service @@ -38,14 +39,6 @@ x-pixl-rabbit-mq: &pixl-rabbit-mq RABBITMQ_USERNAME: ${RABBITMQ_USERNAME} RABBITMQ_PASSWORD: ${RABBITMQ_PASSWORD} -x-emap-db: &emap-db - EMAP_UDS_HOST: ${EMAP_UDS_HOST} - EMAP_UDS_PORT: ${EMAP_UDS_PORT} - EMAP_UDS_NAME: ${EMAP_UDS_NAME} - EMAP_UDS_USER: ${EMAP_UDS_USER} - EMAP_UDS_PASSWORD: ${EMAP_UDS_PASSWORD} - EMAP_UDS_SCHEMA_NAME: ${EMAP_UDS_SCHEMA_NAME} - x-pixl-db: &pixl-db PIXL_DB_HOST: ${PIXL_DB_HOST} PIXL_DB_PORT: ${PIXL_DB_PORT} @@ -114,10 +107,12 @@ services: target: pixl_orthanc_anon args: <<: *build-args-common + ORTHANC_CONCURRENT_JOBS: ${ORTHANC_CONCURRENT_JOBS} platform: linux/amd64 command: /run/secrets + restart: always environment: - <<: [*proxy-common, *pixl-common-env, *azure-keyvault] + <<: [*pixl-db, *proxy-common, *pixl-common-env, *azure-keyvault] ORTHANC_NAME: "PIXL: Anon" ORTHANC_USERNAME: ${ORTHANC_ANON_USERNAME} ORTHANC_PASSWORD: ${ORTHANC_ANON_PASSWORD} @@ -126,19 +121,18 @@ services: ORTHANC_RAW_AE_TITLE: ${ORTHANC_RAW_AE_TITLE} ORTHANC_RAW_DICOM_PORT: "4242" ORTHANC_RAW_HOSTNAME: "orthanc-raw" + ORTHANC_RAW_URL: ${ORTHANC_RAW_URL} + ORTHANC_RAW_USERNAME: ${ORTHANC_RAW_USERNAME} + ORTHANC_RAW_PASSWORD: ${ORTHANC_RAW_PASSWORD} + PIXL_DICOM_TRANSFER_TIMEOUT: ${PIXL_DICOM_TRANSFER_TIMEOUT} + PIXL_MAX_MESSAGES_IN_FLIGHT: ${PIXL_MAX_MESSAGES_IN_FLIGHT} # For the export API ORTHANC_ANON_URL: "http://localhost:8042" ORTHANC_ANON_USERNAME: ${ORTHANC_ANON_USERNAME} ORTHANC_ANON_PASSWORD: ${ORTHANC_ANON_PASSWORD} - PIXL_DB_HOST: ${PIXL_DB_HOST} - PIXL_DB_PORT: ${PIXL_DB_PORT} - PIXL_DB_NAME: ${PIXL_DB_NAME} - PIXL_DB_USER: ${PIXL_DB_USER} - PIXL_DB_PASSWORD: ${PIXL_DB_PASSWORD} DICOM_WEB_PLUGIN_ENABLED: ${ENABLE_DICOM_WEB} HASHER_API_AZ_NAME: "hasher-api" HASHER_API_PORT: 8000 - HTTP_TIMEOUT: ${ORTHANC_ANON_HTTP_TIMEOUT} AZ_DICOM_ENDPOINT_NAME: ${AZ_DICOM_ENDPOINT_NAME} AZ_DICOM_ENDPOINT_URL: ${AZ_DICOM_ENDPOINT_URL} AZ_DICOM_ENDPOINT_TOKEN: ${AZ_DICOM_ENDPOINT_TOKEN} @@ -156,7 +150,6 @@ services: - type: volume source: orthanc-anon-data target: /var/lib/orthanc/db - - ${PWD}/orthanc/orthanc-anon/config:/run/secrets:ro - ${PWD}/projects/configs:/${PROJECT_CONFIGS_DIR:-/projects/configs}:ro networks: - pixl-net @@ -176,7 +169,6 @@ services: retries: 2 interval: 3s timeout: 2s - restart: "no" orthanc-raw: build: @@ -187,7 +179,8 @@ services: <<: *build-args-common ORTHANC_RAW_MAXIMUM_STORAGE_SIZE: ${ORTHANC_RAW_MAXIMUM_STORAGE_SIZE} ORTHANC_RAW_JOB_HISTORY_SIZE: ${ORTHANC_RAW_JOB_HISTORY_SIZE} - ORTHANC_RAW_CONCURRENT_JOBS: ${ORTHANC_RAW_CONCURRENT_JOBS} + ORTHANC_CONCURRENT_JOBS: ${ORTHANC_CONCURRENT_JOBS} + PIXL_DICOM_TRANSFER_TIMEOUT: ${PIXL_DICOM_TRANSFER_TIMEOUT} platform: linux/amd64 command: /run/secrets environment: @@ -196,12 +189,14 @@ services: ORTHANC_USERNAME: ${ORTHANC_RAW_USERNAME} ORTHANC_PASSWORD: ${ORTHANC_RAW_PASSWORD} ORTHANC_RAW_AE_TITLE: ${ORTHANC_RAW_AE_TITLE} - ORTHANC_AUTOROUTE_RAW_TO_ANON: ${ORTHANC_AUTOROUTE_RAW_TO_ANON} ORTHANC_RAW_RECORD_HEADERS: ${ORTHANC_RAW_RECORD_HEADERS} ORTHANC_RAW_HEADER_LOG_PATH: ${ORTHANC_RAW_HEADER_LOG_PATH} - VNAQR_AE_TITLE: ${VNAQR_AE_TITLE} - VNAQR_DICOM_PORT: ${VNAQR_DICOM_PORT} - VNAQR_IP_ADDR: ${VNAQR_IP_ADDR} + PRIMARY_DICOM_SOURCE_AE_TITLE: ${PRIMARY_DICOM_SOURCE_AE_TITLE} + PRIMARY_DICOM_SOURCE_PORT: ${PRIMARY_DICOM_SOURCE_PORT} + PRIMARY_DICOM_SOURCE_IP_ADDR: ${PRIMARY_DICOM_SOURCE_IP_ADDR} + SECONDARY_DICOM_SOURCE_AE_TITLE: ${SECONDARY_DICOM_SOURCE_AE_TITLE:-$PRIMARY_DICOM_SOURCE_AE_TITLE} + SECONDARY_DICOM_SOURCE_PORT: ${SECONDARY_DICOM_SOURCE_PORT:-$PRIMARY_DICOM_SOURCE_PORT} + SECONDARY_DICOM_SOURCE_IP_ADDR: ${SECONDARY_DICOM_SOURCE_IP_ADDR:-$PRIMARY_DICOM_SOURCE_IP_ADDR} ORTHANC_ANON_AE_TITLE: ${ORTHANC_ANON_AE_TITLE} ORTHANC_ANON_DICOM_PORT: "4242" ORTHANC_ANON_HOSTNAME: "orthanc-anon" @@ -231,15 +226,16 @@ services: retries: 10 interval: 3s timeout: 2s - restart: "no" + restart: "always" queue: - image: rabbitmq:3.12.9-management + image: rabbitmq:3.13.7-management@sha256:1c32767bb8f7afb93fe99b890c05a250936bc2836fa3fd0154058f3953207095 hostname: queue-host environment: RABBITMQ_DEFAULT_USER: ${RABBITMQ_USERNAME} RABBITMQ_DEFAULT_PASS: ${RABBITMQ_PASSWORD} RABBITMQ_NODENAME: "rabbit@queue-host" + TZ: ${TZ:-Europe/London} healthcheck: test: rabbitmq-diagnostics -q check_running interval: 30s @@ -265,7 +261,6 @@ services: <<: [ *pixl-db, - *emap-db, *proxy-common, *pixl-common-env, *pixl-rabbit-mq, @@ -276,6 +271,9 @@ services: ORTHANC_ANON_PASSWORD: ${ORTHANC_ANON_PASSWORD} PROJECT_CONFIGS_DIR: /${PROJECT_CONFIGS_DIR:-/projects/configs} PIXL_MAX_MESSAGES_IN_FLIGHT: ${PIXL_MAX_MESSAGES_IN_FLIGHT} + HTTP_TIMEOUT: ${PIXL_DICOM_TRANSFER_TIMEOUT} + XNAT_OVERWRITE: ${XNAT_OVERWRITE} + XNAT_DESTINATION: ${XNAT_DESTINATION} env_file: - ./docker/common.env depends_on: @@ -314,6 +312,8 @@ services: condition: service_healthy orthanc-raw: condition: service_healthy + orthanc-anon: + condition: service_healthy healthcheck: interval: 10s timeout: 30s @@ -326,7 +326,16 @@ services: ORTHANC_RAW_USERNAME: ${ORTHANC_RAW_USERNAME} ORTHANC_RAW_PASSWORD: ${ORTHANC_RAW_PASSWORD} ORTHANC_RAW_AE_TITLE: ${ORTHANC_RAW_AE_TITLE} - VNAQR_MODALITY: ${VNAQR_MODALITY} + ORTHANC_AUTOROUTE_RAW_TO_ANON: ${ORTHANC_AUTOROUTE_RAW_TO_ANON} + PRIMARY_DICOM_SOURCE_MODALITY: ${PRIMARY_DICOM_SOURCE_MODALITY} + PRIMARY_DICOM_SOURCE_AE_TITLE: ${PRIMARY_DICOM_SOURCE_AE_TITLE} + SECONDARY_DICOM_SOURCE_MODALITY: ${SECONDARY_DICOM_SOURCE_MODALITY} + SECONDARY_DICOM_SOURCE_AE_TITLE: ${SECONDARY_DICOM_SOURCE_AE_TITLE:-$PRIMARY_DICOM_SOURCE_AE_TITLE} + ORTHANC_ANON_URL: ${ORTHANC_ANON_URL} + ORTHANC_ANON_USERNAME: ${ORTHANC_ANON_USERNAME} + ORTHANC_ANON_PASSWORD: ${ORTHANC_ANON_PASSWORD} + ORTHANC_ANON_AE_TITLE: ${ORTHANC_ANON_AE_TITLE} + ORTHANC_AUTOROUTE_ANON_TO_ENDPOINT: ${ORTHANC_AUTOROUTE_ANON_TO_ENDPOINT} SKIP_ALEMBIC: ${SKIP_ALEMBIC} PIXL_DB_HOST: ${PIXL_DB_HOST} PIXL_DB_PORT: ${PIXL_DB_PORT} @@ -351,7 +360,7 @@ services: POSTGRES_USER: ${PIXL_DB_USER} POSTGRES_PASSWORD: ${PIXL_DB_PASSWORD} POSTGRES_DB: ${PIXL_DB_NAME} - PGTZ: Europe/London + PGTZ: ${TZ:-Europe/London} env_file: - ./docker/common.env command: postgres -c 'config_file=/etc/postgresql/postgresql.conf' @@ -362,7 +371,7 @@ services: ports: - "${POSTGRES_PORT}:5432" healthcheck: - test: ["CMD", "pg_isready", "-U", "${PIXL_DB_USER}"] + test: ["CMD", "pg_isready", "-U", "${PIXL_DB_USER}", "--dbname", "${PIXL_DB_NAME}"] interval: 10s timeout: 30s retries: 5 diff --git a/docker/orthanc/Dockerfile b/docker/orthanc/Dockerfile index 1d85f8e1b..a78503428 100644 --- a/docker/orthanc/Dockerfile +++ b/docker/orthanc/Dockerfile @@ -11,15 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -FROM orthancteam/orthanc:24.3.3 AS pixl_orthanc_base +FROM orthancteam/orthanc:24.7.3@sha256:57a3d037729897331027ddc00c12695b50f1effbbf805f855396f3d0248d2d5f AS pixl_orthanc_base SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"] - # Create a virtual environment, recommended since python 3.11 and Debian bookworm based images # where you get a warning when installing system-wide packages. RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get update && \ - apt-get install --yes --no-install-recommends python3-venv + apt-get install --yes --no-install-recommends python3-venv tzdata RUN python3 -m venv /.venv # Install curl for now, but try to remove this dependency @@ -48,11 +47,13 @@ COPY ./orthanc/orthanc-raw/plugin/pixl.py /etc/orthanc/pixl.py # Orthanc can't substitute environment veriables as integers so copy and replace before running ARG ORTHANC_RAW_MAXIMUM_STORAGE_SIZE ARG ORTHANC_RAW_JOB_HISTORY_SIZE -ARG ORTHANC_RAW_CONCURRENT_JOBS +ARG ORTHANC_CONCURRENT_JOBS +ARG PIXL_DICOM_TRANSFER_TIMEOUT COPY ./orthanc/orthanc-raw/config /run/secrets RUN sed -i "s/\${ORTHANC_RAW_MAXIMUM_STORAGE_SIZE}/${ORTHANC_RAW_MAXIMUM_STORAGE_SIZE:-0}/g" /run/secrets/orthanc.json RUN sed -i "s/\${ORTHANC_RAW_JOB_HISTORY_SIZE}/${ORTHANC_RAW_JOB_HISTORY_SIZE:-100}/g" /run/secrets/orthanc.json -RUN sed -i "s/\${ORTHANC_RAW_CONCURRENT_JOBS}/${ORTHANC_RAW_CONCURRENT_JOBS:-5}/g" /run/secrets/orthanc.json +RUN sed -i "s/\${ORTHANC_CONCURRENT_JOBS}/${ORTHANC_CONCURRENT_JOBS:-5}/g" /run/secrets/orthanc.json +RUN sed -i "s/\${ORTHANC_RAW_STABLE_SECONDS}/${PIXL_DICOM_TRANSFER_TIMEOUT:-600}/g" /run/secrets/orthanc.json ENV PYTHONPATH=/.venv/lib64/python3.11/site-packages/ @@ -60,3 +61,12 @@ FROM pixl_orthanc_base AS pixl_orthanc_anon COPY ./orthanc/orthanc-anon/plugin/pixl.py /etc/orthanc/pixl.py ENV PYTHONPATH=/.venv/lib64/python3.11/site-packages/ +COPY ./orthanc/orthanc-anon/plugin/download_dicom_spec.py /etc/orthanc/download_dicom_spec.py +RUN --mount=type=cache,target=/root/.cache \ + python3 /etc/orthanc/download_dicom_spec.py + +ARG ORTHANC_CONCURRENT_JOBS +COPY ./orthanc/orthanc-anon/config /run/secrets + +RUN sed -i "s/\${ORTHANC_CONCURRENT_JOBS}/${ORTHANC_CONCURRENT_JOBS:-5}/g" /run/secrets/orthanc.json + diff --git a/docker/pixl-python/Dockerfile b/docker/pixl-python/Dockerfile index f730168c3..84f188c57 100644 --- a/docker/pixl-python/Dockerfile +++ b/docker/pixl-python/Dockerfile @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -FROM python:3.11.7-slim-bullseye AS pixl_python_base +FROM python:3.12.4-slim-bullseye@sha256:26ce493641ad3b1c8a6202117c31340c7bbb2dc126f1aeee8ea3972730a81dc6 AS pixl_python_base SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"] ARG TEST="false" @@ -25,7 +25,9 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ libpq-dev \ curl \ gnupg \ - locales + locales \ + tzdata + RUN sed -i '/en_GB.UTF-8/s/^# //g' /etc/locale.gen && locale-gen RUN apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/* diff --git a/docker/postgres/Dockerfile b/docker/postgres/Dockerfile index 8888138cc..bf615f6e8 100644 --- a/docker/postgres/Dockerfile +++ b/docker/postgres/Dockerfile @@ -11,12 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -FROM postgres:16-bookworm +FROM postgres:16-bookworm@sha256:5620f242bbc0e17478556102327e7efcf60ab48de3607c9e0ea98800841785ec # OS setup RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get update && \ - apt-get install --yes --no-install-recommends procps ca-certificates locales python3 python3-pip python3.11-venv && \ + apt-get install --yes --no-install-recommends procps ca-certificates locales python3 python3-pip python3.11-venv tzdata && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* RUN sed -i '/en_GB.UTF-8/s/^# //g' /etc/locale.gen && locale-gen diff --git a/docs/design/bigger_picture.md b/docs/design/bigger_picture.md index 06e643d85..27750abbf 100644 --- a/docs/design/bigger_picture.md +++ b/docs/design/bigger_picture.md @@ -1,5 +1,7 @@ # The bigger picture -The [System design for linkage and export of imaging and EHR data](https://github.com/UCLH-Foundry/the-rolling-skeleton/blob/main/docs/design/system-design.md) +The [System design for linkage and export of imaging and EHR data](https://github.com/SAFEHR-data/the-rolling-skeleton/blob/main/docs/design/system-design.md) document in `the-rolling-skeleton` repository provides an overview of the overall system architecture and the components that make up the system that PIXL is part of. + +Please request access to the `the-rolling-skeleton` repository and add further details in a [new blank issue](https://github.com/SAFEHR-data/PIXL/issues/new). \ No newline at end of file diff --git a/docs/design/diagrams/pixl-multi-project-config.drawio b/docs/design/diagrams/pixl-multi-project-config.drawio index 62a6f07ac..1bdb84394 100644 --- a/docs/design/diagrams/pixl-multi-project-config.drawio +++ b/docs/design/diagrams/pixl-multi-project-config.drawio @@ -10,7 +10,7 @@ - + @@ -25,7 +25,7 @@ - + diff --git a/docs/file_types/parquet_files.md b/docs/file_types/parquet_files.md index 3fd9a7df5..8b56234e1 100644 --- a/docs/file_types/parquet_files.md +++ b/docs/file_types/parquet_files.md @@ -3,7 +3,7 @@ ## OMOP-ES files From -[OMOP-ES](https://github.com/UCLH-Foundry/the-rolling-skeleton/blob/main/docs/design/100-day-design.md#data-flow-through-components) +[OMOP-ES](https://github.com/SAFEHR-data/the-rolling-skeleton/blob/main/docs/design/100-day-design.md#data-flow-through-components) we receive parquet files defining the data we need to export. These input files appear as 2 groups: 1. **Public** parquet files: have had identifiers removed and replaced with a sequential ID for the @@ -22,7 +22,7 @@ parquet file. ## Radiology linker table An output parquet file named `IMAGE_LINKER.parquet` that defines the connection between the -OMOP procedure_occurrence_id for the current extract and the hashed image/study ID. +OMOP procedure_occurrence_id for the current extract and the pseudo image/study ID. The procedure_occurrence_id can get renumbered from extract to extract. See method `make_radiology_linker_table` for more. @@ -42,10 +42,8 @@ implemented and documented in [`pixl_core`](../../pixl_core/README.md#uploading- Various _parquet_ files are provided throughout the repo to enable unit and system testing: -- `cli/tests/resources/omop/` contains public and private parquet files together with an - `extract_summary.json` file to mimic the input received from OMOP-ES for the unit tests. (This directory is identical to that below and should be deleted at some point). - `test/resources/omop/` contains public and private parquet files together with an - `extract_summary.json` file to mimic the input received from OMOP-ES for the system tests + `extract_summary.json` file to mimic the input received from OMOP-ES for the system tests and cli unit tests During the system test, a `radiology.parquet` file is generated and temporarily stored in `projects/exports/test-extract-uclh-omop-cdm/latest/radiology/radiology.parquet` to check the successful diff --git a/docs/services/pixl_database.md b/docs/services/pixl_database.md index c30be5213..24058d9b5 100644 --- a/docs/services/pixl_database.md +++ b/docs/services/pixl_database.md @@ -2,7 +2,7 @@ PIXL uses a [postgres database](../../postgres/README.md) to -- Add hashed identifiers along with the originals for DICOM images (in `pixl_dcmd`) +- Add pseudo identifiers along with the originals for DICOM images (in `pixl_dcmd`) - Keep track of the export status of imaging (in `core.uploader`) studies and the projects they are used in Note that the pipeline will not process any studies for a project that have already been exported. diff --git a/docs/setup/azure-keyvault.md b/docs/setup/azure-keyvault.md index 6ba6c180d..ed91884aa 100644 --- a/docs/setup/azure-keyvault.md +++ b/docs/setup/azure-keyvault.md @@ -1,6 +1,6 @@ # Azure Keyvault setup -_This is done for the \_UCLH_DIF\_ `dev` tenancy, will need to be done once in the _UCLHAZ_ `prod` +_This is done for the \_UCLH_DIF\_ `dev` tenancy, will need to be done once in the __UCLHSDE__ `prod` tenancy when ready to deploy to production._ This Key Vault and secret must persist any infrastructure changes so should be separate from disposable diff --git a/docs/setup/developer.md b/docs/setup/developer.md index 3c4a15681..07f13c9c0 100644 --- a/docs/setup/developer.md +++ b/docs/setup/developer.md @@ -1,21 +1,111 @@ # Developer setup +## Setting up `Python` Virtual Environment (VE) + +### Using conda +``` +conda create -n "pixlVE" python=3.11 pip -c conda-forge --yes +conda activate pixlVE +conda list -n pixlVE #to check installed packages +conda deactivate #to deactivate VE +conda remove -n pixlVE --all #to remove pixlVE environment +``` + +### Using python virtual environment `venv` +You require `python3-venv` to setup your `venv`. See further details [here](https://docs.python.org/3/library/venv.html). +``` +# Create path for venv +cd $HOME +mkdir pixlVE +cd pixlVE +# Create virtual environment +python3 -m venv pixlVE +source pixlVE/bin/activate +``` + +## Docker requirements +Most modules require `docker` and `docker-compose` to be installed to run tests. +* [Docker](https://docs.docker.com/get-docker/) with version `>=27.0.3` +* [Docker Compose](https://docs.docker.com/compose/install/#installation-scenarios) with version `>=v2.28.1-desktop.1` + +## Installation of `PIXL` modules + +You can install all PIXL Python modules by running the following commands from the `PIXL/` directory: + +```shell +python -m pip install -e "pixl_core/[dev]" +python -m pip install -e "pytest-pixl/[dev,test]" +python -m pip install -e "pixl_core/[test]" +python -m pip install -e "cli/[dev,test]" +python -m pip install -e "pixl_imaging/[dev,test]" +python -m pip install -e "pixl_dcmd/[dev,test]" +python -m pip install -e "pixl_export/[dev,test]" +python -m pip install -e "hasher/[dev,test]" +``` + See each service's README for instructions for individual developing and testing instructions. -Most modules require [`docker`](https://docs.docker.com/desktop/) and `docker-compose` to be installed to run tests. -For Python development we use [ruff](https://docs.astral.sh/ruff/) and [mypy](https://mypy.readthedocs.io/) -alongside [pytest](https://www.pytest.org/). -There is support (sometimes through plugins) for these tools in most IDEs & editors. +## Testing + +### Module-level testing + +Once you have installed each module, you can run the tests for a module using the `pytest` command, e.g. + +```shell +cd pixl_core/ +pytest +``` + +Alternatively, you can run most of the module-level tests from the root of the repo with: + +```shell +pytest #to test all tests `testpaths` pytest.ini +``` + +The `pytest.ini` file in the root of the repo contains the configuration for running most of the module-level tests at once. +However, `pixl_dcmd` and `hasher` have `conftests.py` files that clash, so only `pixl_dcmd` is included as a `testpath` in the +top-level `pytest.ini`. You will therefore need to run tests for `hasher` from the `hasher` directory. + + +#### Enabling default Docker socket for testing `pixl_core` + +We have tests in `pixl_core` for uploading DICOM to XNAT as an endpoint. These tests use +[`xnat4tests`](https://github.com/Australian-Imaging-Service/xnat4tests) to spin up a docker container running XNAT. + +`xnat4tests` requires you to allow the Docker daemon to listen for Docker Engine API requests via the default +socket. This is because `xnat4tests` set up the XNAT Container Service for launching other containers that run +analysis pipelines. + +If you are using Docker Desktop, you will need to enable Docker to listen on the default socket by going to +`Settings > Advanced` and checking the box `Allow the default Docker socket to be used`. + +If your are running Docker Engine on Linux, listening on this socket should be +[enabled by default](https://docs.docker.com/reference/cli/dockerd/#daemon-socket-option). + + +### Integration tests + +There are also integration tests in `PIXL/test/` directory that can be run using the `PIXL/test/run-system-test.sh`. See the +[integration test docs](test/README.md) for more info. + + +### Workflow Before raising a PR, make sure to **run all tests** for each PIXL module -and not just the component you have been working on as this will help us catch unintentional regressions without spending GH actions minutes :-) +and not just the component you have been working on as this will help us catch unintentional regressions without spending GH actions minutes. + ## Linting +For Python development we use [ruff](https://docs.astral.sh/ruff/) and [mypy](https://mypy.readthedocs.io/) +alongside [pytest](https://www.pytest.org/). +There is support (sometimes through plugins) for these tools in most IDEs & editors. + + We run [pre-commit](https://pre-commit.com/) as part of the GitHub Actions CI. To install and run it locally, do: ```shell -pip install pre-commit +python -m pip install pre-commit pre-commit install ``` diff --git a/hasher/README.md b/hasher/README.md index 9a96754cd..3119a298f 100644 --- a/hasher/README.md +++ b/hasher/README.md @@ -27,7 +27,7 @@ It is assumed you have a Python virtual environment configured using a tool like Install `hasher` locally with: ```shell -pip install -e . +python -m pip install -e . ``` ### Setup diff --git a/hasher/pyproject.toml b/hasher/pyproject.toml index 85773286d..51c5626e4 100644 --- a/hasher/pyproject.toml +++ b/hasher/pyproject.toml @@ -1,24 +1,28 @@ [project] name = "hasher" -version = "0.0.1" +version = "0.2.0rc0" authors = [{ name = "PIXL authors" }] description = "Service to securely hash identifiers" readme = "README.md" requires-python = ">=3.10" classifiers = ["Programming Language :: Python :: 3"] dependencies = [ - "core", - "azure-identity==1.12.0", + "core==0.2.0rc0", + "azure-identity==1.19.0", "azure-keyvault==4.2.0", - "fastapi==0.109.1", - "hypothesis==6.56.0", - "requests==2.31.0", - "uvicorn==0.23.2", + "fastapi==0.115.6", + "hypothesis==6.122.3", + "requests==2.32.3", + "uvicorn==0.32.1", ] [project.optional-dependencies] -test = ["pytest==7.4.*", "httpx==0.24.*"] -dev = ["mypy", "pre-commit", "ruff"] +test = [ + "core[test]==0.2.0rc0", +] +dev = [ + "core[dev]==0.2.0rc0", +] [build-system] requires = ["setuptools>=61.0"] @@ -30,3 +34,19 @@ extend = "../ruff.toml" [tool.ruff.lint.extend-per-file-ignores] "./tests/**" = ["D1"] "./src/hasher/endpoints.py" = ["D103"] + +[tool.coverage.report] +exclude_also = [ + "def __repr__", + "if self.debug:", + "if settings.DEBUG", + "except subprocess.CalledProcessError as exception:", + "raise AssertionError", + "raise NotImplementedError", + "if 0:", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", + "if typing.TYPE_CHECKING", + "class .*\\bProtocol\\):", + "@(abc\\.)?abstractmethod", +] diff --git a/hasher/tests/conftest.py b/hasher/tests/conftest.py index aa587ab32..e57f9d988 100644 --- a/hasher/tests/conftest.py +++ b/hasher/tests/conftest.py @@ -18,7 +18,7 @@ import pytest os.environ["LOG_LEVEL"] = "DEBUG" -os.environ["AZURE_KEY_VAULT_SECRET_NAME"] = "test-key" # noqa: S105, hardcoded secret +os.environ["AZURE_KEY_VAULT_SECRET_NAME"] = "test-key" os.environ["LOCAL_SALT_VALUE"] = "pixl_salt" diff --git a/orthanc/orthanc-anon/README.md b/orthanc/orthanc-anon/README.md index e336ee58b..bef1b3f58 100644 --- a/orthanc/orthanc-anon/README.md +++ b/orthanc/orthanc-anon/README.md @@ -37,6 +37,9 @@ secrets. Orthanc interprets all `.json` files in the `/run/secrets` mount as con - The anonymisation is carried out by an Orthanc Python plugin [pixl.py](./plugin/pixl.py). This plugin uses the [pixl_dcmd](../../pixl_dcmd/) package to apply the anonymisation scheme (and that in turn uses [Kitware Dicom Anonymizer](https://github.com/KitwareMedical/dicom-anonymizer)) +- Environmental variables: + - `PIXL_DICOM_TRANSFER_TIMEOUT` is used as the timeout for any REST API requests made from + orthanc-anon ### Step 1 diff --git a/orthanc/orthanc-anon/config/orthanc.json b/orthanc/orthanc-anon/config/orthanc.json index 8b85b47a3..e848006db 100644 --- a/orthanc/orthanc-anon/config/orthanc.json +++ b/orthanc/orthanc-anon/config/orthanc.json @@ -1,7 +1,4 @@ { - "Dictionary": { - "000d,1001": ["LO", "UCLHPIXLProjectName", 1, 1, "UCLH PIXL"] - }, "Name" : "${ORTHANC_NAME}", "RemoteAccessAllowed" : true, "RegisteredUsers": { @@ -13,14 +10,19 @@ // doubling them, or replaced by forward slashes "/". "StorageDirectory" : "/var/lib/orthanc/db", + // overwrite instances with the same UID if we get them for a second time + "OverwriteInstances" : true, + // Limit the maximum number of instances "MaximumPatientCount": 200, - + "MaximumStorageMode" : "Recycle", // Path to the directory that holds the SQLite index (if unset, the // value of StorageDirectory is used). This index could be stored on // a RAM-drive or a SSD device for performance reasons. "IndexDirectory" : "/var/lib/orthanc/db", + "ConcurrentJobs" : ${ORTHANC_CONCURRENT_JOBS}, // replaced in Dockerfile because its an integer + // To enable plugins: "Plugins" : [ "/usr/share/orthanc/plugins" ], "PythonScript" : "/etc/orthanc/pixl.py", diff --git a/cli/src/pixl_cli/_utils.py b/orthanc/orthanc-anon/plugin/download_dicom_spec.py similarity index 53% rename from cli/src/pixl_cli/_utils.py rename to orthanc/orthanc-anon/plugin/download_dicom_spec.py index d7b390f84..04e4f3f4d 100644 --- a/cli/src/pixl_cli/_utils.py +++ b/orthanc/orthanc-anon/plugin/download_dicom_spec.py @@ -1,4 +1,4 @@ -# Copyright (c) University College London Hospitals NHS Foundation Trust +# Copyright (c) 2022 University College London Hospitals NHS Foundation Trust # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,22 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import annotations +"""Download the DICOM spec with dicom-validator.""" from pathlib import Path +from dicom_validator.spec_reader.edition_reader import EditionReader -def clear_file(filepath: Path) -> None: - """Clear the contents of a file""" - filepath.open("w").close() - - -def string_is_non_empty(string: str) -> bool: - """Does a string have more than just spaces and newlines?""" - return len(string.split()) > 0 - - -def remove_file_if_it_exists(filepath: Path) -> None: - """If a file exists remove it""" - if filepath.exists(): - Path.unlink(filepath) +edition = "2024e" +download_path = str(Path.home() / "dicom-validator") +edition_reader = EditionReader(download_path) +destination = edition_reader.get_revision(edition, recreate_json=False) +json_path = Path(destination, "json") +EditionReader.load_dicom_info(json_path) diff --git a/orthanc/orthanc-anon/plugin/pixl.py b/orthanc/orthanc-anon/plugin/pixl.py index 13e66b200..a9d10c641 100644 --- a/orthanc/orthanc-anon/plugin/pixl.py +++ b/orthanc/orthanc-anon/plugin/pixl.py @@ -26,26 +26,39 @@ import sys import threading import traceback +from concurrent.futures import ThreadPoolExecutor from io import BytesIO from time import sleep from typing import TYPE_CHECKING, cast +from zipfile import ZipFile +import pydicom import requests -from core.exceptions import PixlDiscardError +from core.exceptions import PixlDiscardError, PixlSkipInstanceError +from core.project_config.pixl_config_model import load_project_config from decouple import config from loguru import logger from pydicom import dcmread import orthanc -from pixl_dcmd.main import anonymise_dicom, should_exclude_series, write_dataset_to_bytes +from pixl_dcmd.main import ( + anonymise_dicom_and_update_db, + write_dataset_to_bytes, +) if TYPE_CHECKING: from typing import Any + from core.project_config.pixl_config_model import PixlConfig + ORTHANC_USERNAME = config("ORTHANC_USERNAME") ORTHANC_PASSWORD = config("ORTHANC_PASSWORD") ORTHANC_URL = "http://localhost:8042" +ORTHANC_RAW_USERNAME = config("ORTHANC_RAW_USERNAME") +ORTHANC_RAW_PASSWORD = config("ORTHANC_RAW_PASSWORD") +ORTHANC_RAW_URL = "http://orthanc-raw:8042" + EXPORT_API_URL = "http://export-api:8000" # Set up logging as main entry point @@ -57,6 +70,12 @@ logger.warning("Running logging at level {}", logging_level) +# Set up a thread pool executor for non-blocking calls to Orthanc +max_workers = config("PIXL_MAX_MESSAGES_IN_FLIGHT", cast=int) +executor = ThreadPoolExecutor(max_workers=max_workers) + +logger.info("Using {} threads for processing", max_workers) + def AzureAccessToken() -> str: """ @@ -149,28 +168,7 @@ def AzureDICOMTokenRefresh() -> None: return None -def Send(study_id: str) -> None: - """ - Send the resource to the appropriate destination. - Throws an exception if the image has already been exported. - """ - msg = f"Sending {study_id}" - logger.debug(msg) - notify_export_api_of_readiness(study_id) - - -def notify_export_api_of_readiness(study_id: str): - """ - Tell export-api that our data is ready and it should download it from us and upload - as appropriate - """ - url = EXPORT_API_URL + "/export-dicom-from-orthanc" - payload = {"study_id": study_id} - response = requests.post(url, json=payload, timeout=30) - response.raise_for_status() - - -def should_auto_route() -> bool: +def should_export() -> bool: """ Checks whether ORTHANC_AUTOROUTE_ANON_TO_ENDPOINT environment variable is set to true or false @@ -186,19 +184,14 @@ def _azure_available() -> bool: def OnChange(changeType, level, resource): # noqa: ARG001 """ - - If a study is stable and if should_auto_route returns true - then notify the export API that it should perform the upload of DICOM data. - - If orthanc has started then start a timer to refresh the Azure token every 30 seconds - - If orthanc has stopped then cancel the timer + - If `should_export` returns `false`, the do nothing + - Otherwise: + - If orthanc has started then start a timer to refresh the Azure token every 30 seconds + - If orthanc has stopped then cancel the timer """ - if not should_auto_route(): + if not should_export(): return - if changeType == orthanc.ChangeType.STABLE_STUDY: - msg = f"Stable study: {resource}" - logger.info(msg) - Send(resource) - if changeType == orthanc.ChangeType.ORTHANC_STARTED and _azure_available(): orthanc.LogWarning("Starting the scheduler") AzureDICOMTokenRefresh() @@ -214,42 +207,195 @@ def OnHeartBeat(output, uri, **request) -> Any: # noqa: ARG001 output.AnswerBuffer("OK\n", "text/plain") -def ReceivedInstanceCallback(receivedDicom: bytes, origin: str) -> Any: - """Modifies a DICOM instance received by Orthanc and applies anonymisation.""" - if origin == orthanc.InstanceOrigin.REST_API: - orthanc.LogWarning("DICOM instance received from the REST API") - elif origin == orthanc.InstanceOrigin.DICOM_PROTOCOL: - orthanc.LogWarning("DICOM instance received from the DICOM protocol") +def ImportStudiesFromRaw(output, uri, **request): # noqa: ARG001 + """ + Import studies from Orthanc Raw. + + Offload to a thread pool executor to avoid blocking the Orthanc main thread. + """ + payload = json.loads(request["body"]) + study_resource_ids = payload["ResourceIDs"] + study_uids = payload["StudyInstanceUIDs"] + project_name = payload["ProjectName"] - # It's important that as much code in this handler as possible is inside this "try" block. - # This ensures we discard the image if anything goes wrong in the anonymisation process. - # If the handler raises an exception the pre-anon image will be kept. - try: - return _process_dicom_instance(receivedDicom) - except Exception: # noqa: BLE001 - orthanc.LogError("Failed to anonymize instance due to\n" + traceback.format_exc()) - return orthanc.ReceivedInstanceAction.DISCARD, None + for study_resource_id, study_uid in zip(study_resource_ids, study_uids, strict=False): + executor.submit(_import_study_from_raw, study_resource_id, study_uid, project_name) + response = json.dumps({"Message": "Ok"}) + output.AnswerBuffer(response, "application/json") -def _process_dicom_instance(receivedDicom: bytes) -> tuple[orthanc.ReceivedInstanceAction, None]: - # Read the bytes as DICOM/ - dataset = dcmread(BytesIO(receivedDicom)) - # Do before anonymisation in case someone decides to delete the - # Series Description tag as part of anonymisation. - if should_exclude_series(dataset): - orthanc.LogWarning("DICOM instance discarded due to its series description") - return orthanc.ReceivedInstanceAction.DISCARD, None +def _import_study_from_raw(study_resource_id: str, study_uid: str, project_name: str) -> None: + """ + Import a study from Orthanc Raw. - # Attempt to anonymise and drop the study if any exceptions occur - try: - anonymise_dicom(dataset) - return orthanc.ReceivedInstanceAction.MODIFY, write_dataset_to_bytes(dataset) - except PixlDiscardError as error: - logger.debug("Skipping instance: {}", error) - return orthanc.ReceivedInstanceAction.DISCARD, None + Args: + study_resource_id: Resource ID of the study in Orthanc Raw + study_uid: Corresponding StudyInstanceUID + project_name: Name of the project + + - Pull a study from Orthanc Raw based on its resource ID + - Iterate over instances and anonymise them + - Re-upload the study via the dicom-web api + - Notify the PIXL export-api to send the study the to relevant endpoint for the project + + """ + zipped_study_bytes = get_study_zip_archive_from_raw(resource_id=study_resource_id) + + with ZipFile(zipped_study_bytes) as zipped_study: + try: + anonymised_instances_bytes, anonymised_study_uid = _anonymise_study_instances( + zipped_study=zipped_study, + study_uid=study_uid, + project_name=project_name, + ) + except PixlDiscardError as discard: + logger.warning("Failed to anonymize study {}: {}", study_uid, discard) + except Exception: # noqa: BLE001 + logger.exception("Failed to anonymize study: {} ", study_uid) + return + + _upload_instances(anonymised_instances_bytes) + + if not should_export(): + logger.debug("Not exporting study {} as auto-routing is disabled", anonymised_study_uid) + return + + anonymised_study_resource_id = _get_study_resource_id(anonymised_study_uid) + logger.debug( + "Notify export API to retrieve study resource. Original UID {} Anon UID: {}", + study_uid, + anonymised_study_uid, + ) + send_study(study_id=anonymised_study_resource_id, project_name=project_name) + + +def get_study_zip_archive_from_raw(resource_id: str) -> BytesIO: + """Download zip archive of study resource from Orthanc Raw.""" + query = f"{ORTHANC_RAW_URL}/studies/{resource_id}/archive" + response = requests.get( + query, + auth=(config("ORTHANC_RAW_USERNAME"), config("ORTHANC_RAW_PASSWORD")), + timeout=config("PIXL_DICOM_TRANSFER_TIMEOUT", default=180, cast=int), + ) + response.raise_for_status() + logger.debug("Downloaded data for resource {} from Orthanc Raw", resource_id) + return BytesIO(response.content) + + +def _get_study_resource_id(study_uid: str) -> str: + """ + Get the resource ID for an existing study based on its StudyInstanceUID. + + Returns None if there are no resources with the given StudyInstanceUID. + Returns the resource ID if there is a single resource with the given StudyInstanceUID. + Returns None if there are multiple resources with the given StudyInstanceUID and deletes + the studies. + """ + data = json.dumps( + { + "Level": "Study", + "Query": { + "StudyInstanceUID": study_uid, + }, + } + ) + study_resource_ids = json.loads(orthanc.RestApiPost("/tools/find", data)) + if not study_resource_ids: + message = f"No study found with StudyInstanceUID {study_uid}" + raise ValueError(message) + if len(study_resource_ids) > 1: + message = f"Multiple studies found with StudyInstanceUID {study_uid}" + raise ValueError(message) + + return study_resource_ids[0] + + +def _anonymise_study_instances( + zipped_study: ZipFile, study_uid: str, project_name: str +) -> tuple[list[bytes], str]: + """ + Iterate over all instances and anonymise them. + + Skip an instance if a PixlSkipInstanceError is raised during anonymisation. + + Return a list of the bytes of anonymised instances, and the anonymised StudyInstanceUID. + """ + config = load_project_config(project_name) + anonymised_instances_bytes = [] + logger.debug("Zipped study infolist: {}", zipped_study.infolist()) + for file_info in zipped_study.infolist(): + with zipped_study.open(file_info) as file: + logger.debug("Reading file {}", file) + dataset = dcmread(file) + + logger.info("Anonymising file: {} for study: {}", file, study_uid) + try: + anonymised_instances_bytes.append(_anonymise_dicom_instance(dataset, config)) + except PixlSkipInstanceError as e: + logger.warning( + "Skipping instance {} for study {}: {}", + dataset[0x0008, 0x0018].value, + study_uid, + e, + ) + else: + anonymised_study_uid = dataset[0x0020, 0x000D].value + + if not anonymised_instances_bytes: + message = f"All instances have been skipped for study {study_uid}" + raise PixlDiscardError(message) + + logger.success("Finished anonymising file: {} for study: {}", file, study_uid) + return anonymised_instances_bytes, anonymised_study_uid + + +def _anonymise_dicom_instance(dataset: pydicom.Dataset, config: PixlConfig) -> bytes: + """Anonymise a DICOM instance.""" + anonymise_dicom_and_update_db(dataset, config=config) + return write_dataset_to_bytes(dataset) + + +def _upload_instances(instances_bytes: list[bytes]) -> None: + """Upload instances to Orthanc""" + files = [] + for index, dicom_bytes in enumerate(instances_bytes): + files.append(("file", (f"instance{index}.dcm", dicom_bytes, "application/dicom"))) + + # Using requests as doing: + # `upload_response = orthanc.RestApiPost(f"/instances", anonymised_files)` + # gives an error BadArgumentType error (orthanc.RestApiPost seems to only accept json) + upload_response = requests.post( + url=f"{ORTHANC_URL}/instances", + auth=(ORTHANC_USERNAME, ORTHANC_PASSWORD), + files=files, + timeout=config("PIXL_DICOM_TRANSFER_TIMEOUT", default=180, cast=int), + ) + upload_response.raise_for_status() + + +def send_study(study_id: str, project_name: str) -> None: + """ + Send the resource to the appropriate destination. + Throws an exception if the image has already been exported. + """ + msg = f"Sending {study_id}" + logger.debug(msg) + notify_export_api_of_readiness(study_id, project_name) + + +def notify_export_api_of_readiness(study_id: str, project_name: str) -> None: + """ + Tell export-api that our data is ready and it should download it from us and upload + as appropriate + """ + url = EXPORT_API_URL + "/export-dicom-from-orthanc" + payload = {"study_id": study_id, "project_name": project_name} + timeout: float = config("HTTP_TIMEOUT", default=30, cast=float) + response = requests.post(url, json=payload, timeout=timeout) + response.raise_for_status() orthanc.RegisterOnChangeCallback(OnChange) -orthanc.RegisterReceivedInstanceCallback(ReceivedInstanceCallback) orthanc.RegisterRestCallback("/heart-beat", OnHeartBeat) +orthanc.RegisterRestCallback("/import-from-raw", ImportStudiesFromRaw) diff --git a/orthanc/orthanc-raw/README.md b/orthanc/orthanc-raw/README.md index 0b603f18f..eec29a5a0 100644 --- a/orthanc/orthanc-raw/README.md +++ b/orthanc/orthanc-raw/README.md @@ -16,8 +16,8 @@ The following assumptions are made: - There is a PostgreSQL database available (currently [defined in `pixl_core`](../../pixl_core/README.md)) to store the Index data within Orthanc (or it will become available shortly when the service is started). -- The IP, port and AE Title for the VNA Q/R target have been provided, and the reciprocal details -for this instance have been shared with the PACS team. +- The IPs, ports and AE Titles for the primary (VNA) and secondary (PACS) Q/R targets have been provided, + and the reciprocal details for this instance have been shared with the PACS team. - There is sufficient local storage for the `orthanc-raw-data` volume. ### Configuration @@ -26,8 +26,8 @@ for this instance have been shared with the PACS team. - `ORTHANC_RAW_MAXIMUM_STORAGE_SIZE` to limit the storage size - `ORTHANC_RAW_JOB_HISTORY_SIZE` has been increased so that while there is concurrent processing, the job should always exist for being able to query its status - - `ORTHANC_RAW_CONCURRENT_JOBS` has been increased to allow for more concurrent transfers from - the VNA to orthanc raw. + - `ORTHANC_CONCURRENT_JOBS` has been increased to allow for more concurrent transfers from + the VNA to orthanc raw. - All configuration is driven through customised JSON config files stored in the [config](./config/) directory. - The files are populated with values from environment variables and injected into the container as @@ -41,7 +41,7 @@ secrets. Orthanc interprets all `.json` files in the `/run/secrets` mount as con ### Step 1 -Save credentials `.env` for the PACS/VNA Q/R target, postgreSQL and 'Orthanc anon'. +Save credentials `.env` for the VNA (primary) and PACS (secondary) Q/R targets, postgreSQL and 'Orthanc anon'. ``` # PIXL PostgreSQL instance PIXL_DB_HOST= @@ -62,10 +62,15 @@ ORTHANC_RAW_AE_TITLE= # PIXL Orthanc anon instance ORTHANC_ANON_AE_TITLE= -# UCVNAQR DICOM node information -VNAQR_AE_TITLE= -VNAQR_DICOM_PORT= -VNAQR_IP_ADDR= +# UCPRIMARYQR DICOM node information +PRIMARY_DICOM_SOURCE_AE_TITLE= +PRIMARY_DICOM_SOURCE_PORT= +PRIMARY_DICOM_SOURCE_IP_ADDR= + +# UCSECONDARYQR DICOM node information +SECONDARY_DICOM_SOURCE_AE_TITLE= +SECONDARY_DICOM_SOURCE_PORT= +SECONDARY_DICOM_SOURCE_IP_ADDR= ``` ### Step 2 diff --git a/orthanc/orthanc-raw/config/dicom.json b/orthanc/orthanc-raw/config/dicom.json index beb41a9c7..f1288a693 100644 --- a/orthanc/orthanc-raw/config/dicom.json +++ b/orthanc/orthanc-raw/config/dicom.json @@ -2,10 +2,10 @@ "DicomAet" : "${ORTHANC_RAW_AE_TITLE}", "DicomModalities" : { - "UCVNAQR" : { - "AET" : "${VNAQR_AE_TITLE}", - "Port" : "${VNAQR_DICOM_PORT}", - "Host" : "${VNAQR_IP_ADDR}", + "UCPRIMARYQR" : { + "AET" : "${PRIMARY_DICOM_SOURCE_AE_TITLE}", + "Port" : "${PRIMARY_DICOM_SOURCE_PORT}", + "Host" : "${PRIMARY_DICOM_SOURCE_IP_ADDR}", "Manufacturer" : "Generic", "AllowEcho" : true, "AllowFind" : false, @@ -16,7 +16,23 @@ "AllowStorageCommitment" : false, "AllowTranscoding" : true, "UseDicomTls" : false, - "Timeout" : 60 + "Timeout" : 120 + }, + "UCSECONDARYQR" : { + "AET" : "${SECONDARY_DICOM_SOURCE_AE_TITLE}", + "Port" : "${SECONDARY_DICOM_SOURCE_PORT}", + "Host" : "${SECONDARY_DICOM_SOURCE_IP_ADDR}", + "Manufacturer" : "Generic", + "AllowEcho" : true, + "AllowFind" : false, + "AllowFindWorklist" : false, + "AllowGet" : false, + "AllowMove" : false, + "AllowStore" : true, + "AllowStorageCommitment" : false, + "AllowTranscoding" : true, + "UseDicomTls" : false, + "Timeout" : 120 }, "PIXL-Anon" : { "AET" : "${ORTHANC_ANON_AE_TITLE}", @@ -32,7 +48,7 @@ "AllowStorageCommitment" : false, "AllowTranscoding" : true, "UseDicomTls" : false, - "Timeout" : 60 + "Timeout" : 120 } }, @@ -86,6 +102,11 @@ // modality initiating a DICOM connection (as listed in the // "DicomModalities" option above). If this option is set to // "false", Orthanc only checks the AET of the remote modality. - "DicomCheckModalityHost" : false + "DicomCheckModalityHost" : false, + + // Overwrite instances with the same UID + // This allows us to set the project name tag in-place without modifying the + //StudyInstanceUID, SeriesInstanceUID, and SOPInstanceUID + "OverwriteInstance": true } diff --git a/orthanc/orthanc-raw/config/orthanc.json b/orthanc/orthanc-raw/config/orthanc.json index 498f86de1..b53374e30 100644 --- a/orthanc/orthanc-raw/config/orthanc.json +++ b/orthanc/orthanc-raw/config/orthanc.json @@ -1,6 +1,5 @@ { "Dictionary": { - "000d,1001": ["LO", "UCLHPIXLProjectName", 1, 1, "UCLH PIXL"] }, "DefaultPrivateCreator" : "UCLH PIXL", "Name" : "${ORTHANC_NAME}", @@ -20,15 +19,46 @@ "MaximumStorageMode" : "Recycle", // Enable concurrency "JobsHistorySize": ${ORTHANC_RAW_JOB_HISTORY_SIZE}, // replaced in Dockerfile because its an integer - "ConcurrentJobs" : ${ORTHANC_RAW_CONCURRENT_JOBS}, // replaced in Dockerfile because its an integer + "ConcurrentJobs" : ${ORTHANC_CONCURRENT_JOBS}, // replaced in Dockerfile because its an integer // overwrite instances with the same UID if we get them for a second time "OverwriteInstances" : true, // Path to the directory that holds the SQLite index (if unset, the // value of StorageDirectory is used). This index could be stored on // a RAM-drive or a SSD device for performance reasons. //"IndexDirectory" : "/var/lib/orthanc/db" + "StableAge" : ${ORTHANC_RAW_STABLE_SECONDS}, + // Defines the number of threads that are used to execute each type of + // jobs (for the jobs that can be parallelized). + // A value of "0" indicates to use all the available CPU logical cores + "JobsEngineThreadsCount" : { + "ResourceModification": 1 // for /anonymize, /modify + }, + + // Whether to save the jobs into the Orthanc database. If this + // option is set to "true", the pending/running/completed jobs are + // automatically reloaded from the database if Orthanc is stopped + // then restarted (except if the "--no-jobs" command-line argument + // is specified). This option should be set to "false" if multiple + // Orthanc servers are using the same database (e.g. if PostgreSQL + // or MariaDB/MySQL is used). + "SaveJobs" : false, + + "DicomScpTimeout" : 120, + + // Main Dicom tags that are already stored. + // see https://orthanc.uclouvain.be/book/faq/main-dicom-tags.html + // (new in Orthanc 1.11.0) + // Sequences tags are not supported. +"ExtraMainDicomTags" : { + "Instance" : [ + "SOPClassUID" + ], + "Series" : [], + "Study": [], + "Patient": [] + }, - // To enable plugins: +// To enable plugins: "Plugins" : [ "/usr/share/orthanc/plugins" ], "PythonScript" : "/etc/orthanc/pixl.py", "PythonVerbose" : false, diff --git a/orthanc/orthanc-raw/plugin/pixl.py b/orthanc/orthanc-raw/plugin/pixl.py index c3b3e6936..22f8265ff 100644 --- a/orthanc/orthanc-raw/plugin/pixl.py +++ b/orthanc/orthanc-raw/plugin/pixl.py @@ -15,27 +15,19 @@ Facilitates routing of stable studies from orthanc-raw to orthanc-anon This module provides: --OnChange: route stable studies and if auto-routing enabled --should_auto_route: checks whether auto-routing is enabled -OnHeartBeat: extends the REST API """ from __future__ import annotations -import json import os import sys -import traceback -from io import BytesIO -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING -from core.dicom_tags import DICOM_TAG_PROJECT_NAME, add_private_tag from decouple import config from loguru import logger -from pydicom import dcmread import orthanc -from pixl_dcmd.main import write_dataset_to_bytes from pixl_dcmd.tagrecording import record_dicom_headers if TYPE_CHECKING: @@ -51,39 +43,17 @@ logger.warning("Running logging at level {}", logging_level) -def OnChange(changeType, level, resourceId): # noqa: ARG001 - """ - # Taken from: - # https://book.orthanc-server.com/plugins/python.html#auto-routing-studies - This routes any stable study to a modality named PIXL-Anon if - should_auto_route returns true - """ - if changeType == orthanc.ChangeType.STABLE_STUDY and should_auto_route(): - logger.debug("Sending study: {}", resourceId) - # Although this can throw, since we have nowhere to report errors - # back to (eg. an HTTP client), don't try to handle anything here. - # The client will have to detect that it hasn't happened and retry. - orthanc_anon_store_study(resourceId) - - -def orthanc_anon_store_study(resource_id): - """Call the API to send the specified resource (study) to the orthanc anon server.""" - # RestApiPost raises an orthanc.OrthancException if it fails - orthanc.RestApiPost("/modalities/PIXL-Anon/store", resource_id) - orthanc.LogInfo(f"Successfully sent study to anon modality: {resource_id}") - - def OnHeartBeat(output, uri, **request): # noqa: ARG001 """Extends the REST API by registering a new route in the REST API""" orthanc.LogWarning("OK") output.AnswerBuffer("OK\n", "text/plain") -def ReceivedInstanceCallback(receivedDicom: bytes, origin: str) -> Any: +def ReceivedInstanceCallback(receivedDicom: bytes, origin: str) -> Any: # noqa: ARG001 """Optionally record headers from the received DICOM instance.""" if should_record_headers(): record_dicom_headers(receivedDicom) - return modify_dicom_tags(receivedDicom, origin) + return orthanc.ReceivedInstanceAction.KEEP_AS_IS, None def should_record_headers() -> bool: @@ -94,99 +64,5 @@ def should_record_headers() -> bool: return os.environ.get("ORTHANC_RAW_RECORD_HEADERS", "false").lower() == "true" -def should_auto_route(): - """ - Checks whether ORTHANC_AUTOROUTE_RAW_TO_ANON environment variable is - set to true or false - """ - return os.environ.get("ORTHANC_AUTOROUTE_RAW_TO_ANON", "false").lower() == "true" - - -def modify_dicom_tags(receivedDicom: bytes, origin: str) -> Any: - """ - A new incoming DICOM file needs to have the project name private tag added here, so - that the API will later allow us to edit it. - However, we don't know its correct value at this point, so just create it with an obvious - placeholder value. - """ - if origin != orthanc.InstanceOrigin.DICOM_PROTOCOL: - # don't keep resetting the tag values if this was triggered by an API call! - logger.trace("doing nothing as change triggered by API") - return orthanc.ReceivedInstanceAction.KEEP_AS_IS, None - dataset = dcmread(BytesIO(receivedDicom)) - # See the orthanc.json config file for where this tag is given a nickname - # The private block is the first free block >= 0x10. - # We can't directly control it, but the orthanc config requires it to be - # hardcoded to 0x10 - # https://dicom.nema.org/dicom/2013/output/chtml/part05/sect_7.8.html - - # Add project name as private tag, at this point, the value is unknown - private_block = add_private_tag(dataset, DICOM_TAG_PROJECT_NAME) - - logger.debug("added new private block starting at 0x{:04x}", private_block.block_start) - return orthanc.ReceivedInstanceAction.MODIFY, write_dataset_to_bytes(dataset) - - -def log_and_return_http( - output, http_code: int, http_message: str, log_message: Optional[str] = None -): - """ - Log and make an HTTP response in case of success or failure. For failure, log - a stack/exception trace as well. - - :param output: the orthanc output object as given to the callback function - :param http_code: HTTP code to return - :param http_message: message to return in HTTP body - :param log_message: message to log, if different to http_message. - If None, do not log at all if success - """ - http_json_str = json.dumps({"Message": http_message}) - if http_code == 200: # noqa: PLR2004 - if log_message: - orthanc.LogInfo(log_message) - output.AnswerBuffer(http_json_str, "application/json") - else: - orthanc.LogWarning(f"{log_message or http_message}:\n{traceback.format_exc()}") - # length needed in bytes not chars - output.SendHttpStatus(http_code, http_json_str, len(http_json_str.encode())) - - -def SendResourceToAnon(output, uri, **request): # noqa: ARG001 - """Send an existing study to the anon modality""" - orthanc.LogWarning(f"Received request to send study to anon modality: {request}") - if not should_auto_route(): - log_and_return_http( - output, - 200, - "Auto-routing is not enabled", - f"Auto-routing is not enabled, dropping request {request}", - ) - return - try: - body = json.loads(request["body"]) - resource_id = body["ResourceId"] - except (json.decoder.JSONDecodeError, KeyError): - err_str = "Body needs to be JSON with key ResourceId" - log_and_return_http(output, 400, err_str) - except: - err_str = "Other error decoding request" - log_and_return_http(output, 500, err_str) - raise - - try: - orthanc_anon_store_study(resource_id) - except orthanc.OrthancException: - err_str = "Failed contacting downstream server" - log_and_return_http(output, 502, err_str) - except: - err_str = "Misc error sending study to anon" - log_and_return_http(output, 500, err_str) - raise - else: - log_and_return_http(output, 200, "OK") - - -orthanc.RegisterOnChangeCallback(OnChange) orthanc.RegisterReceivedInstanceCallback(ReceivedInstanceCallback) orthanc.RegisterRestCallback("/heart-beat", OnHeartBeat) -orthanc.RegisterRestCallback("/send-to-anon", SendResourceToAnon) diff --git a/pixl_core/README.md b/pixl_core/README.md index 25e803d78..8116ba49e 100644 --- a/pixl_core/README.md +++ b/pixl_core/README.md @@ -10,21 +10,25 @@ Specifically, it defines: - The [RabbitMQ queue](#patient-queue) implementation shared by the Imaging API and any other APIs - The PIXL `postgres` internal database for storing exported images and extracts from the messages processed by the CLI driver -- The [`ParquetExport`](./src/core/exports.py) class for exporting OMOP and EMAP extracts to +- The [`ParquetExport`](./src/core/exports.py) class for exporting OMOP extracts to parquet files -- Handling of [uploads over FTPS](./src/core/upload.py), used to transfer images and parquet files +- Pydantic models for [project configuration](./src/core/project_config/pixl_config_model.py) +- [Secrets management](./src/core/project_config/secrets.py) via an Azure Key Vault. +- Handling of [uploads over FTPS](./src/core/uploader/_ftps.py), used to transfer images and parquet files to the DSH (Data Safe Haven) +- [Uploading DICOM files to a DICOMWeb server](./src/core/uploader/_dicomweb.py) +- [Uploading DICOM files to XNAT](./src/core/uploader/_xnat.py) ## Installation ```bash -pip install -e . +python -m pip install -e . ``` ## Testing ```bash -pip install -e .[test] && pip install -e ../pytest-pixl +python -m pip install -e ../pytest-pixl -e ".[test]" pytest ``` @@ -59,9 +63,9 @@ is geared towards stability. The asynchronous mode of transferring messages is a it is based on the [asyncio event loop](https://docs.python.org/3/library/asyncio-eventloop.html). We set the maximum number of message which can be being processed at once using the `PIXL_MAX_MESSAGES_IN_FLIGHT` -variable in the `.env` file. Chest X-rays take about 5 seconds to return so the default of 100 allows for -a maximum of 20 messages per second. The VNA should be able to cope with 12-15 per second, so this allows -our rate limiting to fit within this range. +variable in the `.env` file. The VNA allows for 5 DICOM transfers at a single point in time, so the default is 5. +We recommend allowing more concurrent jobs using `ORTHANC_CONCURRENT_JOBS`, to allow for resource modification +and export of stable DICOM to orthanc-anon while still pulling from the VNA. ### OMOP ES files @@ -104,11 +108,11 @@ The `project_config` module provides the functionality to handle ## Uploading to an FTPS server The `core.uploader` module implements functionality to upload DICOM images and parquet files to -several destinations. This requires the following environment variables to be set: +several destinations. The `Uploader` abstract class provides a consistent interface for uploading files. Child classes such as the `FTPSUploader` implement the actual upload functionality. The credentials required for -uploading are queried from an **Azure Keyvault** instance (implemented in `_secrets.py`), for which +uploading are queried from an **Azure Keyvault** instance (implemented in `core.project_config.secrets`), for which the setup instructions are in the [top-level README](../README.md#project-secrets) When an extract is ready to be published to the DSH, the PIXL pipeline will upload the **Public** @@ -161,3 +165,72 @@ so we can have different (or no) endpoints for different projects. For [testing](../test/README.md) we set up an additional Orthanc server that acts as a DICOMweb server, using the vanilla Orthanc Docker image with the DICOMWeb plugin enabled. +## Uploading to an XNAT instance + +PIXL also supports sending DICOM images to an [XNAT](https://www.xnat.org/) instance. + +The `XNATUploader` class in `core.uploader._xnat` handles downloading anonymised images from Orthanc and +sending to XNAT. [XNATPy](https://xnat.readthedocs.io/en/latest/) is used to upload the +data to XNAT using the +[`DICOM-zip` Import Handler](https://wiki.xnat.org/xnat-api/image-session-import-service-api#ImageSessionImportServiceAPI-SelectAnImportHandler). + +To use XNAT as an endpoint, first: + +- a user will need to be created on the XNAT instance to perform the upload with PIXL +- a project will need to be created on the XNAT instance. It is assumed the user created for uploading + data does not have admin permissions to create new projects + +### XNAT endpoint Configuration + +Configuration for XNAT as an endpoint is done by storing the following secrets in an Azure Key Vault: + +```bash +"${az_prefix}--xnat--host" # hostname for the XNAT instance +"${az_prefix}--xnat--username" # username of user to perform upload +"${az_prefix}--xnat--password" # password of user to perform upload +"${az_prefix}--xnat--port" # port for connecting to the XNAT instance +``` + +where `az_prefix` is either the project slug or is defined in the [project configuration file](../template_config.yaml) +as `azure_kv_alias`. + +> Note +> +> The project name defined in the configuration file **must** match the +> [XNAT Project ID](https://wiki.xnat.org/documentation/creating-and-managing-projects). If the project name does +> not match the XNAT Project ID, the upload will fail. + +The following environment variables must also be set to determine the XNAT destination and how to handle conflicts +with existing session and series data: + +`"XNAT_DESTINATION"`: + +- if `"/archive"`, will send data straight to the archive +- if `"/prearchive"`, will send data to the [prearchive](https://wiki.xnat.org/documentation/using-the-prearchive) + for manual review before archiving + +`"XNAT_OVERWRITE"`: + +- if `"none"`, will error if the session already exists. Upon error, data will be sent to the prearchive, + even if `XNAT_DESTINATION` is `/archive` +- if `"append"`, will append the data to an existing session or create a new one if it doesn't exist. + If there is a conflict with existing series, an error will be raised. +- if `"delete"`, will append the data to an existing session or create a new one if it doesn't exist. + If there is a conflict with existing series, the existing series will be overwritten. + +### XNAT testing setup + +For unit testing, we use [`xnat4tests`](https://github.com/Australian-Imaging-Service/xnat4tests) to spin up an XNAT +instance in a Docker container. + +Secrets are not used for these unit testing. Instead, the following environment variables are used to configure XNAT for testing: + +- `"XNAT_HOST"` +- `"XNAT_USER_NAME"` +- `"XNAT_PASSWORD"` +- `"XNAT_PORT"` + +Note, it can take several minutes for the server to start up. + +Once the server has started, you can log in by visiting `http://localhost:8080` with the username and password set +in the `XNAT_USER_NAME` and `XNAT_PASSWORD` environment variables. diff --git a/pixl_core/pyproject.toml b/pixl_core/pyproject.toml index 05f958918..e57aa6881 100644 --- a/pixl_core/pyproject.toml +++ b/pixl_core/pyproject.toml @@ -1,40 +1,47 @@ [project] name = "core" -version = "0.0.1" +version = "0.2.0rc0" authors = [{ name = "PIXL core functionality" }] description = "" readme = "README.md" requires-python = ">=3.9" classifiers = ["Programming Language :: Python :: 3"] dependencies = [ - "azure-identity==1.12.0", + "aio_pika==9.5.3", + "azure-identity==1.19.0", "azure-keyvault==4.2.0", - "fastapi==0.109.1", - "token-bucket==0.3.0", - "python-decouple==3.6", + "fastapi==0.115.6", + "jsonpickle==4.0.0", + "loguru==0.7.3", + "pandas==2.2.3", + "pika==1.3.2", + "psycopg2-binary==2.9.10", + "pyarrow==18.1.0", + "pydantic==2.10.3", + "python-decouple==3.8", "python-slugify==8.0.4", - "pika==1.3.1", - "aio_pika==8.2.4", - "requests==2.31.0", - "jsonpickle==3.0.2", - "loguru==0.7.2", - "sqlalchemy==2.0.24", - "psycopg2-binary==2.9.9", - "pandas==2.2.1", - "pyarrow==14.0.1", - "PyYAML==6.0.1", - "pydantic==2.6.3", + "PyYAML==6.0.2", + "requests==2.32.3", + "sqlalchemy==2.0.36", + "token-bucket==0.3.0", + "xnat==0.6.2", ] [project.optional-dependencies] test = [ - "pytest==7.4.2", - "pytest-cov==5.0.0", - "pytest-asyncio==0.21.1", - "httpx==0.24.*", - "pytest-pixl", + "httpx==0.28.*", + "pytest==8.3.4", + "pytest-asyncio==0.24.0", + "pytest-check==2.4.1", + "pytest-cov==6.0.0", + "pytest-pixl==0.2.0rc0", + "xnat4tests==0.3.12", +] +dev = [ + "mypy", + "pre-commit", + "ruff", ] -dev = ["mypy", "pre-commit", "ruff"] [build-system] requires = ["setuptools>=61.0"] @@ -61,6 +68,7 @@ exclude_also = [ "if 0:", "if __name__ == .__main__.:", "if TYPE_CHECKING:", + "if typing.TYPE_CHECKING", "class .*\\bProtocol\\):", "@(abc\\.)?abstractmethod", -] \ No newline at end of file +] diff --git a/pixl_core/src/core/db/models.py b/pixl_core/src/core/db/models.py index ed330b833..e45fc4e53 100644 --- a/pixl_core/src/core/db/models.py +++ b/pixl_core/src/core/db/models.py @@ -52,15 +52,17 @@ class Image(Base): accession_number: Mapped[str] study_date: Mapped[Date] = mapped_column(Date()) mrn: Mapped[str] - hashed_identifier: Mapped[Optional[str]] + study_uid: Mapped[Optional[str]] + pseudo_study_uid: Mapped[Optional[str]] exported_at: Mapped[DateTime] = mapped_column(DateTime(timezone=True), nullable=True) extract: Mapped[Extract] = relationship() extract_id: Mapped[int] = mapped_column(ForeignKey("extract.extract_id")) + pseudo_patient_id: Mapped[Optional[str]] def __repr__(self) -> str: """Nice representation for printing.""" return ( f"<{self.__class__.__name__} " - f"{self.image_id=} {self.accession_number=} {self.mrn=} " - f"{self.hashed_identifier} {self.extract_id}>" + f"{self.image_id=} {self.accession_number=} {self.mrn=} {self.study_uid=}" + f"{self.pseudo_study_uid} {self.extract_id}>" ).replace(" self.", " ") diff --git a/pixl_core/src/core/db/queries.py b/pixl_core/src/core/db/queries.py index 617ea4e4c..1801cb2a5 100644 --- a/pixl_core/src/core/db/queries.py +++ b/pixl_core/src/core/db/queries.py @@ -34,28 +34,28 @@ engine = create_engine(url) -def have_already_exported_image(image_hashed_id: str) -> bool: +def have_already_exported_image(pseudo_study_uid: str) -> bool: """Check if the given image has already been exported.""" PixlSession = sessionmaker(engine) with PixlSession() as pixl_session, pixl_session.begin(): - existing_image = _query_existing_image(pixl_session, image_hashed_id) + existing_image = _query_existing_image(pixl_session, pseudo_study_uid) return existing_image.exported_at is not None -def update_exported_at(hashed_value: str, date_time: datetime) -> None: +def update_exported_at(pseudo_study_uid: str, date_time: datetime) -> None: """Update the `exported_at` field for an image in the PIXL database""" PixlSession = sessionmaker(engine) with PixlSession() as pixl_session, pixl_session.begin(): - existing_image = _query_existing_image(pixl_session, hashed_value) + existing_image = _query_existing_image(pixl_session, pseudo_study_uid) existing_image.exported_at = date_time pixl_session.add(existing_image) -def _query_existing_image(pixl_session: Session, hashed_value: str) -> Image: +def _query_existing_image(pixl_session: Session, pseudo_study_uid: str) -> Image: existing_image: Image = ( pixl_session.query(Image) .filter( - Image.hashed_identifier == hashed_value, + Image.pseudo_study_uid == pseudo_study_uid, ) .one() ) diff --git a/pixl_core/src/core/dicom_tags.py b/pixl_core/src/core/dicom_tags.py index 6f370481a..8f6d6ca81 100644 --- a/pixl_core/src/core/dicom_tags.py +++ b/pixl_core/src/core/dicom_tags.py @@ -53,7 +53,7 @@ class PrivateDicomTag: # LO = Long string max 64 # https://dicom.nema.org/medical/dicom/current/output/chtml/part05/sect_6.2.html vr: str - unknown_value: Optional[str] = "__pixl_unknown_value__" + unknown_value: Optional[str | bytes] = "__pixl_unknown_value__" def acceptable_private_block(self, actual_private_block: int) -> bool: """ @@ -68,19 +68,8 @@ def acceptable_private_block(self, actual_private_block: int) -> bool: return self.required_private_block == actual_private_block -DICOM_TAG_PROJECT_NAME = PrivateDicomTag( - group_id=0x000D, - required_private_block=0x10, - offset_id=0x01, - creator_string="UCLH PIXL", - tag_nickname="UCLHPIXLProjectName", - vr="LO", # LO = Long string max 64 - unknown_value="__pixl_unknown_value__", -) - - def add_private_tag( - dataset: Dataset, private_tag: PrivateDicomTag, value: Optional[str] = None + dataset: Dataset, private_tag: PrivateDicomTag, value: Optional[str | bytes] = None ) -> PrivateBlock: """ Add a private tag to an existing DICOM dataset. diff --git a/pixl_core/src/core/exceptions.py b/pixl_core/src/core/exceptions.py index 2f4c38024..63b06b7e8 100644 --- a/pixl_core/src/core/exceptions.py +++ b/pixl_core/src/core/exceptions.py @@ -24,5 +24,17 @@ class PixlDiscardError(RuntimeError): """ +class PixlSkipInstanceError(RuntimeError): + """Study instance should be ignored.""" + + class PixlRequeueMessageError(RuntimeError): """Requeue PIXL message.""" + + +class PixlOutOfHoursError(Exception): + """Nack and requeue PIXL message.""" + + +class PixlStudyNotInPrimaryArchiveError(Exception): + """Study not in primary archive.""" diff --git a/pixl_core/src/core/patient_queue/_base.py b/pixl_core/src/core/patient_queue/_base.py index 9cc40b0dd..aa9c19922 100644 --- a/pixl_core/src/core/patient_queue/_base.py +++ b/pixl_core/src/core/patient_queue/_base.py @@ -21,7 +21,7 @@ class PixlQueueInterface: - def __init__( # noqa: PLR0913 + def __init__( self, queue_name: str, host: str = "localhost", @@ -65,7 +65,11 @@ def __enter__(self) -> Any: if self._channel is None or self._channel.is_closed: self._channel = self._connection.channel() - self._queue = self._channel.queue_declare(queue=self.queue_name, durable=True) + self._queue = self._channel.queue_declare( + queue=self.queue_name, + durable=True, + arguments={"x-max-priority": 5}, + ) logger.debug("Connected to {}", self.queue_name) return self diff --git a/pixl_core/src/core/patient_queue/message.py b/pixl_core/src/core/patient_queue/message.py index a6cdfc3be..f6d9d3f35 100644 --- a/pixl_core/src/core/patient_queue/message.py +++ b/pixl_core/src/core/patient_queue/message.py @@ -28,10 +28,11 @@ @dataclass class Message: - """Class to represent a message containing the relevant information for a study.""" + """Representation of a RabbitMQ message containing the information to identify a DICOM study.""" mrn: str accession_number: str + study_uid: str study_date: date procedure_occurrence_id: int project_name: str @@ -40,7 +41,9 @@ class Message: @property def identifier(self) -> str: """Identifier for message""" - return f"Message({self.mrn=} {self.accession_number=})".replace("self.", "") + return f"Message({self.mrn=} {self.accession_number=} {self.study_uid=})".replace( + "self.", "" + ) def serialise(self, *, deserialisable: bool = True) -> bytes: """ diff --git a/pixl_core/src/core/patient_queue/producer.py b/pixl_core/src/core/patient_queue/producer.py index 9e36d4912..f46c00774 100644 --- a/pixl_core/src/core/patient_queue/producer.py +++ b/pixl_core/src/core/patient_queue/producer.py @@ -30,10 +30,11 @@ class PixlProducer(PixlBlockingInterface): """Generic publisher for RabbitMQ""" - def publish(self, messages: list[Message]) -> None: + def publish(self, messages: list[Message], priority: int) -> None: """ Sends a list of serialised messages to a queue. :param messages: list of messages to be sent to queue + :param priority: priority of the messages, from 1 (lowest) to 5 (highest) """ logger.info("Publishing {} messages to queue: {}", len(messages), self.queue_name) if len(messages) > 0: @@ -43,9 +44,14 @@ def publish(self, messages: list[Message]) -> None: exchange="", routing_key=self.queue_name, body=serialised_msg, - properties=BasicProperties(delivery_mode=DeliveryMode.Persistent), + properties=BasicProperties( + delivery_mode=DeliveryMode.Persistent, + priority=priority, + ), + ) + logger.debug( + "Message {} published to queue {} with priority", msg, self.queue_name, priority ) - logger.debug("Message {} published to queue {}", msg, self.queue_name) else: logger.warning("List of messages is empty so nothing will be published to queue.") diff --git a/pixl_core/src/core/patient_queue/subscriber.py b/pixl_core/src/core/patient_queue/subscriber.py index 7be2141e0..dd43737c1 100644 --- a/pixl_core/src/core/patient_queue/subscriber.py +++ b/pixl_core/src/core/patient_queue/subscriber.py @@ -22,9 +22,15 @@ import aio_pika from decouple import config -from core.exceptions import PixlDiscardError, PixlRequeueMessageError +from core.exceptions import ( + PixlDiscardError, + PixlOutOfHoursError, + PixlRequeueMessageError, + PixlStudyNotInPrimaryArchiveError, +) from core.patient_queue._base import PixlQueueInterface from core.patient_queue.message import deserialise +from core.patient_queue.producer import PixlProducer if TYPE_CHECKING: from collections.abc import Awaitable, Callable @@ -45,6 +51,7 @@ def __init__( self, queue_name: str, token_bucket: TokenBucket, + token_bucket_key: str, callback: Callable[[Message], Awaitable[None]], ) -> None: """ @@ -53,6 +60,7 @@ def __init__( """ super().__init__(queue_name=queue_name) self.token_bucket = token_bucket + self.token_bucket_key = token_bucket_key self._callback = callback @property @@ -67,23 +75,50 @@ async def __aenter__(self) -> Self: max_in_flight = config("PIXL_MAX_MESSAGES_IN_FLIGHT", cast=int) logger.info("Pika will consume up to {} messages concurrently", max_in_flight) await self._channel.set_qos(prefetch_count=max_in_flight) - self._queue = await self._channel.declare_queue(self.queue_name, durable=True) + self._queue = await self._channel.declare_queue( + self.queue_name, + durable=True, + arguments={"x-max-priority": 5}, + ) return self async def _process_message(self, message: AbstractIncomingMessage) -> None: - if not self.token_bucket.has_token: + if not self.token_bucket.has_token(key=self.token_bucket_key): await asyncio.sleep(1) await message.reject(requeue=True) return pixl_message: Message = deserialise(message.body) - logger.info("Starting message {}", pixl_message.identifier) + logger.debug("Picked up from queue: {}", pixl_message.identifier) try: await self._callback(pixl_message) except PixlRequeueMessageError as requeue: logger.trace("Requeue message: {} from {}", pixl_message.identifier, requeue) await asyncio.sleep(1) await message.reject(requeue=True) + except PixlStudyNotInPrimaryArchiveError as discard: + logger.info( + "Discard message: {} from {}. Sending to secondary imaging queue with priority {}.", + pixl_message.identifier, + discard, + message.priority, + ) + await asyncio.sleep(1) + await message.reject(requeue=False) + with PixlProducer( + queue_name="imaging-secondary", + host=config("RABBITMQ_HOST"), + port=config("RABBITMQ_PORT", cast=int), + username=config("RABBITMQ_USERNAME"), + password=config("RABBITMQ_PASSWORD"), + ) as producer: + producer.publish([pixl_message], priority=message.priority) + except PixlOutOfHoursError as nack_requeue: + logger.trace( + "Nack and requeue message: {} from {}", pixl_message.identifier, nack_requeue + ) + await asyncio.sleep(10) + await message.nack(requeue=True) except PixlDiscardError as exception: logger.warning("Failed message {}: {}", pixl_message.identifier, exception) await ( diff --git a/pixl_core/src/core/project_config/pixl_config_model.py b/pixl_core/src/core/project_config/pixl_config_model.py index 2ae5a5305..407daee61 100644 --- a/pixl_core/src/core/project_config/pixl_config_model.py +++ b/pixl_core/src/core/project_config/pixl_config_model.py @@ -20,8 +20,8 @@ from pathlib import Path from typing import Any, Optional -import yaml # type: ignore [import-untyped] -from decouple import Config, RepositoryEmpty, RepositoryEnv # type: ignore [import-untyped] +import yaml +from decouple import Config, RepositoryEmpty, RepositoryEnv from loguru import logger from pydantic import BaseModel, field_validator @@ -38,19 +38,19 @@ def load_project_config(project_slug: str) -> PixlConfig | Any: Project needs to have a corresponding yaml file in the `$PROJECT_CONFIGS_DIR` directory. """ configpath = Path(config("PROJECT_CONFIGS_DIR")) / f"{project_slug}.yaml" - logger.debug("Loading config for {} from {}", project_slug, configpath) try: - return _load_and_validate(configpath) + return load_config_and_validate(configpath) except FileNotFoundError as error: msg = f"No config for {project_slug}. Please submit PR and redeploy." raise PixlDiscardError(msg) from error -def _load_and_validate(filename: Path) -> PixlConfig | Any: +def load_config_and_validate(filename: Path) -> PixlConfig | Any: """ Load configuration from a yaml file. :param filename: Path to the yaml file """ + logger.debug("Loading config from {}", filename) yaml_data = yaml.safe_load(filename.read_text()) return PixlConfig.model_validate(yaml_data) @@ -65,7 +65,7 @@ class TagOperationFiles(BaseModel): """Tag operations files for a project. At least a base file is required.""" base: list[Path] - manufacturer_overrides: Optional[Path] + manufacturer_overrides: Optional[list[Path]] @field_validator("base") @classmethod @@ -87,16 +87,24 @@ def _valid_tag_operations(cls, tag_ops_files: list[str]) -> list[Path]: @field_validator("manufacturer_overrides") @classmethod - def _valid_manufacturer_overrides(cls, tags_file: str) -> Optional[Path]: - if not tags_file: + def _valid_manufacturer_overrides(cls, tag_files: list[str]) -> Optional[list[Path]]: + if not tag_files: return None - tags_file_path = Path(config("PROJECT_CONFIGS_DIR")) / "tag-operations" / tags_file - # Pydantic does not appear to automatically check if the file exists - if not tags_file_path.exists(): - # For pydantic, you must raise a ValueError (or AssertionError) - raise ValueError from FileNotFoundError(tags_file_path) - return tags_file_path + tag_file_paths = [] + for tag_file in tag_files: + tag_file_path = ( + Path(config("PROJECT_CONFIGS_DIR")) + / "tag-operations" + / "manufacturer-overrides" + / tag_file + ) + # Pydantic does not appear to automatically check if the file exists + if not tag_file_path.exists(): + # For pydantic, you must raise a ValueError (or AssertionError) + raise ValueError from FileNotFoundError(tag_file_path) + tag_file_paths.append(tag_file_path) + return tag_file_paths class _DestinationEnum(str, Enum): @@ -105,6 +113,7 @@ class _DestinationEnum(str, Enum): none = "none" ftps = "ftps" dicomweb = "dicomweb" + xnat = "xnat" class _Destination(BaseModel): @@ -114,8 +123,8 @@ class _Destination(BaseModel): @field_validator("parquet") @classmethod def valid_parquet_destination(cls, v: str) -> str: - if v == "dicomweb": - msg = "Parquet destination cannot be dicomweb" + if v in ("dicomweb", "xnat"): + msg = f"Parquet destination cannot be {v}" raise ValueError(msg) return v diff --git a/pixl_core/src/core/project_config/tag_operations.py b/pixl_core/src/core/project_config/tag_operations.py index 6f7f3707e..d7ec56d60 100644 --- a/pixl_core/src/core/project_config/tag_operations.py +++ b/pixl_core/src/core/project_config/tag_operations.py @@ -40,12 +40,12 @@ def load_tag_operations(pixl_config: PixlConfig) -> TagOperations: :param pixl_config: Project configuration """ base = [_load_scheme(f) for f in pixl_config.tag_operation_files.base] - manufacturer_overrides = None + manufacturer_overrides = [] if pixl_config.tag_operation_files.manufacturer_overrides: - manufacturer_overrides = _load_scheme( - pixl_config.tag_operation_files.manufacturer_overrides - ) + for override_file in pixl_config.tag_operation_files.manufacturer_overrides: + override_dict = _load_scheme(override_file) + manufacturer_overrides.append(override_dict) return TagOperations(base=base, manufacturer_overrides=manufacturer_overrides) @@ -61,7 +61,7 @@ class TagOperations(BaseModel): """ base: list[list[dict]] - manufacturer_overrides: Optional[list[dict]] + manufacturer_overrides: Optional[list[list[dict]]] @field_validator("base") @classmethod @@ -77,19 +77,20 @@ def _valid_base_tags(cls, base_tags: list[list[dict]]) -> list[list[dict]]: @field_validator("manufacturer_overrides") @classmethod def _valid_manufacturer_overrides( - cls, manufacturer_overrides: Optional[list[dict]] - ) -> Optional[list[dict]]: + cls, manufacturer_overrides: Optional[list[list[dict]]] + ) -> Optional[list[list[dict]]]: if manufacturer_overrides is None: return None - if not isinstance(manufacturer_overrides, list): - msg = "Tags must be a list of dictionaries." - raise TypeError(msg) - for override in manufacturer_overrides: - if "manufacturer" not in override or "tags" not in override: - msg = "Manufacturer overrides must have 'manufacturer' and 'tags' keys." - raise ValueError(msg) - for tag in override["tags"]: - _check_tag_format(tag) + for manufacturer_override in manufacturer_overrides: + if not isinstance(manufacturer_override, list): + msg = "Tags must be a list of dictionaries." + raise TypeError(msg) + for override in manufacturer_override: + if "manufacturer" not in override or "tags" not in override: + msg = "Manufacturer overrides must have 'manufacturer' and 'tags' keys." + raise ValueError(msg) + for tag in override["tags"]: + _check_tag_format(tag) return manufacturer_overrides diff --git a/pixl_core/src/core/token_buffer/tokens.py b/pixl_core/src/core/token_buffer/tokens.py index d570d162c..027ea44d6 100644 --- a/pixl_core/src/core/token_buffer/tokens.py +++ b/pixl_core/src/core/token_buffer/tokens.py @@ -14,6 +14,8 @@ # limitations under the License. from __future__ import annotations +import typing + import token_bucket as tb @@ -22,19 +24,20 @@ class TokenBucket(tb.Limiter): Rate limitation is governed by the existence of tokens in a bucket, whereby the bucket is refilled every second. As long as a token can be retrieved, an item can be downloaded. Should there be no more tokens inside the bucket, the request is - added back into the queue. Note that the Limiter object can operate the rate on + added back into the queue. + + Note that the Limiter object can operate the rate on different "streams", which are specified by a string object, also called key. This - key has been hard coded here to "pixl" as we do not expect the token bucket to be - responsible for more than one stream at this point in time. + key has been hard coded here to accept one of two values: 'primary' or 'secondary', + representing two different streams. """ - key = b"pixl" + _keys: typing.ClassVar = ["primary", "secondary"] def __init__( self, rate: float = 5, capacity: int = 5, - storage: tb.StorageBase = None, ) -> None: """ Uses the token bucket implementation from `Falconry` @@ -46,18 +49,19 @@ def __init__( :param storage: Type of storage used to hold the tokens """ self._zero_rate = False - storage = tb.MemoryStorage() if rate == 0: rate = 1 # tb.Limiter does not allow zero rates, so keep track... self._zero_rate = True - super().__init__(rate=rate, capacity=capacity, storage=storage) + super().__init__(rate=rate, capacity=capacity, storage=tb.MemoryStorage()) - @property - def has_token(self) -> bool: - """Does this token bucket have a token?""" - return not self._zero_rate and bool(self.consume(self.key)) + def has_token(self, key: str) -> bool: + """Does this token bucket have a token for the given key?""" + if key not in self._keys: + message = f"Key must be one of {self._keys}, not '{key}'" + raise ValueError(message) + return not self._zero_rate and bool(self.consume(key)) @property def rate(self) -> float: diff --git a/pixl_core/src/core/uploader/__init__.py b/pixl_core/src/core/uploader/__init__.py index 6d4be3568..b87bc9d00 100644 --- a/pixl_core/src/core/uploader/__init__.py +++ b/pixl_core/src/core/uploader/__init__.py @@ -29,6 +29,7 @@ from ._dicomweb import DicomWebUploader from ._ftps import FTPSUploader +from ._xnat import XNATUploader if TYPE_CHECKING: from core.uploader.base import Uploader @@ -37,7 +38,11 @@ # Intenitonally defined in __init__.py to avoid circular imports def get_uploader(project_slug: str) -> Uploader: """Uploader Factory, returns uploader instance based on destination.""" - choices: dict[str, type[Uploader]] = {"ftps": FTPSUploader, "dicomweb": DicomWebUploader} + choices: dict[str, type[Uploader]] = { + "ftps": FTPSUploader, + "dicomweb": DicomWebUploader, + "xnat": XNATUploader, + } project_config = load_project_config(project_slug) destination = project_config.destination.dicom diff --git a/pixl_core/src/core/uploader/_dicomweb.py b/pixl_core/src/core/uploader/_dicomweb.py index 90e35e057..90603395d 100644 --- a/pixl_core/src/core/uploader/_dicomweb.py +++ b/pixl_core/src/core/uploader/_dicomweb.py @@ -17,7 +17,7 @@ from __future__ import annotations import json -from typing import Optional +from typing import TYPE_CHECKING, Optional import requests from decouple import config # type: ignore [import-untyped] @@ -25,6 +25,9 @@ from core.uploader.base import Uploader +if TYPE_CHECKING: + from core.uploader._orthanc import StudyTags + class DicomWebUploader(Uploader): """Upload strategy for a DicomWeb server.""" @@ -51,10 +54,11 @@ def _set_config(self) -> None: self.http_timeout = int(config("HTTP_TIMEOUT", default=30)) def _upload_dicom_image( - self, study_id: str, pseudo_anon_image_id: str, project_slug: str + self, + study_id: str, + study_tags: StudyTags, # noqa: ARG002 ) -> None: """Upload a Dicom resource to the DicomWeb server from within Orthanc.""" - logger.info("Starting DICOMweb upload of '{}' for {}", pseudo_anon_image_id, project_slug) if not self._check_dicomweb_server_exists(): logger.info("Creating new DICOMWeb credentials") self._setup_dicomweb_credentials() @@ -62,7 +66,7 @@ def _upload_dicom_image( self._validate_dicomweb_server() headers = {"content-type": "application/json", "accept": "application/dicom+json"} - payload = {"Resources": [study_id], "Synchronous": False} + payload = {"Resources": [study_id], "Synchronous": True} try: response = requests.post( @@ -76,7 +80,6 @@ def _upload_dicom_image( except requests.exceptions.RequestException: logger.error("Failed to send via stow") raise - logger.info("Finished DICOMweb upload of '{}'", pseudo_anon_image_id) def _check_dicomweb_server_exists(self) -> bool: """Checks if the dicomweb server exists.""" @@ -86,9 +89,7 @@ def _check_dicomweb_server_exists(self) -> bool: timeout=self.http_timeout, ) success_code = 200 - if response.status_code != success_code: - return False - return True + return response.status_code == success_code def _validate_dicomweb_server(self) -> None: """Check if the DICOMweb server is reachable from within the Orthanc instance.""" @@ -122,7 +123,6 @@ def _setup_dicomweb_credentials(self) -> None: "Username": self.endpoint_user, "Password": self.endpoint_password, "HasDelete": True, - "Timeout": self.http_timeout, } headers = {"content-type": "application/json"} diff --git a/pixl_core/src/core/uploader/_ftps.py b/pixl_core/src/core/uploader/_ftps.py index 00fc20517..2cac14d4d 100644 --- a/pixl_core/src/core/uploader/_ftps.py +++ b/pixl_core/src/core/uploader/_ftps.py @@ -30,6 +30,7 @@ from socket import socket from core.exports import ParquetExport + from core.uploader._orthanc import StudyTags from loguru import logger @@ -77,13 +78,17 @@ def _set_config(self) -> None: self.port = int(self.keyvault.fetch_secret(f"{az_prefix}--ftp--port")) def _upload_dicom_image( - self, study_id: str, pseudo_anon_image_id: str, project_slug: str + self, + study_id: str, + study_tags: StudyTags, ) -> None: """Upload a DICOM image to the FTPS server.""" - logger.info("Starting FTPS upload of '{}'", pseudo_anon_image_id) zip_content = get_study_zip_archive(study_id) - self.send_via_ftps(zip_content, pseudo_anon_image_id, remote_directory=project_slug) - logger.info("Finished FTPS upload of '{}'", pseudo_anon_image_id) + self.send_via_ftps( + zip_content, + study_tags.pseudo_anon_image_id, + remote_directory=self.project_slug, + ) def send_via_ftps( self, zip_content: BinaryIO, pseudo_anon_image_id: str, remote_directory: str diff --git a/pixl_core/src/core/uploader/_orthanc.py b/pixl_core/src/core/uploader/_orthanc.py index 05dfd5a3c..e00606e3e 100644 --- a/pixl_core/src/core/uploader/_orthanc.py +++ b/pixl_core/src/core/uploader/_orthanc.py @@ -16,6 +16,7 @@ from __future__ import annotations import json +from dataclasses import dataclass from io import BytesIO import requests @@ -34,20 +35,30 @@ def get_study_zip_archive(resourceId: str) -> BytesIO: return BytesIO(response_study.content) -def get_tags_by_study(study_id: str) -> tuple[str, str]: +@dataclass +class StudyTags: + """Tags for a study.""" + + pseudo_anon_image_id: str + patient_id: str + + +def get_tags_by_study(study_id: str) -> StudyTags: """ Queries the Orthanc server at the study level, returning the - PatientID and UCLHPIXLProjectName DICOM tags. - BEWARE: post-anonymisation, the PatientID is NOT - the patient ID, it's the pseudo-anonymised ID generated - from the hash of the concatenated Patient ID (MRN) and Accession Number fields. + Study Instance UID and PatientID DICOM tags. + BEWARE: post-anonymisation, the Study Instance UID is NOT + the Study Instance UID, it's the pseudo-anonymised ID generated randomly. """ query = f"{ORTHANC_ANON_URL}/studies/{study_id}/shared-tags?simplify=true" fail_msg = "Could not query study for resource '%s'" response_study = _query_orthanc_anon(study_id, query, fail_msg) json_response = json.loads(response_study.content.decode()) - return json_response["PatientID"], json_response["UCLHPIXLProjectName"] + return StudyTags( + pseudo_anon_image_id=json_response["StudyInstanceUID"], + patient_id=json_response["PatientID"], + ) def _query_orthanc_anon(resourceId: str, query: str, fail_msg: str) -> requests.Response: diff --git a/pixl_core/src/core/uploader/_xnat.py b/pixl_core/src/core/uploader/_xnat.py new file mode 100644 index 000000000..a8c345605 --- /dev/null +++ b/pixl_core/src/core/uploader/_xnat.py @@ -0,0 +1,103 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Uploader subclass for XNAT.""" + +from __future__ import annotations + +import os +from typing import TYPE_CHECKING, BinaryIO, Optional + +import xnat + +from core.uploader.base import Uploader + +from ._orthanc import get_study_zip_archive + +if TYPE_CHECKING: + from xnat.core import XNATBaseObject + + from core.exports import ParquetExport + from core.uploader._orthanc import StudyTags + + +class XNATUploader(Uploader): + """Upload strategy for an XNAT server.""" + + def __init__(self, project_slug: str, keyvault_alias: Optional[str]) -> None: + """Create instance of parent class""" + super().__init__(project_slug, keyvault_alias) + + def _set_config(self) -> None: + """ + Configure XNATUploader. + + "XNAT_DESTINATION": + - if "/archive", will send data straight to the archive + - if "/prearchive", will send data to the prearchive for manual review before archiving + + "XNAT_OVERWRITE": + - if 'none', will error if the session already exists. + - if 'append', will append the data to an existing session or create a new one if it + doesn't exist. + If there is a conflict with existing series, an error will be raised. + - if 'delete', will append the data to an existing session or create a new one if it + doesn't exist. + If there is a conflict with existing series, the existing series will be overwritten. + """ + # Use the Azure KV alias as prefix if it exists, otherwise use the project name + az_prefix = self.keyvault_alias + az_prefix = az_prefix if az_prefix else self.project_slug + + self.host = self.keyvault.fetch_secret(f"{az_prefix}--xnat--host") + self.user = self.keyvault.fetch_secret(f"{az_prefix}--xnat--username") + self.password = self.keyvault.fetch_secret(f"{az_prefix}--xnat--password") + self.port = int(self.keyvault.fetch_secret(f"{az_prefix}--xnat--port")) + self.url = f"https://{self.host}:{self.port}" + self.destination = os.environ["XNAT_DESTINATION"] + self.overwrite = os.environ["XNAT_OVERWRITE"] + + def _upload_dicom_image( + self, + study_id: str, + study_tags: StudyTags, + ) -> None: + """Upload a DICOM image to the XNAT instance.""" + zip_content = get_study_zip_archive(study_id) + self.upload_to_xnat(zip_content, study_tags) + + def upload_to_xnat( + self, + zip_content: BinaryIO, + study_tags: StudyTags, + ) -> XNATBaseObject: + with xnat.connect( + server=self.url, + user=self.user, + password=self.password, + ) as session: + session.services.import_( + data=zip_content, + overwrite=self.overwrite, + destination=self.destination, + project=self.project_slug, + subject=study_tags.patient_id, + experiment=study_tags.pseudo_anon_image_id, + content_type="application/zip", + import_handler="DICOM-zip", + ) + + def upload_parquet_files(self, parquet_export: ParquetExport) -> None: # noqa: ARG002 + msg = "XNATUploader does not support parquet files" + raise NotImplementedError(msg) diff --git a/pixl_core/src/core/uploader/base.py b/pixl_core/src/core/uploader/base.py index ab44576ef..887776513 100644 --- a/pixl_core/src/core/uploader/base.py +++ b/pixl_core/src/core/uploader/base.py @@ -17,12 +17,17 @@ from abc import ABC, abstractmethod from datetime import datetime, timezone -from typing import Any, Optional +from typing import TYPE_CHECKING, Any, Optional + +from loguru import logger from core.db.queries import have_already_exported_image, update_exported_at from core.project_config.secrets import AzureKeyVault from core.uploader._orthanc import get_tags_by_study +if TYPE_CHECKING: + from core.uploader._orthanc import StudyTags + class Uploader(ABC): """Upload strategy interface.""" @@ -56,14 +61,29 @@ def upload_dicom_and_update_database(self, study_id: str) -> None: :param study_id: Orthanc Study ID :raise: if the image has already been exported """ - pseudo_anon_image_id, project_slug = self._get_tags_by_study(study_id) - self.check_already_exported(pseudo_anon_image_id) - self._upload_dicom_image(study_id, pseudo_anon_image_id, project_slug) - update_exported_at(pseudo_anon_image_id, datetime.now(tz=timezone.utc)) + study_tags = self._get_tags_by_study(study_id) + self.check_already_exported(study_tags.pseudo_anon_image_id) + + logger.info( + "Starting {} upload of '{}' for {}", + self.__class__.__name__.removesuffix("Uploader"), + study_tags.pseudo_anon_image_id, + self.project_slug, + ) + self._upload_dicom_image(study_id, study_tags) + logger.success( + "Finished {} upload of '{}'", + self.__class__.__name__.removesuffix("Uploader"), + study_tags.pseudo_anon_image_id, + ) + + update_exported_at(study_tags.pseudo_anon_image_id, datetime.now(tz=timezone.utc)) @abstractmethod def _upload_dicom_image( - self, study_id: str, pseudo_anon_image_id: str, project_slug: str + self, + study_id: str, + study_tags: StudyTags, ) -> None: """ Abstract method to upload DICOM images, should not be called directly. @@ -88,6 +108,6 @@ def check_already_exported(pseudo_anon_image_id: str) -> None: raise RuntimeError(msg) @staticmethod - def _get_tags_by_study(study_id: str) -> tuple[str, str]: + def _get_tags_by_study(study_id: str) -> StudyTags: """Helper method for getting tags by study ID, can be overriden for testing.""" return get_tags_by_study(study_id) diff --git a/pixl_core/tests/conftest.py b/pixl_core/tests/conftest.py index c6fb50bfd..77325b985 100644 --- a/pixl_core/tests/conftest.py +++ b/pixl_core/tests/conftest.py @@ -23,6 +23,8 @@ import pytest import requests from core.db.models import Base, Extract, Image +from core.patient_queue.message import Message +from pydicom.uid import generate_uid from pytest_pixl.helpers import run_subprocess from sqlalchemy import Engine, create_engine from sqlalchemy.orm import Session, sessionmaker @@ -38,19 +40,26 @@ os.environ["PIXL_MAX_MESSAGES_IN_FLIGHT"] = "10" os.environ["RABBITMQ_USERNAME"] = "guest" -os.environ["RABBITMQ_PASSWORD"] = "guest" # noqa: S105 Hardcoding password +os.environ["RABBITMQ_PASSWORD"] = "guest" os.environ["RABBITMQ_HOST"] = "localhost" os.environ["RABBITMQ_PORT"] = "25672" os.environ["PROJECT_CONFIGS_DIR"] = str(TEST_DIR.parents[1] / "projects/configs") os.environ["FTP_HOST"] = "localhost" os.environ["FTP_USER_NAME"] = "pixl" -os.environ["FTP_PASSWORD"] = "longpassword" # noqa: S105 Hardcoding password +os.environ["FTP_PASSWORD"] = "longpassword" os.environ["FTP_PORT"] = "20021" os.environ["ORTHANC_ANON_URL"] = "http://localhost:8043" os.environ["ORTHANC_ANON_USERNAME"] = "orthanc" -os.environ["ORTHANC_ANON_PASSWORD"] = "orthanc" # noqa: S105, hardcoded password +os.environ["ORTHANC_ANON_PASSWORD"] = "orthanc" + +os.environ["XNAT_HOST"] = "localhost" +os.environ["XNAT_USER_NAME"] = "pixl" +os.environ["XNAT_PASSWORD"] = "longpassword" +os.environ["XNAT_PORT"] = "8080" +os.environ["XNAT_DESTINATION"] = "/archive" +os.environ["XNAT_OVERWRITE"] = "none" @pytest.fixture(scope="package") @@ -152,16 +161,18 @@ def rows_in_session(db_session) -> Session: accession_number="123", study_date=STUDY_DATE, mrn="mrn", + study_uid="1.2.3", extract=extract, exported_at=datetime.datetime.now(tz=datetime.timezone.utc), - hashed_identifier="already_exported", + pseudo_study_uid=generate_uid(entropy_srcs=["already_exported"]), ) image_not_exported = Image( accession_number="234", study_date=STUDY_DATE, mrn="mrn", + study_uid="2.3.4", extract=extract, - hashed_identifier="not_yet_exported", + pseudo_study_uid=generate_uid(entropy_srcs=["not_yet_exported"]), ) with db_session: db_session.add_all([extract, image_exported, image_not_exported]) @@ -173,13 +184,21 @@ def rows_in_session(db_session) -> Session: @pytest.fixture() def not_yet_exported_dicom_image(rows_in_session) -> Image: """Return a DICOM image from the database.""" - return rows_in_session.query(Image).filter(Image.hashed_identifier == "not_yet_exported").one() + return ( + rows_in_session.query(Image) + .filter(Image.pseudo_study_uid == generate_uid(entropy_srcs=["not_yet_exported"])) + .one() + ) @pytest.fixture() def already_exported_dicom_image(rows_in_session) -> Image: """Return a DICOM image from the database.""" - return rows_in_session.query(Image).filter(Image.hashed_identifier == "already_exported").one() + return ( + rows_in_session.query(Image) + .filter(Image.pseudo_study_uid == generate_uid(entropy_srcs=["already_exported"])) + .one() + ) @pytest.fixture(autouse=True) @@ -188,3 +207,19 @@ def export_dir(tmp_path_factory: pytest.TempPathFactory) -> pathlib.Path: export_dir = tmp_path_factory.mktemp("export_base") / "exports" export_dir.mkdir() return export_dir + + +@pytest.fixture() +def mock_message() -> Message: + """An example Message used for testing""" + return Message( + mrn="111", + accession_number="123", + study_uid="1.2.3", + study_date=datetime.date.fromisoformat("2022-11-22"), + procedure_occurrence_id="234", + project_name="test project", + extract_generated_timestamp=datetime.datetime.strptime( + "Dec 7 2023 2:08PM", "%b %d %Y %I:%M%p" + ).replace(tzinfo=datetime.timezone.utc), + ) diff --git a/pixl_core/tests/data/dicom_series.zip b/pixl_core/tests/data/dicom_series.zip new file mode 100644 index 000000000..dee661c7e Binary files /dev/null and b/pixl_core/tests/data/dicom_series.zip differ diff --git a/pixl_core/tests/data/xnat_project.xml b/pixl_core/tests/data/xnat_project.xml new file mode 100644 index 000000000..26f07edac --- /dev/null +++ b/pixl_core/tests/data/xnat_project.xml @@ -0,0 +1,7 @@ + + + \ No newline at end of file diff --git a/pixl_core/tests/docker-compose.yml b/pixl_core/tests/docker-compose.yml index e6d81fa97..204039d4a 100644 --- a/pixl_core/tests/docker-compose.yml +++ b/pixl_core/tests/docker-compose.yml @@ -42,6 +42,7 @@ services: RAW_DICOM_PORT: "4242" RAW_IP_ADDR: "orthanc-raw" # aka. hostname DICOM_WEB_PLUGIN_ENABLED: true + TZ: "Europe/London" ports: - "127.0.0.1:4243:4242" - "127.0.0.1:8043:8042" @@ -66,6 +67,7 @@ services: RAW_DICOM_PORT: "4242" RAW_IP_ADDR: "dicom-web" # aka. hostname DICOM_WEB_PLUGIN_ENABLED: true + TZ: "Europe/London" ports: - "127.0.0.1:4244:4242" - "127.0.0.1:8044:8042" diff --git a/pixl_core/tests/patient_queue/test_message.py b/pixl_core/tests/patient_queue/test_message.py index f70ca2491..a4785c63f 100644 --- a/pixl_core/tests/patient_queue/test_message.py +++ b/pixl_core/tests/patient_queue/test_message.py @@ -13,27 +13,14 @@ # limitations under the License. from __future__ import annotations -import datetime +from core.patient_queue.message import deserialise -from core.patient_queue.message import Message, deserialise -msg = Message( - mrn="111", - accession_number="123", - study_date=datetime.date.fromisoformat("2022-11-22"), - procedure_occurrence_id="234", - project_name="test project", - extract_generated_timestamp=datetime.datetime.strptime( - "Dec 7 2023 2:08PM", "%b %d %Y %I:%M%p" - ).replace(tzinfo=datetime.timezone.utc), -) - - -def test_serialise() -> None: +def test_serialise(mock_message) -> None: """Checks that messages can be correctly serialised""" - msg_body = msg.serialise(deserialisable=False) + msg_body = mock_message.serialise(deserialisable=False) assert ( - msg_body == b'{"mrn": "111", "accession_number": "123", ' + msg_body == b'{"mrn": "111", "accession_number": "123", "study_uid": "1.2.3", ' b'"study_date": "2022-11-22", ' b'"procedure_occurrence_id": "234", ' b'"project_name": "test project", ' @@ -41,7 +28,7 @@ def test_serialise() -> None: ) -def test_deserialise() -> None: +def test_deserialise(mock_message) -> None: """Checks if deserialised messages are the same as the original""" - serialised_msg = msg.serialise() - assert deserialise(serialised_msg) == msg + serialised_msg = mock_message.serialise() + assert deserialise(serialised_msg) == mock_message diff --git a/pixl_core/tests/patient_queue/test_producer.py b/pixl_core/tests/patient_queue/test_producer.py index 3a684ffe3..2df0ca3b6 100644 --- a/pixl_core/tests/patient_queue/test_producer.py +++ b/pixl_core/tests/patient_queue/test_producer.py @@ -14,18 +14,9 @@ from __future__ import annotations import pytest -from core.patient_queue.message import Message from core.patient_queue.producer import PixlProducer TEST_QUEUE = "test_publish" -TEST_MESSAGE = Message( - mrn="111", - accession_number="123", - study_date="2022-11-22T13:33:00+00:00", - procedure_occurrence_id="234", - project_name="test project", - extract_generated_timestamp="2023-12-07T14:08:00+00:00", -) @pytest.mark.usefixtures("run_containers") @@ -36,14 +27,14 @@ def test_create_pixl_producer() -> None: @pytest.mark.usefixtures("run_containers") -def test_publish() -> None: +def test_publish(mock_message) -> None: """ Checks that after publishing, there is one message in the queue. Will only work if nothing has been added to queue before. """ with PixlProducer(queue_name=TEST_QUEUE) as pp: pp.clear_queue() - pp.publish(messages=[TEST_MESSAGE]) + pp.publish(messages=[mock_message], priority=1) with PixlProducer(queue_name=TEST_QUEUE) as pp: assert pp.message_count == 1 diff --git a/pixl_core/tests/patient_queue/test_subscriber.py b/pixl_core/tests/patient_queue/test_subscriber.py index 57711b886..82c484fc9 100644 --- a/pixl_core/tests/patient_queue/test_subscriber.py +++ b/pixl_core/tests/patient_queue/test_subscriber.py @@ -17,20 +17,11 @@ from unittest.mock import AsyncMock import pytest -from core.patient_queue.message import Message from core.patient_queue.producer import PixlProducer from core.patient_queue.subscriber import PixlConsumer from core.token_buffer.tokens import TokenBucket TEST_QUEUE = "test_consume" -TEST_MESSAGE = Message( - mrn="111", - accession_number="123", - study_date="2022-11-22T13:33:00+00:00", - procedure_occurrence_id="234", - project_name="test project", - extract_generated_timestamp="2023-12-07T14:08:00+00:00", -) class ExpectedTestError(Exception): @@ -42,14 +33,17 @@ class ExpectedTestError(Exception): @pytest.mark.xfail( reason="Sanity check that async test gets run", strict=True, raises=ExpectedTestError ) -async def test_create() -> None: +async def test_create(mock_message) -> None: """Checks consume is working.""" with PixlProducer(queue_name=TEST_QUEUE) as producer: - producer.publish(messages=[TEST_MESSAGE]) + producer.publish(messages=[mock_message], priority=1) consume = AsyncMock() async with PixlConsumer( - queue_name=TEST_QUEUE, token_bucket=TokenBucket(), callback=consume + queue_name=TEST_QUEUE, + token_bucket=TokenBucket(), + token_bucket_key="primary", # noqa: S106 + callback=consume, ) as consumer: # Create a Task to run pc.run in the background task = asyncio.create_task(consumer.run()) diff --git a/pixl_core/tests/project_config/test_project_config.py b/pixl_core/tests/project_config/test_project_config.py index d15f79cf2..3a09ae6d5 100644 --- a/pixl_core/tests/project_config/test_project_config.py +++ b/pixl_core/tests/project_config/test_project_config.py @@ -42,7 +42,7 @@ def base_yaml_data(): "project": {"name": "myproject", "modalities": ["DX", "CR"]}, "tag_operation_files": { "base": ["test-extract-uclh-omop-cdm.yaml"], - "manufacturer_overrides": "manufacturer-overrides/mri-diffusion.yaml", + "manufacturer_overrides": ["mri-diffusion.yaml"], }, "destination": {"dicom": "ftps", "parquet": "ftps"}, } @@ -93,7 +93,8 @@ def ids_for_parameterised_test(val): ) def test_all_real_configs(yaml_file): """Test that all production configs are valid""" - load_project_config(yaml_file.stem) + config = load_project_config(yaml_file.stem) + assert config.project.name == yaml_file.stem def test_load_tag_operations(): @@ -121,7 +122,7 @@ def test_load_tag_operations_no_manufacturer_overrides(base_yaml_data): tag_operations = load_tag_operations(project_config) # Assert - assert tag_operations.manufacturer_overrides is None + assert tag_operations.manufacturer_overrides == [] @pytest.fixture() diff --git a/pixl_core/tests/token_buffer/test_tokens.py b/pixl_core/tests/token_buffer/test_tokens.py index 8e464ad40..c55e2ac66 100644 --- a/pixl_core/tests/token_buffer/test_tokens.py +++ b/pixl_core/tests/token_buffer/test_tokens.py @@ -13,28 +13,39 @@ # limitations under the License. from __future__ import annotations +import re import time +import pytest from core.token_buffer import TokenBucket def test_retrieve_token() -> None: """Checks whether token can be retrieved from created token bucket.""" bucket = TokenBucket() - assert bucket.has_token + assert bucket.has_token(key="primary") + assert bucket.has_token(key="secondary") + + +def test_invalid_token_key() -> None: + """Checks whether invalid key raises an exception.""" + bucket = TokenBucket() + match = re.escape("Key must be one of ['primary', 'secondary'], not 'invalid'") + with pytest.raises(ValueError, match=match): + bucket.has_token(key="invalid") def test_refill_tokens() -> None: """Checks whether the refill happens after one second for a bucket size of 1.""" bucket = TokenBucket(rate=1, capacity=1) - assert bucket.has_token + assert bucket.has_token(key="primary") # Interrogating the bucket within 1 second we find that it's empty - assert bucket.has_token is False + assert bucket.has_token(key="primary") is False # but will be refilled after 1 second time.sleep(1) - assert bucket.has_token + assert bucket.has_token(key="primary") def test_zero_rate() -> None: diff --git a/pixl_core/tests/uploader/test_base.py b/pixl_core/tests/uploader/test_base.py index 4595a8df1..e1806fe3a 100644 --- a/pixl_core/tests/uploader/test_base.py +++ b/pixl_core/tests/uploader/test_base.py @@ -16,6 +16,8 @@ import pytest import sqlalchemy from core.db.models import Image +from core.uploader import DicomWebUploader, FTPSUploader, XNATUploader, get_uploader +from core.uploader._orthanc import StudyTags from core.uploader.base import Uploader from loguru import logger from sqlalchemy.orm import sessionmaker @@ -28,22 +30,26 @@ class DumbUploader(Uploader): Allows testing of the database interaction at the top level call to uploader. """ - def __init__(self, hashed_identifier) -> None: + def __init__(self, pseudo_study_uid) -> None: """Initialise the mock uploader with hardcoded values for FTPS config.""" - self.hashed_identifier = hashed_identifier + self.project_slug = "project_slug" + self.pseudo_study_uid = pseudo_study_uid - def _get_tags_by_study(self, study_id: str) -> tuple[str, str]: - logger.info("Mocked getting tags for: {} to return {}", study_id, self.hashed_identifier) - return self.hashed_identifier, "project_slug" + def _get_tags_by_study(self, study_id: str) -> StudyTags: + logger.info("Mocked getting tags for: {} to return {}", study_id, self.pseudo_study_uid) + return StudyTags(self.pseudo_study_uid, "patient-id") def _upload_dicom_image( - self, study_id: str, pseudo_anon_image_id: str, project_slug: str + self, + study_id: str, + study_tags: StudyTags, ) -> None: logger.info( "Mocked uploader with no upload functionality for {}, {}, {}", study_id, - pseudo_anon_image_id, - project_slug, + study_tags.pseudo_anon_image_id, + self.project_slug, + study_tags.patient_id, ) def _set_config(self) -> None: @@ -65,7 +71,7 @@ def test_export_date_updated(db_engine, not_yet_exported_dicom_image) -> None: """ # ARRANGE study_id = "test-study-id" - uploader = DumbUploader(not_yet_exported_dicom_image.hashed_identifier) + uploader = DumbUploader(not_yet_exported_dicom_image.pseudo_study_uid) # ACT uploader.upload_dicom_and_update_database(study_id) @@ -74,7 +80,7 @@ def test_export_date_updated(db_engine, not_yet_exported_dicom_image) -> None: InMemorySession = sessionmaker(db_engine) with InMemorySession() as session: output = ( - session.query(Image).filter(Image.hashed_identifier == uploader.hashed_identifier).one() + session.query(Image).filter(Image.pseudo_study_uid == uploader.pseudo_study_uid).one() ) assert output.exported_at is not None @@ -99,7 +105,30 @@ def test_study_already_exported_raises(already_exported_dicom_image) -> None: THEN an exception is raised """ study_id = "test-study-id" - uploader = DumbUploader(already_exported_dicom_image.hashed_identifier) + uploader = DumbUploader(already_exported_dicom_image.pseudo_study_uid) with pytest.raises(RuntimeError, match="Image already exported"): uploader.upload_dicom_and_update_database(study_id) + + +@pytest.mark.parametrize( + ("project_slug", "expected_uploader_class"), + [ + ("test-extract-uclh-omop-cdm", FTPSUploader), + ("test-extract-uclh-omop-cdm-dicomweb", DicomWebUploader), + ("test-extract-uclh-omop-cdm-xnat", XNATUploader), + ], +) +def test_get_uploader(project_slug, expected_uploader_class, monkeypatch) -> None: + """Test the correct uploader class is returned.""" + with monkeypatch.context() as m: + # Mock the __init__ method so that we don't attempt to connect to AzureKeyVault. + # Otherwise AzureKeyVault._check_envvars will raise an exception for undefined + # environment variables. + m.setattr( + "core.uploader.base.Uploader.__init__", + lambda self, project_slug, keyvault_alias: None, # noqa: ARG005 + ) + + uploader = get_uploader(project_slug) + assert isinstance(uploader, expected_uploader_class) diff --git a/pixl_core/tests/uploader/test_dicomweb.py b/pixl_core/tests/uploader/test_dicomweb.py index 103afc9c5..63342a2b5 100644 --- a/pixl_core/tests/uploader/test_dicomweb.py +++ b/pixl_core/tests/uploader/test_dicomweb.py @@ -20,6 +20,7 @@ import pytest import requests from core.uploader._dicomweb import DicomWebUploader +from core.uploader._orthanc import StudyTags from decouple import config # type ignore [import-untyped] ORTHANC_ANON_URL = config("ORTHANC_ANON_URL") @@ -27,7 +28,7 @@ ORTHANC_PASSWORD = config("ORTHANC_ANON_PASSWORD") DICOMWEB_USERNAME = "orthanc_dicomweb" -DICOMWEB_PASSWORD = "orthanc_dicomweb" # noqa: S105, hardcoded password +DICOMWEB_PASSWORD = "orthanc_dicomweb" LOCAL_DICOMWEB_URL = "http://localhost:8044" @@ -93,8 +94,13 @@ def test_upload_dicom_image( study_id, run_containers, dicomweb_uploader, not_yet_exported_dicom_image ) -> None: """Tests that DICOM image can be uploaded to a DICOMWeb server""" + study_tags = StudyTags( + pseudo_anon_image_id=not_yet_exported_dicom_image.pseudo_study_uid, + patient_id="patient", + ) dicomweb_uploader._upload_dicom_image( # noqa: SLF001 - study_id, not_yet_exported_dicom_image.hashed_identifier, "project" + study_id, + study_tags, ) # Check that the instance has arrived in the DICOMweb server @@ -109,7 +115,7 @@ def test_dicomweb_upload_fails_with_wrong_credentials( ) -> None: """Tests that the DICOMWeb uploader fails when given wrong credentials.""" dicomweb_uploader.endpoint_user = "wrong" - dicomweb_uploader.endpoint_password = "wrong" # noqa: S105, hardcoded password + dicomweb_uploader.endpoint_password = "wrong" with pytest.raises(requests.exceptions.ConnectionError): dicomweb_uploader._setup_dicomweb_credentials() # noqa: SLF001, private method diff --git a/pixl_core/tests/uploader/test_ftps.py b/pixl_core/tests/uploader/test_ftps.py index 6a8bc19b1..6a18f54c2 100644 --- a/pixl_core/tests/uploader/test_ftps.py +++ b/pixl_core/tests/uploader/test_ftps.py @@ -25,6 +25,7 @@ from core.db.queries import update_exported_at from core.exports import ParquetExport from core.uploader._ftps import FTPSUploader +from pydicom.uid import generate_uid from pytest_pixl.plugin import FtpHostAddress TEST_DIR = Path(__file__).parents[1] @@ -77,7 +78,7 @@ def test_send_via_ftps( """Tests that DICOM image can be uploaded to the correct location""" # ARRANGE # Get the pseudo identifier from the test image - pseudo_anon_id = not_yet_exported_dicom_image.hashed_identifier + pseudo_anon_id = not_yet_exported_dicom_image.pseudo_study_uid project_slug = "some-project-slug" expected_output_file = ftps_home_dir / project_slug / (pseudo_anon_id + ".zip") @@ -94,9 +95,11 @@ def test_update_exported_and_save(rows_in_session) -> None: expected_export_time = datetime.now(tz=timezone.utc) # ACT - update_exported_at("not_yet_exported", expected_export_time) + update_exported_at(generate_uid(entropy_srcs=["not_yet_exported"]), expected_export_time) new_row = ( - rows_in_session.query(Image).filter(Image.hashed_identifier == "not_yet_exported").one() + rows_in_session.query(Image) + .filter(Image.pseudo_study_uid == generate_uid(entropy_srcs=["not_yet_exported"])) + .one() ) actual_export_time = new_row.exported_at.replace(tzinfo=timezone.utc) diff --git a/pixl_core/tests/uploader/test_xnat.py b/pixl_core/tests/uploader/test_xnat.py new file mode 100644 index 000000000..2205a0c11 --- /dev/null +++ b/pixl_core/tests/uploader/test_xnat.py @@ -0,0 +1,199 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test functionality to upload files to an XNAT instance.""" + +import io +import os +from collections.abc import Generator +from pathlib import Path + +import pytest +import xnat +import xnat4tests +from core.uploader._orthanc import StudyTags +from core.uploader._xnat import XNATUploader + +TEST_DIR = Path(__file__).parents[1] + + +class MockXNATUploader(XNATUploader): + """Mock XNATUploader for testing.""" + + def __init__(self, project_slug: str) -> None: + """Initialise the mock uploader with hardcoded values for FTPS config.""" + self.host = os.environ["XNAT_HOST"] + self.user = os.environ["XNAT_USER_NAME"] + self.password = os.environ["XNAT_PASSWORD"] + self.port = os.environ["XNAT_PORT"] + self.url = f"http://{self.host}:{self.port}" + self.destination = os.environ["XNAT_DESTINATION"] + self.overwrite = os.environ["XNAT_OVERWRITE"] + self.project_slug = project_slug + + +@pytest.fixture(scope="session") +def xnat_project_slug() -> str: + """Name of the XNAT project""" + return "some-project-slug" + + +@pytest.fixture() +def xnat_uploader(xnat_project_slug) -> MockXNATUploader: + """Return a MockXNATUploader object.""" + return MockXNATUploader(project_slug=xnat_project_slug) + + +@pytest.fixture() +def zip_parquet() -> Generator: + """Directory containing parquet test data.""" + test_zip_file = TEST_DIR / "data" / "public.zip" + with test_zip_file.open("rb") as file_content: + yield file_content + + +@pytest.fixture() +def zip_dicoms() -> Generator: + """ + Zip file containing the test DICOMs for uploading to the XNAT instance. + + The zip file contains a single study with two DICOMs: + - 987654321_AA12345601_AP.dcm + - 987654321_AA12345601_include123.dcm + """ + test_zip_file = TEST_DIR / "data" / "dicom_series.zip" + with test_zip_file.open("rb") as file_content: + yield io.BytesIO(file_content.read()) + + +@pytest.fixture(scope="session") +def xnat_study_tags() -> StudyTags: + """Return a StudyTags object for the study to be uploaded to XNAT.""" + return StudyTags( + pseudo_anon_image_id="1.3.6.1.4.1.14519.5.2.1.99.1071.12985477682660597455732044031486", + patient_id="987654321", + ) + + +@pytest.fixture(scope="session") +def xnat_server(xnat_project_slug) -> Generator: + """ + Start the XNAT server. + + Note, it can take several minutes for the server to start up. + + Once the server has started, you can log in by visiting http://localhost:8080 + with the username and password set in the `XNAT_USER_NAME` and `XNAT_PASSWORD` + environment variables. + """ + config = xnat4tests.Config( + xnat_port=os.environ["XNAT_PORT"], + docker_host=os.environ["XNAT_HOST"], + build_args={ + "xnat_version": "1.8.10.1", + }, + ) + xnat4tests.start_xnat(config) + + # Create the project as well as a non-admin user to perform the upload + with xnat.connect( + server=config.xnat_uri, + user="admin", + password="admin", # noqa: S106 + ) as session: + session.post( + path="/xapi/users/", + json=dict( # noqa: C408 + admin=False, + username=os.environ["XNAT_USER_NAME"], + password=os.environ["XNAT_PASSWORD"], + firstName="pixl", + lastName="uploader", + email="pixl-uploader@pixl", + verified=True, + enabled=True, + ), + headers={"Content-Type": "application/json"}, + accepted_status=[201, 409], + ) + + # XNAT requires project metadata to be uploaded as XML + with (TEST_DIR / "data" / "xnat_project.xml").open() as file: + project_xml = file.read() + session.skip_response_content_check = ( + True # so XNATPy doesn't raise an error if the project exists + ) + session.post( + path="/data/projects", + data=project_xml, + headers={"Content-Type": "application/xml"}, + accepted_status=[200, 409], + ) + session.put( + path=f"/data/projects/{xnat_project_slug}/users/Owners/pixl", + accepted_status=[200], + ) + + yield config.xnat_uri + + # If a test instance already exists, xnat4tests will reuse it for subsequent tests. + # This can save a lot of time when testing locally as it takes several minutes to start + # the XNAT server. + if os.environ.get("XNAT_DESTROY_INSTANCE", "False").lower() == "true": + xnat4tests.stop_xnat(config) + return + + # If we're keeping the instance, we need to remove the data before next test run. + # We do not delete the project itself as Project IDs cannot be reused. + with xnat.connect( + server=config.xnat_uri, + user="admin", + password="admin", # noqa: S106 + ) as session: + project = session.projects[xnat_project_slug] + for subject in project.subjects.values(): + session.delete( + path=f"/data/projects/{project.id}/subjects/{subject.label}", + query={"removeFiles": "True"}, + ) + + +@pytest.mark.usefixtures("xnat_server") +def test_upload_to_xnat(zip_dicoms, xnat_uploader, xnat_study_tags) -> None: + """Tests that DICOM image can be uploaded to the correct location""" + xnat_uploader.upload_to_xnat( + zip_content=zip_dicoms, + study_tags=xnat_study_tags, + ) + + with xnat.connect( + server=xnat_uploader.url, + user=xnat_uploader.user, + password=xnat_uploader.password, + ) as session: + assert xnat_uploader.project_slug in session.projects + project = session.projects[xnat_uploader.project_slug] + + assert xnat_study_tags.patient_id in project.subjects + subject = project.subjects[xnat_study_tags.patient_id] + + assert len(subject.experiments) == 1 + experiment = subject.experiments[0] + assert experiment.label == xnat_study_tags.pseudo_anon_image_id.replace(".", "_") + assert len(experiment.scans) == 2 + + +def test_parquet_export_not_implemented(xnat_uploader, zip_parquet) -> None: + """Tests that calling XNATUploader.upload_parquet_files raises an error.""" + with pytest.raises(NotImplementedError, match="XNATUploader does not support parquet files"): + xnat_uploader.upload_parquet_files(zip_parquet) diff --git a/pixl_dcmd/README.md b/pixl_dcmd/README.md index fc118e774..2c8b57682 100644 --- a/pixl_dcmd/README.md +++ b/pixl_dcmd/README.md @@ -3,20 +3,48 @@ The `pixl_dcmd` package provides helper functions for de-identifying DICOM data. It is currently only used by the [`orthanc-anon` plugin](../orthanc/orthanc-anon/plugin/pixl.py). -The reason for having this as a separate package instead of having the functionality in `pixl_core` -is because `orthanc` requires Python 3.9, whereas the rest of PIXL is on 3.10 or higher. +For external users, the `pixl_dcmd` package provides the following functionality: -Specifically, the `pixl_dcmd` package provides the following functionality: +- `anonymise_dicom()`: Applies the [anonymisation operations](#tag-scheme-anonymisation) + for the appropriate tag scheme using [Kitware Dicom Anonymizer](https://github.com/KitwareMedical/dicom-anonymizer) + and deletes any tags not mentioned in the tag scheme. The dataset is updated in place. + - Will throw a `PixlSkipInstanceError` for any series based on the project config file. Specifically, an error + will be thrown if: + - the series description matches any series in `series_filters` (usually to remove localiser series) + - the modality of the DICOM is not in `modalities` +- `anonymise_and_validate_dicom()`: Compares DICOM validation issues before and after calling `anonymise_dicom` + and returns a dictionary of the new issues -- `anonymise_dicom()`: Applies the [anonymisation operations](#tag-scheme-anonymisation) for the appropriate tag scheme using [Kitware Dicom Anonymizer](https://github.com/KitwareMedical/dicom-anonymizer)) and deletes any tags not mentioned in the tag scheme. -- `write_dataset_to_bytes()`: writes a DICOM dataset to a bytes object +```python +import os +import pathlib +import pydicom + +from core.project_config.pixl_config_model import load_config_and_validate +from pixl_dcmd import anonymise_and_validate_dicom + +config_dir = pathlib.Path().cwd().parents[2] / "projects" / "configs" +config_path = config_dir / "test-external-user.yaml" +os.environ["PROJECT_CONFIGS_DIR"] = config_dir.as_posix() # needed to validate config +config = load_config_and_validate(config_path) + +dataset_path = pydicom.data.get_testdata_file( + "MR-SIEMENS-DICOM-WithOverlays.dcm", download=True, +) +dataset = pydicom.dcmread(dataset_path) + +# the dataset is updated inplace +validation_issues = anonymise_and_validate_dicom(dataset, config=config) +assert validation_issues == {} +assert dataset != pydicom.dcmread(dataset_path) +``` ## Installation -Install the Python dependencies with +Install the Python dependencies from the `pixl_dcmd` directory: ```bash -pip install -e ../pixl_core/ -e .[test,dev] +python -m pip install -e ../pixl_core/ -e ".[test,dev]" ``` ## Test diff --git a/pixl_dcmd/pyproject.toml b/pixl_dcmd/pyproject.toml index 213f2f934..fa0168384 100644 --- a/pixl_dcmd/pyproject.toml +++ b/pixl_dcmd/pyproject.toml @@ -1,32 +1,54 @@ [project] name = "pixl_dcmd" -version = "0.0.2" +version = "0.2.0rc0" authors = [{ name = "PIXL authors" }] description = "DICOM header anonymisation functions" readme = "README.md" requires-python = ">=3.11" classifiers = ["Programming Language :: Python :: 3"] dependencies = [ - "arrow==1.2.3", + "core==0.2.0rc0", + "arrow==1.3.0", + "dicom-anonymizer==1.0.13.post1", + "dicom-validator==0.6.3", + "logger==1.4", "pydicom==2.4.4", "pydicom-data", - "logger==1.4", - "requests==2.31.0", - "python-decouple==3.6", + "python-decouple==3.8", + "requests==2.32.3", "types-requests~=2.28", - "dicom-anonymizer==1.0.12", ] [project.optional-dependencies] -test = ["dcm2niix==1.0.20220715", "nibabel==5.2.0", "pytest==7.4.2"] -dev = ["mypy", "pre-commit", "ruff"] +test = [ + "core[test]==0.2.0rc0", + "dcm2niix==1.0.20220715", + "nibabel==5.3.2", +] +dev = [ + "core[dev]==0.2.0rc0", +] [build-system] requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" -[tool.ruff] -extend = "./ruff.toml" +[tool.coverage.report] +exclude_also = [ + "def __repr__", + "if self.debug:", + "if settings.DEBUG", + "except subprocess.CalledProcessError as exception:", + "raise AssertionError", + "raise NotImplementedError", + "if 0:", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", + "if typing.TYPE_CHECKING", + "class .*\\bProtocol\\):", + "@(abc\\.)?abstractmethod", +] + [tool.ruff.lint.extend-per-file-ignores] "./tests/**" = ["D100"] diff --git a/pixl_dcmd/ruff.toml b/pixl_dcmd/ruff.toml deleted file mode 100644 index 45c40ded8..000000000 --- a/pixl_dcmd/ruff.toml +++ /dev/null @@ -1,2 +0,0 @@ -[lint.extend-per-file-ignores] -"src/pixl_dcmd/main.py" = ["PLR2004", "PLR0912", "PLR0915"] # Magic value used in comparison, Too many branches, Too many statements diff --git a/pixl_dcmd/src/pixl_dcmd/__init__.py b/pixl_dcmd/src/pixl_dcmd/__init__.py index 111140c05..368a2d821 100644 --- a/pixl_dcmd/src/pixl_dcmd/__init__.py +++ b/pixl_dcmd/src/pixl_dcmd/__init__.py @@ -16,7 +16,11 @@ from pixl_dcmd.main import ( anonymise_dicom, write_dataset_to_bytes, - should_exclude_series, + anonymise_and_validate_dicom, ) -__all__ = ["anonymise_dicom", "write_dataset_to_bytes", "should_exclude_series"] +__all__ = [ + "anonymise_dicom", + "write_dataset_to_bytes", + "anonymise_and_validate_dicom", +] diff --git a/pixl_dcmd/src/pixl_dcmd/_database.py b/pixl_dcmd/src/pixl_dcmd/_database.py index c4b0a3737..154816341 100644 --- a/pixl_dcmd/src/pixl_dcmd/_database.py +++ b/pixl_dcmd/src/pixl_dcmd/_database.py @@ -14,12 +14,17 @@ """Interaction with the PIXL database.""" -from typing import Optional from decouple import config # type: ignore [import-untyped] +import pydicom +from loguru import logger +from pydicom.uid import generate_uid, UID +from sqlalchemy.orm.session import Session from core.db.models import Image, Extract -from sqlalchemy import URL, create_engine -from sqlalchemy.orm import sessionmaker +from sqlalchemy import URL, create_engine, exists +from sqlalchemy.orm import sessionmaker, exc + +from pixl_dcmd._dicom_helpers import StudyInfo url = URL.create( drivername="postgresql+psycopg2", @@ -33,40 +38,117 @@ engine = create_engine(url) -def add_hashed_identifier_and_save_to_db( - existing_image: Image, hashed_value: str -) -> Optional[Image]: +def get_uniq_pseudo_study_uid_and_update_db( + project_slug: str, original_study_info: StudyInfo +) -> UID: + """ + Checks if record (by slug and study info) exists in the database, + gets the pseudo_study_uid if it is not None or records a new, unique one. + Returns the pseudo_study_uid. + """ PixlSession = sessionmaker(engine) with PixlSession() as pixl_session, pixl_session.begin(): - existing_image.hashed_identifier = hashed_value - pixl_session.add(existing_image) - - updated_image: Optional[Image] = ( - pixl_session.query(Image) - .filter( - Image.accession_number == existing_image.accession_number, - Image.mrn == existing_image.mrn, - Image.hashed_identifier == hashed_value, - ) - .one_or_none() + existing_image = get_unexported_image( + project_slug, + original_study_info, + pixl_session, ) - - return updated_image + if existing_image.pseudo_study_uid is None: + pseudo_study_uid = generate_uid() + while not is_unique_pseudo_study_uid(pseudo_study_uid, pixl_session): + pseudo_study_uid = generate_uid() + add_pseudo_study_uid_to_db(existing_image, pseudo_study_uid, pixl_session) + else: + pseudo_study_uid = existing_image.pseudo_study_uid + return UID(pseudo_study_uid, validation_mode=pydicom.config.RAISE) -def query_db(project_slug: str, mrn: str, accession_number: str) -> Image: +def get_pseudo_patient_id_and_update_db( + project_slug: str, original_study_info: StudyInfo, pseudo_patient_id: str +) -> str: + """ + Checks if record (by slug and study info) exists in the database, + gets the pseudo_paitent_id if it is not None otherwise use the + patient ID from the DICOM dataset. + Returns the pseudo_paitent_id. + """ PixlSession = sessionmaker(engine) with PixlSession() as pixl_session, pixl_session.begin(): + existing_image = get_unexported_image( + project_slug, + original_study_info, + pixl_session, + ) + if existing_image.pseudo_patient_id is None: + logger.debug("Adding pseudo patient ID to image") + add_pseudo_patient_id_to_db(existing_image, pseudo_patient_id, pixl_session) + return pseudo_patient_id # type: ignore + return existing_image.pseudo_patient_id # type: ignore + + +def add_pseudo_study_uid_to_db( + existing_image: Image, pseudo_study_uid: str, pixl_session: Session +) -> None: + """ + Add a pseudo study UID generated during anonymisation to the database + for an existing image generated by populate command. + """ + existing_image.pseudo_study_uid = pseudo_study_uid + pixl_session.add(existing_image) + + +def add_pseudo_patient_id_to_db( + existing_image: Image, pseudo_patient_id: str, pixl_session: Session +) -> None: + """ + Add a pseudo patient ID from DICOM tags to the database + for an existing image generated by populate command. + """ + existing_image.pseudo_patient_id = pseudo_patient_id + pixl_session.add(existing_image) + + +def is_unique_pseudo_study_uid(pseudo_study_uid: str, pixl_session: Session) -> bool: + """ + Check that random uid generated is not already in the database. + """ + return not pixl_session.query( + exists().where(Image.pseudo_study_uid == pseudo_study_uid) + ).scalar() + + +def get_unexported_image( + project_slug: str, + study_info: StudyInfo, + pixl_session: Session, +) -> Image: + """ + Get an existing, non-exported (for this project) image record from the database + identified by the study UID. If no result is found, retry with querying on + MRN + accession number. If this fails as well, raise a NoResultFound. + """ + try: existing_image: Image = ( pixl_session.query(Image) .join(Extract) .filter( Extract.slug == project_slug, - Image.accession_number == accession_number, - Image.mrn == mrn, + Image.study_uid == study_info.study_uid, Image.exported_at == None, # noqa: E711 ) .one() ) - - return existing_image + # If no image is found by study UID, try MRN + accession number + except exc.NoResultFound: + existing_image = ( + pixl_session.query(Image) + .join(Extract) + .filter( + Extract.slug == project_slug, + Image.mrn == study_info.mrn, + Image.accession_number == study_info.accession_number, + Image.exported_at == None, # noqa: E711 + ) + .one() + ) + return existing_image diff --git a/pixl_dcmd/src/pixl_dcmd/_dicom_helpers.py b/pixl_dcmd/src/pixl_dcmd/_dicom_helpers.py index 75e73799f..96cbe5de8 100644 --- a/pixl_dcmd/src/pixl_dcmd/_dicom_helpers.py +++ b/pixl_dcmd/src/pixl_dcmd/_dicom_helpers.py @@ -15,23 +15,73 @@ from __future__ import annotations +from dataclasses import dataclass +import logging +from pathlib import Path + +from dicom_validator.spec_reader.edition_reader import EditionReader +from dicom_validator.validator.iod_validator import IODValidator from pydicom import Dataset -from loguru import logger - -from core.dicom_tags import DICOM_TAG_PROJECT_NAME - - -def get_project_name_as_string(dataset: Dataset) -> str: - raw_slug = dataset.get_private_item( - DICOM_TAG_PROJECT_NAME.group_id, - DICOM_TAG_PROJECT_NAME.offset_id, - DICOM_TAG_PROJECT_NAME.creator_string, - ).value - # Get both strings and bytes, which is fun - if isinstance(raw_slug, bytes): - logger.debug(f"Bytes slug {raw_slug!r}") - slug = raw_slug.decode("utf-8").strip() - else: - logger.debug(f"String slug '{raw_slug}'") - slug = raw_slug - return slug + + +class DicomValidator: + def __init__(self, edition: str = "current"): + self.edition = edition + + # Default from dicom_validator but defining here to be explicit + standard_path = str(Path.home() / "dicom-validator") + edition_reader = EditionReader(standard_path) + destination = edition_reader.get_revision(self.edition, False) + json_path = Path(destination, "json") + self.dicom_info = EditionReader.load_dicom_info(json_path) + + def validate_original(self, dataset: Dataset) -> None: + self.original_errors = IODValidator( + dataset, + self.dicom_info, + log_level=logging.ERROR, + ).validate() + + def validate_anonymised(self, dataset: Dataset) -> dict: + # Check that the original dataset has been validated + try: + orig_errors = self.original_errors + except AttributeError: + raise ValueError("Original dataset not yet validated") + + self.anon_errors = IODValidator( + dataset, + self.dicom_info, + log_level=logging.ERROR, + ).validate() + self.diff_errors: dict = {} + + for key in self.anon_errors.keys(): + if key in self.original_errors.keys(): + # Keep only errors introduced after the anonymisation + # The keys of the dictionary containt the actual errors + diff = set(self.anon_errors[key].keys()) - set(orig_errors[key].keys()) + if diff: + self.diff_errors[key] = diff + else: + self.diff_errors[key] = self.anon_errors[key] + + return self.diff_errors + + +@dataclass +class StudyInfo: + """Identifiers used for an imaging study""" + + mrn: str + accession_number: str + study_uid: str + + +def get_study_info(dataset: Dataset) -> StudyInfo: + """Read study identifiers from dicom dataset.""" + return StudyInfo( + mrn=dataset[0x0010, 0x0020].value, + accession_number=dataset[0x0008, 0x0050].value, + study_uid=dataset[0x0020, 0x000D].value, + ) diff --git a/pixl_dcmd/src/pixl_dcmd/_tag_schemes.py b/pixl_dcmd/src/pixl_dcmd/_tag_schemes.py index d9aab56bb..c06f2a6c4 100644 --- a/pixl_dcmd/src/pixl_dcmd/_tag_schemes.py +++ b/pixl_dcmd/src/pixl_dcmd/_tag_schemes.py @@ -33,13 +33,14 @@ def merge_tag_schemes( all_tags.update(_scheme_list_to_dict(base_tags)) if tag_operations.manufacturer_overrides and manufacturer: - manufacturer_tags = [ - tag - for override in tag_operations.manufacturer_overrides - if re.search(override["manufacturer"], manufacturer, re.IGNORECASE) - for tag in override["tags"] - ] - all_tags.update(_scheme_list_to_dict(manufacturer_tags)) + for override_file in tag_operations.manufacturer_overrides: + manufacturer_tags = [ + tag + for override in override_file + if re.search(override["manufacturer"], manufacturer, re.IGNORECASE) + for tag in override["tags"] + ] + all_tags.update(_scheme_list_to_dict(manufacturer_tags)) return list(all_tags.values()) diff --git a/pixl_dcmd/src/pixl_dcmd/main.py b/pixl_dcmd/src/pixl_dcmd/main.py index 0845c620e..c69f73dee 100644 --- a/pixl_dcmd/src/pixl_dcmd/main.py +++ b/pixl_dcmd/src/pixl_dcmd/main.py @@ -13,28 +13,34 @@ # limitations under the License. from __future__ import annotations +import typing from io import BytesIO -from loguru import logger -from os import PathLike -from typing import Any, BinaryIO, Callable, Union - -from core.exceptions import PixlDiscardError -from core.project_config import load_project_config import requests +from core.exceptions import PixlSkipInstanceError from core.project_config import load_tag_operations from decouple import config -from pydicom import DataElement, Dataset, dcmwrite from dicomanonymizer.simpledicomanonymizer import ( - actions_map_name_functions, + ActionsMapNameFunctions, anonymize_dataset, ) +from loguru import logger +from pydicom import DataElement, Dataset, dcmwrite + +from core.project_config.pixl_config_model import PixlConfig +from pixl_dcmd._database import ( + get_uniq_pseudo_study_uid_and_update_db, + get_pseudo_patient_id_and_update_db, +) +from pixl_dcmd._dicom_helpers import ( + DicomValidator, + get_study_info, +) +from pixl_dcmd._tag_schemes import _scheme_list_to_dict, merge_tag_schemes -from pixl_dcmd._dicom_helpers import get_project_name_as_string -from pixl_dcmd._tag_schemes import merge_tag_schemes, _scheme_list_to_dict -from pixl_dcmd._database import add_hashed_identifier_and_save_to_db, query_db -DicomDataSetType = Union[Union[str, bytes, PathLike[Any]], BinaryIO] +if typing.TYPE_CHECKING: + from pixl_dcmd._dicom_helpers import StudyInfo def write_dataset_to_bytes(dataset: Dataset) -> bytes: @@ -50,69 +56,118 @@ def write_dataset_to_bytes(dataset: Dataset) -> bytes: return buffer.read() -def should_exclude_series(dataset: Dataset) -> bool: - slug = get_project_name_as_string(dataset) - +def _should_exclude_series(dataset: Dataset, cfg: PixlConfig) -> bool: series_description = dataset.get("SeriesDescription") - cfg = load_project_config(slug) if cfg.is_series_excluded(series_description): logger.info("FILTERING OUT series description: {}", series_description) return True return False -def anonymise_dicom(dataset: Dataset) -> None: +def anonymise_dicom_and_update_db( + dataset: Dataset, + *, + config: PixlConfig, +) -> dict: + """Anonymise and validate a DICOM dataset and update the PIXL database.""" + identifiable_study_info = get_study_info(dataset) + validation_errors = anonymise_and_validate_dicom(dataset, config=config) + _generate_pseudo_uids_and_synchronise_pixl_db( + dataset=dataset, + project_name=config.project.name, + identifiable_study_info=identifiable_study_info, + ) + return validation_errors + + +def anonymise_and_validate_dicom( + dataset: Dataset, + *, + config: PixlConfig, +) -> dict: + """ + Anonymise dataset using allow list and compare DICOM validation errors before + and after anonymising. + + :param dataset: DICOM dataset to be anonymised, updated in place + :param config: Project config to use for anonymisation + :return: dictionary of validation errors + """ + # Set up Dicom validator and validate the original dataset + dicom_validator = DicomValidator(edition="2024e") + dicom_validator.validate_original(dataset) + + anonymise_dicom(dataset, config=config) + + # Validate the anonymised dataset + validation_errors = dicom_validator.validate_anonymised(dataset) + if validation_errors: + logger.warning( + "The anonymisation introduced the following validation errors:\n{}", + _parse_validation_results(validation_errors), + ) + return validation_errors + + +def anonymise_dicom( + dataset: Dataset, + config: PixlConfig, +) -> None: """ Anonymises a DICOM dataset as Received by Orthanc in place. Finds appropriate configuration based on project name and anonymises by - dropping datasets of the wrong modality - recursively applying tag operations based on the config file - deleting any tags not in the tag scheme recursively + + :param dataset: DICOM dataset to be anonymised, updated in place + :param config: Project config to use for anonymisation """ - project_slug = get_project_name_as_string(dataset) - project_config = load_project_config(project_slug) - logger.debug(f"Received instance for project {project_slug}") - if dataset.Modality not in project_config.project.modalities: + study_info = get_study_info(dataset) + logger.debug( + f"Processing instance for project {config.project.name}: {study_info}" + ) + + # Do before anonymisation in case someone decides to delete the + # Series Description tag as part of anonymisation. + if _should_exclude_series(dataset, config): + msg = "DICOM instance discarded due to its series description" + raise PixlSkipInstanceError(msg) + if dataset.Modality not in config.project.modalities: msg = f"Dropping DICOM Modality: {dataset.Modality}" - raise PixlDiscardError(msg) + raise PixlSkipInstanceError(msg) - logger.info("Anonymising received instance") + logger.debug("Anonymising instance for: {}", study_info) # Merge tag schemes - tag_operations = load_tag_operations(project_config) + tag_operations = load_tag_operations(config) tag_scheme = merge_tag_schemes(tag_operations, manufacturer=dataset.Manufacturer) - modalities = project_config.project.modalities - - logger.info( - f"Applying DICOM tag anonymisation according to {project_config.tag_operation_files}" + logger.debug( + f"Applying DICOM tag anonymisation according to {config.tag_operation_files}" ) logger.trace(f"Tag scheme: {tag_scheme}") - if (0x0008, 0x0060) in dataset and dataset.Modality not in modalities: - msg = f"Dropping DICOM Modality: {dataset.Modality}" - raise PixlDiscardError(msg) - - logger.info("Anonymising received instance") - - _anonymise_dicom_from_scheme(dataset, project_slug, tag_scheme) - - enforce_whitelist(dataset, tag_scheme, recursive=True) + _enforce_allowlist(dataset, tag_scheme, recursive=True) + _anonymise_dicom_from_scheme(dataset, config.project.name, tag_scheme) def _anonymise_dicom_from_scheme( - dataset: Dataset, project_slug: str, tag_scheme: list[dict] + dataset: Dataset, + project_slug: str, + tag_scheme: list[dict], ) -> None: """ Converts tag scheme to tag actions and calls _anonymise_recursively. """ tag_actions = _convert_schema_to_actions(dataset, project_slug, tag_scheme) + _anonymise_recursively(dataset, tag_actions) def _anonymise_recursively( - dataset: Dataset, tag_actions: dict[tuple, Callable] + dataset: Dataset, tag_actions: dict[tuple, typing.Callable] ) -> None: """ Anonymises a DICOM dataset recursively (for items in sequences) in place. @@ -126,33 +181,31 @@ def _anonymise_recursively( def _convert_schema_to_actions( dataset: Dataset, project_slug: str, tags_list: list[dict] -) -> dict[tuple, Callable]: +) -> dict[tuple, typing.Callable]: """ - Convert the tag schema to actions (funcitons) for the anonymiser. + Convert the tag schema to actions (functions) for the anonymiser. See https://github.com/KitwareMedical/dicom-anonymizer for more details. Added custom function secure-hash for linking purposes. This function needs the MRN and Accession Number, hence why the dataset is passed in as well. """ - # Get the MRN, Accession Number before we've anonymised them - mrn = dataset[0x0010, 0x0020].value # Patient ID - accession_number = dataset[0x0008, 0x0050].value # Accession Number - tag_actions = {} for tag in tags_list: group_el = (tag["group"], tag["element"]) if tag["op"] == "secure-hash": tag_actions[group_el] = lambda _dataset, _tag: _secure_hash( - _dataset, project_slug, _tag, mrn, accession_number + _dataset, project_slug, _tag ) continue - tag_actions[group_el] = actions_map_name_functions[tag["op"]] + tag_actions[group_el] = ActionsMapNameFunctions[tag["op"]].value.function return tag_actions def _secure_hash( - dataset: Dataset, project_slug: str, tag: tuple, mrn: str, accession_number: str + dataset: Dataset, + project_slug: str, + tag: tuple, ) -> None: """ Use the hasher API to consistently but securely hash ids later used for linking. @@ -163,24 +216,16 @@ def _secure_hash( if tag in dataset: message = f"Securely hashing: (0x{grp:04x},0x{el:04x})" logger.debug(f"\t{message}") - if grp == 0x0010 and el == 0x0020: # Patient ID - pat_value = mrn + accession_number - - hashed_value = _hash_values(pat_value, project_slug) - # Query PIXL database - existing_image = query_db(project_slug, mrn, accession_number) - # Insert the hashed_value into the PIXL database - add_hashed_identifier_and_save_to_db(existing_image, hashed_value) - elif dataset[grp, el].VR == "SH": + if dataset[grp, el].VR == "LO": pat_value = str(dataset[grp, el].value) - hashed_value = _hash_values(pat_value, project_slug, hash_len=16) + hashed_value = _hash_values(pat_value, project_slug, hash_len=64) + else: + # This is because we currently only hash patient id specifically. + # Other types can be added easily if needed. + raise PixlSkipInstanceError(f"Tag {tag} is not an LO VR type, cannot hash.") dataset[grp, el].value = hashed_value - else: - message = f"Missing linking variable (0x{grp:04x},0x{el:04x})" - logger.warning(f"\t{message}") - def _hash_values(pat_value: str, project_slug: str, hash_len: int = 0) -> str: """ @@ -201,16 +246,16 @@ def _hash_values(pat_value: str, project_slug: str, hash_len: int = 0) -> str: return response.text -def enforce_whitelist( +def _enforce_allowlist( dataset: Dataset, tag_scheme: list[dict], recursive: bool ) -> None: """ - Enforce the whitelist on the dataset. + Enforce the allowlist on the dataset. """ - dataset.walk(lambda ds, de: _whitelist_tag(ds, de, tag_scheme), recursive) + dataset.walk(lambda ds, de: _allowlist_tag(ds, de, tag_scheme), recursive) -def _whitelist_tag(dataset: Dataset, de: DataElement, tag_scheme: list[dict]) -> None: +def _allowlist_tag(dataset: Dataset, de: DataElement, tag_scheme: list[dict]) -> None: """Delete element if it is not in the tagging schemе.""" tag_dict = _scheme_list_to_dict(tag_scheme) if (de.tag.group, de.tag.element) in tag_dict and tag_dict[ @@ -218,3 +263,39 @@ def _whitelist_tag(dataset: Dataset, de: DataElement, tag_scheme: list[dict]) -> ]["op"] != "delete": return del dataset[de.tag] + + +def _parse_validation_results(results: dict) -> str: + """Parse the validation results into a human-readable string.""" + res_str = "" + for key, value in results.items(): + res_str += f"{key}: {value}\n" + return res_str + + +def _generate_pseudo_uids_and_synchronise_pixl_db( + dataset: Dataset, + project_name: str, + identifiable_study_info: StudyInfo, +) -> None: + """ + Synchronise the anonymisation with the pixl database. + + If the pixl database has a value for set for the pseudo identifier, then update the + DICOM data with the value, otherwise save the anonymised data from the DICOM dataset + to the pixl database. + + - pseudo_study_uid -> DICOM study uid tag + - pseudo_patient_id -> DICOM patient identifier tag + """ + dataset[0x0020, 0x000D].value = get_uniq_pseudo_study_uid_and_update_db( + project_name, + identifiable_study_info, + ) + + anonymised_study_info = get_study_info(dataset) + dataset[0x0010, 0x0020].value = get_pseudo_patient_id_and_update_db( + project_name, + identifiable_study_info, + anonymised_study_info.mrn, + ) diff --git a/pixl_dcmd/src/pixl_dcmd/tagrecording.py b/pixl_dcmd/src/pixl_dcmd/tagrecording.py index ca1287874..b3d9f7ab9 100644 --- a/pixl_dcmd/src/pixl_dcmd/tagrecording.py +++ b/pixl_dcmd/src/pixl_dcmd/tagrecording.py @@ -24,9 +24,11 @@ def record_dicom_headers(receivedDicom: bytes) -> None: - with importlib.resources.files("pixl_dcmd").joinpath( - "resources/recorded-headers.yml" - ).open() as f: + with ( + importlib.resources.files("pixl_dcmd") + .joinpath("resources/recorded-headers.yml") + .open() as f + ): recording_config = yaml.safe_load(f) dataset = dcmread(BytesIO(receivedDicom)) with _header_log_path().open("a") as f: diff --git a/pixl_dcmd/tests/conftest.py b/pixl_dcmd/tests/conftest.py index 198bdd913..15011f94a 100644 --- a/pixl_dcmd/tests/conftest.py +++ b/pixl_dcmd/tests/conftest.py @@ -11,21 +11,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""CLI testing fixtures.""" +"""DCMD testing fixtures.""" from __future__ import annotations -from collections.abc import Generator import datetime import os import pathlib import tempfile +import typing +from collections.abc import Generator from typing import Optional +from pixl_dcmd._dicom_helpers import get_study_info +from core.project_config import load_project_config import pytest import pytest_pixl.dicom import requests from core.db.models import Base, Extract, Image +from pydicom import Dataset, dcmread +from pytest_pixl.dicom import generate_dicom_dataset from sqlalchemy import Engine, create_engine from sqlalchemy.orm import Session, sessionmaker @@ -36,30 +41,59 @@ os.environ["PROJECT_CONFIGS_DIR"] = str( pathlib.Path(__file__).parents[2] / "projects/configs" ) +from pathlib import Path +from decouple import config + + +if typing.TYPE_CHECKING: + from core.project_config.pixl_config_model import PixlConfig + + +PROJECT_CONFIGS_DIR = Path(config("PROJECT_CONFIGS_DIR")) STUDY_DATE = datetime.date.fromisoformat("2023-01-01") TEST_PROJECT_SLUG = "test-extract-uclh-omop-cdm" -EXPORTED_MRN = "987654321" -EXPORTED_ACCESSION_NUMBER = "AA12345601" + +@pytest.fixture() +def exported_dicom_dataset() -> Dataset: + exported_dicom_file = ( + pathlib.Path(__file__).parents[2] / "test/resources/Dicom1.dcm" + ) + return dcmread(exported_dicom_file) @pytest.fixture() -def rows_in_session(db_session) -> Session: +def not_exported_dicom_dataset() -> Dataset: + exported_dicom_file = ( + pathlib.Path(__file__).parents[2] / "test/resources/Dicom2.dcm" + ) + return dcmread(exported_dicom_file) + + +@pytest.fixture() +def rows_in_session( + db_session, exported_dicom_dataset, not_exported_dicom_dataset +) -> Session: """Insert a test row for each table, returning the session for use in tests.""" extract = Extract(slug=TEST_PROJECT_SLUG) + exported_study_info = get_study_info(exported_dicom_dataset) + not_exported_study_info = get_study_info(not_exported_dicom_dataset) + image_exported = Image( - accession_number=EXPORTED_ACCESSION_NUMBER, + mrn=exported_study_info.mrn, + accession_number=exported_study_info.accession_number, + study_uid=exported_study_info.study_uid, study_date=STUDY_DATE, - mrn=EXPORTED_MRN, extract=extract, exported_at=datetime.datetime.now(tz=datetime.timezone.utc), ) image_not_exported = Image( - accession_number="AA12345605", + mrn=not_exported_study_info.mrn, + accession_number=not_exported_study_info.accession_number, + study_uid=not_exported_study_info.study_uid, study_date=STUDY_DATE, - mrn="987654321", extract=extract, ) with db_session: @@ -70,14 +104,80 @@ def rows_in_session(db_session) -> Session: @pytest.fixture() -def row_for_dicom_testing(db_session) -> Session: - """Insert a test row for each table, returning the session for use in tests.""" +def row_for_testing_image_with_pseudo_patient_id( + db_session, not_exported_dicom_dataset +) -> Session: + """Insert a test row for the image with a pseudo patient ID.""" + extract = Extract(slug=TEST_PROJECT_SLUG) + + study_info = get_study_info(not_exported_dicom_dataset) + image = Image( + mrn=study_info.mrn, + accession_number=study_info.accession_number, + study_uid=study_info.study_uid, + study_date=STUDY_DATE, + extract=extract, + pseudo_patient_id="AAA00", + ) + with db_session: + db_session.add_all([extract, image]) + db_session.commit() + + return db_session + + +def ids_for_parameterised_test(val: pathlib.Path) -> str: + """Generate test ID for parameterised tests""" + return str(val.stem) + + +@pytest.fixture() +@pytest.mark.parametrize( + ("yaml_file"), PROJECT_CONFIGS_DIR.glob("*.yaml"), ids=ids_for_parameterised_test +) +def row_for_dicom_testing(db_session, yaml_file) -> Session: + """ + Insert a test row for the fake DICOM dataset generated by + pytest_pixl.dicom.generate_dicom_dataset. + """ + + config = load_project_config(yaml_file.stem) + modality = config.project.modalities[0] + + extract = Extract(slug=config.project.name) + ds = pytest_pixl.dicom.generate_dicom_dataset(Modality=modality) + study_info = get_study_info(ds) + + image_not_exported = Image( + mrn=study_info.mrn, + accession_number=study_info.accession_number, + study_uid=study_info.study_uid, + study_date=STUDY_DATE, + extract=extract, + ) + with db_session: + db_session.add_all([extract, image_not_exported]) + db_session.commit() + + return db_session + + +@pytest.fixture() +def row_for_single_dicom_testing(db_session) -> Session: + """ + Insert a test row for the fake DICOM dataset generated by + pytest_pixl.dicom.generate_dicom_dataset. + """ + extract = Extract(slug=TEST_PROJECT_SLUG) + ds = pytest_pixl.dicom.generate_dicom_dataset() + study_info = get_study_info(ds) image_not_exported = Image( - accession_number="BB01234567", + mrn=study_info.mrn, + accession_number=study_info.accession_number, + study_uid=study_info.study_uid, study_date=STUDY_DATE, - mrn="ID123456", extract=extract, ) with db_session: @@ -176,3 +276,44 @@ def mock_get(key, default) -> Optional[str]: return os.environ.get(key, default) monkeypatch.setattr(os.environ, "get", mock_get) + + +@pytest.fixture() +def vanilla_dicom_image_DX(row_for_dicom_testing) -> Dataset: + """ + A DICOM image with diffusion data to test the anonymisation process. + Private tags were added to match the tag operations defined in the project config, so we can + test whether the anonymisation process works as expected when defining overrides. + The row_for_mri_dicom_testing dependency is to make sure the database is populated with the + project slug, which is used to anonymise the DICOM image. + """ + return generate_dicom_dataset(Modality="DX") + + +@pytest.fixture() +def vanilla_single_dicom_image_DX(row_for_single_dicom_testing) -> Dataset: + """ + A DICOM image with diffusion data to test the anonymisation process. + Private tags were added to match the tag operations defined in the project config, so we can + test whether the anonymisation process works as expected when defining overrides. + The row_for_single_dicom_testing dependency is to make sure the database is populated with the + project slug, which is used to anonymise the DICOM image. + """ + return generate_dicom_dataset(Modality="DX") + + +@pytest.fixture() +def vanilla_dicom_image_MR(row_for_dicom_testing) -> Dataset: + """ + A DICOM image with MX data to test the anonymisation process. + Private tags were added to match the tag operations defined in the project config, so we can + test whether the anonymisation process works as expected when defining overrides. + The row_for_mri_dicom_testing dependency is to make sure the database is populated with the + project slug, which is used to anonymise the DICOM image. + """ + return generate_dicom_dataset(Modality="MR") + + +@pytest.fixture(scope="module") +def test_project_config() -> PixlConfig: + return load_project_config(TEST_PROJECT_SLUG) diff --git a/pixl_dcmd/tests/test_database.py b/pixl_dcmd/tests/test_database.py new file mode 100644 index 000000000..5c5a1b455 --- /dev/null +++ b/pixl_dcmd/tests/test_database.py @@ -0,0 +1,116 @@ +# Copyright (c) 2022 University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import datetime + +import pytest +from core.db.models import Extract, Image +from pixl_dcmd._database import ( + get_unexported_image, + get_uniq_pseudo_study_uid_and_update_db, + get_pseudo_patient_id_and_update_db, +) +from pixl_dcmd._dicom_helpers import StudyInfo +from sqlalchemy.orm import Session + +STUDY_DATE = datetime.date.fromisoformat("2023-01-01") +TEST_PROJECT_SLUG = "test-extract-uclh-omop-cdm" +TEST_STUDY_INFO = StudyInfo( + mrn="123456", accession_number="abcde", study_uid="1.2.3.4.5" +) +TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID = StudyInfo( + mrn="234567", accession_number="bcdef", study_uid="2.3.4.5.6" +) + + +@pytest.fixture() +def rows_for_database_testing(db_session) -> Session: + """ + Insert a test row for the fake DICOM dataset generated by + pytest_pixl.dicom.generate_dicom_dataset. + """ + extract = Extract(slug=TEST_PROJECT_SLUG) + + existing_image = Image( + mrn=TEST_STUDY_INFO.mrn, + accession_number=TEST_STUDY_INFO.accession_number, + study_uid=TEST_STUDY_INFO.study_uid, + study_date=STUDY_DATE, + extract=extract, + ) + + existing_image_with_pseudo_study_uid = Image( + mrn=TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.mrn, + accession_number=TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.accession_number, + study_uid=TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.study_uid, + study_date=STUDY_DATE, + extract=extract, + # This should be a valid VR UI value! + # https://dicom.nema.org/medical/dicom/current/output/html/part05.html#table_6.2-1 + pseudo_study_uid="0.0.0.0.0.0", + pseudo_patient_id=TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.mrn, + ) + + with db_session: + db_session.add_all( + [extract, existing_image, existing_image_with_pseudo_study_uid] + ) + db_session.commit() + + return db_session + + +def test_get_uniq_pseudo_study_uid_and_update_db(rows_for_database_testing, db_session): + """ + GIVEN an existing image that already has a pseudo_study_uid + WHEN we query the database for that image + THEN the function should return the existing pseudo_study_uid. + """ + pseudo_study_uid = get_uniq_pseudo_study_uid_and_update_db( + TEST_PROJECT_SLUG, TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID + ) + assert pseudo_study_uid == "0.0.0.0.0.0" + + +def test_get_pseudo_patient_id_and_update_db(rows_for_database_testing, db_session): + """ + GIVEN an existing image that already has pseudo_patient_id in the database + WHEN we query the dataset for that image + THEN the function should return the existing pseudo_patient_id + """ + get_pseudo_patient_id_and_update_db( + TEST_PROJECT_SLUG, + TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID, + TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.mrn, + ) + result = get_unexported_image( + TEST_PROJECT_SLUG, TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID, db_session + ) + assert result.pseudo_patient_id == TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.mrn + + +def test_get_unexported_image_fallback(rows_for_database_testing, db_session): + """ + GIVEN a database entry with a non-exported image + WHEN we query that image with a non-matching study UID + THEN the querying should fall back to querying on MRN and accession number and return the image. + """ + wrong_uid_info = StudyInfo( + mrn="123456", + accession_number="abcde", + study_uid="nope", + ) + result = get_unexported_image(TEST_PROJECT_SLUG, wrong_uid_info, db_session) + assert result.study_uid == TEST_STUDY_INFO.study_uid diff --git a/pixl_dcmd/tests/test_dicom_validator.py b/pixl_dcmd/tests/test_dicom_validator.py new file mode 100644 index 000000000..fe58adf54 --- /dev/null +++ b/pixl_dcmd/tests/test_dicom_validator.py @@ -0,0 +1,87 @@ +# Copyright (c) 2022 University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import pytest +from pixl_dcmd._dicom_helpers import DicomValidator +from pixl_dcmd.main import anonymise_dicom +from pydicom import Dataset + + +def test_validation_check_works(vanilla_single_dicom_image_DX: Dataset) -> None: + """ + GIVEN a DICOM dataset + WHEN the dataset is validated against itself (withouth anonymisation) + THEN no errors should be raised + """ + validator = DicomValidator() + validator.validate_original(vanilla_single_dicom_image_DX) + assert not validator.validate_anonymised(vanilla_single_dicom_image_DX) + + +def test_validation_after_anonymisation_works( + vanilla_single_dicom_image_DX: Dataset, + test_project_config, +) -> None: + """ + GIVEN a DICOM dataset + WHEN the dataset is validated after anonymisation + THEN no errors should be raised + """ + validator = DicomValidator() + validator.validate_original(vanilla_single_dicom_image_DX) + anonymise_dicom(vanilla_single_dicom_image_DX, config=test_project_config) + + assert not validator.validate_anonymised(vanilla_single_dicom_image_DX) + + +@pytest.fixture() +def non_compliant_dicom_image(vanilla_single_dicom_image_DX: Dataset) -> Dataset: + """A DICOM dataset that is not compliant with the DICOM standard.""" + del vanilla_single_dicom_image_DX.PatientName + return vanilla_single_dicom_image_DX + + +def test_validation_passes_for_non_compliant_dicom(non_compliant_dicom_image) -> None: + """ + GIVEN a DICOM dataset that is not compliant with the DICOM standard + WHEN the dataset is validated after anonymisation + THEN no errors should be raised + """ + validator = DicomValidator() + validator.validate_original(non_compliant_dicom_image) + assert not validator.validate_anonymised(non_compliant_dicom_image) + + +def test_validation_fails_after_invalid_tag_modification( + vanilla_single_dicom_image_DX, +) -> None: + """ + GIVEN a DICOM dataset + WHEN an invalid tag operation is performed (e.g. deleting a required tag) + THEN validation should return a non-empty list of errors + """ + validator = DicomValidator() + validator.validate_original(vanilla_single_dicom_image_DX) + del vanilla_single_dicom_image_DX.PatientName + validation_result = validator.validate_anonymised(vanilla_single_dicom_image_DX) + + assert len(validation_result) == 1 + assert "Patient" in validation_result.keys() + assert len(validation_result["Patient"]) == 1 + assert ( + "Tag (0010,0010) (Patient's Name) is missing" + in validation_result["Patient"].keys() + ) diff --git a/pixl_dcmd/tests/test_main.py b/pixl_dcmd/tests/test_main.py index 880dd074e..951c5870d 100644 --- a/pixl_dcmd/tests/test_main.py +++ b/pixl_dcmd/tests/test_main.py @@ -16,6 +16,8 @@ import pathlib import re from pathlib import Path +import logging +import typing import nibabel import numpy as np @@ -24,49 +26,56 @@ import sqlalchemy from core.db.models import Image from core.dicom_tags import ( - DICOM_TAG_PROJECT_NAME, PrivateDicomTag, add_private_tag, create_private_tag, ) from core.project_config import load_project_config, load_tag_operations +from core.project_config.pixl_config_model import load_config_and_validate from decouple import config -from pydicom.data import get_testdata_file - -from pydicom.dataset import Dataset - -from pytest_pixl.dicom import generate_dicom_dataset -from pytest_pixl.helpers import run_subprocess +from pixl_dcmd._dicom_helpers import get_study_info from pixl_dcmd.main import ( + anonymise_dicom_and_update_db, _anonymise_dicom_from_scheme, - enforce_whitelist, + anonymise_and_validate_dicom, anonymise_dicom, - should_exclude_series, + _enforce_allowlist, + _should_exclude_series, ) +from pytest_pixl.dicom import generate_dicom_dataset +from pytest_pixl.helpers import run_subprocess +from conftest import ids_for_parameterised_test + +if typing.TYPE_CHECKING: + from core.project_config.pixl_config_model import PixlConfig PROJECT_CONFIGS_DIR = Path(config("PROJECT_CONFIGS_DIR")) TEST_PROJECT_SLUG = "test-extract-uclh-omop-cdm" @pytest.fixture(scope="module") -def tag_scheme() -> list[dict]: +def tag_scheme(test_project_config: PixlConfig) -> list[dict]: """Base tag scheme for testing.""" - tag_ops = load_tag_operations(load_project_config(TEST_PROJECT_SLUG)) + tag_ops = load_tag_operations(test_project_config) return tag_ops.base[0] -def _mri_diffusion_tags(manufacturer: str = "Philips") -> list[PrivateDicomTag]: +def _get_mri_diffusion_tags( + config: PixlConfig, + manufacturer: str, +) -> list[PrivateDicomTag]: """ Private DICOM tags for testing the anonymisation process. These tags from `/projects/configs/tag-operations/manufacturer-overrides/mri-diffusion.yaml` so we can test whether the manufacturer overrides work during anonymisation """ - project_config = load_project_config(TEST_PROJECT_SLUG) - tag_ops = load_tag_operations(project_config) + tag_ops = load_tag_operations(config) + mri_diffusion_overrides = tag_ops.manufacturer_overrides[0] + manufacturer_overrides = [ override - for override in tag_ops.manufacturer_overrides + for override in mri_diffusion_overrides if re.search(override["manufacturer"], manufacturer, re.IGNORECASE) ][0] @@ -77,27 +86,7 @@ def _mri_diffusion_tags(manufacturer: str = "Philips") -> list[PrivateDicomTag]: @pytest.fixture() -def vanilla_dicom_image() -> Dataset: - """ - A DICOM image with diffusion data to test the anonymisation process. - Private tags were added to match the tag operations defined in the project config, so we can - test whether the anonymisation process works as expected when defining overrides. - """ - ds = generate_dicom_dataset(Modality="DX") - - # Make sure the project name tag is added for anonymisation to work - add_private_tag(ds, DICOM_TAG_PROJECT_NAME) - # Update the project name tag to a known value - block = ds.private_block( - DICOM_TAG_PROJECT_NAME.group_id, DICOM_TAG_PROJECT_NAME.creator_string - ) - ds[block.get_tag(DICOM_TAG_PROJECT_NAME.offset_id)].value = TEST_PROJECT_SLUG - - return ds - - -@pytest.fixture() -def mri_diffusion_dicom_image() -> Dataset: +def mri_diffusion_dicom_image(test_project_config: PixlConfig) -> pydicom.Dataset: """ A DICOM image with diffusion data to test the anonymisation process. Private tags were added to match the tag operations defined in the project config, so we can @@ -105,52 +94,131 @@ def mri_diffusion_dicom_image() -> Dataset: """ manufacturer = "Philips" ds = generate_dicom_dataset(Manufacturer=manufacturer, Modality="DX") - tags = _mri_diffusion_tags(manufacturer) + tags = _get_mri_diffusion_tags( + config=test_project_config, manufacturer=manufacturer + ) for tag in tags: add_private_tag(ds, tag) - # Make sure the project name tag is added for anonymisation to work - add_private_tag(ds, DICOM_TAG_PROJECT_NAME) - # Update the project name tag to a known value - block = ds.private_block( - DICOM_TAG_PROJECT_NAME.group_id, DICOM_TAG_PROJECT_NAME.creator_string - ) - ds[block.get_tag(DICOM_TAG_PROJECT_NAME.offset_id)].value = TEST_PROJECT_SLUG - return ds -def test_enforce_whitelist_removes_overlay_plane() -> None: +def test_enforce_allowlist_removes_overlay_plane() -> None: """Checks that overlay planes are removed.""" - ds = get_testdata_file( + ds = pydicom.data.get_testdata_file( "MR-SIEMENS-DICOM-WithOverlays.dcm", read=True, download=True ) assert (0x6000, 0x3000) in ds - enforce_whitelist(ds, {}, recursive=True) + _enforce_allowlist(ds, {}, recursive=True) assert (0x6000, 0x3000) not in ds -def test_anonymisation(row_for_dicom_testing, vanilla_dicom_image: Dataset) -> None: +def test_anonymisation( + vanilla_single_dicom_image_DX: pydicom.Dataset, + test_project_config: PixlConfig, +) -> None: """ Test whether anonymisation works as expected on a vanilla DICOM dataset """ - orig_patient_id = vanilla_dicom_image.PatientID - orig_patient_name = vanilla_dicom_image.PatientName + orig_patient_id = vanilla_single_dicom_image_DX.PatientID + orig_patient_name = vanilla_single_dicom_image_DX.PatientName + orig_study_date = vanilla_single_dicom_image_DX.StudyDate + + anonymise_dicom(vanilla_single_dicom_image_DX, config=test_project_config) + + assert vanilla_single_dicom_image_DX.PatientID != orig_patient_id + assert vanilla_single_dicom_image_DX.PatientName != orig_patient_name + assert vanilla_single_dicom_image_DX.StudyDate != orig_study_date + + +def test_anonymise_unimplemented_tag( + vanilla_single_dicom_image_DX: pydicom.Dataset, + test_project_config: PixlConfig, +) -> None: + """ + GIVEN DICOM data with an OB data type tag within a sequence + WHEN anonymise_dicom is run that has "replace" tag operation on the sequence, but not the OB element + THEN the sequence should exist, but not the OB element + + VR OB is not implemented by the dicom anonymisation library, so this + is testing that we can still successfully de-identify data with this data type + """ + nested_ds = pydicom.Dataset() + nested_block = nested_ds.private_block(0x0013, "VR OB CREATOR", create=True) + nested_block.add_new(0x0011, "OB", b"") + + # create private sequence tag with the nested dataset + block = vanilla_single_dicom_image_DX.private_block( + 0x0013, "VR OB CREATOR", create=True + ) + block.add_new(0x0010, "SQ", [nested_ds]) + + anonymise_dicom(vanilla_single_dicom_image_DX, config=test_project_config) + + assert (0x0013, 0x0010) in vanilla_single_dicom_image_DX + assert (0x0013, 0x1010) in vanilla_single_dicom_image_DX + sequence = vanilla_single_dicom_image_DX[(0x0013, 0x1010)] + assert (0x0013, 0x1011) not in sequence[0] + + +def test_anonymise_and_validate_as_external_user( + test_project_config: PixlConfig, +) -> None: + """ + GIVEN an example MR dataset and configuration to anonymise this + WHEN the anonymisation and validation is called not using PIXL infrastructure + THEN the dataset is anonymised inplace + + Note: If we update this test, make sure to update the documentation stub. + Or if we end up building docs, then convert this test to a doctest + """ + dataset_path = pydicom.data.get_testdata_file( + "MR-SIEMENS-DICOM-WithOverlays.dcm", download=True + ) + dataset = pydicom.dcmread(dataset_path) - # Sanity check: study date should be present before anonymisation - assert "StudyDate" in vanilla_dicom_image + config_path = ( + pathlib.Path(__file__).parents[2] / "projects/configs/test-external-user.yaml" + ) + config = load_config_and_validate(config_path) - anonymise_dicom(vanilla_dicom_image) + validation_issues = anonymise_and_validate_dicom(dataset, config=config) + + assert validation_issues == {} + assert dataset != pydicom.dcmread(dataset_path) + + +@pytest.mark.parametrize( + ("yaml_file"), PROJECT_CONFIGS_DIR.glob("*.yaml"), ids=ids_for_parameterised_test +) +def test_anonymise_and_validate_dicom(caplog, request, yaml_file) -> None: + """ + Test whether anonymisation and validation works as expected on a vanilla DICOM dataset + GIVEN a project configuration with tag operations that creates a DICOM dataset + WHEN the anonymisation and validation process is run + THEN the dataset should be anonymised and validated without any warnings or errors + """ + caplog.clear() + caplog.set_level(logging.WARNING) + config = load_project_config(yaml_file.stem) + modality = config.project.modalities[0] + dicom_image = request.getfixturevalue(f"vanilla_dicom_image_{modality}") + + validation_errors = anonymise_and_validate_dicom( + dicom_image, + config=config, + ) - assert vanilla_dicom_image.PatientID != orig_patient_id - assert vanilla_dicom_image.PatientName != orig_patient_name - assert "StudyDate" not in vanilla_dicom_image + assert "WARNING" not in [record.levelname for record in caplog.records] + assert not validation_errors +@pytest.mark.usefixtures("row_for_single_dicom_testing") def test_anonymisation_with_overrides( - row_for_dicom_testing, mri_diffusion_dicom_image: Dataset + mri_diffusion_dicom_image: pydicom.Dataset, + test_project_config: PixlConfig, ) -> None: """ Test that the anonymisation process works with manufacturer overrides. @@ -160,12 +228,12 @@ def test_anonymisation_with_overrides( """ # Sanity check - # (0x2001, 0x1003) is one of the tags whitelisted by the overrides for Philips manufacturer + # (0x2001, 0x1003) is one of the tags allow-listed by the overrides for Philips manufacturer assert (0x2001, 0x1003) in mri_diffusion_dicom_image original_patient_id = mri_diffusion_dicom_image.PatientID original_private_tag = mri_diffusion_dicom_image[(0x2001, 0x1003)] - anonymise_dicom(mri_diffusion_dicom_image) + anonymise_dicom(mri_diffusion_dicom_image, config=test_project_config) # Whitelisted tags should still be present assert (0x0010, 0x0020) in mri_diffusion_dicom_image @@ -174,55 +242,146 @@ def test_anonymisation_with_overrides( assert mri_diffusion_dicom_image[(0x2001, 0x1003)] == original_private_tag -def test_image_already_exported_throws(rows_in_session): +@pytest.mark.usefixtures("rows_in_session") +def test_image_already_exported_throws(test_project_config, exported_dicom_dataset): """ GIVEN a dicom image which has no un-exported rows in the pipeline database WHEN the dicom tag scheme is applied THEN an exception will be thrown as """ - exported_dicom = pathlib.Path(__file__).parents[2] / "test/resources/Dicom1.dcm" - input_dataset = pydicom.dcmread(exported_dicom) - - # Make sure the project name tag is added for anonymisation to work - add_private_tag(input_dataset, DICOM_TAG_PROJECT_NAME) - # Update the project name tag to a known value - block = input_dataset.private_block( - DICOM_TAG_PROJECT_NAME.group_id, DICOM_TAG_PROJECT_NAME.creator_string - ) - input_dataset[ - block.get_tag(DICOM_TAG_PROJECT_NAME.offset_id) - ].value = TEST_PROJECT_SLUG with pytest.raises(sqlalchemy.exc.NoResultFound): - anonymise_dicom(input_dataset) + anonymise_dicom_and_update_db( + exported_dicom_dataset, + config=test_project_config, + ) -def test_pseudo_identifier_processing(rows_in_session): +def test_pseudo_identifier_processing( + rows_in_session, + monkeypatch, + exported_dicom_dataset, + not_exported_dicom_dataset, + test_project_config, +): """ GIVEN a dicom image that hasn't been exported in the pipeline db WHEN the dicom tag scheme is applied - THEN the patient identifier tag should be the mrn and accession hashed - and the pipeline db row should now have the fake hash + THEN the patient identifier tag should be the mrn hashed + the study instance uid should be replaced with a new uid + and the db should have the pseudo study id """ - exported_dicom = pathlib.Path(__file__).parents[2] / "test/resources/Dicom2.dcm" - dataset = pydicom.dcmread(exported_dicom) + exported_study_info = get_study_info(exported_dicom_dataset) + not_exported_study_info = get_study_info(not_exported_dicom_dataset) + + class FakeUID: + i = 1 + + @classmethod + def fake_uid(cls): + uid = f"2.25.{cls.i}" + cls.i += 1 + return pydicom.uid.UID(uid) + + monkeypatch.setattr("pixl_dcmd._database.generate_uid", FakeUID.fake_uid) + other_image = ( + rows_in_session.query(Image) + .filter(Image.accession_number == exported_study_info.accession_number) + .one() + ) + other_image.pseudo_study_uid = "2.25.1" + rows_in_session.add(other_image) + rows_in_session.commit() - accession_number = "AA12345605" - mrn = "987654321" - fake_hash = "-".join(list(f"{mrn}{accession_number}")) + mrn = exported_study_info.mrn + fake_hash = "-".join(list(mrn)) print("fake_hash = ", fake_hash) - anonymise_dicom(dataset) + + anonymise_dicom_and_update_db( + not_exported_dicom_dataset, config=test_project_config + ) + image = ( rows_in_session.query(Image) - .filter(Image.accession_number == "AA12345605") + .filter(Image.accession_number == not_exported_study_info.accession_number) .one() ) print("after tags applied") - assert dataset[0x0010, 0x0020].value == fake_hash - assert image.hashed_identifier == fake_hash + assert not_exported_dicom_dataset[0x0010, 0x0020].value == fake_hash + assert image.pseudo_study_uid == not_exported_dicom_dataset[0x0020, 0x000D].value + assert image.pseudo_study_uid == "2.25.2" # 2nd image in the db + + +def test_pseudo_patient_id_processing( + row_for_testing_image_with_pseudo_patient_id, + not_exported_dicom_dataset, + test_project_config, +): + """ + GIVEN an `Image` entity in the database which has a `pseudo_patient_id` set + WHEN the matching DICOM data is anonymised + THEN the DICOM's patient ID tag should be the original `pseudo_study_id` + value from the database, the database's `pseudo_patient_id` shouldn't have changed + """ + study_info = get_study_info(not_exported_dicom_dataset) + original_image: Image = ( + row_for_testing_image_with_pseudo_patient_id.query(Image) + .filter(Image.accession_number == study_info.accession_number) + .one() + ) + assert ( + not_exported_dicom_dataset[0x0010, 0x0020].value + != original_image.pseudo_patient_id + ) + + anonymise_dicom_and_update_db( + not_exported_dicom_dataset, config=test_project_config + ) + + anonymised_image: Image = ( + row_for_testing_image_with_pseudo_patient_id.query(Image) + .filter(Image.accession_number == study_info.accession_number) + .one() + ) + + assert original_image.pseudo_patient_id == anonymised_image.pseudo_patient_id + assert ( + not_exported_dicom_dataset[0x0010, 0x0020].value + == original_image.pseudo_patient_id + ) + + +def test_no_pseudo_patient_id_processing( + rows_in_session, + not_exported_dicom_dataset, + test_project_config, +): + """ + GIVEN an `Image` entity in the database which doesn't have a `pseudo_patient_id` set + WHEN the matching DICOM data is anonymised + THEN database's `pseudo_patient_id` field should be set, and match the value from the + DICOM's patient identifier tag at the end of anonymisation + """ + study_info = get_study_info(not_exported_dicom_dataset) + + anonymise_dicom_and_update_db( + not_exported_dicom_dataset, config=test_project_config + ) + + anonymised_image: Image = ( + rows_in_session.query(Image) + .filter(Image.accession_number == study_info.accession_number) + .one() + ) + + assert anonymised_image.pseudo_patient_id is not None + assert ( + anonymised_image.pseudo_patient_id + == not_exported_dicom_dataset[0x0010, 0x0020].value + ) @pytest.fixture() -def dicom_series_to_keep() -> list[Dataset]: +def dicom_series_to_keep() -> list[pydicom.Dataset]: series = [ "", "whatever", @@ -231,7 +390,7 @@ def dicom_series_to_keep() -> list[Dataset]: @pytest.fixture() -def dicom_series_to_exclude() -> list[Dataset]: +def dicom_series_to_exclude() -> list[pydicom.Dataset]: series = [ "positioning", "foo_barpositioning", @@ -245,21 +404,20 @@ def dicom_series_to_exclude() -> list[Dataset]: return [_make_dicom(s) for s in series] -def _make_dicom(series_description) -> Dataset: - ds = generate_dicom_dataset(SeriesDescription=series_description) - add_private_tag(ds, DICOM_TAG_PROJECT_NAME, "test-extract-uclh-omop-cdm") - return ds +def _make_dicom(series_description) -> pydicom.Dataset: + return generate_dicom_dataset(SeriesDescription=series_description) def test_should_exclude_series(dicom_series_to_exclude, dicom_series_to_keep): + config = load_project_config(TEST_PROJECT_SLUG) for s in dicom_series_to_keep: - assert not should_exclude_series(s) + assert not _should_exclude_series(s, config) for s in dicom_series_to_exclude: - assert should_exclude_series(s) + assert _should_exclude_series(s, config) def test_can_nifti_convert_post_anonymisation( - row_for_dicom_testing, tmp_path, directory_of_mri_dicoms, tag_scheme + row_for_single_dicom_testing, tmp_path, directory_of_mri_dicoms, tag_scheme ): """Can a DICOM image that has passed through our tag processing be converted to NIFTI""" # Create a directory to store anonymised DICOM files @@ -285,7 +443,6 @@ def test_can_nifti_convert_post_anonymisation( "op": "keep", }, ] - # Get test DICOMs from the fixture, anonymise and save for dcm_path in directory_of_mri_dicoms.glob("*.dcm"): dcm = pydicom.dcmread(dcm_path) @@ -323,7 +480,7 @@ def test_can_nifti_convert_post_anonymisation( @pytest.fixture -def sequenced_dicom(): +def sequenced_dicom_mock_db(monkeypatch): """ Create a DICOM dataset with a private sequence tag @@ -332,12 +489,14 @@ def sequenced_dicom(): (group=0x0011, offset=0x0011, creator="UCLH PIXL", VR="SH", value="nested_priv_tag) and a public child tag (group=0x0010, element=0x0020), VR="LO", value="987654321"). + + Also mock db functions """ # Create a test DICOM with a sequence tag exported_dicom = pathlib.Path(__file__).parents[2] / "test/resources/Dicom1.dcm" dataset = pydicom.dcmread(exported_dicom) # create nested dataset to put into sequence - nested_ds = Dataset() + nested_ds = pydicom.Dataset() # nested public tag nested_ds.add_new((0x0010, 0x0020), "LO", "987654321") # nested private tag @@ -348,10 +507,14 @@ def sequenced_dicom(): block = dataset.private_block(0x0011, "UCLH PIXL", create=True) block.add_new(0x0010, "SQ", [nested_ds]) + # Mock the database functions + monkeypatch.setattr( + "pixl_dcmd.main.get_uniq_pseudo_study_uid_and_update_db", lambda *args: None + ) return dataset -def test_del_tag_keep_sq(sequenced_dicom): +def test_del_tag_keep_sq(sequenced_dicom_mock_db): """ GIVEN a dicom image that has a private sequence tag marked to be kept with - a private child tag that is marked to be deleted @@ -362,22 +525,22 @@ def test_del_tag_keep_sq(sequenced_dicom): - the child tags should be deleted/replaced """ ## ARRANGE (or rather check arrangement is as expected) - assert (0x0011, 0x0010) in sequenced_dicom - assert (0x0011, 0x1010) in sequenced_dicom - assert (0x0011, 0x1011) in sequenced_dicom.get_private_item( + assert (0x0011, 0x0010) in sequenced_dicom_mock_db + assert (0x0011, 0x1010) in sequenced_dicom_mock_db + assert (0x0011, 0x1011) in sequenced_dicom_mock_db.get_private_item( 0x0011, 0x0010, "UCLH PIXL" )[0] assert ( - sequenced_dicom.get_private_item(0x0011, 0x0010, "UCLH PIXL")[0] + sequenced_dicom_mock_db.get_private_item(0x0011, 0x0010, "UCLH PIXL")[0] .get_private_item(0x0011, 0x0011, "UCLH PIXL") .value == "nested_priv_tag" ) - assert (0x0010, 0x0020) in sequenced_dicom.get_private_item( + assert (0x0010, 0x0020) in sequenced_dicom_mock_db.get_private_item( 0x0011, 0x0010, "UCLH PIXL" )[0] assert ( - sequenced_dicom.get_private_item(0x0011, 0x0010, "UCLH PIXL")[0] + sequenced_dicom_mock_db.get_private_item(0x0011, 0x0010, "UCLH PIXL")[0] .get_item((0x0010, 0x0020)) .value == "987654321" @@ -408,26 +571,26 @@ def test_del_tag_keep_sq(sequenced_dicom): ] ## ACT - _anonymise_dicom_from_scheme(sequenced_dicom, TEST_PROJECT_SLUG, tag_scheme) + _anonymise_dicom_from_scheme(sequenced_dicom_mock_db, TEST_PROJECT_SLUG, tag_scheme) ## ASSERT # Check that the sequence tag has been kept - assert (0x0011, 0x0010) in sequenced_dicom - assert (0x0011, 0x1010) in sequenced_dicom + assert (0x0011, 0x0010) in sequenced_dicom_mock_db + assert (0x0011, 0x1010) in sequenced_dicom_mock_db # check private tag is deleted - assert (0x0011, 0x1011) not in sequenced_dicom.get_private_item( + assert (0x0011, 0x1011) not in sequenced_dicom_mock_db.get_private_item( 0x0011, 0x0010, "UCLH PIXL" )[0] # check public tag is replaced assert ( - sequenced_dicom.get_private_item(0x0011, 0x0010, "UCLH PIXL")[0] + sequenced_dicom_mock_db.get_private_item(0x0011, 0x0010, "UCLH PIXL")[0] .get_item((0x0010, 0x0020)) .value != "987654321" ) -def test_keep_tag_del_sq(sequenced_dicom): +def test_keep_tag_del_sq(sequenced_dicom_mock_db): """ GIVEN a dicom image that has a private sequence tag marked to be deleted with a private child tag that is marked to be kept @@ -435,8 +598,8 @@ def test_keep_tag_del_sq(sequenced_dicom): THEN the sequence tag should be deleted """ ## ARRANGE (or rather check arrangement is as expected) - assert (0x0011, 0x0010) in sequenced_dicom - assert (0x0011, 0x1010) in sequenced_dicom + assert (0x0011, 0x0010) in sequenced_dicom_mock_db + assert (0x0011, 0x1010) in sequenced_dicom_mock_db # Create a tag scheme that deletes the sequence tag, but keeps the nested tags tag_scheme = [ @@ -456,18 +619,17 @@ def test_keep_tag_del_sq(sequenced_dicom): "op": "replace", }, ] - ## ACT - _anonymise_dicom_from_scheme(sequenced_dicom, TEST_PROJECT_SLUG, tag_scheme) + _anonymise_dicom_from_scheme(sequenced_dicom_mock_db, TEST_PROJECT_SLUG, tag_scheme) ## ASSERT # Check that the sequence tag has been deleted - assert (0x0011, 0x1010) not in sequenced_dicom + assert (0x0011, 0x1010) not in sequenced_dicom_mock_db with pytest.raises(KeyError): - sequenced_dicom.get_private_item(0x0011, 0x0010, "UCLH PIXL") + sequenced_dicom_mock_db.get_private_item(0x0011, 0x0010, "UCLH PIXL") -def test_whitelist_child_elements_deleted(sequenced_dicom): +def test_allowlist_child_elements_deleted(sequenced_dicom_mock_db): """ GIVEN a dicom image that has a public and private sequence tags WHEN the dicom tag scheme is applied @@ -475,13 +637,15 @@ def test_whitelist_child_elements_deleted(sequenced_dicom): """ ## ARRANGE (or rather check arrangement is as expected) # check that the sequence tag is present - assert (0x0011, 0x0010) in sequenced_dicom - assert (0x0011, 0x1010) in sequenced_dicom + assert (0x0011, 0x0010) in sequenced_dicom_mock_db + assert (0x0011, 0x1010) in sequenced_dicom_mock_db # check that the children are present - assert (0x0011, 0x1011) in sequenced_dicom[(0x0011, 0x1010)][0] - sequenced_dicom[(0x0011, 0x1010)][0][(0x0011, 0x1011)].value == "nested_priv_tag" - assert (0x0010, 0x0020) in sequenced_dicom[(0x0011, 0x1010)][0] - sequenced_dicom[(0x0011, 0x1010)][0][(0x0010, 0x0020)].value == "987654321" + assert (0x0011, 0x1011) in sequenced_dicom_mock_db[(0x0011, 0x1010)][0] + sequenced_dicom_mock_db[(0x0011, 0x1010)][0][ + (0x0011, 0x1011) + ].value == "nested_priv_tag" + assert (0x0010, 0x0020) in sequenced_dicom_mock_db[(0x0011, 0x1010)][0] + sequenced_dicom_mock_db[(0x0011, 0x1010)][0][(0x0010, 0x0020)].value == "987654321" # set tag scheme to keep sequence tag_scheme = [ @@ -497,17 +661,17 @@ def test_whitelist_child_elements_deleted(sequenced_dicom): }, ] # Whitelist - enforce_whitelist(sequenced_dicom, tag_scheme, recursive=True) + _enforce_allowlist(sequenced_dicom_mock_db, tag_scheme, recursive=True) # Check that the sequence tag is kept - assert (0x0011, 0x0010) in sequenced_dicom - assert (0x0011, 0x1010) in sequenced_dicom + assert (0x0011, 0x0010) in sequenced_dicom_mock_db + assert (0x0011, 0x1010) in sequenced_dicom_mock_db # Check that children are deleted - assert (0x0011, 0x1011) not in sequenced_dicom[(0x0011, 0x1010)][0] - assert (0x0010, 0x0020) not in sequenced_dicom[(0x0011, 0x1010)][0] + assert (0x0011, 0x1011) not in sequenced_dicom_mock_db[(0x0011, 0x1010)][0] + assert (0x0010, 0x0020) not in sequenced_dicom_mock_db[(0x0011, 0x1010)][0] with pytest.raises(KeyError): - sequenced_dicom[(0x0011, 0x1010)][0].get_private_item( + sequenced_dicom_mock_db[(0x0011, 0x1010)][0].get_private_item( 0x0011, 0x0011, "UCLH PIXL" ) with pytest.raises(KeyError): - sequenced_dicom[(0x0011, 0x1010)][0][0x0010, 0x0020] + sequenced_dicom_mock_db[(0x0011, 0x1010)][0][0x0010, 0x0020] diff --git a/pixl_dcmd/tests/test_tag_schemes.py b/pixl_dcmd/tests/test_tag_schemes.py index 87fdcac46..108bb1e39 100644 --- a/pixl_dcmd/tests/test_tag_schemes.py +++ b/pixl_dcmd/tests/test_tag_schemes.py @@ -39,7 +39,20 @@ def test_merge_base_only_tags(base_only_tag_scheme): THEN the result should be the same as the base file """ tags = merge_tag_schemes(base_only_tag_scheme) - expected = base_only_tag_scheme.base[0] + expected = [*base_only_tag_scheme.base[0], *base_only_tag_scheme.base[1]] + count_tags = dict() + for tag in expected: + key = f"{tag['group']:04x},{tag['element']:04x}" + if key in count_tags: + count_tags[key] += 1 + else: + count_tags[key] = 1 + + for key, values in count_tags.items(): + assert ( + values == 1 + ), f"{key} is replicated please check config files to remove it" + assert tags == expected @@ -68,24 +81,34 @@ def tag_ops_with_manufacturer_overrides(tmp_path_factory): {"name": "tag3", "group": 0x003, "element": 0x1002, "op": "delete"}, ] manufacturer_overrides_tags = [ - { - "manufacturer": "manufacturer_1", - "tags": [ - # Override tag1 for manufacturer 1 - {"name": "tag1", "group": 0x001, "element": 0x1000, "op": "keep"}, - {"name": "tag4", "group": 0x004, "element": 0x1011, "op": "delete"}, - {"name": "tag5", "group": 0x005, "element": 0x1012, "op": "delete"}, - ], - }, - { - "manufacturer": "manufacturer_2", - "tags": [ - {"name": "tag6", "group": 0x006, "element": 0x1100, "op": "keep"}, - {"name": "tag7", "group": 0x007, "element": 0x1111, "op": "delete"}, - # Override tag3 for manufacturer 2 - {"name": "tag3", "group": 0x003, "element": 0x1002, "op": "keep"}, - ], - }, + [ + { + "manufacturer": "manufacturer_1", + "tags": [ + # Override tag1 for manufacturer 1 + {"name": "tag1", "group": 0x001, "element": 0x1000, "op": "keep"}, + {"name": "tag4", "group": 0x004, "element": 0x1011, "op": "delete"}, + ], + }, + { + "manufacturer": "manufacturer_2", + "tags": [ + {"name": "tag6", "group": 0x006, "element": 0x1100, "op": "keep"}, + {"name": "tag7", "group": 0x007, "element": 0x1111, "op": "delete"}, + # Override tag3 for manufacturer 2 + {"name": "tag3", "group": 0x003, "element": 0x1002, "op": "keep"}, + ], + }, + ], + [ + { + "manufacturer": "manufacturer_1", + "tags": [ + # Override tag1 for manufacturer 1 + {"name": "tag5", "group": 0x005, "element": 0x1012, "op": "delete"}, + ], + }, + ], ] return TagOperations( diff --git a/pixl_export/README.md b/pixl_export/README.md index 10141c7aa..9e34f283e 100644 --- a/pixl_export/README.md +++ b/pixl_export/README.md @@ -26,13 +26,13 @@ On Windows, follow [these instructions](https://www.postgresqltutorial.com/postg Then install the Python dependencies with ```bash -pip install -e ../pixl_core/ -e . +python -m pip install -e ../pixl_core/ -e . ``` ## Test ```bash -pip install -e ../pixl_core/ -e .[test] +python -m pip install -e ../pixl_core/ -e ".[test]" pytest ``` diff --git a/pixl_export/pyproject.toml b/pixl_export/pyproject.toml index 2d3e8d998..16a5b74a9 100644 --- a/pixl_export/pyproject.toml +++ b/pixl_export/pyproject.toml @@ -1,38 +1,45 @@ [project] name = "pixl_export" -version = "0.0.2" +version = "0.2.0rc0" authors = [{ name = "PIXL authors" }] description = "PIXL electronic health record extractor" readme = "README.md" requires-python = ">=3.10" classifiers = ["Programming Language :: Python :: 3"] dependencies = [ - "core", - "uvicorn==0.23.2", - "python-decouple==3.6", - "psycopg2-binary==2.9.9", - "azure-identity==1.12.0", - "azure-storage-blob==12.14.1", - "pyarrow==14.0.1", + "core==0.2.0rc0", + "uvicorn==0.32.1", + "azure-storage-blob==12.24.0", ] [project.optional-dependencies] test = [ - "pytest==7.4.2", - "pytest-asyncio==0.21.1", - "pytest-pixl", - "httpx==0.24.*", + "core[test]==0.2.0rc0", ] dev = [ - "mypy", - "pre-commit", - "ruff", + "core[dev]==0.2.0rc0", ] [build-system] requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" +[tool.coverage.report] +exclude_also = [ + "def __repr__", + "if self.debug:", + "if settings.DEBUG", + "except subprocess.CalledProcessError as exception:", + "raise AssertionError", + "raise NotImplementedError", + "if 0:", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", + "if typing.TYPE_CHECKING", + "class .*\\bProtocol\\):", + "@(abc\\.)?abstractmethod", +] + [tool.pytest.ini_options] markers = ["processing"] @@ -44,3 +51,4 @@ extend = "../ruff.toml" [tool.setuptools.package-data] pixl_export = ["sql/*.sql", "report_deid/*.txt"] + diff --git a/pixl_export/src/pixl_export/_databases.py b/pixl_export/src/pixl_export/_databases.py index c446e3d7d..a32998065 100644 --- a/pixl_export/src/pixl_export/_databases.py +++ b/pixl_export/src/pixl_export/_databases.py @@ -26,7 +26,7 @@ class Database: """Fake database wrapper""" - def __init__( # noqa: PLR0913 Too many arguments in function definition + def __init__( self, db_name: Optional[str] = None, username: Optional[str] = None, diff --git a/pixl_export/src/pixl_export/main.py b/pixl_export/src/pixl_export/main.py index 8a5e5d653..1333df0ac 100644 --- a/pixl_export/src/pixl_export/main.py +++ b/pixl_export/src/pixl_export/main.py @@ -21,13 +21,13 @@ datetime, # noqa: TCH003, always import datetime otherwise pydantic throws error ) from pathlib import Path +from typing import Annotated from core.exports import ParquetExport from core.rest_api.router import router from core.uploader import get_uploader -from core.uploader._orthanc import get_tags_by_study from decouple import config # type: ignore [import-untyped] -from fastapi import FastAPI, HTTPException +from fastapi import Body, FastAPI, HTTPException from fastapi.responses import JSONResponse from loguru import logger from pydantic import BaseModel @@ -61,12 +61,6 @@ class ExportPatientData(BaseModel): output_dir: Path = EXPORT_API_EXPORT_ROOT_DIR -class StudyData(BaseModel): - """Uniquely identify a study when talking to the API""" - - study_id: str - - @app.post( "/export-patient-data", summary="Copy all matching radiology reports in the PIXL DB to a parquet file \ @@ -97,16 +91,16 @@ def export_patient_data(export_params: ExportPatientData) -> None: "/export-dicom-from-orthanc", summary="Download a zipped up study from orthanc anon and upload it via the appropriate route", ) -def export_dicom_from_orthanc(study_data: StudyData) -> None: +def export_dicom_from_orthanc( + study_id: Annotated[str, Body()], + project_name: Annotated[str, Body()], +) -> None: """ Download zipped up study data from orthanc anon and route it appropriately. Intended only for orthanc-anon to call, as only it knows when its data is ready for download. - Because we're post-anonymisation, the "PatientID" tag returned is actually - the hashed image ID (MRN + Accession number). + Because we're post-anonymisation, the "StudyInstanceUID" tag returned is actually + the Pseudo Study UID (a randomly selected, but consistent UID). """ - study_id = study_data.study_id - _, project_slug = get_tags_by_study(study_id) - - uploader = get_uploader(project_slug) + uploader = get_uploader(project_name) logger.debug("Sending {} via '{}'", study_id, type(uploader).__name__) uploader.upload_dicom_and_update_database(study_id) diff --git a/pixl_export/tests/conftest.py b/pixl_export/tests/conftest.py index b35cb06e6..fe0c9efe6 100644 --- a/pixl_export/tests/conftest.py +++ b/pixl_export/tests/conftest.py @@ -30,17 +30,11 @@ os.environ["PIXL_DB_PORT"] = "35432" os.environ["PIXL_DB_NAME"] = "pixl" os.environ["PIXL_DB_USER"] = "postgres" -os.environ["PIXL_DB_PASSWORD"] = "postgres" # noqa: S105 -os.environ["EMAP_UDS_HOST"] = "localhost" -os.environ["EMAP_UDS_PORT"] = "35433" -os.environ["EMAP_UDS_NAME"] = "emap" -os.environ["EMAP_UDS_USER"] = "postgres" -os.environ["EMAP_UDS_PASSWORD"] = "postgres" # noqa: S105 -os.environ["EMAP_UDS_SCHEMA_NAME"] = "star" +os.environ["PIXL_DB_PASSWORD"] = "postgres" os.environ["PROJECT_CONFIGS_DIR"] = str(Path(__file__).parents[2] / "projects/configs") os.environ["ORTHANC_ANON_USERNAME"] = "orthanc_anon_username" -os.environ["ORTHANC_ANON_PASSWORD"] = "orthanc_anon_password" # noqa: S105 password used in test only +os.environ["ORTHANC_ANON_PASSWORD"] = "orthanc_anon_password" os.environ["ORTHANC_ANON_URL"] = "http://orthanc-anon:8042" TEST_DIR = Path(__file__).parent diff --git a/pixl_export/tests/test_app.py b/pixl_export/tests/test_app.py index 1946dfb59..ff46b5894 100644 --- a/pixl_export/tests/test_app.py +++ b/pixl_export/tests/test_app.py @@ -29,4 +29,5 @@ def test_heartbeat_response_is_200() -> None: def test_initial_state_has_no_token() -> None: - assert not AppState().token_bucket.has_token + assert not AppState().token_bucket.has_token(key="primary") + assert not AppState().token_bucket.has_token(key="secondary") diff --git a/pixl_imaging/README.md b/pixl_imaging/README.md index d23fa0c66..9f3419db8 100644 --- a/pixl_imaging/README.md +++ b/pixl_imaging/README.md @@ -1,20 +1,41 @@ # PIXL Imaging API -The PIXL imaging API processes messages from the imaging queue created by the [CLI](../cli/README.md) -to query images from the [VNA](https://en.wikipedia.org/wiki/Vendor_Neutral_Archive) and transfers them to the [`orthanc-raw` instance](../orthanc/orthanc-raw/README.md). +The PIXL imaging API processes messages created by the [CLI](../cli/README.md) and sent to imaging queues +to query images from a dicom server and transfer them to the [`orthanc-raw` instance](../orthanc/orthanc-raw/README.md). -It exposes a single HTTP endpoint that expects a JSON-formatted message structured as defined by the -[`Message`](../pixl_core/src/core/patient_queue/message.py) class in `pixl_core/patient_queue`. -On arrival of the input message it will issue a DICOMWeb request to `orthanc-raw`, which then queries the VNA -for the requested imaging study, if it didn't already exist. +The imaging API has two queues: + +- `imaging-primary`, for querying the VNA +- `imaging-secondary`, for querying PACS + +The imaging API uses RabbitMQ to expose a single HTTP endpoint that expects a JSON-formatted message structured as +defined by the [`Message`](../pixl_core/src/core/patient_queue/message.py) class in `pixl_core/patient_queue`. + +Users should send messages to the `imaging-primary` queue only. On arrival of the input message, the imaging API +will query the VNA for the requested study. If the study does not exist in the VNA, the input message will be sent +to the `imaging-secondary` queue for processing. When the `imaging-secondary` queue processes a message, if the +study does not exist in PACS an error is raised. + +If the study has be identified in VNA or PACS, a query to `orthanc-raw` is made to check if the study already +exists locally. If it does exist locally, a check is made to ensure all instances exist locally and any missing +instances are retrieved. If the study does not exist locally, the entire study is retrieved from the archive. + +Once the study and all its instances are in `orthanc-raw`, the study is sent to `orthanc-anon` via a C-STORE +operation. + +>[!NOTE] +> When querying the archives, if we do not know the `StudyInstanceUID` we will query by MRN and Accession Number. +> This may result in multiple studies being found in the archives. In this instance, all studies returned by the +> query will be retrieved and sent to Orthanc Anon for anonymisation. In Orthanc Anon, the studies will be combined +> into a single study as they share the same MRN and Accession Number. ## Installation ```bash python -m pip install --upgrade pip -pip install -e pytest-pixl/ -pip install -e pixl_core/[test] -pip install -e pixl_imaging/[test] +python -m pip install -e pytest-pixl/ +python -m pip install -e "pixl_core/[test]" +python -m pip install -e "pixl_imaging/[test]" ``` ## Test @@ -34,8 +55,8 @@ Usage should be from the CLI driver, which interacts with the endpoint. The database tables are updated using alembic, see the [alembic](alembic) dir for more details. -The `SKIP_ALEMBIC` environmental variable is used to control whether migrations are applied to the database. +The `SKIP_ALEMBIC` environmental variable is used to control whether migrations are applied to the database (see variable at '.env.sample' and 'test/.env'). -- Tests that don't use the database use `SKIP_ALEMBIC=true`, but otherwise you probably want to run this. +- `SKIP_ALEMBIC` is set to true for tests that do not use the database (e.g. `SKIP_ALEMBIC=true`). Otherwise you probably want to run this. - If you wanted to test out new migrations from a test/dev deployment on the GAE with data in, then you can redeploy just the `imaging-api` container while keeping the `postgres` container up. diff --git a/pixl_imaging/alembic/versions/83dcb3812628_add_study_uid_column_to_image_table.py b/pixl_imaging/alembic/versions/83dcb3812628_add_study_uid_column_to_image_table.py new file mode 100644 index 000000000..5876f45a1 --- /dev/null +++ b/pixl_imaging/alembic/versions/83dcb3812628_add_study_uid_column_to_image_table.py @@ -0,0 +1,46 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Add study UID column to Image table + +Revision ID: 83dcb3812628 +Revises: cb5ee12a6e20 +Create Date: 2024-08-01 16:14:47.995586 + +""" + +from collections.abc import Sequence +from typing import Union + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "83dcb3812628" +down_revision: Union[str, None] = "cb5ee12a6e20" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("image", sa.Column("study_uid", sa.String(), nullable=True), schema="pipeline") + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("image", "study_uid", schema="pipeline") + # ### end Alembic commands ### diff --git a/pixl_imaging/alembic/versions/cb5ee12a6e20_replace_hashed_id_with_pseudo_study_uid.py b/pixl_imaging/alembic/versions/cb5ee12a6e20_replace_hashed_id_with_pseudo_study_uid.py new file mode 100644 index 000000000..eba9fc341 --- /dev/null +++ b/pixl_imaging/alembic/versions/cb5ee12a6e20_replace_hashed_id_with_pseudo_study_uid.py @@ -0,0 +1,43 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +replace_hashed_id_with_pseudo_study_uid +Revision ID: cb5ee12a6e20 +Revises: bcaef54e2bfe +Create Date: 2024-05-01 20:31:31.670512 + +""" + +from collections.abc import Sequence +from typing import Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "cb5ee12a6e20" +down_revision: Union[str, None] = "bcaef54e2bfe" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.alter_column( + "image", "hashed_identifier", new_column_name="pseudo_study_uid", schema="pipeline" + ) + + +def downgrade() -> None: + op.alter_column( + "image", "pseudo_study_uid", new_column_name="hashed_identifier", schema="pipeline" + ) diff --git a/pixl_imaging/alembic/versions/d947cc715eb1_add_pseudo_patient_id_column_to_image_table.py b/pixl_imaging/alembic/versions/d947cc715eb1_add_pseudo_patient_id_column_to_image_table.py new file mode 100644 index 000000000..c75d2b1ac --- /dev/null +++ b/pixl_imaging/alembic/versions/d947cc715eb1_add_pseudo_patient_id_column_to_image_table.py @@ -0,0 +1,50 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Add pseudo patient ID to image table + +Revision ID: d947cc715eb1 +Revises: 83dcb3812628 +Create Date: 2024-08-21 11:42:57.946914 + +""" + +from collections.abc import Sequence +from typing import Union + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "d947cc715eb1" +down_revision: Union[str, None] = "83dcb3812628" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + "image", + sa.Column("pseudo_patient_id", sa.String(length=255), nullable=True), + schema="pipeline", + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("image", "pseudo_patient_id", schema="pipeline") + # ### end Alembic commands ### diff --git a/pixl_imaging/pyproject.toml b/pixl_imaging/pyproject.toml index 82994dbf7..55d6e3992 100644 --- a/pixl_imaging/pyproject.toml +++ b/pixl_imaging/pyproject.toml @@ -1,38 +1,47 @@ [project] name = "pixl_imaging" -version = "0.0.2" -authors = [ - { name="PIXL authors" }, -] +version = "0.2.0rc0" +authors = [{ name = "PIXL authors" }] description = "PIXL image extractor" readme = "README.md" requires-python = ">=3.10" -classifiers = [ - "Programming Language :: Python :: 3" -] +classifiers = ["Programming Language :: Python :: 3"] dependencies = [ - "aiohttp==3.9.3", - "alembic==1.13.1", + "core==0.2.0rc0", + "aiohttp==3.10.11", + "alembic==1.14.0", "pydicom==2.4.4", - "python-decouple==3.6", - "uvicorn==0.23.2", + "uvicorn==0.32.1", ] [project.optional-dependencies] test = [ - "pytest==7.4.2", - "pytest-asyncio==0.21.1" + "core[test]==0.2.0rc0", ] dev = [ - "mypy", - "pre-commit", - "ruff", + "core[dev]==0.2.0rc0", ] [build-system] requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" +[tool.coverage.report] +exclude_also = [ + "def __repr__", + "if self.debug:", + "if settings.DEBUG", + "except subprocess.CalledProcessError as exception:", + "raise AssertionError", + "raise NotImplementedError", + "if 0:", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", + "if typing.TYPE_CHECKING", + "class .*\\bProtocol\\):", + "@(abc\\.)?abstractmethod", +] + [tool.pytest.ini_options] markers = ["processing"] @@ -41,11 +50,11 @@ extend = "../ruff.toml" [tool.ruff.lint.extend-per-file-ignores] "./tests/**" = [ - "D1", # Documentation of methods - "S105", # Hardcoded passwords - "SLF001" # Access private members of a class + "D1", # Documentation of methods + "S105", # Hardcoded passwords + "SLF001", # Access private members of a class ] "./alembic/**" = [ - "INP001", # is part of an implicit namespace package. Add an `__init__.py` - "D103", # Missing docstring in public function + "INP001", # is part of an implicit namespace package. Add an `__init__.py` + "D103", # Missing docstring in public function ] diff --git a/pixl_imaging/src/pixl_imaging/_orthanc.py b/pixl_imaging/src/pixl_imaging/_orthanc.py index c838f177b..08132036d 100644 --- a/pixl_imaging/src/pixl_imaging/_orthanc.py +++ b/pixl_imaging/src/pixl_imaging/_orthanc.py @@ -13,7 +13,6 @@ # limitations under the License. from __future__ import annotations -from abc import ABC, abstractmethod from asyncio import sleep from time import time from typing import Any, Optional @@ -24,35 +23,58 @@ from loguru import logger -class Orthanc(ABC): - def __init__(self, url: str, username: str, password: str) -> None: +class Orthanc: + def __init__( # noqa: PLR0913 + self, + url: str, + username: str, + password: str, + http_timeout: int, + dicom_timeout: int, + aet: str, + ) -> None: if not url: msg = "URL for orthanc is required" raise ValueError(msg) self._url = url.rstrip("/") + self._aet = aet self._username = username self._password = password self._auth = aiohttp.BasicAuth(login=username, password=password) + self.http_timeout = http_timeout + self.dicom_timeout = dicom_timeout @property - @abstractmethod def aet(self) -> str: """Application entity title (AET) of this Orthanc instance""" + return self._aet @property async def modalities(self) -> Any: """Accessible modalities from this Orthanc instance""" return await self._get("/modalities") - async def get_jobs(self) -> Any: - """Get expanded details for all jobs.""" - return await self._get("/jobs?expand") - async def query_local(self, data: dict) -> Any: """Query local Orthanc instance for resourceId.""" + logger.debug("Running query on local Orthanc with {}", data) return await self._post("/tools/find", data=data) + async def get_local_study(self, study_id: str) -> Any: + """Query local Orthanc instance for study.""" + return await self._get(f"/studies/{study_id}") + + async def get_local_study_statistics(self, study_id: str) -> Any: + """Query local Orthanc instance for study statistics.""" + return await self._get(f"/studies/{study_id}/statistics") + + async def get_local_study_instances(self, study_id: str) -> Any: + """Get the instances of a study.""" + return await self._get( + f"/studies/{study_id}/instances?short=true", + timeout=self.dicom_timeout, # this API call can sometimes take several minutes + ) + async def query_remote(self, data: dict, modality: str) -> Optional[str]: """Query a particular modality, available from this node""" logger.debug("Running query on modality: {} with {}", modality, data) @@ -60,46 +82,59 @@ async def query_remote(self, data: dict, modality: str) -> Optional[str]: response = await self._post( f"/modalities/{modality}/query", data=data, - timeout=config("PIXL_QUERY_TIMEOUT", default=10, cast=float), ) logger.debug("Query response: {}", response) - query_answers = await self._get(f"/queries/{response['ID']}/answers") + query_answers = await self.get_remote_query_answers(response["ID"]) if len(query_answers) > 0: return str(response["ID"]) return None - async def modify_private_tags_by_study( - self, - *, - study_id: str, - private_creator: str, - tag_replacement: dict, - ) -> Any: - # According to the docs, you can't modify tags for an instance using the instance API - # (the best you can do is download a modified version), so do it via the studies API. - # KeepSource=false needed to stop it making a copy - # https://orthanc.uclouvain.be/api/index.html#tag/Studies/paths/~1studies~1{id}~1modify/post - return await self._post( - f"/studies/{study_id}/modify", - { - "PrivateCreator": private_creator, - "Permissive": False, - "KeepSource": False, - "Replace": tag_replacement, - }, + async def get_remote_query_answers(self, query_id: str) -> Any: + """Get the answers to a query""" + return await self._get(f"/queries/{query_id}/answers") + + async def get_remote_query_answer_content(self, query_id: str, answer_id: str) -> Any: + """Get the content of a query answer""" + return await self._get(f"/queries/{query_id}/answers/{answer_id}/content") + + async def get_remote_query_answer_instances(self, query_id: str, answer_id: str) -> Any: + """Get the instances of a query answer, using DICOM timeout as can take a while""" + response = await self._post( + f"/queries/{query_id}/answers/{answer_id}/query-instances", + data={"Query": {}}, + timeout=self.dicom_timeout, ) + return response["ID"] - async def retrieve_from_remote(self, query_id: str) -> str: + async def retrieve_study_from_remote(self, query_id: str) -> str: response = await self._post( f"/queries/{query_id}/retrieve", data={"TargetAet": self.aet, "Synchronous": False}, ) return str(response["ID"]) - async def wait_for_job_success_or_raise(self, query_id: str, timeout: float) -> None: + async def retrieve_instances_from_remote( + self, modality: str, missing_instances: list[dict[str, str]] + ) -> str: + """Retieve missing instances from remote modality in a single c-move query.""" + response = await self._post( + f"/modalities/{modality}/move", + data={ + "Level": "Instance", + "TargetAet": self.aet, + "Synchronous": False, + "Resources": missing_instances, + }, + ) + return str(response["ID"]) + + async def get_jobs(self) -> Any: + """Get expanded details for all jobs.""" + return await self._get("/jobs?expand") + + async def wait_for_job_success_or_raise(self, job_id: str, job_type: str, timeout: int) -> None: """Wait for job to complete successfully, or raise exception if fails or exceeds timeout.""" - job_id = await self.retrieve_from_remote(query_id=query_id) # C-Move job_info = {"State": "Pending"} start_time = time() @@ -110,12 +145,15 @@ async def wait_for_job_success_or_raise(self, query_id: str, timeout: float) -> f"Error code={job_info['ErrorCode']} Cause={job_info['ErrorDescription']}" ) raise PixlDiscardError(msg) - + if job_type == "modify": + logger.debug("Modify job: {}", job_info) if (time() - start_time) > timeout: - msg = f"Failed to transfer {job_id} in {timeout} seconds" + msg = f"Failed to finish {job_type} job {job_id} in {timeout} seconds" + await sleep(10) raise PixlDiscardError(msg) - - await sleep(1) + await sleep(10) + if job_info["State"] == "Pending": + start_time = time() job_info = await self.job_state(job_id=job_id) async def job_state(self, job_id: str) -> Any: @@ -123,26 +161,36 @@ async def job_state(self, job_id: str) -> Any: # See: https://book.orthanc-server.com/users/advanced-rest.html#jobs-monitoring return await self._get(f"/jobs/{job_id}") - async def _get(self, path: str) -> Any: + async def _get(self, path: str, timeout: int | None = None) -> Any: + # Optionally override default http timeout + http_timeout = timeout or self.http_timeout async with ( aiohttp.ClientSession() as session, - session.get(f"{self._url}{path}", auth=self._auth, timeout=10) as response, + session.get( + f"{self._url}{path}", + auth=self._auth, + timeout=http_timeout, + ) as response, ): return await _deserialise(response) - async def _post(self, path: str, data: dict, timeout: Optional[float] = None) -> Any: + async def _post(self, path: str, data: dict, timeout: int | None = None) -> Any: + # Optionally override default http timeout + http_timeout = timeout or self.http_timeout async with ( aiohttp.ClientSession() as session, session.post( - f"{self._url}{path}", json=data, auth=self._auth, timeout=timeout + f"{self._url}{path}", json=data, auth=self._auth, timeout=http_timeout ) as response, ): return await _deserialise(response) - async def delete(self, path: str, timeout: Optional[float] = 10) -> None: + async def delete(self, path: str) -> None: async with ( aiohttp.ClientSession() as session, - session.delete(f"{self._url}{path}", auth=self._auth, timeout=timeout) as response, + session.delete( + f"{self._url}{path}", auth=self._auth, timeout=self.http_timeout + ) as response, ): await _deserialise(response) @@ -161,8 +209,13 @@ def __init__(self) -> None: url=config("ORTHANC_RAW_URL"), username=config("ORTHANC_RAW_USERNAME"), password=config("ORTHANC_RAW_PASSWORD"), + http_timeout=config("PIXL_QUERY_TIMEOUT", default=10, cast=int), + dicom_timeout=config("PIXL_DICOM_TRANSFER_TIMEOUT", default=240, cast=int), + aet=config("ORTHANC_RAW_AE_TITLE"), ) + self.autoroute_to_anon = config("ORTHANC_AUTOROUTE_RAW_TO_ANON", default=False, cast=bool) + async def raise_if_pending_jobs(self) -> None: """ Raise PixlRequeueMessageError if there are pending jobs on the server. @@ -171,15 +224,72 @@ async def raise_if_pending_jobs(self) -> None: PixlRequeueMessageError will cause the rabbitmq message to be requeued """ jobs = await self.get_jobs() + unfinished_jobs = [x for x in jobs if x["State"] not in ("Success", "Failure")] + for job in unfinished_jobs: + logger.trace( + "{}, {}, {}, {}, {}", + job["State"], + job.get("CreationTime"), + job.get("ID"), + job.get("Type"), + job.get("EffectiveRuntime"), + ) for job in jobs: if job["State"] == "Pending": msg = "Pending messages in orthanc raw" raise PixlRequeueMessageError(msg) - @property - def aet(self) -> str: - return str(config("ORTHANC_RAW_AE_TITLE")) - - async def send_existing_study_to_anon(self, resource_id: str) -> Any: + async def send_study_to_anon(self, resource_id: str) -> Any: """Send study to orthanc anon.""" - return await self._post("/send-to-anon", data={"ResourceId": resource_id}) + response = await self._post( + "/modalities/PIXL-Anon/store", + data={ + "Resources": [resource_id], + "Asynchronous": True, + }, + ) + + logger.debug("Successfully triggered c-store of study to anon modality: {}", resource_id) + return str(response["ID"]) + + +class PIXLAnonOrthanc(Orthanc): + """Orthanc Anon connection.""" + + def __init__(self) -> None: + super().__init__( + url=config("ORTHANC_ANON_URL"), + username=config("ORTHANC_ANON_USERNAME"), + password=config("ORTHANC_ANON_PASSWORD"), + http_timeout=config("PIXL_QUERY_TIMEOUT", default=10, cast=int), + dicom_timeout=config("PIXL_DICOM_TRANSFER_TIMEOUT", default=240, cast=int), + aet=config("ORTHANC_ANON_AE_TITLE"), + ) + + self.autoroute_to_endpoint = config( + "ORTHANC_AUTOROUTE_ANON_TO_ENDPOINT", default=False, cast=bool + ) + + async def notify_anon_to_retrieve_study_resources( + self, + orthanc_raw: PIXLRawOrthanc, + resource_ids: list[str], + project_name: str, + ) -> Any: + """Notify Orthanc Anon of study resources to retrieve from Orthanc Raw.""" + resources_info = [ + await orthanc_raw.get_local_study(study_id=resource_id) for resource_id in resource_ids + ] + study_uids = [ + resource_info["MainDicomTags"]["StudyInstanceUID"] for resource_info in resources_info + ] + logger.debug("Notify Orthanc Anon to import resources {} from Orthanc Raw", resource_ids) + + await self._post( + path="/import-from-raw", + data={ + "ResourceIDs": resource_ids, + "StudyInstanceUIDs": study_uids, + "ProjectName": project_name, + }, + ) diff --git a/pixl_imaging/src/pixl_imaging/_processing.py b/pixl_imaging/src/pixl_imaging/_processing.py index 4d845f54b..a1a92274d 100644 --- a/pixl_imaging/src/pixl_imaging/_processing.py +++ b/pixl_imaging/src/pixl_imaging/_processing.py @@ -13,14 +13,16 @@ # limitations under the License. from __future__ import annotations +import datetime from dataclasses import dataclass -from typing import TYPE_CHECKING, Any +from enum import StrEnum +from typing import TYPE_CHECKING, Any, Optional +from zoneinfo import ZoneInfo -from core.dicom_tags import DICOM_TAG_PROJECT_NAME -from core.exceptions import PixlDiscardError +from core.exceptions import PixlDiscardError, PixlOutOfHoursError, PixlStudyNotInPrimaryArchiveError from decouple import config -from pixl_imaging._orthanc import Orthanc, PIXLRawOrthanc +from pixl_imaging._orthanc import Orthanc, PIXLAnonOrthanc, PIXLRawOrthanc if TYPE_CHECKING: from core.patient_queue.message import Message @@ -28,125 +30,315 @@ from loguru import logger -async def process_message(message: Message) -> None: +class DicomModality(StrEnum): + primary = config("PRIMARY_DICOM_SOURCE_MODALITY") + secondary = config("SECONDARY_DICOM_SOURCE_MODALITY") + + +async def process_message(message: Message, archive: DicomModality) -> None: """ Process message from queue by retrieving a study with the given Patient and Accession Number. We may receive multiple messages with same Patient + Acc Num, either as retries or because they are needed for multiple projects. """ - logger.debug("Processing: {}", message.identifier) + logger.trace("Processing: {}. Querying {} archive.", message.identifier, archive.name) study = ImagingStudy.from_message(message) orthanc_raw = PIXLRawOrthanc() - try: - await _process_message(study, orthanc_raw) - except PixlDiscardError: - # if a message has failed mid-transfer, can have partial study in orthanc-raw - # delete this so that it doesn't become stable and is exported - studies_to_delete = await orthanc_raw.query_local(study.orthanc_query_dict) - for study_to_delete in studies_to_delete: - logger.info( - "Deleting study '{}' from message {}", study_to_delete, study.message.identifier - ) - await orthanc_raw.delete(f"/studies/{study_to_delete}") - raise + orthanc_anon = PIXLAnonOrthanc() + await _process_message( + study=study, + orthanc_raw=orthanc_raw, + archive=archive, + orthanc_anon=orthanc_anon, + ) -async def _process_message(study: ImagingStudy, orthanc_raw: PIXLRawOrthanc) -> None: +async def _process_message( + study: ImagingStudy, + orthanc_raw: PIXLRawOrthanc, + archive: DicomModality, + orthanc_anon: PIXLAnonOrthanc, +) -> None: + """ + Retrieve a study from the archives and send it to Orthanc Anon. + + Querying the archives: + If 'archive' is 'secondary' and it's during working hours: + - raise a PixlOutOfHoursError to have the message requeued + If the study doesn't exist and 'archive' is primary: + - publish the message to the secondary imaging queue + - raise a PixlDiscardError + If the study doesn't exist and 'archive' is secondary: + - raise a PixlDiscardError + + Querying Orthanc Raw: + If the study already exists in Orthanc Raw: + - query the archive to determine whether any instances are missing + - retrieve any missing instances + If it doesn't already exist in Orthanc Raw: + - query the archive for the study + - retrieve the study from the VNA / PACS + + Then: + - send the study to Orthanc Anon if ORTHANC_AUTOROUTE_RAW_TO_ANON is True + - if the C-STORE operation to Orthanc Anon is successful, and + ORTHANC_AUTOROUTE_ANON_TO_ENDPOINT is True, send the study to the appropriate destination + """ await orthanc_raw.raise_if_pending_jobs() - study_exists = await _update_or_resend_existing_study_( - study.message.project_name, orthanc_raw, study + if archive.name == "secondary" and (_is_daytime() or _is_weekend()): + msg = "Not querying secondary archive during the daytime or on the weekend." + raise PixlOutOfHoursError(msg) + + logger.info("Processing: {}. Querying {} archive.", study.message.identifier, archive.name) + + study_query_id = await _find_study_in_archive_or_raise( + orthanc_raw=orthanc_raw, + study=study, + archive=archive, ) - if study_exists: + + existing_local_resources = await _get_study_resources( + orthanc_raw=orthanc_raw, + study=study, + ) + + if not existing_local_resources: + await _retrieve_study( + orthanc_raw=orthanc_raw, + study_query_id=study_query_id, + ) + else: + await _retrieve_missing_instances( + resources=existing_local_resources, + orthanc_raw=orthanc_raw, + study=study, + study_query_id=study_query_id, + modality=archive.value, + ) + + resources = await _get_study_resources( + orthanc_raw=orthanc_raw, + study=study, + ) + + if not orthanc_raw.autoroute_to_anon: + logger.debug("Auto-routing to Orthanc Anon is not enabled. Not sending study {}", resources) return - query_id = await _find_study_in_vna_or_raise(orthanc_raw, study) - timeout: float = config("PIXL_DICOM_TRANSFER_TIMEOUT", cast=float) - await orthanc_raw.wait_for_job_success_or_raise(query_id, timeout) + await orthanc_anon.notify_anon_to_retrieve_study_resources( + orthanc_raw=orthanc_raw, + resource_ids=resources, + project_name=study.message.project_name, + ) - # Now that instance has arrived in orthanc raw, we can set its project name tag via the API - studies = await orthanc_raw.query_local(study.orthanc_query_dict) - logger.debug("Local instances for study: {}", studies) - await _add_project_to_study(study.message.project_name, orthanc_raw, studies) - return +async def _get_study_resources( + orthanc_raw: PIXLRawOrthanc, + study: ImagingStudy, +) -> list[str]: + """Get a list of existing resources for a study in Orthanc Raw.""" + existing_resources: list[str] = await study.query_local(orthanc_raw) + logger.debug( + 'Found {} existing resources for study "{}"', + existing_resources, + study, + ) -async def _update_or_resend_existing_study_( - project_name: str, orthanc_raw: PIXLRawOrthanc, study: ImagingStudy -) -> bool: + return existing_resources + + +async def _find_study_in_archive_or_raise( + orthanc_raw: Orthanc, + study: ImagingStudy, + archive: DicomModality, +) -> str: """ - If study does not yet exist in orthanc raw, do nothing. - If study exists in orthanc raw and has the wrong project name, update it. - If study exists in orthanc raw and has the correct project name, send to orthanc anon. + Query an archive for a study. + + If 'archive' is 'secondary' and it's during working hours: + - raise a PixlOutOfHoursError to have the message requeued + If the study doesn't exist, and 'archive' is primary: + - raise a PixlStudyNotInPrimaryArchiveError if a secondary archive is defined + - raise a PixlDiscardError if a secondary archive is not defined + If the study doesn't exist and 'archive' is secondary: + - raise a PixlDiscardError + + When querying an archive, the study is first queried using its UID it it's available. + If the UID is an empty string, or if the study is not found, the study is queried using + the MRN and accession number. - Return True if study exists in orthanc raw, otherwise False. """ - existing_resources = await study.query_local(orthanc_raw, project_tag=True) - if len(existing_resources) == 0: - return False - - # Check whether study already has the correct project name - different_project: list[str] = [] - - if len(existing_resources) > 1: - # Only keep one study, the one which has the largest number of series - sorted_resources = sorted(existing_resources, key=lambda x: len(x["LastUpdate"])) - logger.debug( - "Found more than one resource for study, only keeping the last updated resource: {}", - sorted_resources, + query_id = await _find_study_in_archive( + orthanc_raw=orthanc_raw, + study=study, + modality=archive.value, + ) + + if query_id is not None: + return query_id + + if archive.name == "secondary": + msg = f"Failed to find study {study.message.identifier} in primary or secondary archive." + raise PixlDiscardError(msg) + + if config("SECONDARY_DICOM_SOURCE_AE_TITLE") == config("PRIMARY_DICOM_SOURCE_AE_TITLE"): + msg = ( + f"Failed to find study {study.message.identifier} in primary archive " + "and SECONDARY_DICOM_SOURCE_AE_TITLE is the same as PRIMARY_DICOM_SOURCE_AE_TITLE." ) - existing_resources = [sorted_resources.pop(-1)] - for delete_resource in sorted_resources: - await orthanc_raw.delete(f"/studies/{delete_resource['ID']}") - - for resource in existing_resources: - project_tags = ( - resource["RequestedTags"].get(DICOM_TAG_PROJECT_NAME.tag_nickname), - resource["RequestedTags"].get( - "Unknown Tag & Data" - ), # Fallback for testing where we're not using the entire plugin, remains undefined + raise PixlDiscardError(msg) + + msg = ( + f"Failed to find study {study.message.identifier} in primary archive, " + "sending message to secondary imaging queue." + ) + raise PixlStudyNotInPrimaryArchiveError(msg) + + +async def _find_study_in_archive( + orthanc_raw: Orthanc, + study: ImagingStudy, + modality: str, +) -> Optional[str]: + """ + Query the primary archive for the study using its UID. + If UID is not available, query on MRN and accession number. + """ + query_response = None + if study.message.study_uid: + query_response = await orthanc_raw.query_remote( + data=study.orthanc_uid_query_dict, + modality=modality, ) - if project_name not in project_tags: - different_project.append(resource["ID"]) + if query_response is not None: + return query_response + + logger.debug( + "No study found in modality {} with UID '{}', trying MRN and accession number", + modality, + study.message.study_uid, + ) + return await orthanc_raw.query_remote( + study.orthanc_query_dict, + modality=modality, + ) + + +def _is_daytime() -> bool: + """Check if the current time is between 8 am and 8 pm.""" + timezone = ZoneInfo(config("TZ")) + after_8am = datetime.time(8, 00) <= datetime.datetime.now(tz=timezone).time() + before_8pm = datetime.datetime.now(tz=timezone).time() <= datetime.time(20, 00) + return after_8am and before_8pm + + +def _is_weekend() -> bool: + """Check if it's the weekend.""" + timezone = ZoneInfo(config("TZ")) + saturday = 5 + sunday = 6 + return datetime.datetime.now(tz=timezone).weekday() in (saturday, sunday) - if different_project: - await _add_project_to_study(project_name, orthanc_raw, different_project) - return True - await orthanc_raw.send_existing_study_to_anon(existing_resources[0]["ID"]) - return True + +async def _retrieve_study(orthanc_raw: Orthanc, study_query_id: str) -> None: + """Retrieve all instances for a study from the VNA / PACS.""" + job_id = await orthanc_raw.retrieve_study_from_remote(query_id=study_query_id) # C-Move + await orthanc_raw.wait_for_job_success_or_raise( + job_id, "c-move", timeout=orthanc_raw.dicom_timeout + ) -async def _add_project_to_study( - project_name: str, orthanc_raw: PIXLRawOrthanc, studies: list[str] +async def _retrieve_missing_instances( + resources: list[str], + orthanc_raw: Orthanc, + study: ImagingStudy, + study_query_id: str, + modality: str, ) -> None: - if len(studies) > 1: - logger.error( - "Got {} studies with matching accession number and patient ID, expected 1", - studies, + """Retrieve missing instances for a study from the VNA / PACS.""" + missing_instance_uids = await _get_missing_instances( + orthanc_raw=orthanc_raw, study=study, resources=resources, study_query_id=study_query_id + ) + if not missing_instance_uids: + return + logger.debug( + "Retrieving {} missing instances for study {}", + len(missing_instance_uids), + study.message.identifier, + ) + job_id = await orthanc_raw.retrieve_instances_from_remote(modality, missing_instance_uids) + await orthanc_raw.wait_for_job_success_or_raise( + job_id, "c-move for missing instances", timeout=orthanc_raw.dicom_timeout + ) + + +async def _get_missing_instances( + orthanc_raw: Orthanc, study: ImagingStudy, resources: list[str], study_query_id: str +) -> list[dict[str, str]]: + """ + Check if any study instances are missing from Orthanc Raw. + + Return a list of missing instance UIDs (empty if none missing) + """ + missing_instances: list[dict[str, str]] = [] + + # First query the VNA / PACS for the study instances + study_query_answers = await orthanc_raw.get_remote_query_answers(study_query_id) + instances_queries_and_answers = [] + for answer_id in study_query_answers: + instances_query_id = await orthanc_raw.get_remote_query_answer_instances( + query_id=study_query_id, answer_id=answer_id ) - for study in studies: - logger.debug("Adding private tag to study ID {}", study) - await orthanc_raw.modify_private_tags_by_study( - study_id=study, - private_creator=DICOM_TAG_PROJECT_NAME.creator_string, - tag_replacement={ - # The tag here needs to be defined in orthanc's dictionary - DICOM_TAG_PROJECT_NAME.tag_nickname: project_name, - }, + instances_query_answers = await orthanc_raw.get_remote_query_answers(instances_query_id) + instances_queries_and_answers.extend( + [(instances_query_id, answer) for answer in instances_query_answers] + ) + num_remote_instances = len(instances_queries_and_answers) + + num_local_instances = 0 + for resource in resources: + study_statistics = await orthanc_raw.get_local_study_statistics(study_id=resource) + num_local_instances += int(study_statistics["CountInstances"]) + + if num_remote_instances == num_local_instances: + logger.debug("No missing instances for study {}", study.message.study_uid) + return missing_instances + + # Get all SOPInstanceUIDs for the study that are in Orthanc Raw + orthanc_raw_sop_instance_uids = [] + for resource in resources: + study_instances = await orthanc_raw.get_local_study_instances(study_id=resource) + orthanc_raw_sop_instance_uids.extend( + [instance["MainDicomTags"]["0008,0018"] for instance in study_instances] ) + # If the SOPInstanceUID is not in the list of instances in Orthanc Raw + # retrieve the instance from the VNA / PACS + query_tags = ["0020,000d", "0020,000e", "0008,0018"] + for instances_query_id, instance_query_answer in instances_queries_and_answers: + instance_query_answer_content = await orthanc_raw.get_remote_query_answer_content( + query_id=instances_query_id, + answer_id=instance_query_answer, + ) + uids_for_query = { + instance_query_answer_content[x]["Name"]: instance_query_answer_content[x]["Value"] + for x in query_tags + } + sop_instance_uid = uids_for_query["SOPInstanceUID"] + if sop_instance_uid in orthanc_raw_sop_instance_uids: + continue + + logger.trace( + "Instance {} is missing from study {}", + sop_instance_uid, + study.message.study_uid, + ) + missing_instances.append(uids_for_query) -async def _find_study_in_vna_or_raise(orthanc_raw: Orthanc, study: ImagingStudy) -> str: - """Query the VNA for the study, raise exception if it doesn't exist""" - query_id = await orthanc_raw.query_remote( - study.orthanc_query_dict, modality=config("VNAQR_MODALITY") - ) - if query_id is None: - msg = "Failed to find in the VNA" - raise PixlDiscardError(msg) - return query_id + return missing_instances @dataclass @@ -161,29 +353,45 @@ def from_message(cls, message: Message) -> ImagingStudy: return ImagingStudy(message=message) @property - def orthanc_query_dict(self) -> dict: - """Build a dictionary to query a study.""" + def orthanc_uid_query_dict(self) -> dict: + """Build a dictionary to query a study with a study UID.""" return { "Level": "Study", "Query": { - "PatientID": self.message.mrn, - "AccessionNumber": self.message.accession_number, + "StudyInstanceUID": self.message.study_uid, }, } @property - def orthanc_dict_with_project_name(self) -> dict: - """Dictionary to query a study, returning the PIXL_PROJECT tags for each study.""" + def orthanc_query_dict(self) -> dict: + """Build a dictionary to query a study on MRN and accession number.""" return { - **self.orthanc_query_dict, - "RequestedTags": [DICOM_TAG_PROJECT_NAME.tag_nickname], - "Expand": True, + "Level": "Study", + "Query": { + "PatientID": self.message.mrn, + "AccessionNumber": self.message.accession_number, + }, } - async def query_local(self, node: Orthanc, *, project_tag: bool = False) -> Any: - """Does this study exist in an Orthanc instance/node, optionally query for project tag.""" - query_dict = self.orthanc_query_dict - if project_tag: - query_dict = self.orthanc_dict_with_project_name + async def query_local(self, node: Orthanc) -> Any: + """Does this study exist in an Orthanc instance/node.""" + if self.message.study_uid: + uid_query = self.orthanc_uid_query_dict + + query_response = await node.query_local(uid_query) + if query_response: + return query_response + + logger.trace( + "No study found locally with UID, trying MRN and accession number. {}", + self.orthanc_query_dict, + ) + else: + logger.trace( + "study_uid is empty, trying MRN and accession number. {}", + self.orthanc_query_dict, + ) + + mrn_accession_query = self.orthanc_query_dict - return await node.query_local(query_dict) + return await node.query_local(mrn_accession_query) diff --git a/pixl_imaging/src/pixl_imaging/main.py b/pixl_imaging/src/pixl_imaging/main.py index 94f3ac62d..7e612ecfc 100644 --- a/pixl_imaging/src/pixl_imaging/main.py +++ b/pixl_imaging/src/pixl_imaging/main.py @@ -26,9 +26,10 @@ from fastapi.responses import JSONResponse from loguru import logger -from ._processing import process_message +from ._processing import DicomModality, process_message -QUEUE_NAME = "imaging" +QUEUE_NAME = "imaging-primary" +SECONDARY_QUEUE_NAME = "imaging-secondary" app = FastAPI( title="imaging-api", @@ -57,10 +58,24 @@ async def startup_event() -> None: the task is consumer.run and the callback is _processing.process_message """ background_tasks = set() - async with PixlConsumer( - QUEUE_NAME, token_bucket=state.token_bucket, callback=process_message - ) as consumer: - task = asyncio.create_task(consumer.run()) + async with ( + PixlConsumer( + QUEUE_NAME, + token_bucket=state.token_bucket, + token_bucket_key="primary", # noqa: S106 + callback=lambda message: process_message(message, archive=DicomModality.primary), + ) as primary_consumer, + PixlConsumer( + SECONDARY_QUEUE_NAME, + token_bucket=state.token_bucket, + token_bucket_key="secondary", # noqa: S106 + callback=lambda message: process_message(message, archive=DicomModality.secondary), + ) as secondary_consumer, + ): + task = asyncio.create_task(primary_consumer.run()) + background_tasks.add(task) + task.add_done_callback(background_tasks.discard) + task = asyncio.create_task(secondary_consumer.run()) background_tasks.add(task) task.add_done_callback(background_tasks.discard) diff --git a/pixl_imaging/tests/conftest.py b/pixl_imaging/tests/conftest.py index 1f025b8e4..ed1c38071 100644 --- a/pixl_imaging/tests/conftest.py +++ b/pixl_imaging/tests/conftest.py @@ -16,12 +16,9 @@ import os import shlex import subprocess -from collections.abc import Generator from pathlib import Path import pytest -from _pytest.monkeypatch import MonkeyPatch -from loguru import logger from pytest_pixl.helpers import run_subprocess os.environ["TEST"] = "true" @@ -30,32 +27,30 @@ os.environ["RABBITMQ_USERNAME"] = "guest" os.environ["RABBITMQ_HOST"] = "queue" os.environ["RABBITMQ_PORT"] = "5672" +os.environ["ORTHANC_ANON_URL"] = "unused" +os.environ["ORTHANC_ANON_USERNAME"] = "unused" +os.environ["ORTHANC_ANON_PASSWORD"] = "unused" +os.environ["ORTHANC_ANON_AE_TITLE"] = "PIXLRAW" os.environ["ORTHANC_RAW_URL"] = "http://localhost:8044" os.environ["ORTHANC_RAW_USERNAME"] = "orthanc" os.environ["ORTHANC_RAW_PASSWORD"] = "orthanc" os.environ["ORTHANC_RAW_AE_TITLE"] = "PIXLRAW" +os.environ["ORTHANC_AUTOROUTE_RAW_TO_ANON"] = "False" os.environ["ORTHANC_VNA_URL"] = "http://localhost:8043" os.environ["ORTHANC_VNA_USERNAME"] = "orthanc" os.environ["ORTHANC_VNA_PASSWORD"] = "orthanc" -os.environ["ORTHANC_VNA_AE_TITLE"] = "VNAQR" -os.environ["VNAQR_MODALITY"] = "UCVNAQR" +os.environ["ORTHANC_PACS_URL"] = "http://localhost:8045" +os.environ["ORTHANC_PACS_USERNAME"] = "orthanc" +os.environ["ORTHANC_PACS_PASSWORD"] = "orthanc" +os.environ["PRIMARY_DICOM_SOURCE_MODALITY"] = "UCPRIMARYQR" +os.environ["PRIMARY_DICOM_SOURCE_AE_TITLE"] = "PRIMARYQR" +os.environ["SECONDARY_DICOM_SOURCE_MODALITY"] = "UCSECONDARYQR" +os.environ["SECONDARY_DICOM_SOURCE_AE_TITLE"] = "SECONDARYQR" +os.environ["PIXL_QUERY_TIMEOUT"] = "10" os.environ["PIXL_DICOM_TRANSFER_TIMEOUT"] = "30" os.environ["SKIP_ALEMBIC"] = "true" os.environ["PIXL_MAX_MESSAGES_IN_FLIGHT"] = "20" -os.environ["ORTHANC_AUTOROUTE_RAW_TO_ANON"] = "false" - - -@pytest.fixture(autouse=True) -def _patch_send_existing_study_to_anon(monkeypatch: Generator[MonkeyPatch, None, None]) -> None: - """Patch send_existing_study_to_anon in Orthanc as orthanc raw doesn't use the pixl plugin.""" - - async def patched_send(self, resource_id: str) -> None: - """Replaces send_existing_study_to_anon.""" - logger.info("Intercepted request to send '{}' to anon", resource_id) - - monkeypatch.setattr( - "pixl_imaging._orthanc.PIXLRawOrthanc.send_existing_study_to_anon", patched_send - ) +os.environ["TZ"] = "Europe/London" TEST_DIR = Path(__file__).parent @@ -67,7 +62,7 @@ def run_containers() -> subprocess.CompletedProcess[bytes]: yield run_subprocess( shlex.split("docker compose up --build --wait"), TEST_DIR, - timeout=180, + timeout=240, ) run_subprocess( shlex.split("docker compose down --volumes"), diff --git a/pixl_imaging/tests/docker-compose.yml b/pixl_imaging/tests/docker-compose.yml index bf318b794..3aded6e04 100644 --- a/pixl_imaging/tests/docker-compose.yml +++ b/pixl_imaging/tests/docker-compose.yml @@ -33,38 +33,61 @@ services: vna-qr: image: orthancteam/orthanc:24.3.3 environment: - ORTHANC_NAME: "VNAQR" + ORTHANC_NAME: "PRIMARYQR" ORTHANC_USERNAME: "orthanc" ORTHANC_PASSWORD: "orthanc" - ORTHANC_AE_TITLE: "VNAQR" + ORTHANC_AE_TITLE: "PRIMARYQR" RAW_AE_TITLE: "PIXLRAW" RAW_DICOM_PORT: "4242" RAW_IP_ADDR: "orthanc-raw" # aka. hostname + TZ: ${TZ:-Europe/London} ports: - "127.0.0.1:4243:4242" - "127.0.0.1:8043:8042" volumes: - ../../test/vna_config/:/run/secrets:ro <<: *orthanc-healthcheck + pacs-qr: + image: orthancteam/orthanc:24.3.3 + environment: + ORTHANC_NAME: "SECONDARYQR" + ORTHANC_USERNAME: "orthanc" + ORTHANC_PASSWORD: "orthanc" + ORTHANC_AE_TITLE: "SECONDARYQR" + RAW_AE_TITLE: "PIXLRAW" + RAW_DICOM_PORT: "4242" + RAW_IP_ADDR: "orthanc-raw" # aka. hostname + TZ: ${TZ:-Europe/London} + ports: + - "127.0.0.1:4245:4242" + - "127.0.0.1:8045:8042" + volumes: + - ../../test/vna_config/:/run/secrets:ro + <<: *orthanc-healthcheck orthanc-raw: build: context: ../../ dockerfile: ./docker/orthanc/Dockerfile target: pixl_orthanc_raw + args: + PIXL_DICOM_TRANSFER_TIMEOUT: 30 environment: ORTHANC_NAME: "PIXL: Raw" ORTHANC_USERNAME: "orthanc" ORTHANC_PASSWORD: "orthanc" ORTHANC_AE_TITLE: "PIXLRAW" - VNAQR_AE_TITLE: "VNAQR" - VNAQR_DICOM_PORT: "4242" - VNAQR_IP_ADDR: "vna-qr" + PRIMARY_DICOM_SOURCE_AE_TITLE: "PRIMARYQR" + PRIMARY_DICOM_SOURCE_PORT: "4242" + PRIMARY_DICOM_SOURCE_IP_ADDR: "vna-qr" + SECONDARY_DICOM_SOURCE_AE_TITLE: "SECONDARYQR" + SECONDARY_DICOM_SOURCE_PORT: "4242" + SECONDARY_DICOM_SOURCE_IP_ADDR: "pacs-qr" ORTHANC_ANON_AE_TITLE: "unused" ORTHANC_ANON_DICOM_PORT: "4242" ORTHANC_ANON_HOSTNAME: "orthanc-anon" - ORTHANC_AUTOROUTE_RAW_TO_ANON: "false" PROJECT_CONFIGS_DIR: "/projects/configs" + TZ: "Europe/London" ports: - "127.0.0.1:4244:4242" - "127.0.0.1:8044:8042" diff --git a/pixl_imaging/tests/orthanc_raw_config/dicom.json b/pixl_imaging/tests/orthanc_raw_config/dicom.json index 4e3b20af9..75d89a507 100644 --- a/pixl_imaging/tests/orthanc_raw_config/dicom.json +++ b/pixl_imaging/tests/orthanc_raw_config/dicom.json @@ -1,19 +1,96 @@ { "DicomAet" : "${ORTHANC_AE_TITLE}", - "DicomAlwaysAllowEcho" : true, - "DicomCheckModalityHost" : true, + "DicomModalities" : { - "UCVNAQR" : { - "AET" : "${VNAQR_AE_TITLE}", - "Port" : "${VNAQR_DICOM_PORT}", - "Host" : "${VNAQR_IP_ADDR}", - "Manufacturer" : "Generic" + "UCPRIMARYQR" : { + "AET" : "${PRIMARY_DICOM_SOURCE_AE_TITLE}", + "Port" : "${PRIMARY_DICOM_SOURCE_PORT}", + "Host" : "${PRIMARY_DICOM_SOURCE_IP_ADDR}", + "Manufacturer" : "Generic", + "AllowEcho" : true, + "AllowFind" : false, + "AllowFindWorklist" : false, + "AllowGet" : false, + "AllowMove" : false, + "AllowStore" : true, + "AllowStorageCommitment" : false, + "AllowTranscoding" : true, + "UseDicomTls" : false, + "Timeout" : 60 }, - "PIXLR" : { - "AET" : "${ORTHANC_AE_TITLE}", - "Port" : "4242", - "Host" : "127.0.0.1", - "Manufacturer" : "Generic" + "UCSECONDARYQR" : { + "AET" : "${SECONDARY_DICOM_SOURCE_AE_TITLE}", + "Port" : "${SECONDARY_DICOM_SOURCE_PORT}", + "Host" : "${SECONDARY_DICOM_SOURCE_IP_ADDR}", + "Manufacturer" : "Generic", + "AllowEcho" : true, + "AllowFind" : false, + "AllowFindWorklist" : false, + "AllowGet" : false, + "AllowMove" : false, + "AllowStore" : true, + "AllowStorageCommitment" : false, + "AllowTranscoding" : true, + "UseDicomTls" : false, + "Timeout" : 60 } - } + }, + + // Check whether the called AET corresponds to the AET of Orthanc + // during an incoming DICOM SCU request + "DicomCheckCalledAet" : true, + + // Whether Orthanc accepts to act as C-STORE SCP for unknown storage + // SOP classes (aka. "promiscuous mode") + "UnknownSopClassAccepted" : true, + + // Whether the Orthanc SCP allows incoming C-ECHO requests, even + // from SCU modalities it does not know about (i.e. that are not + // listed in the "DicomModalities" option above). Orthanc 1.3.0 + // is the only version to behave as if this argument were set to "false". + "DicomAlwaysAllowEcho" : true, + + // Whether the Orthanc SCP allows incoming C-STORE requests, even + // from SCU modalities it does not know about (i.e. that are not + // listed in the "DicomModalities" option above) + "DicomAlwaysAllowStore" : true, + + // Whether the Orthanc SCP allows incoming C-FIND requests, even + // from SCU modalities it does not know about (i.e. that are not + // listed in the "DicomModalities" option above). Setting this + // option to "true" implies security risks. (new in Orthanc 1.9.0) + // Note: From Orthanc 1.10.0, this option only applies to C-FIND + // requests for patients/studies/series/instances. Use option + // "DicomAlwaysAllowFindWorklist" for worklists. + "DicomAlwaysAllowFind" : false, + + // Whether the Orthanc SCP allows incoming C-FIND requests for worklists, + // even from SCU modalities it does not know about (i.e. that are not + // listed in the "DicomModalities" option above). Setting this + // option to "true" implies security risks. (new in Orthanc 1.10.0) + "DicomAlwaysAllowFindWorklist" : false, + + // Whether the Orthanc SCP allows incoming C-GET requests, even + // from SCU modalities it does not know about (i.e. that are not + // listed in the "DicomModalities" option above). Setting this + // option to "true" implies security risks. (new in Orthanc 1.9.0) + "DicomAlwaysAllowGet" : false, + + // Whether the Orthanc SCP allows incoming C-MOVE requests, even + // from SCU modalities it does not know about (i.e. that are not + // listed in the "DicomModalities" option above). Setting this + // option to "true" implies security risks. (new in Orthanc 1.9.7) + "DicomAlwaysAllowMove" : false, + + // Whether Orthanc checks the IP/hostname address of the remote + // modality initiating a DICOM connection (as listed in the + // "DicomModalities" option above). If this option is set to + // "false", Orthanc only checks the AET of the remote modality. + "DicomCheckModalityHost" : false, + + // Overwrite instances with the same UID + // This allows us to set the project name tag in-place without modifying the + //StudyInstanceUID, SeriesInstanceUID, and SOPInstanceUID + "OverwriteInstance": true + } diff --git a/pixl_imaging/tests/orthanc_raw_config/orthanc.json b/pixl_imaging/tests/orthanc_raw_config/orthanc.json index e2c6a767e..00cc51049 100644 --- a/pixl_imaging/tests/orthanc_raw_config/orthanc.json +++ b/pixl_imaging/tests/orthanc_raw_config/orthanc.json @@ -1,11 +1,31 @@ { - "Dictionary": { - "000d,1001": ["LO", "UCLHPIXLProjectName", 1, 1, "UCLH PIXL"] - }, "DefaultPrivateCreator" : "UCLH PIXL", "Name" : "${ORTHANC_NAME}", "RemoteAccessAllowed" : true, "RegisteredUsers": { "${ORTHANC_USERNAME}": "${ORTHANC_PASSWORD}" - } + }, + +// Path to the directory that holds the heavyweight files (i.e. the + // raw DICOM instances). Backslashes must be either escaped by + // doubling them, or replaced by forward slashes "/". + "StorageDirectory" : "/var/lib/orthanc/db", + + // Limit the maximum storage size + "MaximumPatientCount" : 0, // no limit + "MaximumStorageSize" : 100, // MB + "MaximumStorageMode" : "Recycle", + // Enable concurrency + "JobsHistorySize": 100, + "ConcurrentJobs" : 20, + // overwrite instances with the same UID if we get them for a second time + "OverwriteInstances" : true, + "StableAge" : 30, + // Defines the number of threads that are used to execute each type of + // jobs (for the jobs that can be parallelized). + // A value of "0" indicates to use all the available CPU logical cores + "JobsEngineThreadsCount" : { + "ResourceModification": 5 // for /anonymize, /modify + } + } \ No newline at end of file diff --git a/pixl_imaging/tests/test_imaging_processing.py b/pixl_imaging/tests/test_imaging_processing.py index 24645e95d..b4821960e 100644 --- a/pixl_imaging/tests/test_imaging_processing.py +++ b/pixl_imaging/tests/test_imaging_processing.py @@ -16,38 +16,135 @@ from __future__ import annotations import datetime +import os import pathlib import shlex +from typing import TYPE_CHECKING import pytest +from core.exceptions import PixlDiscardError, PixlOutOfHoursError, PixlStudyNotInPrimaryArchiveError from core.patient_queue.message import Message from decouple import config from pixl_imaging._orthanc import Orthanc, PIXLRawOrthanc -from pixl_imaging._processing import ImagingStudy, process_message +from pixl_imaging._processing import DicomModality, ImagingStudy, process_message from pydicom import dcmread from pydicom.data import get_testdata_file +from pydicom.uid import generate_uid +from pytest_check import check from pytest_pixl.helpers import run_subprocess +if TYPE_CHECKING: + from collections.abc import Generator + + pytest_plugins = ("pytest_asyncio",) ACCESSION_NUMBER = "abc" PATIENT_ID = "a_patient" -message = Message( - mrn=PATIENT_ID, - accession_number=ACCESSION_NUMBER, - study_date=datetime.datetime.strptime("01/01/1234 01:23:45", "%d/%m/%Y %H:%M:%S").replace( - tzinfo=datetime.timezone.utc - ), - procedure_occurrence_id=234, - project_name="test project", - extract_generated_timestamp=datetime.datetime.fromisoformat("1234-01-01 00:00:00"), -) +STUDY_UID = generate_uid(entropy_srcs=["12345678"]) +SERIES_UID = generate_uid(entropy_srcs=["12345678.1"]) +SOP_INSTANCE_UID = "1.1.1.1.1.1.1111.1.1.1.1.1.11111111111111.11111" +SOP_INSTANCE_UID_2 = "2.2.2.2.2.2.2222.2.2.2.2.2.22222222222222.22222" + +PACS_ACCESSION_NUMBER = "def" +PACS_PATIENT_ID = "another_patient" +PACS_STUDY_UID = "87654321" + +MISSING_ACCESSION_NUMBER = "ghi" +MISSING_PATIENT_ID = "missing_patient" +MISSING_STUDY_UID = "00000000" + + +@pytest.fixture(scope="module") +def message() -> Message: + """A Message with a valid study_uid.""" + return Message( + mrn=PATIENT_ID, + accession_number=ACCESSION_NUMBER, + study_uid=STUDY_UID, + study_date=datetime.datetime.strptime("01/01/1234 01:23:45", "%d/%m/%Y %H:%M:%S").replace( + tzinfo=datetime.timezone.utc + ), + procedure_occurrence_id=234, + project_name="test project", + extract_generated_timestamp=datetime.datetime.fromisoformat("1234-01-01 00:00:00"), + ) + + +@pytest.fixture(scope="module") +def no_uid_message() -> Message: + """A Message with a valid study_uid.""" + return Message( + mrn=PATIENT_ID, + accession_number=ACCESSION_NUMBER, + study_uid="", + study_date=datetime.datetime.strptime("01/01/1234 01:23:45", "%d/%m/%Y %H:%M:%S").replace( + tzinfo=datetime.timezone.utc + ), + procedure_occurrence_id=234, + project_name="test project", + extract_generated_timestamp=datetime.datetime.fromisoformat("1234-01-01 00:00:00"), + ) + + +@pytest.fixture(scope="module") +def pacs_message() -> Message: + """A Message with a valid study_uid for a study that exists in PACS but not VNA.""" + return Message( + mrn=PACS_PATIENT_ID, + accession_number=PACS_ACCESSION_NUMBER, + study_uid=PACS_STUDY_UID, + study_date=datetime.datetime.strptime("01/01/1234 01:23:45", "%d/%m/%Y %H:%M:%S").replace( + tzinfo=datetime.timezone.utc + ), + procedure_occurrence_id=234, + project_name="test project", + extract_generated_timestamp=datetime.datetime.fromisoformat("1234-01-01 00:00:00"), + ) + + +@pytest.fixture(scope="module") +def pacs_no_uid_message() -> Message: + """A Message without a valid study_uid for a study that exists in PACS but not the VNA.""" + return Message( + mrn=PACS_PATIENT_ID, + accession_number=PACS_ACCESSION_NUMBER, + study_uid="ialsodontexist", + study_date=datetime.datetime.strptime("01/01/1234 01:23:45", "%d/%m/%Y %H:%M:%S").replace( + tzinfo=datetime.timezone.utc + ), + procedure_occurrence_id=234, + project_name="test project", + extract_generated_timestamp=datetime.datetime.fromisoformat("1234-01-01 00:00:00"), + ) + + +@pytest.fixture(scope="module") +def missing_message() -> Message: + """A Message for a study that does not exist in PACS nor the VNA.""" + return Message( + mrn=MISSING_PATIENT_ID, + accession_number=MISSING_ACCESSION_NUMBER, + study_uid=MISSING_STUDY_UID, + study_date=datetime.datetime.strptime("01/01/1234 01:23:45", "%d/%m/%Y %H:%M:%S").replace( + tzinfo=datetime.timezone.utc + ), + procedure_occurrence_id=345, + project_name="test project", + extract_generated_timestamp=datetime.datetime.fromisoformat("1234-01-01 00:00:00"), + ) class WritableOrthanc(Orthanc): - @property - def aet(self) -> str: - return "VNAQR" + def __init__(self, url: str, username: str, password: str, aet: str) -> None: + super().__init__( + url=url, + username=username, + password=password, + http_timeout=config("PIXL_QUERY_TIMEOUT", cast=int), + dicom_timeout=config("PIXL_DICOM_TRANSFER_TIMEOUT", cast=int), + aet=aet, + ) def upload(self, filename: str) -> None: run_subprocess( @@ -59,21 +156,61 @@ def upload(self, filename: str) -> None: @pytest.fixture(scope="module") -def _add_image_to_fake_vna(run_containers) -> None: +def _add_image_to_fake_vna(run_containers) -> Generator[None]: """Add single fake image to VNA.""" + vna = WritableOrthanc( + aet="PRIMARYQR", + url=config("ORTHANC_VNA_URL"), + username=config("ORTHANC_VNA_USERNAME"), + password=config("ORTHANC_VNA_PASSWORD"), + ) + image_filename = "test.dcm" - path = get_testdata_file("CT_small.dcm") + path = str(get_testdata_file("CT_small.dcm")) ds = dcmread(path) ds.AccessionNumber = ACCESSION_NUMBER ds.PatientID = PATIENT_ID + ds.StudyInstanceUID = STUDY_UID + ds.SeriesInstanceUID = SERIES_UID + ds.SOPInstanceUID = SOP_INSTANCE_UID ds.save_as(image_filename) vna = WritableOrthanc( + aet="PRIMARYQR", url=config("ORTHANC_VNA_URL"), username=config("ORTHANC_VNA_USERNAME"), password=config("ORTHANC_VNA_PASSWORD"), ) vna.upload(image_filename) + + instance_2_image_filename = "test_2.dcm" + ds.SOPInstanceUID = SOP_INSTANCE_UID_2 + ds.save_as(instance_2_image_filename) + vna.upload(instance_2_image_filename) + + yield + pathlib.Path(image_filename).unlink(missing_ok=True) + pathlib.Path(instance_2_image_filename).unlink(missing_ok=True) + + +@pytest.fixture(scope="module") +def _add_image_to_fake_pacs(run_containers) -> Generator[None]: + """Add single fake image to PACS.""" + image_filename = "test-mr.dcm" + path = str(get_testdata_file("MR_small.dcm")) + ds = dcmread(path) + ds.AccessionNumber = PACS_ACCESSION_NUMBER + ds.PatientID = PACS_PATIENT_ID + ds.StudyInstanceUID = PACS_STUDY_UID + ds.save_as(image_filename) + + pacs = WritableOrthanc( + aet="SECONDARYQR", + url=config("ORTHANC_PACS_URL"), + username=config("ORTHANC_PACS_USERNAME"), + password=config("ORTHANC_PACS_PASSWORD"), + ) + pacs.upload(image_filename) yield pathlib.Path(image_filename).unlink(missing_ok=True) @@ -93,7 +230,7 @@ async def orthanc_raw(run_containers) -> PIXLRawOrthanc: @pytest.mark.processing() @pytest.mark.asyncio() @pytest.mark.usefixtures("_add_image_to_fake_vna") -async def test_image_saved(orthanc_raw) -> None: +async def test_image_saved(orthanc_raw, message: Message) -> None: """ Given the VNA has images, and orthanc raw has no images When we run process_message @@ -104,14 +241,75 @@ async def test_image_saved(orthanc_raw) -> None: orthanc = await orthanc_raw assert not await study.query_local(orthanc) - await process_message(message) + await process_message(message, archive=DicomModality.primary) + + studies = await study.query_local(orthanc) + assert len(studies) == 1 + + study_info = await orthanc._get(f"/studies/{studies[0]}") + with check: + assert study_info["MainDicomTags"]["AccessionNumber"] == ACCESSION_NUMBER + assert study_info["PatientMainDicomTags"]["PatientID"] == PATIENT_ID + assert study_info["MainDicomTags"]["StudyInstanceUID"] == STUDY_UID + + series_info = await orthanc._get(f"/series/{study_info['Series'][0]}") + with check: + assert series_info["MainDicomTags"]["SeriesInstanceUID"] == SERIES_UID + + instance_info = await orthanc._get(f"/instances/{series_info['Instances'][0]}") + with check: + assert instance_info["MainDicomTags"]["SOPInstanceUID"] in ( + SOP_INSTANCE_UID, + SOP_INSTANCE_UID_2, + ) + + +@pytest.mark.processing() +@pytest.mark.asyncio() +@pytest.mark.usefixtures("_add_image_to_fake_vna") +async def test_partial_retrieve(orthanc_raw, message: Message, caplog) -> None: + """ + Given the VNA has a single study with 2 instances, and orthanc raw has the same study with + 1 instance + When we run process_message + Then orthanc raw will contain both instances after retrieving only the missing instance + """ + study = ImagingStudy.from_message(message) + + orthanc = await orthanc_raw + + assert not await study.query_local(orthanc) + await process_message(message, archive=DicomModality.primary) assert await study.query_local(orthanc) + all_instances = await orthanc._get("/instances") + assert len(all_instances) == 2 + + instance_info = {} + + with check: + for instance in all_instances: + instance_info = await orthanc._get(f"/instances/{instance}") + sop_instance_uid = instance_info["MainDicomTags"]["SOPInstanceUID"] + assert sop_instance_uid in (SOP_INSTANCE_UID, SOP_INSTANCE_UID_2) + + await orthanc.delete(f"/instances/{instance_info['ID']}") + + await process_message(message, archive=DicomModality.primary) + all_instances = await orthanc._get("/instances") + assert len(all_instances) == 2 + + expected_msg = ( + f"Instance {instance_info['MainDicomTags']['SOPInstanceUID']}" + f" is missing from study {STUDY_UID}" + ) + assert expected_msg in caplog.text + @pytest.mark.processing() @pytest.mark.asyncio() @pytest.mark.usefixtures("_add_image_to_fake_vna") -async def test_existing_message_sent_twice(orthanc_raw) -> None: +async def test_existing_message_sent_twice(orthanc_raw, message: Message) -> None: """ Given the VNA has images, and orthanc raw has no images When we run process_message on the same message twice @@ -120,16 +318,237 @@ async def test_existing_message_sent_twice(orthanc_raw) -> None: study = ImagingStudy.from_message(message) orthanc = await orthanc_raw - await process_message(message) + await process_message(message, archive=DicomModality.primary) assert await study.query_local(orthanc) query_for_update_time = {**study.orthanc_query_dict, "Expand": True} first_processing_resource = await orthanc.query_local(query_for_update_time) assert len(first_processing_resource) == 1 - await process_message(message) + await process_message(message, archive=DicomModality.primary) second_processing_resource = await orthanc.query_local(query_for_update_time) assert len(second_processing_resource) == 1 # Check update time hasn't changed assert first_processing_resource[0]["LastUpdate"] == second_processing_resource[0]["LastUpdate"] + + studies = await study.query_local(orthanc) + assert len(studies) == 1 + + study_info = await orthanc._get(f"/studies/{studies[0]}") + with check: + assert study_info["MainDicomTags"]["AccessionNumber"] == ACCESSION_NUMBER + assert study_info["PatientMainDicomTags"]["PatientID"] == PATIENT_ID + assert study_info["MainDicomTags"]["StudyInstanceUID"] == STUDY_UID + + series_info = await orthanc._get(f"/series/{study_info['Series'][0]}") + with check: + assert series_info["MainDicomTags"]["SeriesInstanceUID"] == SERIES_UID + + instance_info = await orthanc._get(f"/instances/{series_info['Instances'][0]}") + with check: + assert instance_info["MainDicomTags"]["SOPInstanceUID"] in ( + SOP_INSTANCE_UID, + SOP_INSTANCE_UID_2, + ) + + +@pytest.mark.processing() +@pytest.mark.asyncio() +@pytest.mark.usefixtures("_add_image_to_fake_vna") +async def test_querying_without_uid(orthanc_raw, caplog, no_uid_message: Message) -> None: + """ + Given a message with non-existent study_uid + When we query the VNA + Then the querying falls back to using the MRN and accession number + """ + study = ImagingStudy.from_message(no_uid_message) + orthanc = await orthanc_raw + + assert not await study.query_local(orthanc) + await process_message(no_uid_message, archive=DicomModality.primary) + assert await study.query_local(orthanc) + + expected_msg = ( + f"No study found in modality UCPRIMARYQR with UID '{study.message.study_uid}', " + "trying MRN and accession number" + ) + assert expected_msg in caplog.text + + +class Monday2AM(datetime.datetime): + @classmethod + def now(cls, tz=None) -> datetime.datetime: + return cls(2024, 1, 1, 2, 0, tzinfo=tz) + + +class Monday11AM(datetime.datetime): + @classmethod + def now(cls, tz=None) -> datetime.datetime: + return cls(2024, 1, 1, 11, 0, tzinfo=tz) + + +class Saturday2AM(datetime.datetime): + @classmethod + def now(cls, tz=None) -> datetime.datetime: + return datetime.datetime(2024, 1, 6, 2, 0, tzinfo=tz) + + +@pytest.mark.processing() +@pytest.mark.asyncio() +@pytest.mark.usefixtures("_add_image_to_fake_pacs") +async def test_querying_pacs_with_uid( + orthanc_raw, caplog, monkeypatch, pacs_message: Message +) -> None: + """ + Given a message with study_uid exists in PACS but not VNA, + When we query the archives + Then the querying finds the study in PACS with the study_uid + """ + study = ImagingStudy.from_message(pacs_message) + orthanc = await orthanc_raw + + assert not await study.query_local(orthanc) + + # PACS is not queried during the daytime nor at the weekend. + # Set today to be a Monday at 2 am. + with monkeypatch.context() as mp: + mp.setattr(datetime, "datetime", Monday2AM) + match = "sending message to secondary imaging queue." + with pytest.raises(PixlStudyNotInPrimaryArchiveError, match=match): + await process_message(pacs_message, archive=DicomModality.primary) + await process_message(pacs_message, archive=DicomModality.secondary) + + assert await study.query_local(orthanc) + + expected_msg = ( + f"No study found in modality UCPRIMARYQR with UID '{study.message.study_uid}', " + "trying MRN and accession number" + ) + assert expected_msg in caplog.text + + unexpected_msg = ( + f"No study found in modality UCSECONDARYQR with UID '{study.message.study_uid}', " + "trying MRN and accession number" + ) + assert unexpected_msg not in caplog.text + + +@pytest.mark.processing() +@pytest.mark.asyncio() +@pytest.mark.usefixtures("_add_image_to_fake_pacs") +async def test_querying_pacs_without_uid( + orthanc_raw, caplog, monkeypatch, pacs_no_uid_message: Message +) -> None: + """ + Given a message with non-existent study_uid exists in PACS but not VNA, + When we query the archives + Then the querying falls back to using the MRN and accession number and finds the study in PACS + """ + study = ImagingStudy.from_message(pacs_no_uid_message) + orthanc = await orthanc_raw + + assert not await study.query_local(orthanc) + + # PACS is not queried during the daytime nor at the weekend. + # Set today to be a Monday at 2 am. + with monkeypatch.context() as mp: + mp.setattr(datetime, "datetime", Monday2AM) + match = "sending message to secondary imaging queue." + with pytest.raises(PixlStudyNotInPrimaryArchiveError, match=match): + await process_message(pacs_no_uid_message, archive=DicomModality.primary) + await process_message(pacs_no_uid_message, archive=DicomModality.secondary) + + assert await study.query_local(orthanc) + + expected_msg = "No study found in modality UCPRIMARYQR with UID" + assert expected_msg in caplog.text + + expected_msg = ( + f"No study found in modality UCSECONDARYQR with UID '{study.message.study_uid}', " + "trying MRN and accession number" + ) + assert expected_msg in caplog.text + + +@pytest.mark.processing() +@pytest.mark.asyncio() +async def test_querying_missing_image(orthanc_raw, monkeypatch, missing_message: Message) -> None: + """ + Given a message for a study that is missing in both the VNA and PACS, + When we query the archives within the window of Monday-Friday 8pm to 8am, + Then the querying tries both the VNA and PACS and raises a PixlDiscardError + """ + study = ImagingStudy.from_message(missing_message) + orthanc = await orthanc_raw + + assert not await study.query_local(orthanc) + + # PACS is not queried during the daytime nor at the weekend. + # Set today to be a Monday at 2 am. + primary_match = "sending message to secondary imaging queue." + secondary_match = "Failed to find study .* in primary or secondary archive." + with ( # noqa: PT012 + monkeypatch.context() as mp, + pytest.raises(PixlStudyNotInPrimaryArchiveError, match=primary_match), + pytest.raises(PixlDiscardError, match=secondary_match), + ): + mp.setattr(datetime, "datetime", Monday2AM) + await process_message(missing_message, archive=DicomModality.primary) + await process_message(missing_message, archive=DicomModality.secondary) + + +@pytest.mark.processing() +@pytest.mark.asyncio() +@pytest.mark.parametrize( + "query_date", + [ + (Monday11AM), + (Saturday2AM), + ], +) +async def test_querying_pacs_during_working_hours( + orthanc_raw, query_date, monkeypatch, missing_message: Message +) -> None: + """ + Given a message for a study that is missing in both the VNA and PACS, + When we query the archives outside of Monday-Friday 8pm-8am, + Then the querying tries only the VNA and raises a PixlDiscardError + """ + study = ImagingStudy.from_message(missing_message) + orthanc = await orthanc_raw + + assert not await study.query_local(orthanc) + + match = "Not querying secondary archive during the daytime or on the weekend." + with monkeypatch.context() as mp, pytest.raises(PixlOutOfHoursError, match=match): # noqa: PT012 + mp.setattr(datetime, "datetime", query_date) + await process_message(missing_message, archive=DicomModality.secondary) + + +@pytest.mark.processing() +@pytest.mark.asyncio() +async def test_querying_pacs_not_defined( + orthanc_raw, monkeypatch, missing_message: Message +) -> None: + """ + Given a message for a study that is missing in the VNA and the SECONDARY_DICOM_SOURCE_AE_TITLE + is the same as the PRIMARY_DICOM_SOURCE_AE_TITLE + When we query the archive, + Then the querying tries the VNA and then raises a PixlDiscardError + """ + study = ImagingStudy.from_message(missing_message) + orthanc = await orthanc_raw + + assert not await study.query_local(orthanc) + + match = ( + "Failed to find study .* in primary archive " + "and SECONDARY_DICOM_SOURCE_AE_TITLE is the same as PRIMARY_DICOM_SOURCE_AE_TITLE." + ) + with ( # noqa: PT012 + monkeypatch.context() as mp, + pytest.raises(PixlDiscardError, match=match), + ): + mp.setenv("SECONDARY_DICOM_SOURCE_AE_TITLE", os.environ["PRIMARY_DICOM_SOURCE_AE_TITLE"]) + await process_message(missing_message, archive=DicomModality.primary) diff --git a/projects/configs/ms-pinpoint-internal-only.yaml b/projects/configs/ms-pinpoint.yaml similarity index 87% rename from projects/configs/ms-pinpoint-internal-only.yaml rename to projects/configs/ms-pinpoint.yaml index 67247be86..a68258adf 100644 --- a/projects/configs/ms-pinpoint-internal-only.yaml +++ b/projects/configs/ms-pinpoint.yaml @@ -13,14 +13,15 @@ # limitations under the License. project: - name: "ms-pinpoint-internal-only" + name: "ms-pinpoint" modalities: ["MR"] tag_operation_files: base: + - "base.yaml" #Expected base config file for any project - "mri.yaml" - "ms-pinpoint.yaml" - manufacturer_overrides: null + manufacturer_overrides: ["mri.yaml"] series_filters: - "localizer" diff --git a/projects/configs/prognosis-ai.yaml b/projects/configs/prognosis-ai.yaml new file mode 100644 index 000000000..f1292856e --- /dev/null +++ b/projects/configs/prognosis-ai.yaml @@ -0,0 +1,34 @@ +# Copyright (c) 2024 University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +project: + name: "prognosis-ai" + modalities: ["MR"] + +tag_operation_files: + base: + - "base.yaml" #Expected base config file for any project + - "mri.yaml" + - "ion-neuro-db.yaml" + manufacturer_overrides: ["mri.yaml"] + +series_filters: + - "localizer" + - "localiser" + - "scout" + - "positioning" + +destination: + dicom: "none" + parquet: "none" diff --git a/projects/configs/tag-operations/base.yaml b/projects/configs/tag-operations/base.yaml new file mode 100644 index 000000000..d5271ab15 --- /dev/null +++ b/projects/configs/tag-operations/base.yaml @@ -0,0 +1,375 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default `base.yaml` configuration file with commented links to their respective image modalities +# `base.yaml` configuration file is expected to be part of all projects configuration files + +#################################### 0008 Group ################################### +# +# +- name: "Specific Character Set" + group: 0x0008 + element: 0x0005 + op: "keep" +#CT, MR, US, RT +- name: "Image Type" + group: 0x0008 + element: 0x0008 + op: "keep" +#CT, MR, US, RT +- name: "Instance Creator UID" + group: 0x0008 + element: 0x0014 + op: "replace_UID" +#CT, MR, US, RT +- name: "SOP Class UID" + group: 0x0008 + element: 0x0016 + op: "keep" +#CT, MR, US, RT +- name: "SOP Instance UID" + group: 0x0008 + element: 0x0018 + op: "replace_UID" +#CT, MR, US, RT +- name: "Accession Number" + group: 0x0008 + element: 0x0050 + op: "replace" +- name: "Study Date" + group: 0x0008 + element: 0x0020 + op: "replace" +- name: "General Study" + group: 0x0008 + element: 0x0030 + op: "replace" +- name: "Modality" + group: 0x0008 + element: 0x0060 + op: "keep" +- name: "Modalities In Study" + group: 0x0008 + element: 0x0061 + op: "keep" +- name: "Manufacturer" + group: 0x0008 + element: 0x0070 + op: "keep" +- name: "Referring Physician's Name" + group: 0x0008 + element: 0x0090 + op: "replace" +- name: "Study Description" + group: 0x0008 + element: 0x1030 + op: "keep" +- name: "Series Description" + group: 0x0008 + element: 0x103e + op: "keep" +- name: "Manufacturers Model Name" + group: 0x0008 + element: 0x1090 + op: "keep" +- name: "Referenced Series Sequence" + group: 0x0008 + element: 0x1155 + op: "replace_UID" +- name: "Referenced Frame Number" + group: 0x0008 + element: 0x1160 + op: "keep" +- name: "Pixel Presentation" + group: 0x0008 + element: 0x9205 + op: "keep" +#################################### 0010 Group ################################### +# +# +- name: "Patients Name" + group: 0x0010 + element: 0x0010 + op: "replace" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Patient ID" + group: 0x0010 + element: 0x0020 + op: "secure-hash" +- name: "Patient's Birth Date" + group: 0x0010 + element: 0x0030 + op: "replace" +- name: "Patient's Sex" + group: 0x0010 + element: 0x0040 + op: "keep" +#################################### 0013 Group ################################### +- name: "VR OB Creator" + group: 0x0013 + element: 0x0010 + op: "keep" +- name: "VR OB sequence" + group: 0x0013 + element: 0x1010 + op: "replace" +#################################### 0018 Group ################################### +# +# +- name: "Scan Options Attribute" + group: 0x0018 + element: 0x0022 + op: "replace" +#CT, MR, X-Ray +- name: "Software Version" + group: 0x0018 + element: 0x1020 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Focal Spot" + group: 0x0018 + element: 0x1190 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Patient Position" + group: 0x0018 + element: 0x5100 + op: "replace" +#CT, MR, PET, US, X-Ray, and RT Images +#################################### 0020 Group ################################### +# +# +- name: "Study Instance UID" + group: 0x0020 + element: 0x000d + op: "replace_UID" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Series Instance UID" + group: 0x0020 + element: 0x000e + op: "replace_UID" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Study ID" + group: 0x0020 + element: 0x0010 + op: "replace" +#CT, MR, US, and RT Images +- name: "Series Number" + group: 0x0020 + element: 0x0011 + op: "keep" +#RT Image +- name: "Acquisition Number" + group: 0x0020 + element: 0x0012 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Instance Number" + group: 0x0020 + element: 0x0013 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Patient Orientation" + group: 0x0020 + element: 0x0020 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Image Position (Patient)" + group: 0x0020 + element: 0x0032 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Image Orientation (Patient)" + group: 0x0020 + element: 0x0037 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Image Laterality" + group: 0x0020 + element: 0x0062 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Frame of Reference UID" + group: 0x0020 + element: 0x0052 + op: "replace_UID" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Synchronization Frame of Reference UID" + group: 0x0020 + element: 0x0200 + op: "replace_UID" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Position Reference Indicator" + group: 0x0020 + element: 0x1040 + op: "replace" +#CT, MR, PET, US, X-Ray, and RT Images +#################################### 0028 Group ################################### +# +# +- name: "Samples Per Pixel" + group: 0x0028 + element: 0x0002 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Photometric Interpretation" + group: 0x0028 + element: 0x0004 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Planar Configuration" + group: 0x0028 + element: 0x0006 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Rows" + group: 0x0028 + element: 0x0010 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Columns" + group: 0x0028 + element: 0x0011 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Pixel Spacing" + group: 0x0028 + element: 0x0030 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Bits Allocated" + group: 0x0028 + element: 0x0100 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Bits Stored" + group: 0x0028 + element: 0x0101 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "High Bit" + group: 0x0028 + element: 0x0102 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Pixel Representation" + group: 0x0028 + element: 0x0103 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Burned In Annotation" + group: 0x0028 + element: 0x0301 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Recognizable Visual Features" + group: 0x0028 + element: 0x0302 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Window Center" + group: 0x0028 + element: 0x1050 + op: "keep" +#CT, MR, US, RT, PET, X-ray +- name: "Window Width" + group: 0x0028 + element: 0x1051 + op: "keep" +#CT, MR, US, RT, PET, X-ray +- name: "Rescale Intercept" + group: 0x0028 + element: 0x1052 + op: "keep" +#CT, MR, US, RT, PET, X-ray +- name: "Rescale Slope" + group: 0x0028 + element: 0x1053 + op: "keep" +#CT, MR, US, RT, PET, X-ray +- name: "Rescale Type" + group: 0x0028 + element: 0x1054 + op: "keep" +#CT, MR, US, RT, X-ray +- name: "Window Center And Width Explanation" + group: 0x0028 + element: 0x1055 + op: "keep" +#CT, MR, US, RT, PET, X-ray +- name: "Lossy Image Compression" + group: 0x0028 + element: 0x2110 + op: "keep" +#CT, MR, US, RT, PET, X-ray +- name: "VOI LUT Sequence" + group: 0x0028 + element: 0x3010 + op: "keep" +#CT, MR, US, RT, PET, X-ray +#################################### 0040 Group ################################### +# +# +- name: "UID Attribute" + group: 0x0040 + element: 0xA124 + op: "replace_UID" +#CT, MR, PET, US, X-Ray, and RT Images +- name: "Real World Value Intercept" + group: 0x0040 + element: 0x9224 + op: "keep" +#CT, MR, US, RT, PET, X-ray +- name: "Real World Value Slope" + group: 0x0040 + element: 0x9225 + op: "keep" +#CT, MR, US, RT, PET, X-ray +#################################### 0054 Group ################################### +# +# +- name: "View Code Sequence" + group: 0x0054 + element: 0x0220 + op: "keep" +#CT, MR, PET, US, X-Ray, and RT Images +#################################### 0088 Group ################################### +# +# +- name: "Storage Media File-set UID" + group: 0x0088 + element: 0x0140 + op: "replace_UID" +#CT, MR, PET, US, X-Ray, and RT Images +#################################### 3006 Group ################################### +# +# +- name: "Referenced Frame of Reference UID" + group: 0x3006 + element: 0x0024 + op: "replace_UID" +#RT +- name: "Related Frame of Reference UID" + group: 0x3006 + element: 0x00C2 + op: "replace_UID" +#NOTE cannot find any imaging for this one +#################################### 7FE0 Group ################################### +# +# +- name: "Pixel Data" + group: 0x7fe0 + element: 0x0010 + op: "keep" +#CT, US, and RT Images \ No newline at end of file diff --git a/projects/configs/tag-operations/diffusion-weighted-mri.yaml b/projects/configs/tag-operations/diffusion-weighted-mri.yaml index dbf64a449..ca80aabe4 100644 --- a/projects/configs/tag-operations/diffusion-weighted-mri.yaml +++ b/projects/configs/tag-operations/diffusion-weighted-mri.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Extra common public tags for diffusion weighted MRI +# Common public tags for diffusion weighted MRI - name: "DiffusionGradientOrientation" group: 0x0018 diff --git a/pixl_dcmd/bin/run-tests.sh b/projects/configs/tag-operations/ion-neuro-db.yaml old mode 100755 new mode 100644 similarity index 80% rename from pixl_dcmd/bin/run-tests.sh rename to projects/configs/tag-operations/ion-neuro-db.yaml index 770867555..ad15c76cb --- a/pixl_dcmd/bin/run-tests.sh +++ b/projects/configs/tag-operations/ion-neuro-db.yaml @@ -1,4 +1,3 @@ -#!/usr/bin/env bash # Copyright (c) University College London Hospitals NHS Foundation Trust # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,10 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -set -euxo pipefail -BIN_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -PACKAGE_DIR="${BIN_DIR%/*}" -cd "$PACKAGE_DIR" +# Specific tag operations for ion-neuro-db project + +- name: "Acquisition Date" + group: 0x0008 + element: 0x0022 + op: "keep" + -pytest tests diff --git a/projects/configs/tag-operations/manufacturer-overrides/mri-diffusion.yaml b/projects/configs/tag-operations/manufacturer-overrides/mri-diffusion.yaml index 89bc1e2db..76d850e19 100644 --- a/projects/configs/tag-operations/manufacturer-overrides/mri-diffusion.yaml +++ b/projects/configs/tag-operations/manufacturer-overrides/mri-diffusion.yaml @@ -13,28 +13,45 @@ # limitations under the License. # Example tags obtained from https://github.com/rordenlab/dcm2niix/blob/master/Philips/README.md#diffusion-direction +# https://www.documents.philips.com/doclib/enc/fetch/8554818/DICOM_Conformance_Statement_MR_Applications_on_IntelliSpace_Portal_V4.0.pdf - manufacturer: ^philips tags: - - group: 0x2001 + - name: "Private Creator Group 2001" + group: 0x2001 + element: 0x0010 + op: "keep" + - name: "Private Creator Group 2001 (90)" + group: 0x2001 + element: 0x0090 + op: "keep" + - name: "Diffusion B-Factor" + group: 0x2001 element: 0x1003 op: "keep" - - name: "ScaleSlope" # may need to have this for all MRs rather than just DW? + - name: "Private Creator Group 2005" group: 0x2005 - element: 0x100e + element: 0x0010 op: "keep" - - group: 0x2005 + - name: "Diffusion Direction RL" + group: 0x2005 element: 0x10b0 op: "keep" - - group: 0x2005 + - name: "Diffusion Direction AP" + group: 0x2005 element: 0x10b1 op: "keep" - - group: 0x2005 + - name: "Diffusion Direction FH" + group: 0x2005 element: 0x10b2 op: "keep" # Example tags obtained from https://github.com/rordenlab/dcm2niix/blob/master/Siemens/README.md#siemens-x-series - manufacturer: ^siemens tags: + - name: "SIEMENS MR HEADER private creator Group" + group: 0x0019 + element: 0x0010 + op: "keep" - name: "SiemensDiffusionBValue" group: 0x0019 element: 0x100c @@ -43,6 +60,14 @@ group: 0x0019 element: 0x100e op: "keep" + - name: "SIEMENS CSA HEADER private creator" + group: 0x0029 + element: 0x0010 + op: "keep" + - name: "SIEMENS MEDCOM HEADER2 private creator" + group: 0x0029 + element: 0x0011 + op: "keep" - name: "CSA Image Header Info" group: 0x0029 element: 0x1010 diff --git a/projects/configs/tag-operations/manufacturer-overrides/mri.yaml b/projects/configs/tag-operations/manufacturer-overrides/mri.yaml new file mode 100644 index 000000000..d3ad9c4d0 --- /dev/null +++ b/projects/configs/tag-operations/manufacturer-overrides/mri.yaml @@ -0,0 +1,41 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# https://www.documents.philips.com/doclib/enc/fetch/8554818/DICOM_Conformance_Statement_MR_Applications_on_IntelliSpace_Portal_V4.0.pdf +- manufacturer: ^philips + tags: + - name: "Private Creator Group 2001" + group: 0x2001 + element: 0x0010 + op: "keep" + - name: "Private Creator Group 2001 (90)" + group: 0x2001 + element: 0x0090 + op: "keep" + - name: "Flip angle" # public version can have rounding issues + group: 0x2001 + element: 0x1023 + op: "keep" + - name: "Private Creator Group 2005" + group: 0x2005 + element: 0x0010 + op: "keep" + - name: "Scale Slope" + group: 0x2005 + element: 0x100e + op: "keep" + - name: "Repetition Time" # public version can have rounding issues + group: 0x2005 + element: 0x1030 + op: "keep" \ No newline at end of file diff --git a/projects/configs/tag-operations/mr-spectroscopy.yaml b/projects/configs/tag-operations/mr-spectroscopy.yaml new file mode 100644 index 000000000..5042d9979 --- /dev/null +++ b/projects/configs/tag-operations/mr-spectroscopy.yaml @@ -0,0 +1,190 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +- name: Device Serial Number + group: 0x0018 + element: 0x1000 + op: keep +- name: Content Qualification + group: 0x0018 + element: 0x9004 + op: keep +- name: Pulse Sequence Name + group: 0x0018 + element: 0x9005 + op: keep +- name: Echo Pulse Sequence + group: 0x0018 + element: 0x9008 + op: keep +- name: Multi-planar Excitation + group: 0x0018 + element: 0x9012 + op: keep +- name: Steady State Pulse Sequence + group: 0x0018 + element: 0x9017 + op: keep +- name: Echo Planar Pulse Sequence + group: 0x0018 + element: 0x9018 + op: keep +- name: Spectrally Selected Suppression + group: 0x0018 + element: 0x9025 + op: keep +- name: Geometry of k-Space Traversal + group: 0x0018 + element: 0x9032 + op: keep +- name: Segmented k-Space Traversal + group: 0x0018 + element: 0x9033 + op: keep +- name: Spectral Width + group: 0x0018 + element: 0x9052 + op: keep +- name: Chemical Shift Reference + group: 0x0018 + element: 0x9053 + op: keep +- name: Volume Localization Technique + group: 0x0018 + element: 0x9054 + op: keep +- name: De-coupling + group: 0x0018 + element: 0x9059 + op: keep +- name: k-space Filtering + group: 0x0018 + element: 0x9064 + op: keep +- name: Time Domain Filtering + group: 0x0018 + element: 0x9065 + op: keep +- name: Number of Zero Fills + group: 0x0018 + element: 0x9066 + op: keep +- name: Baseline Correction + group: 0x0018 + element: 0x9067 + op: keep +- name: Acquisition Duration + group: 0x0018 + element: 0x9073 + op: keep +- name: Number of k-Space Trajectories + group: 0x0018 + element: 0x9093 + op: keep +- name: Transmitter Frequency + group: 0x0018 + element: 0x9098 + op: keep +- name: Resonant Nucleus + group: 0x0018 + element: 0x9100 + op: keep +- name: Frequency Correction + group: 0x0018 + element: 0x9101 + op: keep +- name: Slab Thickness + group: 0x0018 + element: 0x9104 + op: keep +- name: Slab Orientation + group: 0x0018 + element: 0x9105 + op: keep +- name: Mid Slab Position + group: 0x0018 + element: 0x9106 + op: keep +- name: Multiple Spin Echo + group: 0x0018 + element: 0x9011 + op: keep +- name: Volume Localization Sequence + group: 0x0018 + element: 0x9126 + op: keep +- name: Applicable Safety Standard Agency + group: 0x0018 + element: 0x9174 + op: keep +- name: First Order Phase Correction + group: 0x0018 + element: 0x9198 + op: keep +- name: Water Referenced Phase Correction + group: 0x0018 + element: 0x9199 + op: keep +- name: MR Spectroscopy Acquisition Type + group: 0x0018 + element: 0x9200 + op: keep +- name: Dimension Organization UID + group: 0x0020 + element: 0x9164 + op: keep +- name: Dimension Index Pointer + group: 0x0020 + element: 0x9165 + op: keep +- name: Dimension Organization Sequence + group: 0x0020 + element: 0x9221 + op: keep +- name: Dimension Index Sequence + group: 0x0020 + element: 0x9222 + op: keep +- name: Number of Frames + group: 0x0028 + element: 0x0008 + op: keep +- name: Data Point Rows + group: 0x0028 + element: 0x9001 + op: keep +- name: Data Point Columns + group: 0x0028 + element: 0x9002 + op: keep +- name: Signal Domain Columns + group: 0x0028 + element: 0x9003 + op: keep +- name: Data Representation + group: 0x0028 + element: 0x9108 + op: keep +- name: Acquisition Context Sequence + group: 0x0040 + element: 0x0555 + op: keep +- name: Shared Functional Groups Sequence + group: 0x5200 + element: 0x9229 + op: keep +- name: Spectroscopy Data + group: 0x5600 + element: 0x0020 + op: keep diff --git a/projects/configs/tag-operations/mri.yaml b/projects/configs/tag-operations/mri.yaml index d5f3b7140..77415f15f 100644 --- a/projects/configs/tag-operations/mri.yaml +++ b/projects/configs/tag-operations/mri.yaml @@ -12,81 +12,31 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Default configuration for Standard MR Image +# Default configuration for Standard MR Image with commented links of resepctive tags +# See further details at: +# https://dicom.innolitics.com/ciods/mr-image +# https://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_A.4.3.html#table_A.4-1 -- name: "Private Creator Data Element" - group: 0x000d - element: 0x0010 - op: "keep" -- name: "UCLH Project Name" - group: 0x000d - element: 0x1001 - op: "keep" -- name: "Specific Character Set" - group: 0x0008 - element: 0x0005 - op: "keep" -- name: "Image Type" - group: 0x0008 - element: 0x0008 - op: "keep" -- name: "Instance Creator UID" - group: 0x0008 - element: 0x0014 - op: "replace_UID" -- name: "SOP Class UID" - group: 0x0008 - element: 0x0016 - op: "keep" -- name: "SOP Instance UID" - group: 0x0008 - element: 0x0018 - op: "replace_UID" -- name: "Accession Number" - group: 0x0008 - element: 0x0050 - op: "replace" -- name: "Modality" - group: 0x0008 - element: 0x0060 - op: "keep" -- name: "Modalities In Study" - group: 0x0008 - element: 0x0061 - op: "keep" -- name: "Manufacturer" - group: 0x0008 - element: 0x0070 - op: "keep" -- name: "Study Description" - group: 0x0008 - element: 0x1030 - op: "keep" -- name: "Series Description" - group: 0x0008 - element: 0x103e - op: "keep" -- name: "Manufacturers Model Name" - group: 0x0008 - element: 0x1090 - op: "keep" -- name: "Referenced Series Sequence" +################################### 0008 Group ################################### +# +# +- name: Acquisition DateTime group: 0x0008 - element: 0x1155 - op: "replace_UID" -- name: "Referenced Frame Number" + element: 0x002A + op: replace +- name: Content Date group: 0x0008 - element: 0x1160 - op: "keep" -- name: "Pixel Presentation" + element: 0x0023 + op: replace +- name: Content Time group: 0x0008 - element: 0x9205 - op: "keep" + element: 0x0033 + op: replace - name: "Volumetric Properties" group: 0x0008 element: 0x9206 op: "keep" -- name: "Volume Based Calculation Technique Attribute" +- name: "Volume Based Calculation Technique" group: 0x0008 element: 0x9207 op: "keep" @@ -98,14 +48,9 @@ group: 0x0008 element: 0x9209 op: "keep" -- name: "Patients Name" - group: 0x0010 - element: 0x0010 - op: "replace" -- name: "Patient ID" - group: 0x0010 - element: 0x0020 - op: "secure-hash" +#################################### 0018 Group ################################### +# +# - name: "Scanning Sequence" group: 0x0018 element: 0x0020 @@ -118,6 +63,9 @@ group: 0x0018 element: 0x0023 op: "keep" +#https://dicom.innolitics.com/ciods/mr-image/mr-image/00180023 +#https://dicom.innolitics.com/ciods/enhanced-mr-image/mr-pulse-sequence/00180023 +#https://dicom.innolitics.com/ciods/enhanced-mr-color-image/mr-pulse-sequence/00180023 - name: "Sequence Name" group: 0x0018 element: 0x0024 @@ -130,6 +78,8 @@ group: 0x0018 element: 0x0080 op: "keep" +#https://dicom.innolitics.com/ciods/enhanced-mr-image/enhanced-mr-image-multi-frame-functional-groups/52009229/00189112/00180080 +#https://dicom.innolitics.com/ciods/enhanced-mr-color-image/enhanced-mr-color-image-multi-frame-functional-groups/52009229/00189112/00180080 - name: "Echo Time" group: 0x0018 element: 0x0081 @@ -142,30 +92,42 @@ group: 0x0018 element: 0x0083 op: "keep" +#https://dicom.innolitics.com/ciods/mr-image/mr-image/00180083 +#https://dicom.innolitics.com/ciods/mr-spectroscopy/mr-spectroscopy-multi-frame-functional-groups/52009229/00189119/00180083 +#https://dicom.innolitics.com/ciods/enhanced-mr-image/enhanced-mr-image-multi-frame-functional-groups/52009230/00189119/00180083 +#https://dicom.innolitics.com/ciods/enhanced-mr-color-image/enhanced-mr-color-image-multi-frame-functional-groups/52009229/00189119/00180083 - name: "Magnetic Field Strength" group: 0x0018 element: 0x0087 op: "keep" +#https://dicom.innolitics.com/ciods/mr-image/mr-image/00180087 +#https://dicom.innolitics.com/ciods/enhanced-mr-image/enhanced-mr-image/00180087 +#https://dicom.innolitics.com/ciods/enhanced-mr-color-image/enhanced-mr-image/00180087 +#https://dicom.innolitics.com/ciods/legacy-converted-enhanced-mr-image/enhanced-mr-image/00180087 - name: "Spacing Between Slices" group: 0x0018 element: 0x0088 op: "keep" +#https://dicom.innolitics.com/ciods/ct-image/image-plane/00180088 +#https://dicom.innolitics.com/ciods/mr-image/image-plane/00180088 +#https://dicom.innolitics.com/ciods/x-ray-3d-angiographic-image/x-ray-3d-angiographic-image-multi-frame-functional-groups/52009229/00289110/00180088 +#https://dicom.innolitics.com/ciods/x-ray-3d-craniofacial-image/x-ray-3d-craniofacial-image-multi-frame-functional-groups/52009229/00289110/00180088 - name: "Echo train length" group: 0x0018 element: 0x0091 op: "keep" +#https://dicom.innolitics.com/ciods/mr-image/mr-image/00180091 +#https://dicom.innolitics.com/ciods/mr-spectroscopy/mr-spectroscopy-multi-frame-functional-groups/52009229/00189112/00180091 +#https://dicom.innolitics.com/ciods/enhanced-mr-image/enhanced-mr-image-multi-frame-functional-groups/52009229/00189112/00180091 +#https://dicom.innolitics.com/ciods/enhanced-mr-color-image/enhanced-mr-color-image-multi-frame-functional-groups/52009229/00189112/00180091 - name: "Percent Phase Field of View" group: 0x0018 element: 0x0094 op: "keep" -- name: "Software Version" - group: 0x0018 - element: 0x1020 - op: "keep" -- name: "Protocol Name" - group: 0x0018 - element: 0x1030 - op: "delete" +#https://dicom.innolitics.com/ciods/mr-image/mr-image/00180094 +#https://dicom.innolitics.com/ciods/mr-spectroscopy/mr-spectroscopy-multi-frame-functional-groups/52009229/00189103/00180094 +#https://dicom.innolitics.com/ciods/enhanced-mr-image/enhanced-mr-image-multi-frame-functional-groups/52009229/00189125/00180094 +#https://dicom.innolitics.com/ciods/enhanced-mr-color-image/enhanced-mr-color-image-multi-frame-functional-groups/52009229/00189125/00180094 - name: "Field Of View Dimension" group: 0x0018 element: 0x1149 @@ -178,207 +140,36 @@ group: 0x0018 element: 0x1166 op: "keep" -- name: "Focal Spot" - group: 0x0018 - element: 0x1190 - op: "keep" - name: "Receive Coil Name" group: 0x0018 element: 0x1250 op: "keep" +#https://dicom.innolitics.com/ciods/mr-image/mr-image/00181250 +#https://dicom.innolitics.com/ciods/mr-spectroscopy/mr-spectroscopy-multi-frame-functional-groups/52009229/00189042/00181250 +#https://dicom.innolitics.com/ciods/enhanced-mr-image/enhanced-mr-image-multi-frame-functional-groups/52009230/00189042/00181250 +#https://dicom.innolitics.com/ciods/enhanced-mr-color-image/enhanced-mr-color-image-multi-frame-functional-groups/52009229/00189042/00181250 - name: "Transmit Coil Name" group: 0x0018 element: 0x1251 op: "keep" +#https://dicom.innolitics.com/ciods/mr-image/mr-image/00181251 +#https://dicom.innolitics.com/ciods/mr-spectroscopy/mr-spectroscopy-multi-frame-functional-groups/52009229/00189049/00181251 +#https://dicom.innolitics.com/ciods/enhanced-mr-image/enhanced-mr-image-multi-frame-functional-groups/52009230/00189049/00181251 +#https://dicom.innolitics.com/ciods/enhanced-mr-color-image/enhanced-mr-color-image-multi-frame-functional-groups/52009229/00189049/00181251 - name: "Flip Angle" group: 0x0018 element: 0x1314 op: "keep" -- name: "SAR" - group: 0x0018 - element: 0x1316 - op: "keep" -- name: "Acquisition Device Processing Description" - group: 0x0018 - element: 0x1400 - op: "keep" +#https://dicom.innolitics.com/ciods/mr-image/mr-image/00181314 +#https://dicom.innolitics.com/ciods/enhanced-mr-image/enhanced-mr-image-multi-frame-functional-groups/52009230/00189112/00181314 +#https://dicom.innolitics.com/ciods/enhanced-mr-color-image/enhanced-mr-color-image-multi-frame-functional-groups/52009229/00189112/00181314 - name: "MR Acquisition Frequency Encoding Steps" group: 0x0018 element: 0x9058 op: "keep" +#https://dicom.innolitics.com/ciods/enhanced-mr-image/enhanced-mr-image-multi-frame-functional-groups/52009230/00189125/00189058 +#https://dicom.innolitics.com/ciods/enhanced-mr-color-image/enhanced-mr-color-image-multi-frame-functional-groups/52009229/00189125/00189058 - name: "MR Acquisition Phase Encoding Steps in-plane" group: 0x0018 element: 0x9231 op: "keep" -- name: "Study Instance UID" - group: 0x0020 - element: 0x000d - op: "replace_UID" -- name: "Series Instance UID" - group: 0x0020 - element: 0x000e - op: "replace_UID" -- name: "Study ID" - group: 0x0020 - element: 0x0010 - op: "replace" -- name: "Series Number" - group: 0x0020 - element: 0x0011 - op: "keep" -- name: "Acquisition Number" - group: 0x0020 - element: 0x0012 - op: "keep" -- name: "Instance Number" - group: 0x0020 - element: 0x0013 - op: "keep" -- name: "Patient Orientation" - group: 0x0020 - element: 0x0020 - op: "keep" -- name: "Image Position (Patient)" - group: 0x0020 - element: 0x0032 - op: "keep" -- name: "Image Orientation (Patient)" - group: 0x0020 - element: 0x0037 - op: "keep" -- name: "Image Laterality" - group: 0x0020 - element: 0x0062 - op: "keep" -- name: "Frame of Reference UID" - group: 0x0020 - element: 0x0052 - op: "replace_UID" -- name: "Synchronization Frame of Reference UID" - group: 0x0020 - element: 0x0200 - op: "replace_UID" -- name: "Samples Per Pixel" - group: 0x0028 - element: 0x0002 - op: "keep" -- name: "Photometric Interpretation" - group: 0x0028 - element: 0x0004 - op: "keep" -- name: "Planar Configuration" - group: 0x0028 - element: 0x0006 - op: "keep" -- name: "Rows" - group: 0x0028 - element: 0x0010 - op: "keep" -- name: "Columns" - group: 0x0028 - element: 0x0011 - op: "keep" -- name: "Pixel Spacing" - group: 0x0028 - element: 0x0030 - op: "keep" -- name: "Bits Stored" - group: 0x0028 - element: 0x0101 - op: "keep" -- name: "High Bit" - group: 0x0028 - element: 0x0102 - op: "keep" -- name: "Pixel Representation" - group: 0x0028 - element: 0x0103 - op: "keep" -- name: "Burned In Annotation" - group: 0x0028 - element: 0x0301 - op: "keep" -- name: "Pixel Spacing Calibration Type" - group: 0x0028 - element: 0x0a02 - op: "keep" -- name: "Pixel Spacing Calibration Description" - group: 0x0028 - element: 0x0a04 - op: "keep" -- name: "Pixel Intensity Relationship" - group: 0x0028 - element: 0x1040 - op: "keep" -- name: "Pixel Intensity Relationship Sign" - group: 0x0028 - element: 0x1041 - op: "keep" -- name: "Window Center" - group: 0x0028 - element: 0x1050 - op: "keep" -- name: "Window Width" - group: 0x0028 - element: 0x1051 - op: "keep" -- name: "Rescale Intercept" - group: 0x0028 - element: 0x1052 - op: "keep" -- name: "Rescale Slope" - group: 0x0028 - element: 0x1053 - op: "keep" -- name: "Rescale Type" - group: 0x0028 - element: 0x1054 - op: "keep" -- name: "Window Center And Width Explanation" - group: 0x0028 - element: 0x1055 - op: "keep" -- name: "Lossy Image Compression" - group: 0x0028 - element: 0x2110 - op: "keep" -- name: "VOI LUT Sequence" - group: 0x0028 - element: 0x3010 - op: "keep" -- name: "Current Patient Location" - group: 0x0038 - element: 0x0300 - op: "delete" -- name: "UID" - group: 0x0040 - element: 0xA124 - op: "replace_UID" -- name: "Real World Value Intercept" - group: 0x0040 - element: 0x9924 - op: "replace_UID" -- name: "Real World Value Slope" - group: 0x0040 - element: 0x9925 - op: "replace_UID" -- name: "View Code Sequence" - group: 0x0054 - element: 0x0220 - op: "keep" -- name: "Storage Media File-set UID" - group: 0x0088 - element: 0x0140 - op: "replace_UID" -- name: "Referenced Frame of Reference UID" - group: 0x3006 - element: 0x0024 - op: "replace_UID" -- name: "Related Frame of Reference UID" - group: 0x3006 - element: 0x00C2 - op: "replace_UID" -- name: "Pixel Data" - group: 0x7fe0 - element: 0x0010 - op: "keep" diff --git a/projects/configs/tag-operations/ms-pinpoint.yaml b/projects/configs/tag-operations/ms-pinpoint.yaml index d69a84eeb..a64ee7e13 100644 --- a/projects/configs/tag-operations/ms-pinpoint.yaml +++ b/projects/configs/tag-operations/ms-pinpoint.yaml @@ -13,6 +13,7 @@ # limitations under the License. # Specific tag operations for ms-pinpoint project + - name: "Acquisition Date" group: 0x0008 element: 0x0022 diff --git a/projects/configs/tag-operations/test-external-user.yaml b/projects/configs/tag-operations/test-external-user.yaml new file mode 100644 index 000000000..b979a95f8 --- /dev/null +++ b/projects/configs/tag-operations/test-external-user.yaml @@ -0,0 +1,408 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Configuration of Standard MR Image for external user + +- name: "Specific Character Set" + group: 0x0008 + element: 0x0005 + op: "keep" +- name: "Image Type" + group: 0x0008 + element: 0x0008 + op: "keep" +- name: "Instance Creator UID" + group: 0x0008 + element: 0x0014 + op: "replace_UID" +- name: "SOP Class UID" + group: 0x0008 + element: 0x0016 + op: "keep" +- name: "SOP Instance UID" + group: 0x0008 + element: 0x0018 + op: "replace_UID" +- name: "Study Date" + group: 0x0008 + element: 0x0020 + op: "replace" +- name: "Study Time" + group: 0x0008 + element: 0x0030 + op: "replace" +- name: "Accession Number" + group: 0x0008 + element: 0x0050 + op: "replace" +- name: "Modality" + group: 0x0008 + element: 0x0060 + op: "keep" +- name: "Modalities In Study" + group: 0x0008 + element: 0x0061 + op: "keep" +- name: "Manufacturer" + group: 0x0008 + element: 0x0070 + op: "keep" +- name: "Referring Clinician's Name" + group: 0x0008 + element: 0x0090 + op: "replace" +- name: "Study Description" + group: 0x0008 + element: 0x1030 + op: "keep" +- name: "Series Description" + group: 0x0008 + element: 0x103e + op: "keep" +- name: "Manufacturers Model Name" + group: 0x0008 + element: 0x1090 + op: "keep" +- name: "Referenced Series Sequence" + group: 0x0008 + element: 0x1155 + op: "replace_UID" +- name: "Referenced Frame Number" + group: 0x0008 + element: 0x1160 + op: "keep" +- name: "Pixel Presentation" + group: 0x0008 + element: 0x9205 + op: "keep" +- name: "Volumetric Properties" + group: 0x0008 + element: 0x9206 + op: "keep" +- name: "Volume Based Calculation Technique" + group: 0x0008 + element: 0x9207 + op: "keep" +- name: "Complex Image Component" + group: 0x0008 + element: 0x9208 + op: "keep" +- name: "Acquisition Contrast" + group: 0x0008 + element: 0x9209 + op: "keep" +- name: "Patients Name" + group: 0x0010 + element: 0x0010 + op: "replace" +- name: "Patient ID" + group: 0x0010 + element: 0x0020 + op: "replace" +- name: "Patient's Birth Date" + group: 0x0010 + element: 0x0030 + op: "replace" +- name: "Patient's Sex" + group: 0x0010 + element: 0x0040 + op: "keep" +- name: "Scanning Sequence" + group: 0x0018 + element: 0x0020 + op: "keep" +- name: "Sequence Variant" + group: 0x0018 + element: 0x0021 + op: "keep" +- name: "Scan Options" + group: 0x0018 + element: 0x0022 + op: "keep" +- name: "MR Acquisition Type" + group: 0x0018 + element: 0x0023 + op: "keep" +- name: "Sequence Name" + group: 0x0018 + element: 0x0024 + op: "keep" +- name: "Slice Thickness" + group: 0x0018 + element: 0x0050 + op: "keep" +- name: "Repetition Time" + group: 0x0018 + element: 0x0080 + op: "keep" +- name: "Echo Time" + group: 0x0018 + element: 0x0081 + op: "keep" +- name: "Inversion Time" + group: 0x0018 + element: 0x0082 + op: "keep" +- name: "Number of Averages" + group: 0x0018 + element: 0x0083 + op: "keep" +- name: "Magnetic Field Strength" + group: 0x0018 + element: 0x0087 + op: "keep" +- name: "Spacing Between Slices" + group: 0x0018 + element: 0x0088 + op: "keep" +- name: "Echo train length" + group: 0x0018 + element: 0x0091 + op: "keep" +- name: "Percent Phase Field of View" + group: 0x0018 + element: 0x0094 + op: "keep" +- name: "Patient Position" + group: 0x0018 + element: 0x5100 + op: "keep" +- name: "Software Version" + group: 0x0018 + element: 0x1020 + op: "keep" +- name: "Field Of View Dimension" + group: 0x0018 + element: 0x1149 + op: "keep" +- name: "Imager Pixel Spacing" + group: 0x0018 + element: 0x1164 + op: "keep" +- name: "Grid" + group: 0x0018 + element: 0x1166 + op: "keep" +- name: "Focal Spot" + group: 0x0018 + element: 0x1190 + op: "keep" +- name: "Receive Coil Name" + group: 0x0018 + element: 0x1250 + op: "keep" +- name: "Transmit Coil Name" + group: 0x0018 + element: 0x1251 + op: "keep" +- name: "Flip Angle" + group: 0x0018 + element: 0x1314 + op: "keep" +- name: "SAR" + group: 0x0018 + element: 0x1316 + op: "keep" +- name: "Acquisition Device Processing Description" + group: 0x0018 + element: 0x1400 + op: "keep" +- name: "MR Acquisition Frequency Encoding Steps" + group: 0x0018 + element: 0x9058 + op: "keep" +- name: "MR Acquisition Phase Encoding Steps in-plane" + group: 0x0018 + element: 0x9231 + op: "keep" +- name: "Study Instance UID" + group: 0x0020 + element: 0x000d + op: "replace_UID" +- name: "Series Instance UID" + group: 0x0020 + element: 0x000e + op: "replace_UID" +- name: "Study ID" + group: 0x0020 + element: 0x0010 + op: "replace" +- name: "Series Number" + group: 0x0020 + element: 0x0011 + op: "keep" +- name: "Acquisition Number" + group: 0x0020 + element: 0x0012 + op: "keep" +- name: "Instance Number" + group: 0x0020 + element: 0x0013 + op: "keep" +- name: "Patient Orientation" + group: 0x0020 + element: 0x0020 + op: "keep" +- name: "Image Position (Patient)" + group: 0x0020 + element: 0x0032 + op: "keep" +- name: "Image Orientation (Patient)" + group: 0x0020 + element: 0x0037 + op: "keep" +- name: "Image Laterality" + group: 0x0020 + element: 0x0062 + op: "keep" +- name: "Frame of Reference UID" + group: 0x0020 + element: 0x0052 + op: "replace_UID" +- name: "Position Reference Indicator" + group: 0x0020 + element: 0x1040 + op: "keep" +- name: "Synchronization Frame of Reference UID" + group: 0x0020 + element: 0x0200 + op: "replace_UID" +- name: "Samples Per Pixel" + group: 0x0028 + element: 0x0002 + op: "keep" +- name: "Photometric Interpretation" + group: 0x0028 + element: 0x0004 + op: "keep" +- name: "Planar Configuration" + group: 0x0028 + element: 0x0006 + op: "keep" +- name: "Rows" + group: 0x0028 + element: 0x0010 + op: "keep" +- name: "Columns" + group: 0x0028 + element: 0x0011 + op: "keep" +- name: "Pixel Spacing" + group: 0x0028 + element: 0x0030 + op: "keep" +- name: "Bits Allocated" + group: 0x0028 + element: 0x0100 + op: "keep" +- name: "Bits Stored" + group: 0x0028 + element: 0x0101 + op: "keep" +- name: "High Bit" + group: 0x0028 + element: 0x0102 + op: "keep" +- name: "Pixel Representation" + group: 0x0028 + element: 0x0103 + op: "keep" +- name: "Burned In Annotation" + group: 0x0028 + element: 0x0301 + op: "keep" +- name: "LUTDescriptor" + group: 0x0028 + element: 0x0302 + op: "keep" +- name: "Pixel Spacing Calibration Type" + group: 0x0028 + element: 0x0a02 + op: "keep" +- name: "Pixel Spacing Calibration Description" + group: 0x0028 + element: 0x0a04 + op: "keep" +- name: "Pixel Intensity Relationship" + group: 0x0028 + element: 0x1040 + op: "keep" +- name: "Pixel Intensity Relationship Sign" + group: 0x0028 + element: 0x1041 + op: "keep" +- name: "Window Center" + group: 0x0028 + element: 0x1050 + op: "keep" +- name: "Window Width" + group: 0x0028 + element: 0x1051 + op: "keep" +- name: "Rescale Intercept" + group: 0x0028 + element: 0x1052 + op: "keep" +- name: "Rescale Slope" + group: 0x0028 + element: 0x1053 + op: "keep" +- name: "Rescale Type" + group: 0x0028 + element: 0x1054 + op: "keep" +- name: "Window Center And Width Explanation" + group: 0x0028 + element: 0x1055 + op: "keep" +- name: "Lossy Image Compression" + group: 0x0028 + element: 0x2110 + op: "keep" +- name: "VOI LUT Sequence" + group: 0x0028 + element: 0x3010 + op: "keep" +- name: "UID" + group: 0x0040 + element: 0xA124 + op: "replace_UID" +- name: "Real World Value Intercept" + group: 0x0040 + element: 0x9924 + op: "replace_UID" +- name: "Real World Value Slope" + group: 0x0040 + element: 0x9925 + op: "replace_UID" +- name: "View Code Sequence" + group: 0x0054 + element: 0x0220 + op: "keep" +- name: "Storage Media File-set UID" + group: 0x0088 + element: 0x0140 + op: "replace_UID" +- name: "Referenced Frame of Reference UID" + group: 0x3006 + element: 0x0024 + op: "replace_UID" +- name: "Related Frame of Reference UID" + group: 0x3006 + element: 0x00C2 + op: "replace_UID" +- name: "Pixel Data" + group: 0x7fe0 + element: 0x0010 + op: "keep" diff --git a/projects/configs/tag-operations/test-extract-uclh-omop-cdm.yaml b/projects/configs/tag-operations/test-extract-uclh-omop-cdm.yaml index 54aed8f2f..f31e7af13 100644 --- a/projects/configs/tag-operations/test-extract-uclh-omop-cdm.yaml +++ b/projects/configs/tag-operations/test-extract-uclh-omop-cdm.yaml @@ -11,591 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -- name: "Specific Character Set" - group: 0x0008 - element: 0x0005 - op: "keep" -- name: "Image Type" - group: 0x0008 - element: 0x0008 - op: "keep" -- name: "SOP Class UID" - group: 0x0008 - element: 0x0016 - op: "keep" -- name: "SOP Instance UID" - group: 0x0008 - element: 0x0018 - op: "replace_UID" -- name: "Study Date" - group: 0x0008 - element: 0x0020 - op: "delete" -- name: "Series Date" - group: 0x0008 - element: 0x0021 - op: "delete" -- name: "Acquisition Date" - group: 0x0008 - element: 0x0022 - op: "delete" -- name: "Image Date" - group: 0x0008 - element: 0x0023 - op: "delete" -- name: "Acquisition Date Time" - group: 0x0008 - element: 0x002a - op: "delete" -- name: "Study Time" - group: 0x0008 - element: 0x0030 - op: "delete" -- name: "Series Time" - group: 0x0008 - element: 0x0031 - op: "delete" -- name: "Acquisition Time" - group: 0x0008 - element: 0x0032 - op: "delete" -- name: "Image Time" - group: 0x0008 - element: 0x0033 - op: "delete" -- name: "Accession Number" - group: 0x0008 - element: 0x0050 - op: "replace" -- name: "Modality" - group: 0x0008 - element: 0x0060 - op: "keep" -- name: "Modalities In Study" - group: 0x0008 - element: 0x0061 - op: "keep" -- name: "Presentation Intent Type" - group: 0x0008 - element: 0x0068 - op: "delete" -- name: "Manufacturer" - group: 0x0008 - element: 0x0070 - op: "keep" -- name: "Institution Name" - group: 0x0008 - element: 0x0080 - op: "delete" -- name: "Institution Address" - group: 0x0008 - element: 0x0081 - op: "delete" -- name: "Referring Physicians Name" - group: 0x0008 - element: 0x0090 - op: "delete" -- name: "Station Name" - group: 0x0008 - element: 0x1010 - op: "delete" -- name: "Study Description" - group: 0x0008 - element: 0x1030 - op: "keep" -- name: "Series Description" - group: 0x0008 - element: 0x103e - op: "keep" -- name: "Institutional Department Name" - group: 0x0008 - element: 0x1040 - op: "delete" -- name: "Performing Physicians Name" - group: 0x0008 - element: 0x1050 - op: "delete" -- name: "Operators Name" - group: 0x0008 - element: 0x1070 - op: "delete" -- name: "Manufacturers Model Name" - group: 0x0008 - element: 0x1090 - op: "keep" -- name: "Referenced Study Sequence" - group: 0x0008 - element: 0x1110 - op: "delete" -- name: "Referenced Patient Sequence" - group: 0x0008 - element: 0x1120 - op: "delete" -- name: "Source Image Sequence" - group: 0x0008 - element: 0x2112 - op: "delete" -- name: "Anatomic Region Sequence" - group: 0x0008 - element: 0x2218 - op: "delete" -- name: "Irradiation Event UID" - group: 0x0008 - element: 0x3010 - op: "delete" -- name: "Patients Name" - group: 0x0010 + +# Configuration for extracting data from UCLH OMOP CDM DICOM files + +- name: "VR OB Creator" + group: 0x0013 element: 0x0010 - op: "replace" -- name: "Patient ID" - group: 0x0010 - element: 0x0020 - op: "secure-hash" -- name: "Issuer Of Patient ID" - group: 0x0010 - element: 0x0021 - op: "delete" -- name: "Patients Birth Date" - group: 0x0010 - element: 0x0030 - op: "delete" -- name: "Patients Birth Time" - group: 0x0010 - element: 0x0032 - op: "delete" -- name: "Patients Sex" - group: 0x0010 - element: 0x0040 - op: "delete" -- name: "Other Patient IDs" - group: 0x0010 - element: 0x1000 - op: "delete" -- name: "Other Patient Names" - group: 0x0010 - element: 0x1001 - op: "delete" -- name: "Patients Age" - group: 0x0010 - element: 0x1010 - op: "delete" -- name: "Patients Size" - group: 0x0010 - element: 0x1020 - op: "keep" -- name: "Patients Weight" - group: 0x0010 - element: 0x1030 - op: "keep" -- name: "Patients Address" - group: 0x0010 - element: 0x1040 - op: "delete" -- name: "Medical Alerts" - group: 0x0010 - element: 0x2000 - op: "delete" -- name: "Contrast Allergies" - group: 0x0010 - element: 0x2110 - op: "delete" -- name: "Patient Comments" - group: 0x0010 - element: 0x4000 - op: "delete" -- name: "Private Creator Data Element" - group: 0x0011 - element: 0x0010 - op: "delete" -- name: "Body Part Examined" - group: 0x0018 - element: 0x0015 - op: "keep" -- name: "kVp" - group: 0x0018 - element: 0x0060 - op: "keep" -- name: "Software Version" - group: 0x0018 - element: 0x1020 - op: "keep" -- name: "Protocol Name" - group: 0x0018 - element: 0x1030 - op: "delete" -- name: "Field Of View Dimension" - group: 0x0018 - element: 0x1149 - op: "keep" -- name: "Exposure Time" - group: 0x0018 - element: 0x1150 - op: "keep" -- name: "X Ray Tube Current" - group: 0x0018 - element: 0x1151 - op: "keep" -- name: "Exposure" - group: 0x0018 - element: 0x1152 - op: "keep" -- name: "Exposure In Uas" - group: 0x0018 - element: 0x1153 - op: "keep" -- name: "Image Area Dose Product" - group: 0x0018 - element: 0x115e - op: "keep" -- name: "Imager Pixel Spacing" - group: 0x0018 - element: 0x1164 - op: "keep" -- name: "Grid" - group: 0x0018 - element: 0x1166 - op: "keep" -- name: "Focal Spot" - group: 0x0018 - element: 0x1190 - op: "keep" -- name: "Acquisition Device Processing Description" - group: 0x0018 - element: 0x1400 - op: "keep" -- name: "Exposure Index" - group: 0x0018 - element: 0x1411 - op: "keep" -- name: "Target Exposure Index" - group: 0x0018 - element: 0x1412 - op: "keep" -- name: "Deviation Index" - group: 0x0018 - element: 0x1413 - op: "keep" -- name: "Positioner Type" - group: 0x0018 - element: 0x1508 - op: "keep" -- name: "Collemator Shape" - group: 0x0018 - element: 0x1700 - op: "keep" -- name: "Vertices Of The Polygonal Collimator" - group: 0x0018 - element: 0x1720 - op: "keep" -- name: "View Position" - group: 0x0018 - element: 0x5101 - op: "keep" -- name: "Sensitivity" - group: 0x0018 - element: 0x6000 - op: "keep" -- name: "Detector Temperature" - group: 0x0018 - element: 0x7001 - op: "keep" -- name: "Detector Type" - group: 0x0018 - element: 0x7004 - op: "keep" -- name: "Detector Configuration" - group: 0x0018 - element: 0x7005 - op: "keep" -- name: "Detector ID" - group: 0x0018 - element: 0x700a - op: "keep" -- name: "Detector Binning" - group: 0x0018 - element: 0x701a - op: "keep" -- name: "Detector Element Physical Size" - group: 0x0018 - element: 0x7020 - op: "keep" -- name: "Detector Element Spacing" - group: 0x0018 - element: 0x7022 - op: "keep" -- name: "Detector Active Shape" - group: 0x0018 - element: 0x7024 op: "keep" -- name: "Detector Active Dimensions" - group: 0x0018 - element: 0x7026 - op: "keep" -- name: "Field Of View Origin" - group: 0x0018 - element: 0x7030 - op: "keep" -- name: "Field Of View Rotation" - group: 0x0018 - element: 0x7032 - op: "keep" -- name: "Field Of View Horizontal Flip" - group: 0x0018 - element: 0x7034 - op: "keep" -- name: "Grid Focal Distance" - group: 0x0018 - element: 0x704c - op: "keep" -- name: "Exposure Control Mode" - group: 0x0018 - element: 0x7060 - op: "keep" -- name: "Study Instance UID" - group: 0x0020 - element: 0x000d - op: "replace_UID" -- name: "Series Instance UID" - group: 0x0020 - element: 0x000e - op: "replace_UID" -- name: "Study ID" - group: 0x0020 - element: 0x0010 +- name: "VR OB sequence" + group: 0x0013 + element: 0x1010 op: "replace" -- name: "Series Number" - group: 0x0020 - element: 0x0011 - op: "keep" -- name: "Image Number" - group: 0x0020 - element: 0x0013 - op: "keep" -- name: "Patient Orientation" - group: 0x0020 - element: 0x0020 - op: "keep" -- name: "Image Laterality" - group: 0x0020 - element: 0x0062 - op: "keep" -- name: "Number Of Study Related Images" - group: 0x0020 - element: 0x1208 - op: "delete" -- name: "Samples Per Pixel" - group: 0x0028 - element: 0x0002 - op: "keep" -- name: "Photometric Interpretation" - group: 0x0028 - element: 0x0004 - op: "keep" -- name: "Rows" - group: 0x0028 - element: 0x0010 - op: "keep" -- name: "Columns" - group: 0x0028 - element: 0x0011 - op: "keep" -- name: "Pixel Spacing" - group: 0x0028 - element: 0x0030 - op: "keep" -- name: "Bits Allocated" - group: 0x0028 - element: 0x0100 - op: "keep" -- name: "Bits Stored" - group: 0x0028 - element: 0x0101 - op: "keep" -- name: "High Bit" - group: 0x0028 - element: 0x0102 - op: "keep" -- name: "Pixel Representation" - group: 0x0028 - element: 0x0103 - op: "keep" -- name: "Quality Control Image" - group: 0x0028 - element: 0x0300 - op: "keep" -- name: "Burned In Annotation" - group: 0x0028 - element: 0x0301 - op: "keep" -- name: "Pixel Spacing Calibration Type" - group: 0x0028 - element: 0x0a02 - op: "keep" -- name: "Pixel Spacing Calibration Description" - group: 0x0028 - element: 0x0a04 - op: "keep" -- name: "Pixel Intensity Relationship" - group: 0x0028 - element: 0x1040 - op: "keep" -- name: "Pixel Intensity Relationship Sign" - group: 0x0028 - element: 0x1041 - op: "keep" -- name: "Window Center" - group: 0x0028 - element: 0x1050 - op: "keep" -- name: "Window Width" - group: 0x0028 - element: 0x1051 - op: "keep" -- name: "Rescale Intercept" - group: 0x0028 - element: 0x1052 - op: "keep" -- name: "Rescale Slope" - group: 0x0028 - element: 0x1053 - op: "keep" -- name: "Rescale Type" - group: 0x0028 - element: 0x1054 - op: "keep" -- name: "Window Center And Width Explanation" - group: 0x0028 - element: 0x1055 - op: "keep" -- name: "Lossy Image Compression" - group: 0x0028 - element: 0x2110 - op: "keep" -- name: "VOI LUT Sequence" - group: 0x0028 - element: 0x3010 - op: "keep" -- name: "Current Patient Location" - group: 0x0038 - element: 0x0300 - op: "delete" -- name: "Patient State" - group: 0x0038 - element: 0x0500 - op: "delete" -- name: "Performed Procedure Start Date" - group: 0x0040 - element: 0x0244 - op: "delete" -- name: "Performed Procedure Start Time" - group: 0x0040 - element: 0x0245 - op: "delete" -- name: "Performed Procedure Step ID" - group: 0x0040 - element: 0x0253 - op: "delete" -- name: "Performed Procedure Step Description" - group: 0x0040 - element: 0x0254 - op: "delete" -- name: "Performed Action Item Sequence" - group: 0x0040 - element: 0x0260 - op: "delete" -- name: "Request Attributes Sequence" - group: 0x0040 - element: 0x0275 - op: "delete" -- name: "Acquisition Context Sequence" - group: 0x0040 - element: 0x0555 - op: "delete" -- name: "Confidentiality Code" - group: 0x0040 - element: 0x1008 - op: "delete" -- name: "Private Creator Data Element" - group: 0x0045 - element: 0x0010 - op: "delete" -- name: "View Code Sequence" - group: 0x0054 - element: 0x0220 - op: "keep" -- name: "Image Comments" - group: 0x0020 - element: 0x4000 - op: "delete" -- name: "Instance Creator UID" - group: 0x0008 - element: 0x0014 - op: "replace_UID" -- name: "Referenced SOP Instance UID" - group: 0x0008 - element: 0x1155 - op: "replace_UID" -- name: "Frame of Reference UID" - group: 0x0020 - element: 0x0052 - op: "replace_UID" -- name: "Synchronization Frame of Reference UID" - group: 0x0020 - element: 0x0200 - op: "replace_UID" -- name: "Storage Media File-set UID" - group: 0x0088 - element: 0x0140 - op: "replace_UID" -- name: "Referenced Frame of Reference UID" - group: 0x3006 - element: 0x0024 - op: "replace_UID" -- name: "Related Frame of Reference UID" - group: 0x3006 - element: 0x00C2 - op: "replace_UID" -- name: "UID" - group: 0x0040 - element: 0xA124 - op: "replace_UID" -- name: "Study Comments [Retired]" - group: 0x0032 - element: 0x4000 - op: "delete" -- name: "Ethnic Group" - group: 0x0010 - element: 0x2160 - op: "delete" -- name: "Physicians Of Record" - group: 0x0008 - element: 0x1048 - op: "delete" -- name: "Name Of Physicians Reading Study" - group: 0x0008 - element: 0x1060 - op: "delete" -- name: "Device Serial Number" - group: 0x0018 - element: 0x1000 - op: "delete" -- name: "Additional Patient History" - group: 0x0010 - element: 0x21b0 - op: "delete" -- name: "Pregnancy Status" - group: 0x0010 - element: 0x21c0 - op: "delete" -- name: "Pixel Data" - group: 0x7fe0 - element: 0x0010 - op: "keep" -- name: "Private Creator Data Element" - group: 0x000d - element: 0x0010 - op: "keep" -- name: "UCLH Project Name" - group: 0x000d - element: 0x1001 - op: "keep" diff --git a/projects/configs/tag-operations/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml b/projects/configs/tag-operations/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml index 54aed8f2f..718d2fecf 100644 --- a/projects/configs/tag-operations/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml +++ b/projects/configs/tag-operations/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml @@ -11,6 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +# Configuration for extracting data from UCLH Nasogastric Tube Project DICOM files + - name: "Specific Character Set" group: 0x0008 element: 0x0005 @@ -30,39 +33,15 @@ - name: "Study Date" group: 0x0008 element: 0x0020 - op: "delete" -- name: "Series Date" - group: 0x0008 - element: 0x0021 - op: "delete" -- name: "Acquisition Date" - group: 0x0008 - element: 0x0022 - op: "delete" -- name: "Image Date" - group: 0x0008 - element: 0x0023 - op: "delete" -- name: "Acquisition Date Time" + op: "replace" +- name: "Instance Creator UID" group: 0x0008 - element: 0x002a - op: "delete" + element: 0x0014 + op: "replace_UID" - name: "Study Time" group: 0x0008 element: 0x0030 - op: "delete" -- name: "Series Time" - group: 0x0008 - element: 0x0031 - op: "delete" -- name: "Acquisition Time" - group: 0x0008 - element: 0x0032 - op: "delete" -- name: "Image Time" - group: 0x0008 - element: 0x0033 - op: "delete" + op: "replace" - name: "Accession Number" group: 0x0008 element: 0x0050 @@ -75,30 +54,14 @@ group: 0x0008 element: 0x0061 op: "keep" -- name: "Presentation Intent Type" - group: 0x0008 - element: 0x0068 - op: "delete" - name: "Manufacturer" group: 0x0008 element: 0x0070 op: "keep" -- name: "Institution Name" - group: 0x0008 - element: 0x0080 - op: "delete" -- name: "Institution Address" - group: 0x0008 - element: 0x0081 - op: "delete" - name: "Referring Physicians Name" group: 0x0008 element: 0x0090 - op: "delete" -- name: "Station Name" - group: 0x0008 - element: 0x1010 - op: "delete" + op: "replace" - name: "Study Description" group: 0x0008 element: 0x1030 @@ -107,42 +70,14 @@ group: 0x0008 element: 0x103e op: "keep" -- name: "Institutional Department Name" - group: 0x0008 - element: 0x1040 - op: "delete" -- name: "Performing Physicians Name" - group: 0x0008 - element: 0x1050 - op: "delete" -- name: "Operators Name" - group: 0x0008 - element: 0x1070 - op: "delete" - name: "Manufacturers Model Name" group: 0x0008 element: 0x1090 op: "keep" -- name: "Referenced Study Sequence" - group: 0x0008 - element: 0x1110 - op: "delete" -- name: "Referenced Patient Sequence" - group: 0x0008 - element: 0x1120 - op: "delete" -- name: "Source Image Sequence" - group: 0x0008 - element: 0x2112 - op: "delete" -- name: "Anatomic Region Sequence" - group: 0x0008 - element: 0x2218 - op: "delete" -- name: "Irradiation Event UID" +- name: "Referenced SOP Instance UID" group: 0x0008 - element: 0x3010 - op: "delete" + element: 0x1155 + op: "replace_UID" - name: "Patients Name" group: 0x0010 element: 0x0010 @@ -151,34 +86,14 @@ group: 0x0010 element: 0x0020 op: "secure-hash" -- name: "Issuer Of Patient ID" - group: 0x0010 - element: 0x0021 - op: "delete" - name: "Patients Birth Date" group: 0x0010 element: 0x0030 - op: "delete" -- name: "Patients Birth Time" - group: 0x0010 - element: 0x0032 - op: "delete" + op: "replace" - name: "Patients Sex" group: 0x0010 element: 0x0040 - op: "delete" -- name: "Other Patient IDs" - group: 0x0010 - element: 0x1000 - op: "delete" -- name: "Other Patient Names" - group: 0x0010 - element: 0x1001 - op: "delete" -- name: "Patients Age" - group: 0x0010 - element: 0x1010 - op: "delete" + op: "keep" - name: "Patients Size" group: 0x0010 element: 0x1020 @@ -187,42 +102,46 @@ group: 0x0010 element: 0x1030 op: "keep" -- name: "Patients Address" - group: 0x0010 - element: 0x1040 - op: "delete" -- name: "Medical Alerts" - group: 0x0010 - element: 0x2000 - op: "delete" -- name: "Contrast Allergies" - group: 0x0010 - element: 0x2110 - op: "delete" -- name: "Patient Comments" - group: 0x0010 - element: 0x4000 - op: "delete" -- name: "Private Creator Data Element" - group: 0x0011 - element: 0x0010 - op: "delete" - name: "Body Part Examined" group: 0x0018 element: 0x0015 op: "keep" +- name: "Scanning Sequence" + group: 0x0018 + element: 0x0020 + op: "keep" +- name: "Scanning Variant" + group: 0x0018 + element: 0x0021 + op: "keep" +- name: "Scan Options" + group: 0x0018 + element: 0x0022 + op: "keep" +- name: "MR Acquisition Type" + group: 0x0018 + element: 0x0023 + op: "keep" +- name: "Slice Thickness" + group: 0x0018 + element: 0x0050 + op: "keep" - name: "kVp" group: 0x0018 element: 0x0060 op: "keep" +- name: "Echo Time" + group: 0x0018 + element: 0x0081 + op: "keep" +- name: "Echo Train Length" + group: 0x0018 + element: 0x0091 + op: "keep" - name: "Software Version" group: 0x0018 element: 0x1020 op: "keep" -- name: "Protocol Name" - group: 0x0018 - element: 0x1030 - op: "delete" - name: "Field Of View Dimension" group: 0x0018 element: 0x1149 @@ -287,6 +206,10 @@ group: 0x0018 element: 0x1720 op: "keep" +- name: "Patient Position" + group: 0x0018 + element: 0x5100 + op: "keep" - name: "View Position" group: 0x0018 element: 0x5101 @@ -375,14 +298,26 @@ group: 0x0020 element: 0x0020 op: "keep" +- name: "Image Position (Patient)" + group: 0x0020 + element: 0x0032 + op: "keep" +- name: "Image Orientation (Patient)" + group: 0x0020 + element: 0x0037 + op: "keep" +- name: "Position Reference Indicator" + group: 0x0020 + element: 0x1040 + op: "keep" +- name: "Patient Orientation Code Sequence" + group: 0x0054 + element: 0x0410 + op: "keep" - name: "Image Laterality" group: 0x0020 element: 0x0062 op: "keep" -- name: "Number Of Study Related Images" - group: 0x0020 - element: 0x1208 - op: "delete" - name: "Samples Per Pixel" group: 0x0028 element: 0x0002 @@ -475,66 +410,10 @@ group: 0x0028 element: 0x3010 op: "keep" -- name: "Current Patient Location" - group: 0x0038 - element: 0x0300 - op: "delete" -- name: "Patient State" - group: 0x0038 - element: 0x0500 - op: "delete" -- name: "Performed Procedure Start Date" - group: 0x0040 - element: 0x0244 - op: "delete" -- name: "Performed Procedure Start Time" - group: 0x0040 - element: 0x0245 - op: "delete" -- name: "Performed Procedure Step ID" - group: 0x0040 - element: 0x0253 - op: "delete" -- name: "Performed Procedure Step Description" - group: 0x0040 - element: 0x0254 - op: "delete" -- name: "Performed Action Item Sequence" - group: 0x0040 - element: 0x0260 - op: "delete" -- name: "Request Attributes Sequence" - group: 0x0040 - element: 0x0275 - op: "delete" -- name: "Acquisition Context Sequence" - group: 0x0040 - element: 0x0555 - op: "delete" -- name: "Confidentiality Code" - group: 0x0040 - element: 0x1008 - op: "delete" -- name: "Private Creator Data Element" - group: 0x0045 - element: 0x0010 - op: "delete" - name: "View Code Sequence" group: 0x0054 element: 0x0220 op: "keep" -- name: "Image Comments" - group: 0x0020 - element: 0x4000 - op: "delete" -- name: "Instance Creator UID" - group: 0x0008 - element: 0x0014 - op: "replace_UID" -- name: "Referenced SOP Instance UID" - group: 0x0008 - element: 0x1155 - op: "replace_UID" - name: "Frame of Reference UID" group: 0x0020 element: 0x0052 @@ -547,6 +426,10 @@ group: 0x0088 element: 0x0140 op: "replace_UID" +- name: "UID" + group: 0x0040 + element: 0xA124 + op: "replace_UID" - name: "Referenced Frame of Reference UID" group: 0x3006 element: 0x0024 @@ -555,47 +438,7 @@ group: 0x3006 element: 0x00C2 op: "replace_UID" -- name: "UID" - group: 0x0040 - element: 0xA124 - op: "replace_UID" -- name: "Study Comments [Retired]" - group: 0x0032 - element: 0x4000 - op: "delete" -- name: "Ethnic Group" - group: 0x0010 - element: 0x2160 - op: "delete" -- name: "Physicians Of Record" - group: 0x0008 - element: 0x1048 - op: "delete" -- name: "Name Of Physicians Reading Study" - group: 0x0008 - element: 0x1060 - op: "delete" -- name: "Device Serial Number" - group: 0x0018 - element: 0x1000 - op: "delete" -- name: "Additional Patient History" - group: 0x0010 - element: 0x21b0 - op: "delete" -- name: "Pregnancy Status" - group: 0x0010 - element: 0x21c0 - op: "delete" - name: "Pixel Data" group: 0x7fe0 element: 0x0010 op: "keep" -- name: "Private Creator Data Element" - group: 0x000d - element: 0x0010 - op: "keep" -- name: "UCLH Project Name" - group: 0x000d - element: 0x1001 - op: "keep" diff --git a/projects/configs/tag-operations/xray.yaml b/projects/configs/tag-operations/xray.yaml new file mode 100644 index 000000000..947b0879a --- /dev/null +++ b/projects/configs/tag-operations/xray.yaml @@ -0,0 +1,89 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default configuration to extend base for x-rays + +- name: "Volumetric Properties" + group: 0x0008 + element: 0x9206 + op: "keep" +- name: "Volume Based Calculation Technique" + group: 0x0008 + element: 0x9207 + op: "keep" +- name: "Field Of View Dimension" + group: 0x0018 + element: 0x1149 + op: "keep" +#https://dicom.innolitics.com/ciods/x-ray-angiographic-image/dx-detector/00181149 +#https://dicom.innolitics.com/ciods/x-ray-angiographic-image/dx-detector/00181149 +#https://dicom.innolitics.com/ciods/x-ray-radiofluoroscopic-image/x-ray-acquisition/00181149 +#https://dicom.innolitics.com/ciods/x-ray-angiographic-image/x-ray-acquisition/00181149 +#https://dicom.innolitics.com/ciods/digital-x-ray-image/dx-detector/00181149 +#https://dicom.innolitics.com/ciods/digital-mammography-x-ray-image/dx-detector/00181149 +#https://dicom.innolitics.com/ciods/digital-intra-oral-x-ray-image/dx-detector/00181149 +- name: "Imager Pixel Spacing" + group: 0x0018 + element: 0x1164 + op: "keep" +#https://dicom.innolitics.com/ciods/x-ray-angiographic-image/dx-detector/00181164 +#https://dicom.innolitics.com/ciods/x-ray-radiofluoroscopic-image/x-ray-acquisition/00181164 +#https://dicom.innolitics.com/ciods/x-ray-angiographic-image/x-ray-acquisition/00181164 +#https://dicom.innolitics.com/ciods/digital-x-ray-image/dx-detector/00181164 +#https://dicom.innolitics.com/ciods/digital-mammography-x-ray-image/dx-detector/00181164 +#https://dicom.innolitics.com/ciods/digital-intra-oral-x-ray-image/dx-detector/00181164 +- name: "Grid" + group: 0x0018 + element: 0x1166 + op: "keep" +#https://dicom.innolitics.com/ciods/x-ray-radiofluoroscopic-image/x-ray-acquisition/00181166 +#https://dicom.innolitics.com/ciods/x-ray-angiographic-image/x-ray-acquisition/00181166 +#https://dicom.innolitics.com/ciods/digital-x-ray-image/x-ray-grid/00181166 +#https://dicom.innolitics.com/ciods/digital-mammography-x-ray-image/x-ray-grid/00181166 +#https://dicom.innolitics.com/ciods/digital-intra-oral-x-ray-image/x-ray-grid/00181166 +#https://dicom.innolitics.com/ciods/x-ray-3d-craniofacial-image/x-ray-3d-craniofacial-acquisition/00189507/00181166 +#https://dicom.innolitics.com/ciods/x-ray-3d-angiographic-image/x-ray-3d-angiographic-acquisition/00189507/00181166 +#https://dicom.innolitics.com/ciods/breast-projection-x-ray-image/breast-projection-x-ray-image-multi-frame-functional-groups/52009229/00189555/00181166 +- name: "Acquisition Device Processing Description" + group: 0x0018 + element: 0x1400 + op: "keep" +#https://dicom.innolitics.com/ciods/x-ray-radiofluoroscopic-image/x-ray-image/00181400 +#https://dicom.innolitics.com/ciods/x-ray-angiographic-image/x-ray-image/00181400 +#https://dicom.innolitics.com/ciods/digital-x-ray-image/dx-image/00181400 +#https://dicom.innolitics.com/ciods/digital-mammography-x-ray-image/dx-image/00181400 +#https://dicom.innolitics.com/ciods/digital-intra-oral-x-ray-image/dx-image/00181400 +#https://dicom.innolitics.com/ciods/x-ray-3d-craniofacial-image/x-ray-3d-craniofacial-image-contributing-sources/00189506/00181400 +#https://dicom.innolitics.com/ciods/x-ray-3d-angiographic-image/x-ray-3d-angiographic-image-contributing-sources/00189506/00181400 +#https://dicom.innolitics.com/ciods/breast-projection-x-ray-image/breast-projection-x-ray-image-multi-frame-functional-groups/52009229/00189412/00181400 +- name: "Pixel Spacing Calibration Type" + group: 0x0028 + element: 0x0a02 + op: "keep" +#RT, X-Ray +- name: "Pixel Spacing Calibration Description" + group: 0x0028 + element: 0x0a04 + op: "keep" +#X-Ray +- name: "Pixel Intensity Relationship" + group: 0x0028 + element: 0x1040 + op: "keep" +#X-Ray +- name: "Pixel Intensity Relationship Sign" + group: 0x0028 + element: 0x1041 + op: "keep" +#X-Ray diff --git a/projects/configs/test-external-user.yaml b/projects/configs/test-external-user.yaml new file mode 100644 index 000000000..ad4734d07 --- /dev/null +++ b/projects/configs/test-external-user.yaml @@ -0,0 +1,34 @@ +# Copyright (c) 2022 University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +project: + name: "test-external-user" + modalities: ["MR"] + +tag_operation_files: + base: + - "base.yaml" #Expected base config file for any project + - "test-external-user.yaml" + - "diffusion-weighted-mri.yaml" + manufacturer_overrides: ["mri-diffusion.yaml"] + +series_filters: + - "localizer" + - "localiser" + - "scout" + - "positioning" + +destination: + dicom: "none" + parquet: "none" diff --git a/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml b/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml index 5e8868963..bec9c9bc3 100644 --- a/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm-dicomweb.yaml @@ -19,8 +19,9 @@ project: tag_operation_files: base: - - "test-extract-uclh-omop-cdm.yaml" - manufacturer_overrides: "manufacturer-overrides/mri-diffusion.yaml" + - "base.yaml" #Expected base config file for any project + - "mri.yaml" + manufacturer_overrides: ["mri.yaml", "mri-diffusion.yaml"] series_filters: - "localizer" diff --git a/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml b/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml new file mode 100644 index 000000000..9db5648d5 --- /dev/null +++ b/projects/configs/test-extract-uclh-omop-cdm-xnat.yaml @@ -0,0 +1,38 @@ +# Copyright (c) 2024 University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +project: + name: "test-extract-uclh-omop-cdm-xnat" + azure_kv_alias: "test" + modalities: ["DX", "CR", "MR"] + +tag_operation_files: + base: + - "base.yaml" #Expected base config file for any project + - "mri.yaml" + manufacturer_overrides: ["mri.yaml", "mri-diffusion.yaml"] + +series_filters: + - "localizer" + - "localiser" + - "scout" + - "positioning" + +destination: + dicom: "xnat" + parquet: "none" + +xnat_destination_options: + overwrite: "none" + destination: "/archive" diff --git a/projects/configs/test-extract-uclh-omop-cdm.yaml b/projects/configs/test-extract-uclh-omop-cdm.yaml index 5a13f6692..7ff106a1f 100644 --- a/projects/configs/test-extract-uclh-omop-cdm.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm.yaml @@ -19,8 +19,10 @@ project: tag_operation_files: base: + - "base.yaml" #Expected base config file for any project + - "mri.yaml" - "test-extract-uclh-omop-cdm.yaml" - manufacturer_overrides: "manufacturer-overrides/mri-diffusion.yaml" + manufacturer_overrides: ["mri-diffusion.yaml"] series_filters: - "localizer" diff --git a/pixl_dcmd/tests/__init__.py b/projects/configs/test-mr-spectroscopy.yaml similarity index 63% rename from pixl_dcmd/tests/__init__.py rename to projects/configs/test-mr-spectroscopy.yaml index a61138626..6571deb1e 100644 --- a/pixl_dcmd/tests/__init__.py +++ b/projects/configs/test-mr-spectroscopy.yaml @@ -11,3 +11,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +project: + name: "test-mr-spectroscopy" + modalities: ["MR"] + +tag_operation_files: + base: + - "base.yaml" + - "mri.yaml" + - "mr-spectroscopy.yaml" + manufacturer_overrides: + - "mri.yaml" + +series_filters: + - "localizer" + - "localiser" + - "scout" + - "positioning" + +destination: + dicom: "none" + parquet: "none" diff --git a/projects/configs/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml b/projects/configs/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml index 2aa75ed31..6b49a09e5 100644 --- a/projects/configs/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml +++ b/projects/configs/uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml @@ -19,6 +19,7 @@ project: tag_operation_files: base: + - "base.yaml" #Expected base config file for any project - "uclh-nasogastric-tube-project-ngt-only-full-dataset.yaml" manufacturer_overrides: null destination: diff --git a/projects/configs/uclh-prostate-mri-validation-dataset.yaml b/projects/configs/uclh-prostate-mri-external-dataset.yaml similarity index 78% rename from projects/configs/uclh-prostate-mri-validation-dataset.yaml rename to projects/configs/uclh-prostate-mri-external-dataset.yaml index ba78bd730..326e94c92 100644 --- a/projects/configs/uclh-prostate-mri-validation-dataset.yaml +++ b/projects/configs/uclh-prostate-mri-external-dataset.yaml @@ -13,14 +13,15 @@ # limitations under the License. project: - name: "uclh-prostate-mri-validation-dataset" + name: "uclh-prostate-mri-external-dataset" modalities: ["MR"] tag_operation_files: base: + - "base.yaml" #Expected base config file for any project - "mri.yaml" - "diffusion-weighted-mri.yaml" - manufacturer_overrides: "manufacturer-overrides/mri-diffusion.yaml" + manufacturer_overrides: ["mri.yaml", "mri-diffusion.yaml"] destination: - dicom: "none" - parquet: "none" + dicom: "ftps" + parquet: "ftps" diff --git a/pytest-pixl/README.md b/pytest-pixl/README.md index 6111973b7..4c4e9666e 100644 --- a/pytest-pixl/README.md +++ b/pytest-pixl/README.md @@ -2,6 +2,17 @@ Installable `pytest` plugin module providing common test fixtures used throughout PIXL. +## Installation + +```bash +python -m pip install -e ../pixl_core -e ".[test]" +``` + +## pytest-cov’s engine +``` +COV_CORE_SOURCE=src COV_CORE_CONFIG=.coveragerc COV_CORE_DATAFILE=.coverage.eager pytest --cov=src --cov-append --cov-report=xml --cov-report=term-missing +``` + ## Available fixtures ### `ftps_server` diff --git a/pytest-pixl/pyproject.toml b/pytest-pixl/pyproject.toml index df00c1e77..2f7e1c232 100644 --- a/pytest-pixl/pyproject.toml +++ b/pytest-pixl/pyproject.toml @@ -1,9 +1,7 @@ [project] name = "pytest-pixl" -version = "0.0.1" -authors = [ - { name="PIXL core functionality" }, -] +version = "0.2.0rc0" +authors = [{ name = "PIXL authors" }] description = "Pytest plugin for PIXL" readme = "README.md" requires-python = ">=3.9" @@ -12,27 +10,52 @@ classifiers = [ ] dependencies = [ "pydicom==2.4.4", - "pyftpdlib==1.5.9", - "python-decouple==3.6", - "pyOpenSSL==24.0.0", - "requests==2.31.0", + "pyftpdlib==2.0.1", + "pyOpenSSL==24.3.0", + "python-decouple==3.8", + "pytest-loguru==0.4.0", + "requests==2.32.3", ] [project.optional-dependencies] test = [ - "core", # pixl_core - "pytest==7.4.2", + "core==0.2.0rc0", + "pytest==8.3.4", ] dev = [ - "mypy", - "pre-commit", - "ruff", + "core[dev]==0.2.0rc0", ] [build-system] requires = ["setuptools>=61.0", "setuptools-scm>=8.0"] build-backend = "setuptools.build_meta" + +[tool.coverage.report] +exclude_also = [ + # Don't complain about missing debug-only code: + "def __repr__", + "if self.debug:", + + # Don't complain if tests don't hit defensive assertion code: + "raise AssertionError", + "raise NotImplementedError", + + # Don't complain if non-runnable code isn't run: + "if 0:", + "if __name__ == .__main__.:", + + # Don't complain about abstract methods, they aren't run: + "@(abc\\.)?abstractmethod", + + # Others + "if settings.DEBUG", + "except subprocess.CalledProcessError as exception:", + "if TYPE_CHECKING:", + "if typing.TYPE_CHECKING", + "class .*\\bProtocol\\):", +] + [tool.setuptools.package-data] pytest_pixl = ["*.key", "*.crt"] @@ -43,4 +66,4 @@ extend = "../ruff.toml" "./tests/**" = ["D100"] [project.entry-points.pytest11] -pytest_pixl = "pytest_pixl.plugin" +pytest_pixl = "pytest_pixl.plugin" \ No newline at end of file diff --git a/pytest-pixl/src/pytest_pixl/data/omop-resources/batch_input.csv b/pytest-pixl/src/pytest_pixl/data/omop-resources/batch_input.csv new file mode 100644 index 000000000..b2440897a --- /dev/null +++ b/pytest-pixl/src/pytest_pixl/data/omop-resources/batch_input.csv @@ -0,0 +1,4 @@ +procedure_id,mrn,accession_number,project_name,extract_generated_timestamp,study_date,study_uid,participant_id +1,mrn,123,i-am-a-project,2023-01-01T00:00:00Z,2023-01-01,1.2.3, +2,mrn,234,i-am-a-project,2023-01-01T00:00:00Z,2023-01-01,2.3.4, +3,mrn,345,i-am-a-project,2023-01-01T00:00:00Z,2023-01-01,3.4.5, diff --git a/pytest-pixl/src/pytest_pixl/data/omop-resources/duplicate_input.csv b/pytest-pixl/src/pytest_pixl/data/omop-resources/duplicate_input.csv new file mode 100644 index 000000000..c4b38f497 --- /dev/null +++ b/pytest-pixl/src/pytest_pixl/data/omop-resources/duplicate_input.csv @@ -0,0 +1,7 @@ +procedure_id,mrn,accession_number,project_name,extract_generated_timestamp,study_date,study_uid,participant_id +1,mrn,123,i-am-a-project,2023-01-01T00:00:00Z,2023-01-01,1.2.3, +2,mrn,234,i-am-a-project,2023-01-01T00:00:00Z,2023-01-01,2.3.4, +3,mrn,345,i-am-a-project,2023-01-01T00:00:00Z,2023-01-01,3.4.5, +4,mrn,123,i-am-a-project,2024-01-01T00:00:00Z,2023-01-01,1.2.3, +5,mrn,234,i-am-a-project,2024-01-01T00:00:00Z,2023-01-01,2.3.4, +6,mrn,345,i-am-a-project,2024-01-01T00:00:00Z,2023-01-01,3.4.5, diff --git a/pytest-pixl/src/pytest_pixl/data/omop-resources/multiple_projects.csv b/pytest-pixl/src/pytest_pixl/data/omop-resources/multiple_projects.csv new file mode 100644 index 000000000..846c7d213 --- /dev/null +++ b/pytest-pixl/src/pytest_pixl/data/omop-resources/multiple_projects.csv @@ -0,0 +1,7 @@ +procedure_id,mrn,accession_number,project_name,extract_generated_timestamp,study_date,study_uid,participant_id +1,mrn,123,i-am-a-project,2023-01-01T00:00:00Z,2023-01-01,1.2.3, +2,mrn,234,i-am-a-project,2023-01-01T00:00:00Z,2023-01-01,2.3.4, +3,mrn,345,i-am-a-project,2023-01-01T00:00:00Z,2023-01-01,3.4.5, +4,mrn,123,i-am-another-project,2023-01-01T00:00:00Z,2023-01-01,1.2.3, +5,mrn,234,i-am-another-project,2023-01-01T00:00:00Z,2023-01-01,2.3.4, +6,mrn,345,i-am-another-project,2023-01-01T00:00:00Z,2023-01-01,3.4.5, diff --git a/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/private/PROCEDURE_OCCURRENCE_LINKS.parquet b/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/private/PROCEDURE_OCCURRENCE_LINKS.parquet index f4a1d6370..c1c49f80d 100644 Binary files a/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/private/PROCEDURE_OCCURRENCE_LINKS.parquet and b/pytest-pixl/src/pytest_pixl/data/omop-resources/omop/private/PROCEDURE_OCCURRENCE_LINKS.parquet differ diff --git a/pytest-pixl/src/pytest_pixl/data/omop-resources/participant_id.csv b/pytest-pixl/src/pytest_pixl/data/omop-resources/participant_id.csv new file mode 100644 index 000000000..57555de63 --- /dev/null +++ b/pytest-pixl/src/pytest_pixl/data/omop-resources/participant_id.csv @@ -0,0 +1,4 @@ +procedure_id,mrn,accession_number,project_name,extract_generated_timestamp,study_date,study_uid,participant_id +1,mrn,123,i-am-a-project,2023-01-01T00:00:00Z,2023-01-01,1.2.3,AAA00 +2,mrn,234,i-am-a-project,2023-01-01T00:00:00Z,2023-01-01,2.3.4,BBB11 +3,mrn,345,i-am-a-project,2023-01-01T00:00:00Z,2023-01-01,3.4.5,CCC22 diff --git a/pytest-pixl/src/pytest_pixl/data/omop-resources/test.csv b/pytest-pixl/src/pytest_pixl/data/omop-resources/test.csv index b520add67..367736831 100644 --- a/pytest-pixl/src/pytest_pixl/data/omop-resources/test.csv +++ b/pytest-pixl/src/pytest_pixl/data/omop-resources/test.csv @@ -1,2 +1,2 @@ -procedure_id,mrn,accession_number,project_name,extract_generated_timestamp,study_date -0,patient_identifier,123456789,ms-pinpoint-test,01/01/2022 00:01,01/01/2022 00:01 \ No newline at end of file +procedure_id,mrn,accession_number,project_name,extract_generated_timestamp,study_date,study_uid,participant_id +0,patient_identifier,123456789,ms-pinpoint-test,2023-01-01T00:01:00Z,2022-01-01,1.2.3.4.5.6.7.8, diff --git a/pytest-pixl/src/pytest_pixl/helpers.py b/pytest-pixl/src/pytest_pixl/helpers.py index ecbcbf122..dc59b3779 100644 --- a/pytest-pixl/src/pytest_pixl/helpers.py +++ b/pytest-pixl/src/pytest_pixl/helpers.py @@ -39,11 +39,11 @@ def run_subprocess( """ logger.info("Running command {}", cmd) try: - cp = subprocess.run( + cp = subprocess.run( # noqa: S603 input is trusted cmd, check=True, cwd=working_dir, - shell=shell, # noqa: S603 input is trusted + shell=shell, timeout=timeout, capture_output=True, ) diff --git a/pytest-pixl/tests/test_ftpserver_fixture.py b/pytest-pixl/tests/test_ftpserver_fixture.py index de9951e25..9f7833907 100644 --- a/pytest-pixl/tests/test_ftpserver_fixture.py +++ b/pytest-pixl/tests/test_ftpserver_fixture.py @@ -11,11 +11,30 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os +from pathlib import Path + import pytest +from conftest import TEST_DIR +from pytest_pixl.dicom import _create_default_json +from pytest_pixl.plugin import FtpHostAddress -@pytest.mark.pytester_example_path("tests/samples_for_fixture_tests/test_ftpserver_fixture") +@pytest.mark.pytester_example_path(f"{TEST_DIR}/samples_for_fixture_tests/test_ftpserver_fixture") def test_ftpserver_connection(pytester): """Test whether we can connect to the FTP server fixture""" pytester.copy_example("test_ftpserver_login.py") pytester.runpytest("-k", "test_ftpserver_login") + + +def test_create_default_json_file(): + """Test whether we can create a default JSON file""" + filename_to_create = "test_json_file.json" + _create_default_json(filename_to_create) + assert Path(filename_to_create).exists() + os.remove(filename_to_create) # noqa: PTH107 + + +def test_ftp_host_address(): + """Run FTP on localhost - docker containers do not need to access it""" + assert FtpHostAddress.DOCKERHOST == FtpHostAddress.DOCKERHOST diff --git a/pytest.ini b/pytest.ini index 8096b9b62..be8f67715 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,10 +1,11 @@ [pytest] minversion=6.0 -addopts = -ra -q +addopts = -ra -q --cov --cov-append testpaths = pixl_core/tests cli/tests pixl_export/tests pixl_imaging/tests pixl_dcmd/tests -# pixl_dcmd and hasher conftests clash, so can only add one of the two + pytest-pixl/tests +# pixl_dcmd and hasher conftests clash, so can only add one of the two \ No newline at end of file diff --git a/ruff.toml b/ruff.toml index 99b8cd438..36ba9768c 100644 --- a/ruff.toml +++ b/ruff.toml @@ -48,7 +48,13 @@ lint.mccabe.max-complexity = 18 exclude=["scripts"] [lint.extend-per-file-ignores] -"**/test*/*" = ["PLR2004"] # Magic value used in comparison +"**/test*/*" = [ + "PLR2004", # Magic value used in comparison + "S105" # harcoded password +] +"test/conftest.py" = [ + "E402" # Module level import not at the top of the file +] "hasher/tests/*" = ["ARG001"] # unused function argument "env.py" = ["INP001", "E402", "ERA001"] "alembic/versions/*" = ["D103", "INP001"] diff --git a/schemas/github-issue-forms.json b/schemas/github-issue-forms.json new file mode 100644 index 000000000..815289757 --- /dev/null +++ b/schemas/github-issue-forms.json @@ -0,0 +1,2377 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + + "$id": "https://json.schemastore.org/github-issue-forms.json", + + "$comment": "https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-issue-forms", + + "additionalProperties": true, + + "definitions": { + "type": { + "description": "A form item type\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#keys", + + "type": "string", + + "enum": ["checkboxes", "dropdown", "input", "markdown", "textarea"] + }, + + "id": { + "type": "string", + + "pattern": "^[a-zA-Z0-9_-]+$", + + "examples": ["SampleId"] + }, + + "validations": { + "title": "validation options", + + "type": "object", + + "properties": { + "required": { + "description": "Specify whether require a form item", + + "type": "boolean", + + "default": false + } + }, + + "additionalProperties": false + }, + + "assignee": { + "type": "string", + + "maxLength": 39, + + "pattern": "^[a-zA-Z0-9](-?[a-zA-Z0-9])*$", + + "examples": ["SampleAssignee"] + }, + + "label": { + "type": "string", + + "minLength": 1, + + "examples": ["Sample label"] + }, + + "description": { + "type": "string", + + "default": "", + + "examples": ["Sample description"] + }, + + "placeholder": { + "type": "string", + + "default": "", + + "examples": ["Sample placeholder"] + }, + + "value": { + "type": "string", + + "minLength": 1, + + "examples": ["Sample value"] + }, + + "form_item": { + "title": "form item", + + "description": "A form item\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#about-githubs-form-schema", + + "type": "object", + + "required": ["type"], + + "properties": { + "type": { + "$ref": "#/definitions/type" + } + }, + + "allOf": [ + { + "if": { + "properties": { + "type": { + "const": "markdown" + } + } + }, + + "then": { + "$comment": "For `additionalProperties` to work `type` must also be present here.", + + "title": "markdown", + + "description": "Markdown\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#markdown", + + "type": "object", + + "required": ["type", "attributes"], + + "properties": { + "type": { + "$ref": "#/definitions/type" + }, + + "attributes": { + "title": "markdown attributes", + + "description": "Markdown attributes\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes", + + "type": "object", + + "required": ["value"], + + "properties": { + "value": { + "description": "A markdown code\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes", + + "type": "string", + + "minLength": 1, + + "examples": ["Sample code"] + } + }, + + "additionalProperties": false + } + }, + + "additionalProperties": false + } + }, + + { + "if": { + "properties": { + "type": { + "const": "textarea" + } + } + }, + + "then": { + "$comment": "For `additionalProperties` to work `type` must also be present here.", + + "title": "textarea", + + "description": "Textarea\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#textarea", + + "type": "object", + + "required": ["type", "attributes"], + + "properties": { + "type": { + "$ref": "#/definitions/type" + }, + + "id": { + "$ref": "#/definitions/id", + + "description": "A textarea id\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#keys" + }, + + "attributes": { + "title": "textarea attributes", + + "description": "Textarea attributes\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-1", + + "type": "object", + + "required": ["label"], + + "properties": { + "label": { + "$ref": "#/definitions/label", + + "description": "A short textarea description\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-1" + }, + + "description": { + "$ref": "#/definitions/description", + + "description": "A long textarea description\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-1" + }, + + "placeholder": { + "$ref": "#/definitions/placeholder", + + "description": "A textarea placeholder\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-1" + }, + + "value": { + "$ref": "#/definitions/value", + + "description": "A textarea value\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-1" + }, + + "render": { + "description": "A textarea syntax highlighting mode\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-1", + + "type": "string", + + "enum": [ + "1C Enterprise", + + "4D", + + "ABAP CDS", + + "ABAP", + + "ABNF", + + "AFDKO", + + "AGS Script", + + "AIDL", + + "AL", + + "AMPL", + + "ANTLR", + + "API Blueprint", + + "APL", + + "ASL", + + "ASN.1", + + "ASP.NET", + + "ATS", + + "ActionScript", + + "Ada", + + "Alloy", + + "Alpine Abuild", + + "Altium Designer", + + "AngelScript", + + "Ant Build System", + + "ApacheConf", + + "Apex", + + "Apollo Guidance Computer", + + "AppleScript", + + "Arc", + + "AsciiDoc", + + "AspectJ", + + "Assembly", + + "Astro", + + "Asymptote", + + "Augeas", + + "AutoHotkey", + + "AutoIt", + + "AutoIt3", + + "AutoItScript", + + "Avro IDL", + + "Awk", + + "BASIC", + + "Ballerina", + + "Batchfile", + + "Beef", + + "Befunge", + + "BibTeX", + + "Bicep", + + "Bison", + + "BitBake", + + "Blade", + + "BlitzBasic", + + "BlitzMax", + + "Boo", + + "Boogie", + + "Brainfuck", + + "Brightscript", + + "Browserslist", + + "C", + + "C#", + + "C++", + + "C-ObjDump", + + "C2hs Haskell", + + "CIL", + + "CLIPS", + + "CMake", + + "COBOL", + + "CODEOWNERS", + + "COLLADA", + + "CSON", + + "CSS", + + "CSV", + + "CUE", + + "CWeb", + + "Cabal Config", + + "Cabal", + + "Cap'n Proto", + + "Carto", + + "CartoCSS", + + "Ceylon", + + "Chapel", + + "Charity", + + "ChucK", + + "Cirru", + + "Clarion", + + "Classic ASP", + + "Clean", + + "Click", + + "Clojure", + + "Closure Templates", + + "Cloud Firestore Security Rules", + + "CoNLL", + + "CoNLL-U", + + "CoNLL-X", + + "ColdFusion CFC", + + "ColdFusion", + + "Common Lisp", + + "Common Workflow Language", + + "Component Pascal", + + "Containerfile", + + "Cool", + + "Coq", + + "Cpp-ObjDump", + + "Crystal", + + "Csound Document", + + "Csound Score", + + "Csound", + + "Cuda", + + "Cue Sheet", + + "Cycript", + + "Cython", + + "D-ObjDump", + + "DIGITAL Command Language", + + "DM", + + "DTrace", + + "Dafny", + + "Darcs Patch", + + "Dart", + + "DataWeave", + + "Dhall", + + "Diff", + + "Dlang", + + "Dockerfile", + + "Dogescript", + + "Dylan", + + "E", + + "E-mail", + + "EBNF", + + "ECL", + + "ECLiPSe", + + "EJS", + + "EQ", + + "Eagle", + + "Earthly", + + "Easybuild", + + "Ecere Projects", + + "EditorConfig", + + "Eiffel", + + "Elixir", + + "Elm", + + "Emacs Lisp", + + "EmberScript", + + "Erlang", + + "F#", + + "F*", + + "FIGfont", + + "FIGlet Font", + + "FLUX", + + "Factor", + + "Fancy", + + "Fantom", + + "Faust", + + "Fennel", + + "Filebench WML", + + "Filterscript", + + "Fluent", + + "Formatted", + + "Forth", + + "Fortran Free Form", + + "Fortran", + + "FreeBasic", + + "Frege", + + "Futhark", + + "G-code", + + "GAML", + + "GAMS", + + "GAP", + + "GCC Machine Description", + + "GDB", + + "GDScript", + + "GEDCOM", + + "GLSL", + + "GN", + + "Game Maker Language", + + "Gemfile.lock", + + "Genie", + + "Genshi", + + "Gentoo Eclass", + + "Gerber Image", + + "Gettext Catalog", + + "Gherkin", + + "Git Config", + + "Glyph Bitmap Distribution Format", + + "Glyph", + + "Gnuplot", + + "Go Checksums", + + "Go Module", + + "Go", + + "Golo", + + "Gosu", + + "Grace", + + "Gradle", + + "Grammatical Framework", + + "Graph Modeling Language", + + "GraphQL", + + "Graphviz (DOT)", + + "Groovy Server Pages", + + "Groovy", + + "HAProxy", + + "HCL", + + "HTML", + + "HTML+ECR", + + "HTML+EEX", + + "HTML+ERB", + + "HTML+PHP", + + "HTML+Razor", + + "HTTP", + + "HXML", + + "Hack", + + "Haml", + + "Handlebars", + + "Harbour", + + "HashiCorp Configuration Language", + + "Haskell", + + "Haxe", + + "HiveQL", + + "HolyC", + + "Hy", + + "IDL", + + "IGOR Pro", + + "IPython Notebook", + + "Idris", + + "Ignore List", + + "ImageJ Macro", + + "Inform 7", + + "Io", + + "Ioke", + + "Isabelle ROOT", + + "Isabelle", + + "J", + + "JAR Manifest", + + "JFlex", + + "JSON with Comments", + + "JSON", + + "JSON5", + + "JSONLD", + + "JSONiq", + + "Jasmin", + + "Java Properties", + + "Java Server Pages", + + "Java", + + "JavaScript", + + "JavaScript+ERB", + + "Jest Snapshot", + + "Jinja", + + "Jison Lex", + + "Jison", + + "Jolie", + + "Jsonnet", + + "Julia", + + "Jupyter Notebook", + + "Kaitai Struct", + + "KakouneScript", + + "KiCad Layout", + + "KiCad Legacy Layout", + + "KiCad Schematic", + + "Kit", + + "Kotlin", + + "Kusto", + + "LFE", + + "LLVM", + + "LOLCODE", + + "LSL", + + "LTspice Symbol", + + "LabVIEW", + + "Lark", + + "Lasso", + + "Lean", + + "Less", + + "Lex", + + "LilyPond", + + "Limbo", + + "Linker Script", + + "Linux Kernel Module", + + "Liquid", + + "Literate Agda", + + "Literate CoffeeScript", + + "Literate Haskell", + + "LiveScript", + + "Logos", + + "Logtalk", + + "LookML", + + "LoomScript", + + "Lua", + + "M", + + "M4", + + "M4Sugar", + + "MATLAB", + + "MAXScript", + + "MLIR", + + "MQL4", + + "MQL5", + + "MTML", + + "MUF", + + "Macaulay2", + + "Makefile", + + "Mako", + + "Markdown", + + "Marko", + + "Mathematica", + + "Max", + + "Mercury", + + "Meson", + + "Metal", + + "Microsoft Developer Studio Project", + + "Microsoft Visual Studio Solution", + + "MiniD", + + "Mirah", + + "Modelica", + + "Modula-2", + + "Modula-3", + + "Module Management System", + + "Monkey", + + "Moocode", + + "MoonScript", + + "Motoko", + + "Motorola 68K Assembly", + + "Muse", + + "Myghty", + + "NASL", + + "NCL", + + "NEON", + + "NPM Config", + + "NSIS", + + "NWScript", + + "Nearley", + + "Nemerle", + + "NeoSnippet", + + "NetLinx", + + "NetLinx+ERB", + + "NetLogo", + + "NewLisp", + + "Nextflow", + + "Nginx", + + "Ninja", + + "Nit", + + "Nix", + + "NumPy", + + "Nunjucks", + + "ObjDump", + + "Object Data Instance Notation", + + "ObjectScript", + + "Objective-C", + + "Objective-C++", + + "Objective-J", + + "Odin", + + "Omgrofl", + + "Opa", + + "Opal", + + "Open Policy Agent", + + "OpenCL", + + "OpenEdge ABL", + + "OpenQASM", + + "OpenRC runscript", + + "OpenSCAD", + + "OpenStep Property List", + + "OpenType Feature File", + + "Org", + + "Ox", + + "Oxygene", + + "Oz", + + "P4", + + "PEG.js", + + "PHP", + + "PLpgSQL", + + "POV-Ray SDL", + + "Pan", + + "Papyrus", + + "Parrot Assembly", + + "Parrot Internal Representation", + + "Parrot", + + "Pascal", + + "Pawn", + + "Pep8", + + "Perl", + + "Pickle", + + "PicoLisp", + + "PigLatin", + + "Pike", + + "PlantUML", + + "Pod 6", + + "Pod", + + "PogoScript", + + "Pony", + + "PostCSS", + + "PostScript", + + "PowerShell", + + "Prisma", + + "Processing", + + "Proguard", + + "Prolog", + + "Promela", + + "Propeller Spin", + + "Protocol Buffer", + + "Protocol Buffers", + + "Public Key", + + "Pug", + + "Puppet", + + "Pure Data", + + "PureBasic", + + "PureScript", + + "Python", + + "Q#", + + "QMake", + + "Qt Script", + + "Quake", + + "R", + + "RAML", + + "RDoc", + + "REALbasic", + + "REXX", + + "RMarkdown", + + "RPC", + + "RPM Spec", + + "Racket", + + "Ragel", + + "Raw token data", + + "ReScript", + + "Readline Config", + + "Reason", + + "Rebol", + + "Record Jar", + + "Red", + + "Redirect Rules", + + "Regular Expression", + + "RenderScript", + + "Rich Text Format", + + "Ring", + + "Riot", + + "RobotFramework", + + "Roff", + + "Rouge", + + "Rscript", + + "Ruby", + + "Rust", + + "SAS", + + "SCSS", + + "SELinux Kernel Policy Language", + + "SELinux Policy", + + "SMT", + + "SPARQL", + + "SQF", + + "SQL", + + "SQLPL", + + "SRecode Template", + + "SSH Config", + + "STON", + + "SVG", + + "SWIG", + + "Sage", + + "SaltStack", + + "Sass", + + "Scala", + + "Scaml", + + "Scheme", + + "Scilab", + + "Self", + + "ShaderLab", + + "Shell", + + "ShellCheck Config", + + "Sieve", + + "Singularity", + + "Slash", + + "Slice", + + "Slim", + + "SmPL", + + "Smalltalk", + + "SnipMate", + + "Solidity", + + "Soong", + + "SourcePawn", + + "Spline Font Database", + + "Squirrel", + + "Stan", + + "Standard ML", + + "Starlark", + + "StringTemplate", + + "Stylus", + + "SubRip Text", + + "SugarSS", + + "SuperCollider", + + "Svelte", + + "Swift", + + "SystemVerilog", + + "TI Program", + + "TLA", + + "TOML", + + "TSQL", + + "TSV", + + "TSX", + + "TXL", + + "Tcl", + + "Tcsh", + + "TeX", + + "Tea", + + "Terra", + + "Texinfo", + + "Text", + + "TextMate Properties", + + "Textile", + + "Thrift", + + "Turing", + + "Turtle", + + "Twig", + + "Type Language", + + "TypeScript", + + "UltiSnip", + + "UltiSnips", + + "Unified Parallel C", + + "Unity3D Asset", + + "Unix Assembly", + + "Uno", + + "UnrealScript", + + "Ur", + + "Ur/Web", + + "UrWeb", + + "V", + + "VBA", + + "VCL", + + "VHDL", + + "Vala", + + "Valve Data Format", + + "Verilog", + + "Vim Help File", + + "Vim Script", + + "Vim Snippet", + + "Visual Basic .NET", + + "Vue", + + "Wavefront Material", + + "Wavefront Object", + + "Web Ontology Language", + + "WebAssembly", + + "WebVTT", + + "Wget Config", + + "Wikitext", + + "Windows Registry Entries", + + "Wollok", + + "World of Warcraft Addon Data", + + "X BitMap", + + "X Font Directory Index", + + "X PixMap", + + "X10", + + "XC", + + "XCompose", + + "XML Property List", + + "XML", + + "XPages", + + "XProc", + + "XQuery", + + "XS", + + "XSLT", + + "Xojo", + + "Xonsh", + + "Xtend", + + "YAML", + + "YANG", + + "YARA", + + "YASnippet", + + "Yacc", + + "ZAP", + + "ZIL", + + "Zeek", + + "ZenScript", + + "Zephir", + + "Zig", + + "Zimpl", + + "abl", + + "abuild", + + "acfm", + + "aconf", + + "actionscript 3", + + "actionscript3", + + "ada2005", + + "ada95", + + "adobe composite font metrics", + + "adobe multiple font metrics", + + "advpl", + + "ags", + + "ahk", + + "altium", + + "amfm", + + "amusewiki", + + "apache", + + "apkbuild", + + "arexx", + + "as3", + + "asm", + + "asp", + + "aspx", + + "aspx-vb", + + "ats2", + + "au3", + + "autoconf", + + "b3d", + + "bash session", + + "bash", + + "bat", + + "batch", + + "bazel", + + "blitz3d", + + "blitzplus", + + "bmax", + + "bplus", + + "bro", + + "bsdmake", + + "byond", + + "bzl", + + "c++-objdump", + + "c2hs", + + "cURL Config", + + "cake", + + "cakescript", + + "cfc", + + "cfm", + + "cfml", + + "chpl", + + "clipper", + + "coccinelle", + + "coffee", + + "coffee-script", + + "coldfusion html", + + "console", + + "cperl", + + "cpp", + + "csharp", + + "csound-csd", + + "csound-orc", + + "csound-sco", + + "cucumber", + + "curlrc", + + "cwl", + + "dcl", + + "delphi", + + "desktop", + + "dircolors", + + "django", + + "dosbatch", + + "dosini", + + "dpatch", + + "dtrace-script", + + "eC", + + "ecr", + + "editor-config", + + "edn", + + "eeschema schematic", + + "eex", + + "elisp", + + "emacs muse", + + "emacs", + + "email", + + "eml", + + "erb", + + "fb", + + "fish", + + "flex", + + "foxpro", + + "fsharp", + + "fstar", + + "ftl", + + "fundamental", + + "gf", + + "git-ignore", + + "gitattributes", + + "gitconfig", + + "gitignore", + + "gitmodules", + + "go mod", + + "go sum", + + "go.mod", + + "go.sum", + + "golang", + + "groff", + + "gsp", + + "hbs", + + "heex", + + "help", + + "html+django", + + "html+jinja", + + "html+ruby", + + "htmlbars", + + "htmldjango", + + "hylang", + + "i7", + + "ignore", + + "igor", + + "igorpro", + + "ijm", + + "inc", + + "inform7", + + "inputrc", + + "irc logs", + + "irc", + + "java server page", + + "jq", + + "jruby", + + "js", + + "jsonc", + + "jsp", + + "kak", + + "kakscript", + + "keyvalues", + + "ksy", + + "lassoscript", + + "latex", + + "leex", + + "lhaskell", + + "lhs", + + "lisp", + + "litcoffee", + + "live-script", + + "ls", + + "m2", + + "m68k", + + "mIRC Script", + + "macruby", + + "mail", + + "make", + + "man page", + + "man", + + "man-page", + + "manpage", + + "markojs", + + "max/msp", + + "maxmsp", + + "mbox", + + "mcfunction", + + "mdoc", + + "mediawiki", + + "mf", + + "mma", + + "mumps", + + "mupad", + + "nanorc", + + "nasm", + + "ne-on", + + "nesC", + + "nette object notation", + + "nginx configuration file", + + "nixos", + + "njk", + + "node", + + "npmrc", + + "nroff", + + "nush", + + "nvim", + + "obj-c", + + "obj-c++", + + "obj-j", + + "objc", + + "objc++", + + "objectivec", + + "objectivec++", + + "objectivej", + + "objectpascal", + + "objj", + + "octave", + + "odin-lang", + + "odinlang", + + "oncrpc", + + "ooc", + + "openedge", + + "openrc", + + "osascript", + + "pandoc", + + "pasm", + + "pcbnew", + + "perl-6", + + "perl6", + + "pir", + + "plain text", + + "posh", + + "postscr", + + "pot", + + "pov-ray", + + "povray", + + "progress", + + "protobuf", + + "pwsh", + + "pycon", + + "pyrex", + + "python3", + + "q", + + "ql", + + "qsharp", + + "ragel-rb", + + "ragel-ruby", + + "rake", + + "raw", + + "razor", + + "rb", + + "rbx", + + "reStructuredText", + + "readline", + + "red/system", + + "redirects", + + "regex", + + "regexp", + + "renpy", + + "rhtml", + + "robots txt", + + "robots", + + "robots.txt", + + "rpcgen", + + "rs", + + "rs-274x", + + "rss", + + "rst", + + "rusthon", + + "salt", + + "saltstate", + + "sed", + + "sepolicy", + + "sh", + + "shell-script", + + "shellcheckrc", + + "sml", + + "snippet", + + "sourcemod", + + "soy", + + "specfile", + + "splus", + + "squeak", + + "terraform", + + "tl", + + "tm-properties", + + "troff", + + "ts", + + "udiff", + + "vb .net", + + "vb.net", + + "vb6", + + "vbnet", + + "vdf", + + "vim", + + "vimhelp", + + "viml", + + "visual basic 6", + + "visual basic for applications", + + "visual basic", + + "vlang", + + "wasm", + + "wast", + + "wdl", + + "wgetrc", + + "wiki", + + "winbatch", + + "wisp", + + "wl", + + "wolfram lang", + + "wolfram language", + + "wolfram", + + "wsdl", + + "xBase", + + "xbm", + + "xdr", + + "xhtml", + + "xml+genshi", + + "xml+kid", + + "xpm", + + "xsd", + + "xsl", + + "xten", + + "yas", + + "yml", + + "zsh" + ] + } + }, + + "additionalProperties": false + }, + + "validations": { + "$ref": "#/definitions/validations", + + "title": "textarea validations", + + "description": "Textarea validations\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#validations" + } + }, + + "additionalProperties": false + } + }, + + { + "if": { + "properties": { + "type": { + "const": "input" + } + } + }, + + "then": { + "$comment": "For `additionalProperties` to work `type` must also be present here.", + + "title": "input", + + "description": "Input\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#input", + + "type": "object", + + "required": ["type", "attributes"], + + "properties": { + "type": { + "$ref": "#/definitions/type" + }, + + "id": { + "$ref": "#/definitions/id", + + "description": "An input id\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#keys" + }, + + "attributes": { + "title": "input attributes", + + "description": "Input attributes\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-2", + + "type": "object", + + "required": ["label"], + + "properties": { + "label": { + "$ref": "#/definitions/label", + + "description": "A short input description\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-2" + }, + + "description": { + "$ref": "#/definitions/description", + + "description": "A long input description\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-2" + }, + + "placeholder": { + "$ref": "#/definitions/placeholder", + + "description": "An input placeholder\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-2" + }, + + "value": { + "$ref": "#/definitions/value", + + "description": "An input value\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-2" + } + }, + + "additionalProperties": false + }, + + "validations": { + "$ref": "#/definitions/validations", + + "title": "input validations", + + "description": "Input validations\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#validations-1" + } + }, + + "additionalProperties": false + } + }, + + { + "if": { + "properties": { + "type": { + "const": "dropdown" + } + } + }, + + "then": { + "$comment": "For `additionalProperties` to work `type` must also be present here.", + + "title": "dropdown", + + "description": "dropdown\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#dropdown", + + "type": "object", + + "required": ["type", "attributes"], + + "properties": { + "type": { + "$ref": "#/definitions/type" + }, + + "id": { + "$ref": "#/definitions/id", + + "description": "A dropdown id\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#keys" + }, + + "attributes": { + "title": "dropdown attributes", + + "description": "Dropdown attributes\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-3", + + "type": "object", + + "required": ["label", "options"], + + "properties": { + "label": { + "$ref": "#/definitions/label", + + "description": "A short dropdown description\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-3" + }, + + "description": { + "$ref": "#/definitions/description", + + "description": "A long dropdown description\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-3" + }, + + "multiple": { + "description": "Specify whether allow a multiple choices\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-3", + + "type": "boolean", + + "default": false + }, + + "options": { + "description": "Dropdown choices\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-3", + + "type": "array", + + "minItems": 1, + + "uniqueItems": true, + + "items": { + "type": "string", + + "minLength": 1, + + "examples": ["Sample choice"] + } + }, + + "default": { + "description": "Index of the default option\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-3", + + "type": "integer", + + "examples": [0] + } + }, + + "additionalProperties": false + }, + + "validations": { + "$ref": "#/definitions/validations", + + "title": "dropdown validations", + + "description": "Dropdown validations\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#validations-2" + } + }, + + "additionalProperties": false + } + }, + + { + "if": { + "properties": { + "type": { + "const": "checkboxes" + } + } + }, + + "then": { + "$comment": "For `additionalProperties` to work `type` must also be present here.", + + "title": "checkboxes", + + "description": "Checkboxes\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#checkboxes", + + "type": "object", + + "required": ["type", "attributes"], + + "properties": { + "type": { + "$ref": "#/definitions/type" + }, + + "id": { + "$ref": "#/definitions/id", + + "description": "Checkbox list id\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#keys" + }, + + "attributes": { + "title": "checkbox list attributes", + + "description": "Checkbox list attributes\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-4", + + "type": "object", + + "required": ["label", "options"], + + "properties": { + "label": { + "$ref": "#/definitions/label", + + "description": "A short checkbox list description\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-4" + }, + + "description": { + "$ref": "#/definitions/description", + + "description": "A long checkbox list description\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-4" + }, + + "options": { + "description": "Checkbox list choices\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-4", + + "type": "array", + + "minItems": 1, + + "items": { + "title": "checkbox list choice", + + "description": "Checkbox list choice\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-4", + + "type": "object", + + "required": ["label"], + + "properties": { + "label": { + "description": "A short checkbox list choice description\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-4", + + "type": "string", + + "minLength": 1, + + "examples": ["Sample label"] + }, + + "required": { + "description": "Specify whether a choice is required\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema#attributes-4", + + "type": "boolean", + + "default": false + } + }, + + "additionalProperties": false + } + } + }, + + "additionalProperties": false + } + }, + + "additionalProperties": false + } + } + ] + } + }, + + "properties": { + "name": { + "description": "An issue template name\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-issue-forms#top-level-syntax", + + "type": "string", + + "minLength": 1, + + "examples": ["Sample name"] + }, + + "description": { + "description": "An issue template description\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-issue-forms#top-level-syntax", + + "type": "string", + + "minLength": 1, + + "examples": ["Sample description"] + }, + + "body": { + "description": "An issue template body\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-issue-forms#top-level-syntax", + + "type": "array", + + "minItems": 1, + + "items": { + "$ref": "#/definitions/form_item" + } + }, + + "assignees": { + "description": "An issue template assignees\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-issue-forms#top-level-syntax", + + "oneOf": [ + { + "$ref": "#/definitions/assignee" + }, + + { + "type": "array", + + "minItems": 1, + + "uniqueItems": true, + + "items": { + "$ref": "#/definitions/assignee" + } + } + ] + }, + + "labels": { + "description": "An issue template labels\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-issue-forms#top-level-syntax", + + "type": "array", + + "minItems": 1, + + "uniqueItems": true, + + "items": { + "type": "string", + + "minLength": 1, + + "examples": [ + "Sample label", + + "bug", + + "documentation", + + "duplicate", + + "enhancement", + + "good first issue", + + "help wanted", + + "invalid", + + "question", + + "wontfix" + ] + } + }, + + "title": { + "description": "An issue template title\nhttps://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-issue-forms#top-level-syntax", + + "type": "string", + + "minLength": 1, + + "examples": ["Sample title", "Bug: ", "Feature: "] + } + }, + + "required": ["name", "description", "body"], + + "title": "GitHub issue forms config file schema", + + "type": "object" +} diff --git a/template_config.yaml b/template_config.yaml index f90291a43..223c11fe9 100644 --- a/template_config.yaml +++ b/template_config.yaml @@ -23,5 +23,5 @@ tag_operation_files: # DICOM tag anonymisation operations manufacturer_overrides: none # Manufactuer-dependendent overrides destination: - dicom: "ftps" # alternatives: "dicomweb", "none" + dicom: "ftps" # alternatives: "dicomweb", "xnat", "none" parquet: "ftps" # alternatives: "none" diff --git a/test/.env b/test/.env index e5ff8ec9f..ad9b56d91 100644 --- a/test/.env +++ b/test/.env @@ -1,8 +1,11 @@ ENV=test DEBUG=True -PIXL_DICOM_TRANSFER_TIMEOUT=120 -PIXL_QUERY_TIMEOUT=120 -PIXL_MAX_MESSAGES_IN_FLIGHT=20 +LOG_LEVEL=DEBUG +PIXL_DICOM_TRANSFER_TIMEOUT=100 +PIXL_QUERY_TIMEOUT=20 +CLI_RETRY_SECONDS=90 +PIXL_MAX_MESSAGES_IN_FLIGHT=5 +TZ=Europe/London # PIXL PostgreSQL instance PIXL_DB_HOST=postgres @@ -12,14 +15,6 @@ PIXL_DB_USER=pixl_db_username PIXL_DB_PASSWORD=pixl_db_password SKIP_ALEMBIC=false -# EMAP UDS -EMAP_UDS_HOST=star -EMAP_UDS_PORT=5432 -EMAP_UDS_NAME=emap -EMAP_UDS_USER=postgres -EMAP_UDS_PASSWORD=postgres -EMAP_UDS_SCHEMA_NAME=star - # Exposed ports HASHER_API_PORT=7010 POSTGRES_PORT=7001 @@ -55,21 +50,24 @@ ORTHANC_RAW_RECORD_HEADERS=true ORTHANC_RAW_HEADER_LOG_PATH=/tmp/headers.csv # PIXL Orthanc anon instance +ORTHANC_ANON_URL=http://orthanc-anon:8042 ORTHANC_ANON_USERNAME=orthanc_anon_username ORTHANC_ANON_PASSWORD=orthanc_anon_password ORTHANC_ANON_AE_TITLE=ORTHANCANON ORTHANC_ANON_HTTP_TIMEOUT=60 ENABLE_DICOM_WEB=true ORTHANC_AUTOROUTE_ANON_TO_ENDPOINT=true -PIXL_DICOM_TRANSFER_TIMEOUT=240 STUDY_TIME_OFFSET=0 LOCAL_SALT_VALUE=PIXL # UCVNAQR DICOM node information -VNAQR_AE_TITLE=VNAQR -VNAQR_DICOM_PORT=4242 -VNAQR_IP_ADDR=vna-qr -VNAQR_MODALITY=UCVNAQR +PRIMARY_DICOM_SOURCE_AE_TITLE=PRIMARYQR +PRIMARY_DICOM_SOURCE_PORT=4242 +PRIMARY_DICOM_SOURCE_IP_ADDR=vna-qr +PRIMARY_DICOM_SOURCE_MODALITY=UCPRIMARYQR + +# UCVNAQR DICOM node information +SECONDARY_DICOM_SOURCE_MODALITY=UCSECONDARYQR # RabbitMQ RABBITMQ_HOST=localhost diff --git a/test/README.md b/test/README.md index 15d8f704a..2a3723cca 100644 --- a/test/README.md +++ b/test/README.md @@ -3,14 +3,12 @@ This directory contains a system/integration test that runs locally and aims to test the essential functionality of the full PIXL system. -**Given** a DICOM image in an Orthanc instance (mocked vendor neutral archive, VNA) and a single -patient with the same identifier in a postgres instance (mocked EMAP database, star schema). +**Given** a DICOM image in an Orthanc instance (mocked vendor neutral archive, VNA) **When** a message containing the patient and study identifier is added to the queue and the consumers started. -**Then** a row in the "anon" EMAP data instance of the PIXL postgres instance exists and the DICOM -study exists in the "anon" PIXL Orthanc instance. +**Then** the DICOM study exists in the "anon" PIXL Orthanc instance. After setting up your [.secrets.env](../README.md#project-secrets)), you can run the system test with: diff --git a/test/conftest.py b/test/conftest.py index a25b1b70b..79cb460d7 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -14,17 +14,29 @@ """System/E2E test setup""" # ruff: noqa: C408 dict() makes test data easier to read and write +import os from collections.abc import Generator from pathlib import Path from typing import Any +from core.db.models import Base +from sqlalchemy import URL, create_engine +from sqlalchemy.orm import sessionmaker + +# Setting env variables before loading modules +os.environ["PIXL_DB_HOST"] = "localhost" +os.environ["PIXL_DB_PORT"] = "7001" +os.environ["PIXL_DB_USER"] = "pixl_db_username" +os.environ["PIXL_DB_PASSWORD"] = "pixl_db_password" +os.environ["PIXL_DB_NAME"] = "pixl" + import pytest import requests from pytest_pixl.dicom import generate_dicom_dataset from pytest_pixl.ftpserver import PixlFTPServer from pytest_pixl.helpers import run_subprocess from pytest_pixl.plugin import FtpHostAddress -from utils import wait_for_stable_orthanc_anon +from utils import wait_for_images_to_be_exported pytest_plugins = "pytest_pixl" @@ -39,6 +51,7 @@ def host_export_root_dir() -> Path: RESOURCES_DIR = TEST_DIR / "resources" RESOURCES_OMOP_DIR = RESOURCES_DIR / "omop" RESOURCES_OMOP_DICOMWEB_DIR = RESOURCES_DIR / "omop-dicomweb" +SECONDS_TO_WAIT_FOR_EXPORT = 301 def _upload_to_vna(image_filename: Path) -> None: @@ -58,12 +71,6 @@ def _populate_vna(tmp_path_factory: pytest.TempPathFactory) -> None: # more detailed series testing is found in pixl_dcmd tests, but here # we just stick an instance to each study, one of which is expected to be propagated through - # studies are also defined by the StudyID, the StudyInstanceUID - def study_instance_uid(offset: int) -> dict[str, str]: - baseline = "1.3.46.670589.11.38023.5.0.14068.2023012517090166000" - offset_str = f"{offset:04d}" - return dict(StudyInstanceUID=baseline[: -len(offset_str)] + offset_str) - def series_instance_uid(offset: int) -> dict[str, str]: baseline = "1.3.46.670589.11.38023.5.0.7404.2023012517551898153" offset_str = f"{offset:04d}" @@ -78,13 +85,13 @@ def sop_instance_uid(offset: int) -> dict[str, str]: AccessionNumber="AA12345601", PatientID="987654321", StudyID="12340001", - **study_instance_uid(1), + StudyInstanceUID="1.3.6.1.4.1.14519.5.2.1.99.1071.12985477682660597455732044031486", ) study_2 = dict( AccessionNumber="AA12345605", PatientID="987654321", StudyID="12340002", - **study_instance_uid(2), + StudyInstanceUID="1.2.276.0.7230010.3.1.2.929116473.1.1710754859.579485", ) # Series are also defined by the SeriesInstanceUID and SeriesNumber. @@ -137,9 +144,13 @@ def _upload_dicom_instance(dicom_dir: Path, **kwargs: Any) -> None: @pytest.fixture(scope="session") def _setup_pixl_cli(ftps_server: PixlFTPServer, _populate_vna: None) -> Generator: """Run pixl populate/start. Cleanup intermediate export dir on exit.""" - run_subprocess(["pixl", "populate", str(RESOURCES_OMOP_DIR.absolute())], TEST_DIR) - # poll here for two minutes to check for imaging to be processed, printing progress - wait_for_stable_orthanc_anon(121, 5, 15, min_instances=3) + run_subprocess( + ["pixl", "populate", "--num-retries", "0", str(RESOURCES_OMOP_DIR.absolute())], + TEST_DIR, + timeout=600, + ) + # poll here to check for imaging to be processed, printing progress + wait_for_images_to_be_exported(SECONDS_TO_WAIT_FOR_EXPORT, 5, 15) yield run_subprocess( [ @@ -157,9 +168,13 @@ def _setup_pixl_cli(ftps_server: PixlFTPServer, _populate_vna: None) -> Generato @pytest.fixture(scope="session") def _setup_pixl_cli_dicomweb(_populate_vna: None) -> Generator: """Run pixl populate/start. Cleanup intermediate export dir on exit.""" - run_subprocess(["pixl", "populate", str(RESOURCES_OMOP_DICOMWEB_DIR.absolute())], TEST_DIR) - # poll here for two minutes to check for imaging to be processed, printing progress - wait_for_stable_orthanc_anon(121, 5, 15, min_instances=3) + run_subprocess( + ["pixl", "populate", "--num-retries", "0", str(RESOURCES_OMOP_DICOMWEB_DIR.absolute())], + TEST_DIR, + timeout=600, + ) + # poll here to check for imaging to be processed, printing progress + wait_for_images_to_be_exported(SECONDS_TO_WAIT_FOR_EXPORT, 5, 15) yield run_subprocess( [ @@ -187,3 +202,28 @@ def _export_patient_data(_setup_pixl_cli) -> None: # type: ignore [no-untyped-d is synchronous (whether that is itself wise is another matter). """ run_subprocess(["pixl", "export-patient-data", str(RESOURCES_OMOP_DIR.absolute())], TEST_DIR) + + +@pytest.fixture(scope="session", autouse=True) +def _cleanup_database() -> Generator: + """ + Remove the test data from the database so we can re-run the tests. + + If the database is not cleaned, the data will not be exported when the + tests are re-run, which results in `wait_for_condition` timing out. + """ + yield + url = URL.create( + drivername="postgresql+psycopg2", + username=os.environ["PIXL_DB_USER"], + password=os.environ["PIXL_DB_PASSWORD"], + host="localhost", + port=os.environ["PIXL_DB_PORT"], + database=os.environ["PIXL_DB_NAME"], + ) + engine = create_engine(url) + PixlSession = sessionmaker(engine) + with PixlSession() as session: + for table in reversed(Base.metadata.sorted_tables): + session.execute(table.delete()) + session.commit() diff --git a/test/docker-compose.yml b/test/docker-compose.yml index 71f186b72..f123709fc 100644 --- a/test/docker-compose.yml +++ b/test/docker-compose.yml @@ -22,13 +22,14 @@ services: image: orthancteam/orthanc:24.3.3 platform: linux/amd64 environment: - ORTHANC_NAME: ${VNAQR_AE_TITLE} + ORTHANC_NAME: ${PRIMARY_DICOM_SOURCE_AE_TITLE} ORTHANC_USERNAME: "orthanc" ORTHANC_PASSWORD: "orthanc" - ORTHANC_AE_TITLE: "VNAQR" + ORTHANC_AE_TITLE: "PRIMARYQR" RAW_AE_TITLE: ${ORTHANC_RAW_AE_TITLE} RAW_DICOM_PORT: "4242" RAW_IP_ADDR: "orthanc-raw" # aka. hostname + TZ: ${TZ:-Europe/London} ports: - "127.0.0.1:4243:4242" - "127.0.0.1:8043:8042" @@ -54,6 +55,7 @@ services: RAW_DICOM_PORT: "4242" RAW_IP_ADDR: "dicomweb-server" # aka. hostname DICOM_WEB_PLUGIN_ENABLED: true + TZ: ${TZ:-Europe/London} ports: - "127.0.0.1:4244:4242" - "127.0.0.1:8044:8042" diff --git a/test/resources/omop-dicomweb/private/PROCEDURE_OCCURRENCE_LINKS.parquet b/test/resources/omop-dicomweb/private/PROCEDURE_OCCURRENCE_LINKS.parquet index f4a1d6370..c1c49f80d 100644 Binary files a/test/resources/omop-dicomweb/private/PROCEDURE_OCCURRENCE_LINKS.parquet and b/test/resources/omop-dicomweb/private/PROCEDURE_OCCURRENCE_LINKS.parquet differ diff --git a/test/resources/omop/private/PROCEDURE_OCCURRENCE_LINKS.parquet b/test/resources/omop/private/PROCEDURE_OCCURRENCE_LINKS.parquet index f4a1d6370..c1c49f80d 100644 Binary files a/test/resources/omop/private/PROCEDURE_OCCURRENCE_LINKS.parquet and b/test/resources/omop/private/PROCEDURE_OCCURRENCE_LINKS.parquet differ diff --git a/test/run-system-test.sh b/test/run-system-test.sh index 01cdf3645..da7c71fd7 100755 --- a/test/run-system-test.sh +++ b/test/run-system-test.sh @@ -18,8 +18,11 @@ PACKAGE_DIR="${BIN_DIR%/*}" cd "${PACKAGE_DIR}/test" setup() { + # NOTE: the `pixl dc` command only work when run from the PIXL project root. + # However, doing so will fail unless a valid .env file with all the necessary environment + # variables is present. docker compose --env-file .env -p system-test down --volumes - # + # Note: cannot run as single docker compose command due to different build contexts docker compose --env-file .env -p system-test up --wait -d --build --remove-orphans # Warning: Requires to be run from the project root diff --git a/test/system_test.py b/test/system_test.py index 6d7ae6171..6fc53b0c0 100644 --- a/test/system_test.py +++ b/test/system_test.py @@ -14,26 +14,28 @@ """Replacement for the 'interesting' bits of the system/E2E test""" from pathlib import Path -from typing import Any import pandas as pd import pydicom import pytest import requests -from core.dicom_tags import DICOM_TAG_PROJECT_NAME from loguru import logger +from pydicom.uid import UID +from pytest_check import check from pytest_pixl.ftpserver import PixlFTPServer from pytest_pixl.helpers import run_subprocess, wait_for_condition pytest_plugins = "pytest_pixl" +SECONDS_TO_WAIT_FOR_CONDITION = 251 + @pytest.fixture() -def expected_studies() -> dict[str, Any]: +def expected_studies() -> dict[int, dict]: """Expected study metadata post-anonymisation.""" return { - "d40f0639105babcdec043f1acf7330a8ebd64e64f13f7d0d4745f0135ddee0cd": { - "procedure_occurrence_id": 4, + 4: { + "original_study_instance_uid": "1.3.46.670589.11.38023.5.0.14068.2023012517090160001", "instances": { # tuple made up of (AccessionNumber, SeriesDescription) # for AA12345601 @@ -41,8 +43,8 @@ def expected_studies() -> dict[str, Any]: ("ANONYMIZED", "AP"), }, }, - "7ff25b0b438d23a31db984f49b0d6ca272104eb3d20c82f30e392cff5446a9c3": { - "procedure_occurrence_id": 5, + 5: { + "original_study_instance_uid": "1.3.46.670589.11.38023.5.0.14068.2023012517090160002", "instances": { # for AA12345605, ("ANONYMIZED", "include123"), @@ -99,15 +101,16 @@ def test_ftps_radiology_linker_upload(self, expected_studies: dict) -> None: TestFtpsUpload.expected_public_parquet_dir / "radiology" / "IMAGE_LINKER.parquet" ) po_col = radiology_linker_data["procedure_occurrence_id"] - for study_id, studies in expected_studies.items(): - expected_po_id = studies["procedure_occurrence_id"] + for procedure_occurrence_id, studies in expected_studies.items(): + expected_po_id = procedure_occurrence_id row = radiology_linker_data[po_col == expected_po_id].iloc[0] - assert row.hashed_identifier == study_id + assert UID(row.pseudo_study_uid).is_valid + assert row.pseudo_study_uid != studies["original_study_instance_uid"] assert radiology_linker_data.shape[0] == 2 assert set(radiology_linker_data.columns) == { "procedure_occurrence_id", - "hashed_identifier", + "pseudo_study_uid", } @pytest.mark.usefixtures("_export_patient_data") @@ -128,21 +131,26 @@ def two_zip_files_present() -> bool: wait_for_condition( two_zip_files_present, - seconds_max=121, + seconds_max=SECONDS_TO_WAIT_FOR_CONDITION, seconds_interval=5, seconds_condition_stays_true_for=15, progress_string_fn=zip_file_list, ) assert zip_files + radiology_linker_data = pd.read_parquet( + TestFtpsUpload.expected_public_parquet_dir / "radiology" / "IMAGE_LINKER.parquet" + ) + radiology_linker_data = radiology_linker_data.set_index("pseudo_study_uid") for z in zip_files: unzip_dir = tmp_path_factory.mktemp("unzip_dir", numbered=True) - self._check_dcm_tags_from_zip(z, unzip_dir, expected_studies) + procedure = radiology_linker_data.loc[z.stem]["procedure_occurrence_id"] + logger.info("Checking tags in zip file {} for procedure {}", z, procedure) + self._check_dcm_tags_from_zip(z, unzip_dir, expected_studies[procedure]) def _check_dcm_tags_from_zip( - self, zip_path: Path, unzip_dir: Path, expected_studies: dict + self, zip_path: Path, unzip_dir: Path, expected_study: dict ) -> None: """Check that private tag has survived anonymisation with the correct value.""" - expected_instances = expected_studies[zip_path.stem]["instances"] run_subprocess( ["unzip", zip_path], working_dir=unzip_dir, @@ -156,26 +164,13 @@ def _check_dcm_tags_from_zip( for dcm_file in dicom_in_zip: dcm = pydicom.dcmread(dcm_file) # The actual dicom filename and dir structure isn't checked - should it be? - assert dcm.get("PatientID") == zip_path.stem # PatientID stores study id post anon + assert ( + dcm.get("StudyInstanceUID") == zip_path.stem + ) # StudyInstanceUID stores the pseudo study id post anon actual_instances.add((dcm.get("AccessionNumber"), dcm.get("SeriesDescription"))) - block = dcm.private_block( - DICOM_TAG_PROJECT_NAME.group_id, DICOM_TAG_PROJECT_NAME.creator_string - ) - tag_offset = DICOM_TAG_PROJECT_NAME.offset_id - private_tag = block[tag_offset] - assert private_tag is not None - if isinstance(private_tag.value, bytes): - # Allow this for the time being, until it has been investigated - # See https://github.com/UCLH-Foundry/PIXL/issues/363 - logger.error( - "TEMPORARILY IGNORE: tag value {} should be of type str, but is of type bytes", - private_tag.value, - ) - assert private_tag.value.decode() == TestFtpsUpload.project_slug - else: - assert private_tag.value == TestFtpsUpload.project_slug # check the basic info about the instances exactly matches - assert actual_instances == expected_instances + with check: + assert actual_instances == expected_study["instances"] @pytest.mark.usefixtures("_setup_pixl_cli_dicomweb") @@ -201,7 +196,7 @@ def two_studies_present_on_dicomweb() -> bool: wait_for_condition( two_studies_present_on_dicomweb, - seconds_max=121, + seconds_max=SECONDS_TO_WAIT_FOR_CONDITION, seconds_interval=10, progress_string_fn=dicomweb_studies_list, ) diff --git a/test/utils.py b/test/utils.py index 10ac4161d..b512d8d46 100644 --- a/test/utils.py +++ b/test/utils.py @@ -14,48 +14,49 @@ """Utilities for the system test""" -import json -import shlex -import subprocess from functools import partial, update_wrapper +from typing import cast +from core.db.models import Image +from core.db.queries import engine from pytest_pixl.helpers import wait_for_condition +from sqlalchemy import not_ +from sqlalchemy.orm import sessionmaker -def wait_for_stable_orthanc_anon( +def wait_for_images_to_be_exported( seconds_max: int, seconds_interval: int, seconds_condition_stays_true_for: int, - min_instances: int = 3, + min_studies: int = 2, ) -> None: """ - Query the orthanc-anon REST API to check that the correct number of instances - have been received. + Query pixl DB to ensure that images have been processed and exported. If they haven't within the time limit, raise a TimeoutError """ - instances = [] + studies: list[Image] = [] - def at_least_n_intances(n_intances: int) -> bool: - nonlocal instances - instances_cmd = shlex.split( - "docker exec system-test-orthanc-anon-1 " - 'curl -u "orthanc_anon_username:orthanc_anon_password" ' - "http://orthanc-anon:8042/instances" - ) - instances_output = subprocess.run(instances_cmd, capture_output=True, check=True, text=True) # noqa: S603 - instances = json.loads(instances_output.stdout) - return len(instances) >= n_intances + def at_least_n_studies_exported(n_studies: int) -> bool: + nonlocal studies - condition = partial(at_least_n_intances, min_instances) - update_wrapper(condition, at_least_n_intances) + PixlSession = sessionmaker(engine) + with PixlSession() as session: + studies = cast( + list[Image], + session.query(Image).filter(not_(Image.exported_at.is_(None))).all(), + ) + return len(studies) >= n_studies - def list_instances() -> str: - return f"Expecting at least {min_instances} instances.\northanc-anon instances: {instances}" + condition = partial(at_least_n_studies_exported, min_studies) + update_wrapper(condition, at_least_n_studies_exported) + + def list_studies() -> str: + return f"Expecting at least {min_studies} studies.\nexported studies: {studies}" wait_for_condition( condition, seconds_max=seconds_max, seconds_interval=seconds_interval, - progress_string_fn=list_instances, + progress_string_fn=list_studies, seconds_condition_stays_true_for=seconds_condition_stays_true_for, )