Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upload DICOM images with FTPS #226

Merged
merged 45 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from 36 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
05ff091
Add dummy ftp server
stefpiatek Jan 12, 2024
412326d
Move FTP server to core tests
stefpiatek Jan 12, 2024
7448861
Move FTP server to core tests
stefpiatek Jan 12, 2024
f9ab780
Run core tests from pytest directly
stefpiatek Jan 12, 2024
9dfbe38
Fix typo
stefpiatek Jan 12, 2024
442640b
Spin up docker containers from pytest for core
stefpiatek Jan 12, 2024
d2165ab
Specify `platform` for FTP server Docker
milanmlft Jan 15, 2024
0443faa
Merge branch 'main' into stef/image-upload-sftp
milanmlft Jan 15, 2024
1e64846
Add basic FTP upload functionality
milanmlft Jan 15, 2024
3b28bbd
Rename `file_upload.py` -> `upload.py`
milanmlft Jan 15, 2024
387f177
Switch to different docker image for ftp server
milanmlft Jan 15, 2024
33d4ad5
Generate new SSL certificates for ftp server
milanmlft Jan 15, 2024
7ce2105
Refactor `upload_file()` to return the remote directory and output fi…
milanmlft Jan 15, 2024
34ed7cc
Add basic test for ftp file upload
milanmlft Jan 15, 2024
b0de033
Added interaction with PIXL db and download upload functionality
ruaridhg Jan 16, 2024
8efa95c
Get to point where connections have to be encrypted
stefpiatek Jan 19, 2024
7fb161d
pair switch
stefpiatek Jan 19, 2024
8d626cf
gitignore pixl_core test files and folders created
ruaridhg Jan 19, 2024
7856a56
FTP_TLS in production and FTP for testing
ruaridhg Jan 19, 2024
6131a5d
Use FTP_TLS for testing (#230)
stefpiatek Jan 19, 2024
32079bb
Remove FTP uploaded data after tests
milanmlft Jan 22, 2024
469729b
Test DICOM image uploading with FTPS
milanmlft Jan 22, 2024
567081e
remove timezone info from db
ruaridhg Jan 22, 2024
f40e1ce
Added exception for image already exported
ruaridhg Jan 22, 2024
422fcb2
Tear down data sub dirs after testing
ruaridhg Jan 22, 2024
ff01769
Fix directory teardown in `mounted_data()` fixture`
milanmlft Jan 22, 2024
edb96c7
Revert "Fix directory teardown in `mounted_data()` fixture`"
milanmlft Jan 22, 2024
6f6b449
Make FTP server mounted data directory writable on GHA
milanmlft Jan 22, 2024
d6a04d3
Fix my fix
milanmlft Jan 22, 2024
9f1c67d
😫
milanmlft Jan 22, 2024
c10c843
Maybe this will work?
milanmlft Jan 22, 2024
9b1983d
🤞
milanmlft Jan 22, 2024
aa38ce2
😤
milanmlft Jan 22, 2024
8413c80
Clean up, fix ruff errors
milanmlft Jan 22, 2024
15c8a83
Merge branch 'main' into stef/image-upload-sftp
milanmlft Jan 23, 2024
c5cf602
Remove return from upload_dicom_image()
milanmlft Jan 23, 2024
5d52bc9
Remove return from `upload_content()`, part 2
milanmlft Jan 23, 2024
a0ab2a8
Remove return from `upload_content()`, part 3
milanmlft Jan 23, 2024
9401ea3
Refactor `upload_dicom_image()`, remove `upload_content()`
milanmlft Jan 23, 2024
7c4bac9
Revert scope for `mounted_data()` fixture
milanmlft Jan 23, 2024
b8ff97e
Set return type for `run_containers()` fixture
milanmlft Jan 23, 2024
8e0622c
Renaming and adding docstrings to public module functions
milanmlft Jan 23, 2024
c5a94d3
Extract `_query_existing_image` method
milanmlft Jan 23, 2024
0e62386
Extract global variables in orthanc plugins
milanmlft Jan 23, 2024
93ce4da
Log the `resourceID`
milanmlft Jan 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,7 @@ jobs:
- name: Run tests
working-directory: pixl_core/tests
run: |
docker compose up --build --exit-code-from test
docker compose down
pytest
env:
ENV: test

Expand Down Expand Up @@ -235,7 +234,7 @@ jobs:
name: Move cache
run: |
rm -rf /tmp/.buildx-cache
mv /tmp/.buildx-cache-new /tmp/.buildx-cache
mv /tmp/.buildx-cache-new /tmp/.buildx-cache

- name: Init Python
uses: actions/setup-python@v4
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,5 @@ dmypy.json

# project specific files
/exports/
**/test_producer.csv
pixl_core/tests/ftp-server/mounts/data/*
5 changes: 2 additions & 3 deletions cli/tests/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import pytest
from core.database import Extract, Image
from core.patient_queue.message import Message
from dateutil.tz import UTC
from pixl_cli._database import filter_exported_or_add_to_db
from sqlalchemy.orm import Session

Expand All @@ -32,7 +31,7 @@ def _make_message(project_name: str, accession_number: str, mrn: str) -> Message
mrn=mrn,
study_date=STUDY_DATE,
procedure_occurrence_id=1,
omop_es_timestamp=datetime.datetime.now(tz=UTC),
omop_es_timestamp=datetime.datetime.now(tz=datetime.timezone.utc),
)


Expand All @@ -56,7 +55,7 @@ def rows_in_session(db_session) -> Session:
study_date=STUDY_DATE,
mrn="mrn",
extract=extract,
exported_at=datetime.datetime.now(tz=UTC),
exported_at=datetime.datetime.now(tz=datetime.timezone.utc),
)
image_not_exported = Image(
accession_number="234",
Expand Down
29 changes: 29 additions & 0 deletions orthanc/orthanc-anon/plugin/pixl.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

import requests
import yaml
from core import upload
from decouple import config
from pydicom import dcmread

Expand Down Expand Up @@ -156,6 +157,34 @@ def SendViaStow(resourceId):
orthanc.LogError("Failed to send via STOW")


def SendViaFTPS(resourceId: str) -> None:
"""
Makes a POST API call to upload the resource to a dicom-web server
using orthanc credentials as authorisation
"""
ORTHANC_USERNAME = config("ORTHANC_USERNAME")
ORTHANC_PASSWORD = config("ORTHANC_PASSWORD")

milanmlft marked this conversation as resolved.
Show resolved Hide resolved
orthanc_url = "http://localhost:8042"

# Query orthanc-anon for the study
query = f"{orthanc_url}/studies/{resourceId}/archive"
try:
response_study = requests.get(query, auth=(ORTHANC_USERNAME, ORTHANC_PASSWORD), timeout=10)
success_code = 200
if response_study.status_code != success_code:
msg = "Could not download archive of resource '%s'"
raise RuntimeError(msg, resourceId)
except requests.exceptions.RequestException:
orthanc.LogError(f"Failed to query'{resourceId}'")

# get the zip content
zip_content = response_study.content
logging.info("Downloaded data: %s", zip_content)
milanmlft marked this conversation as resolved.
Show resolved Hide resolved

upload.upload_content(zip_content, resourceId)
milanmlft marked this conversation as resolved.
Show resolved Hide resolved


def ShouldAutoRoute():
"""
Checks whether ORTHANC_AUTOROUTE_ANON_TO_AZURE environment variable is
Expand Down
10 changes: 2 additions & 8 deletions pixl_core/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,8 @@ pip install -e .
### Test

```bash
pip install .[test]
pytest -m "not pika"
```
or the full set with
```bash
cd tests
docker compose up --build --exit-code-from test
docker compose down
pip install -e .[test]
pytest
```

## Token buffer
Expand Down
76 changes: 76 additions & 0 deletions pixl_core/src/core/_database.py
milanmlft marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright (c) University College London Hospitals NHS Foundation Trust
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Interaction with the PIXL database."""

from datetime import datetime

from decouple import config
from sqlalchemy import URL, create_engine
from sqlalchemy.orm import sessionmaker

from core.database import Extract, Image

url = URL.create(
drivername="postgresql+psycopg2",
username=config("PIXL_DB_USER", default="None"),
password=config("PIXL_DB_PASSWORD", default="None"),
host=config("PIXL_DB_HOST", default="None"),
port=config("PIXL_DB_PORT", default=1),
database=config("PIXL_DB_NAME", default="None"),
)

engine = create_engine(url)


def get_project_slug_from_db(hashed_value: str) -> str:
PixlSession = sessionmaker(engine)
with PixlSession() as pixl_session, pixl_session.begin():
existing_image = (
pixl_session.query(Image)
.filter(
Image.hashed_identifier == hashed_value,
)
.one()
)
milanmlft marked this conversation as resolved.
Show resolved Hide resolved

if existing_image.exported_at is not None:
msg = "Image already exported"
raise RuntimeError(msg)

existing_extract = (
pixl_session.query(Extract)
.filter(
Extract.extract_id == existing_image.extract_id,
)
.one()
)

return str(existing_extract.slug)


def update_exported_at_and_save(hashed_value: str, date_time: datetime) -> Image:
PixlSession = sessionmaker(engine)
with PixlSession() as pixl_session, pixl_session.begin():
existing_image = (
pixl_session.query(Image)
.filter(
Image.hashed_identifier == hashed_value,
)
.one()
)
existing_image.exported_at = date_time
pixl_session.add(existing_image)

return existing_image
119 changes: 119 additions & 0 deletions pixl_core/src/core/upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# Copyright (c) University College London Hospitals NHS Foundation Trust
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Functionality to upload files to an endpoint."""

from __future__ import annotations

import ftplib
import logging
import os
import ssl
from datetime import datetime, timezone
from ftplib import FTP_TLS
from typing import TYPE_CHECKING, Any, BinaryIO

if TYPE_CHECKING:
from pathlib import Path
from socket import socket

from core._database import get_project_slug_from_db, update_exported_at_and_save

logger = logging.getLogger(__name__)


class ImplicitFtpTls(ftplib.FTP_TLS):
"""
FTP_TLS subclass that automatically wraps sockets in SSL to support implicit FTPS.

https://stackoverflow.com/questions/12164470/python-ftp-implicit-tls-connection-issue
"""

def __init__(self, *args: Any, **kwargs: Any) -> None:
"""Create instance from parent class."""
super().__init__(*args, **kwargs)
self._sock: socket | None = None

@property
def sock(self) -> socket | None:
"""Return the socket."""
return self._sock

@sock.setter
def sock(self, value: socket) -> None:
"""When modifying the socket, ensure that it is ssl wrapped."""
if value is not None and not isinstance(value, ssl.SSLSocket):
value = self.context.wrap_socket(value)
self._sock = value


def upload_dicom_image(local_file_path: Path, pseudo_anon_id: str) -> None:
milanmlft marked this conversation as resolved.
Show resolved Hide resolved
"""Top level way to upload an image."""
# Create the remote directory if it doesn't exist
# rename destination to {project-slug}/{study-pseduonymised-id}.zip
remote_directory = get_project_slug_from_db(pseudo_anon_id)

# Store the file using a binary handler
with local_file_path.open("rb") as file_content:
output_path = upload_content(
# wrong directory name, can get that from the image at least
file_content,
remote_dir=remote_directory,
remote_file=f"{pseudo_anon_id}.zip",
)

update_exported_at_and_save(pseudo_anon_id, datetime.now(tz=timezone.utc))
return output_path


def upload_content(content: BinaryIO, *, remote_dir: str, remote_file: str) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably worth making this a private method so that we know we shouldn't be calling it directly

"""Upload local file to directory in ftp server."""
ftp = _connect_to_ftp()

_create_and_set_as_cwd(ftp, remote_dir)

# Store the file using a binary handler
command = f"STOR {remote_file}"
logger.debug("Running %s", command)
ftp.storbinary(command, content)

# Close the FTP connection
ftp.quit()
logger.debug("Finished uploading!")
return f"{remote_dir}/{remote_file}"


def _connect_to_ftp() -> FTP_TLS:
# Set your FTP server details
ftp_host = os.environ["FTP_HOST"]
ftp_port = os.environ["FTP_PORT"] # FTPS usually uses port 21
ftp_user = os.environ["FTP_USER_NAME"]
ftp_password = os.environ["FTP_USER_PASS"]

# Connect to the server and login
ftp = ImplicitFtpTls()
ftp.connect(ftp_host, int(ftp_port))
ftp.login(ftp_user, ftp_password)
ftp.prot_p()
return ftp


def _create_and_set_as_cwd(ftp: FTP_TLS, project_dir: str) -> None:
try:
ftp.cwd(project_dir)
logger.info("'%s' exists on remote ftp, so moving into it", project_dir)
except ftplib.error_perm:
logger.info("creating '%s' on remote ftp and moving into it", project_dir)
# Directory doesn't exist, so create it
ftp.mkd(project_dir)
ftp.cwd(project_dir)
Loading
Loading