diff --git a/examples/BraTS/data_prep/mlcube/mlcube.yaml b/examples/BraTS/data_prep/mlcube/mlcube.yaml index 2cdfcabec..e05b3e184 100644 --- a/examples/BraTS/data_prep/mlcube/mlcube.yaml +++ b/examples/BraTS/data_prep/mlcube/mlcube.yaml @@ -8,7 +8,7 @@ platform: docker: # Image name. - image: mlcommons/fets_data-prep + image: mlcommons/fets_data-prep-v2 # Docker build context relative to $MLCUBE_ROOT. Default is `build`. build_context: "../project" # Docker file name within docker build context, default is `Dockerfile`. diff --git a/examples/BraTS/data_prep/project/sanity_check.py b/examples/BraTS/data_prep/project/sanity_check.py index 68c4cfe0b..5eb0aa6a7 100644 --- a/examples/BraTS/data_prep/project/sanity_check.py +++ b/examples/BraTS/data_prep/project/sanity_check.py @@ -17,6 +17,7 @@ def check_subject_validity(subject_dir): subject_valid = True strings_to_check = [ "_t1.nii.gz", + "_t1c.nii.gz", "_t1ce.nii.gz", "_t2.nii.gz", "_flair.nii.gz", diff --git a/examples/BraTS2024/data_prep/README.md b/examples/BraTS2024/data_prep/README.md new file mode 100644 index 000000000..05ea3b180 --- /dev/null +++ b/examples/BraTS2024/data_prep/README.md @@ -0,0 +1,72 @@ +# BraTS 2024 Data Preparation + +Data preparation MLCube for the raw datasets of: + +* [Meningioma Radiotherapy](https://www.synapse.org/Synapse:syn53708249/wiki/627503) +* [Pathology](https://www.synapse.org/Synapse:syn53708249/wiki/628091) + +## Example raw datasets + +### Radiotherapy + +``` +BraTS-MEN-RT/ +├── BraTS-MEN-RT-xxxx-x +│   ├── BraTS-MEN-RT-xxxx-x_gtv.nii.gz +│   └── BraTS-MEN-RT-xxxx-x_t1c.nii.gz +├── BraTS-MEN-RT-yyyy-y +│   ├── BraTS-MEN-RT-yyyy-y_gtv.nii.gz +│   └── BraTS-MEN-RT-yyyy-y_t1c.nii.gz +└── BraTS-MEN-RT-zzzz-z + ├── BraTS-MEN-RT-zzzz-z_gtv.nii.gz + └── BraTS-MEN-RT-zzzz-z_t1c.nii.gz +``` + +where: +* `*_t1c.nii.gz` are data given to model MLCubes to make their inference +* `*_gtv.nii.gz` are the labels (groundtruth) + +### Pathology + +``` +BraTS-Path/ +├── BraTSPath_cohort_xxxxxxx.png +├── BraTSPath_cohort_yyyyyyy.png +├── BraTSPath_cohort_zzzzzzz.png +└── labels.csv +``` + +where: +* `*.png` are data given to model MLCubes to make their inference +* `labels.csv` are the classfication labels + +## Example prepared datasets + +### Radiotherapy + +``` +data +├── BraTS-MEN-RT-xxxx-x +│ └── BraTS-MEN-RT-xxxx-x_t1c.nii.gz +├── BraTS-MEN-RT-yyyy-y +│ └── BraTS-MEN-RT-yyyy-y_t1c.nii.gz +└── BraTS-MEN-RT-zzzz-z + └── BraTS-MEN-RT-zzzz-z_t1c.nii.gz + +labels +├── BraTS-MEN-RT-xxxx-x_gtv.nii.gz +├── BraTS-MEN-RT-yyyy-y_gtv.nii.gz +└── BraTS-MEN-RT-zzzz-z_gtv.nii.gz +``` + +### Pathology + +``` +data +├── BraTSPath_cohort_xxxxxxx.png +├── BraTSPath_cohort_yyyyyyy.png +└── BraTSPath_cohort_zzzzzzz.png + +labels +└── labels.csv +``` diff --git a/examples/BraTS2024/data_prep/mlcube/mlcube.yaml b/examples/BraTS2024/data_prep/mlcube/mlcube.yaml new file mode 100644 index 000000000..4cc92c657 --- /dev/null +++ b/examples/BraTS2024/data_prep/mlcube/mlcube.yaml @@ -0,0 +1,46 @@ +name: BraTS2024 Data Preparator Cube +description: BraTS2024 Data Preparator Cube for Tasks 3 and 10 +authors: + - { name: "MLCommons Medical Working Group" } + - { name: "Verena Chung" } + +platform: + accelerator_count: 0 + +docker: + # Image name. + image: ghcr.io/vpchung/brats2024-prep:0.0.1 + # Docker build context relative to $MLCUBE_ROOT. Default is `build`. + build_context: "../project" + # Docker file name within docker build context, default is `Dockerfile`. + build_file: "Dockerfile" + +tasks: + prepare: + parameters: + inputs: + { + data_path: input_data/, + labels_path: input_labels/, + parameters_file: parameters.yaml, + } + outputs: { output_path: data/, output_labels_path: labels/ } + sanity_check: + parameters: + inputs: + { + data_path: data/, + labels_path: labels/, + + parameters_file: parameters.yaml, + } + statistics: + parameters: + inputs: + { + data_path: data/, + labels_path: labels/, + + parameters_file: parameters.yaml, + } + outputs: { output_path: { type: file, default: statistics.yaml } } diff --git a/examples/BraTS2024/data_prep/mlcube/workspace/parameters-path.yaml b/examples/BraTS2024/data_prep/mlcube/workspace/parameters-path.yaml new file mode 100644 index 000000000..48681bd64 --- /dev/null +++ b/examples/BraTS2024/data_prep/mlcube/workspace/parameters-path.yaml @@ -0,0 +1 @@ +task: pathology diff --git a/examples/BraTS2024/data_prep/mlcube/workspace/parameters-rt.yaml b/examples/BraTS2024/data_prep/mlcube/workspace/parameters-rt.yaml new file mode 100644 index 000000000..88b751a43 --- /dev/null +++ b/examples/BraTS2024/data_prep/mlcube/workspace/parameters-rt.yaml @@ -0,0 +1,3 @@ +task: segmentation-radiotherapy +segmentation_modalities: ["t1c"] +label_modality: gtv \ No newline at end of file diff --git a/examples/BraTS2024/data_prep/project/Dockerfile b/examples/BraTS2024/data_prep/project/Dockerfile new file mode 100644 index 000000000..19ce19a37 --- /dev/null +++ b/examples/BraTS2024/data_prep/project/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.9.16-slim + +COPY ./requirements.txt /mlcube_project/requirements.txt + +RUN pip3 install --no-cache-dir -r /mlcube_project/requirements.txt + +ENV LANG C.UTF-8 + +# Create a non-root user. +RUN useradd nonrootuser +USER nonrootuser + +COPY . /mlcube_project + +ENTRYPOINT ["python3", "/mlcube_project/mlcube.py"] \ No newline at end of file diff --git a/examples/BraTS2024/data_prep/project/mlcube.py b/examples/BraTS2024/data_prep/project/mlcube.py new file mode 100644 index 000000000..057b7ca1c --- /dev/null +++ b/examples/BraTS2024/data_prep/project/mlcube.py @@ -0,0 +1,51 @@ +"""MLCube handler file""" +import typer +import yaml +from prepare import prepare_dataset +from sanity_check import perform_sanity_checks +from stats import generate_statistics + +app = typer.Typer() + + +@app.command("prepare") +def prepare( + data_path: str = typer.Option(..., "--data_path"), + labels_path: str = typer.Option(..., "--labels_path"), + parameters_file: str = typer.Option(..., "--parameters_file"), + output_path: str = typer.Option(..., "--output_path"), + output_labels_path: str = typer.Option(..., "--output_labels_path"), +): + with open(parameters_file) as f: + parameters = yaml.safe_load(f) + + prepare_dataset(data_path, labels_path, parameters, output_path, output_labels_path) + + +@app.command("sanity_check") +def sanity_check( + data_path: str = typer.Option(..., "--data_path"), + labels_path: str = typer.Option(..., "--labels_path"), + parameters_file: str = typer.Option(..., "--parameters_file"), +): + with open(parameters_file) as f: + parameters = yaml.safe_load(f) + + perform_sanity_checks(data_path, labels_path, parameters) + + +@app.command("statistics") +def statistics( + data_path: str = typer.Option(..., "--data_path"), + labels_path: str = typer.Option(..., "--labels_path"), + parameters_file: str = typer.Option(..., "--parameters_file"), + out_path: str = typer.Option(..., "--output_path"), +): + with open(parameters_file) as f: + parameters = yaml.safe_load(f) + + generate_statistics(data_path, labels_path, parameters, out_path) + + +if __name__ == "__main__": + app() diff --git a/examples/BraTS2024/data_prep/project/prepare.py b/examples/BraTS2024/data_prep/project/prepare.py new file mode 100644 index 000000000..43d324bbb --- /dev/null +++ b/examples/BraTS2024/data_prep/project/prepare.py @@ -0,0 +1,64 @@ +import os +import random +import shutil +from glob import iglob + +random.seed(7) + + +def __copy_modalities(input_folder, modalities, output_folder): + for file in iglob(os.path.join(input_folder, "*.nii.gz")): + for modality in modalities: + if file.endswith(f"{modality}.nii.gz"): + new_file = os.path.join(output_folder, os.path.basename(file)) + shutil.copyfile(file, new_file) + break + + +def copy_radiotherapy_data( + data_path, labels_path, parameters, output_path, output_labels_path +): + # copy data + modalities = parameters["segmentation_modalities"] + for folder in iglob(os.path.join(data_path, "*/")): + outfolder = os.path.join( + output_path, os.path.basename(os.path.normpath(folder)) + ) + os.makedirs(outfolder, exist_ok=True) + __copy_modalities(folder, modalities, outfolder) + + # copy labels + modality = parameters["label_modality"] + for f in iglob(os.path.join(labels_path, "*")): + if os.path.isdir(f): + __copy_modalities(f, [modality], output_labels_path) + else: + if f.endswith(f"{modality}.nii.gz"): + new_file = os.path.join(output_labels_path, os.path.basename(f)) + shutil.copyfile(f, new_file) + + +def copy_pathology_data(data_path, labels_path, output_path, output_labels_path): + # copy data + for file in iglob(os.path.join(data_path, "*.png")): + new_file = os.path.join(output_path, os.path.basename(file)) + shutil.copyfile(file, new_file) + + # copy labels + for file in iglob(os.path.join(labels_path, "*.csv")): + new_file = os.path.join(output_labels_path, os.path.basename(file)) + shutil.copyfile(file, new_file) + + +def prepare_dataset( + data_path, labels_path, parameters, output_path, output_labels_path +): + task = parameters["task"] + assert task in ["segmentation-radiotherapy", "pathology"], "Invalid task" + os.makedirs(output_path, exist_ok=True) + os.makedirs(output_labels_path, exist_ok=True) + + if task == "segmentation-radiotherapy": + copy_radiotherapy_data(data_path, labels_path, parameters, output_path, output_labels_path) + else: + copy_pathology_data(data_path, labels_path, output_path, output_labels_path) diff --git a/examples/BraTS2024/data_prep/project/requirements.txt b/examples/BraTS2024/data_prep/project/requirements.txt new file mode 100644 index 000000000..fd9cc83a4 --- /dev/null +++ b/examples/BraTS2024/data_prep/project/requirements.txt @@ -0,0 +1,4 @@ +pyYAML +typer +numpy +SimpleITK>=2.1.0 diff --git a/examples/BraTS2024/data_prep/project/sanity_check.py b/examples/BraTS2024/data_prep/project/sanity_check.py new file mode 100644 index 000000000..b5ec9e97b --- /dev/null +++ b/examples/BraTS2024/data_prep/project/sanity_check.py @@ -0,0 +1,60 @@ +import os + + +def check_subject_validity_for_segmentation(labels_path, subject_dir, parameters): + modalities = parameters["segmentation_modalities"] + label_modality = parameters["label_modality"] + + # data + strings_to_check = [f"_{modality}.nii.gz" for modality in modalities] + for string in strings_to_check: + if not os.path.isfile( + os.path.join(subject_dir, os.path.basename(subject_dir) + string) + ): + raise ValueError( + f"{os.path.basename(subject_dir)} does not contain all modalities" + ) + + assert len(os.listdir(subject_dir)) == len( + modalities + ), "invalid number of modalities" + + # labels + if not os.path.isfile( + os.path.join( + labels_path, os.path.basename(subject_dir) + f"_{label_modality}.nii.gz" + ) + ): + raise ValueError( + f"{os.path.basename(subject_dir)} does not contain segmentation labels" + ) + + +def check_subject_validity_for_pathology(labels_path, data_path): + # data + if not all(file.endswith("png") for file in os.listdir(data_path)): + raise ValueError( + f"{os.path.basename(data_path)} should only contain PNG files" + ) + + # labels + assert len(os.listdir(labels_path)) == 1, "invalid number of labels file" + if not os.listdir(labels_path)[0].endswith("csv"): + raise ValueError( + f"{labels_path} does not contain classification labels in a CSV file" + ) + + +def perform_sanity_checks(data_path, labels_path, parameters): + task = parameters["task"] + + if task == "segmentation-radiotherapy": + data_folders = os.listdir(data_path) + for folder in data_folders: + current_subject = os.path.join(data_path, folder) + assert os.path.isdir(current_subject), "Unexpected file found" + check_subject_validity_for_segmentation( + labels_path, current_subject, parameters + ) + else: + check_subject_validity_for_pathology(labels_path, data_path) diff --git a/examples/BraTS2024/data_prep/project/stats.py b/examples/BraTS2024/data_prep/project/stats.py new file mode 100644 index 000000000..4b7a79e6f --- /dev/null +++ b/examples/BraTS2024/data_prep/project/stats.py @@ -0,0 +1,11 @@ +import os +import yaml + + +def generate_statistics(data_path, labels_path, parameters, out_path): + stats = { + "Number of Subjects": len(os.listdir(data_path)), + } + + with open(out_path, "w") as f: + yaml.dump(stats, f) diff --git a/examples/BraTS2024/dummy_model/README.md b/examples/BraTS2024/dummy_model/README.md new file mode 100644 index 000000000..7eff8e7c4 --- /dev/null +++ b/examples/BraTS2024/dummy_model/README.md @@ -0,0 +1,32 @@ +# BraTS 2024 Dummy Models + +Reference model MLCube for: + +* [Meningioma Radiotherapy](https://www.synapse.org/Synapse:syn53708249/wiki/627503) +* [Pathology](https://www.synapse.org/Synapse:syn53708249/wiki/628091) + +## Example model outputs + +### Radiotherapy + +A single folder with segmentation files, e.g. + +``` +predictions +├── BraTS-MEN-RT-xxxx-x.nii.gz +├── BraTS-MEN-RT-yyyy-y.nii.gz +└── BraTS-MEN-RT-zzzz-z.nii.gz +``` + +### Pathology + +A 2-column CSV with `SubjectID` and `Prediction` as the headers, e.g. + +``` +SubjectID,Prediction +BraTSPath_cohort_xxxxxxx.png,A +BraTSPath_cohort_yyyyyyy.png,B +BraTSPath_cohort_zzzzzzz.png,C +``` + +where `A`, `B`, and `C` are integers from 0 to 5. diff --git a/examples/BraTS2024/dummy_model/mlcube/mlcube.yaml b/examples/BraTS2024/dummy_model/mlcube/mlcube.yaml new file mode 100644 index 000000000..9b95122e8 --- /dev/null +++ b/examples/BraTS2024/dummy_model/mlcube/mlcube.yaml @@ -0,0 +1,27 @@ +name: mock-model-brats +description: mock-model-brats +authors: + - { name: MLCommons Medical Working Group } + - { name: Verena Chung } + +platform: + accelerator_count: 0 + +docker: + # Image name + image: ghcr.io/vpchung/brats2024-dummy-model:0.0.1 + # Docker build context relative to $MLCUBE_ROOT. Default is `build`. + build_context: "../project" + # Docker file name within docker build context, default is `Dockerfile`. + build_file: "Dockerfile" + +tasks: + infer: + # Computes predictions on input data + parameters: + inputs: + { + data_path: data/, + parameters_file: parameters.yaml, + } + outputs: { output_path: { type: directory, default: predictions } } diff --git a/examples/BraTS2024/dummy_model/mlcube/workspace/parameters-path.yaml b/examples/BraTS2024/dummy_model/mlcube/workspace/parameters-path.yaml new file mode 100644 index 000000000..48681bd64 --- /dev/null +++ b/examples/BraTS2024/dummy_model/mlcube/workspace/parameters-path.yaml @@ -0,0 +1 @@ +task: pathology diff --git a/examples/BraTS2024/dummy_model/mlcube/workspace/parameters-rt.yaml b/examples/BraTS2024/dummy_model/mlcube/workspace/parameters-rt.yaml new file mode 100644 index 000000000..f23bd73c0 --- /dev/null +++ b/examples/BraTS2024/dummy_model/mlcube/workspace/parameters-rt.yaml @@ -0,0 +1 @@ +task: segmentation-radiotherapy diff --git a/examples/BraTS2024/dummy_model/project/Dockerfile b/examples/BraTS2024/dummy_model/project/Dockerfile new file mode 100644 index 000000000..e4c0ddbb4 --- /dev/null +++ b/examples/BraTS2024/dummy_model/project/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.9.16-slim + +COPY ./requirements.txt /mlcube_project/requirements.txt + +RUN pip3 install --no-cache-dir -r /mlcube_project/requirements.txt + +ENV LANG C.UTF-8 + +# Create a non-root user. +RUN useradd nonrootuser +USER nonrootuser + +COPY . /mlcube_project + +ENTRYPOINT ["python3", "/mlcube_project/mlcube.py"] \ No newline at end of file diff --git a/examples/BraTS2024/dummy_model/project/infer.py b/examples/BraTS2024/dummy_model/project/infer.py new file mode 100644 index 000000000..6a2dd5647 --- /dev/null +++ b/examples/BraTS2024/dummy_model/project/infer.py @@ -0,0 +1,17 @@ +import os +import shutil + +from numpy.random import randint +import pandas as pd + + +def run_inference(data_path, parameters, output_path): + task = parameters["task"] + if task == "segmentation-radiotherapy": + for k in os.listdir(data_path): + file = os.path.join(data_path, k, f"{k}_t1c.nii.gz") + shutil.copyfile(file, os.path.join(output_path, f"{k}.nii.gz")) + else: + dummy_predictions = pd.DataFrame({'SubjectID': os.listdir(data_path)}) + dummy_predictions["Prediction"] = randint(6,size=len(os.listdir(data_path))) + dummy_predictions.to_csv(os.path.join(output_path, "predictions.csv"), index=False) diff --git a/examples/BraTS2024/dummy_model/project/mlcube.py b/examples/BraTS2024/dummy_model/project/mlcube.py new file mode 100644 index 000000000..97d7e84f0 --- /dev/null +++ b/examples/BraTS2024/dummy_model/project/mlcube.py @@ -0,0 +1,29 @@ +"""MLCube handler file""" +import typer +import yaml + +from infer import run_inference + +app = typer.Typer() + + +@app.command("infer") +def infer( + data_path: str = typer.Option(..., "--data_path"), + parameters_file: str = typer.Option(..., "--parameters_file"), + output_path: str = typer.Option(..., "--output_path"), +): + with open(parameters_file) as f: + parameters = yaml.safe_load(f) + + run_inference(data_path, parameters, output_path) + + +@app.command("hotfix") +def hotfix(): + # NOOP command for typer to behave correctly. DO NOT REMOVE OR MODIFY + pass + + +if __name__ == "__main__": + app() diff --git a/examples/BraTS2024/dummy_model/project/requirements.txt b/examples/BraTS2024/dummy_model/project/requirements.txt new file mode 100644 index 000000000..86c5f9fcf --- /dev/null +++ b/examples/BraTS2024/dummy_model/project/requirements.txt @@ -0,0 +1,4 @@ +typer==0.9.0 +PyYAML==6.0 +pandas +numpy diff --git a/examples/BraTS2024/mock_metrics/Dockerfile b/examples/BraTS2024/mock_metrics/Dockerfile new file mode 100644 index 000000000..1794b7d99 --- /dev/null +++ b/examples/BraTS2024/mock_metrics/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.9.16-slim + +COPY ./requirements.txt /mlcube_project/requirements.txt + +RUN pip3 install --no-cache-dir -r /mlcube_project/requirements.txt + +ENV LANG C.UTF-8 + +# Create a non-root user. +RUN useradd nonrootuser +USER nonrootuser + +COPY . /mlcube_project + +ENTRYPOINT ["python3", "/mlcube_project/mlcube.py"] diff --git a/examples/BraTS2024/mock_metrics/metrics.py b/examples/BraTS2024/mock_metrics/metrics.py new file mode 100644 index 000000000..2bbcaf4e3 --- /dev/null +++ b/examples/BraTS2024/mock_metrics/metrics.py @@ -0,0 +1,93 @@ +""" +Validate model predictions output for BraTS-MEN-RT +and BraTS-Path 2024. +""" + +import os +from glob import glob + +import numpy as np +import pandas as pd +import SimpleITK as sitk +import yaml +from cnb_tools import validation_toolkit as vtk + + +def check_image_dims(path): + base_size = np.array([182, 218, 182]) + image = sitk.ReadImage(path) + size_array = np.array(image.GetSize()) + assert (base_size == size_array).all(), ( + "Image size is not [182, 218, 182] for " + path + ) + + +def _check_counts(pred, gold): + if len(pred) != len(gold): + raise ValueError("Predictions number don't match labels") + + if sorted(pred) != sorted(gold): + raise ValueError("Predictions don't match submission criteria") + + +def check_for_postop(labels, predictions): + label_id_len = len("xxxxx-xxx-seg.nii.gz") + subjectids = [ + label[-label_id_len:].replace("-seg", "") for label in os.listdir(labels) + ] + + pred_id_len = len("xxxxx-xxx.nii.gz") + pred_subjectids = [pred[-pred_id_len:] for pred in os.listdir(predictions)] + _check_counts(pred_subjectids, subjectids) + + for pred in os.listdir(predictions): + check_image_dims(os.path.join(predictions, pred)) + + +def check_for_radiotherapy(labels, predictions): + label_id_len = len("xxxx-x_gtv.nii.gz") + subjectids = [ + label[-label_id_len:].replace("_gtv", "") for label in os.listdir(labels) + ] + pred_id_len = len("xxxx-x.nii.gz") + pred_subjectids = [pred[-pred_id_len:] for pred in os.listdir(predictions)] + _check_counts(pred_subjectids, subjectids) + + +def check_for_pathology(parent): + pred_file = glob(os.path.join(parent, "*.csv")) + if len(pred_file) != 1: + raise ValueError("There should only be one predictions CSV file") + + try: + pred = pd.read_csv(pred_file[0], usecols=["SubjectID", "Prediction"]) + except ValueError as exc: + raise ValueError( + "Predictions file should contain two columns: `SubjectID` " + "and `Prediction` (case-sensitive)" + ) from exc + + if vtk.check_duplicate_keys(pred["SubjectID"]) != "": + raise ValueError("Duplicate SubjectIDs found") + + if vtk.check_values_range(pred["Prediction"], min_val=0, max_val=5) != "": + raise ValueError("`Prediction` should be integers between 0 and 5") + + if not all(pred["SubjectID"].str.contains(r"BraTSPath_\w+_\d{7}\.png$")): + raise ValueError( + "'SubjectID' values do not contain the filenames in the dataset" + ) + + +def calculate_metrics(labels, predictions, parameters, output_path): + task = parameters["task"] + + if task == "segmentation": + check_for_postop(labels, predictions) + elif task == "segmentation-radiotherapy": + check_for_radiotherapy(labels, predictions) + else: + check_for_pathology(predictions) + + with open(output_path, "w") as f: + yaml.dump({"valid": True}, f) diff --git a/examples/BraTS2024/mock_metrics/mlcube.py b/examples/BraTS2024/mock_metrics/mlcube.py new file mode 100644 index 000000000..59094ad60 --- /dev/null +++ b/examples/BraTS2024/mock_metrics/mlcube.py @@ -0,0 +1,30 @@ +"""MLCube handler file""" + +import typer +import yaml +from metrics import calculate_metrics + +app = typer.Typer() + + +@app.command("evaluate") +def evaluate( + labels: str = typer.Option(..., "--labels"), + predictions: str = typer.Option(..., "--predictions"), + parameters_file: str = typer.Option(..., "--parameters_file"), + output_path: str = typer.Option(..., "--output_path"), +): + with open(parameters_file) as f: + parameters = yaml.safe_load(f) + + calculate_metrics(labels, predictions, parameters, output_path) + + +@app.command("hotfix") +def hotfix(): + # NOOP command for typer to behave correctly. DO NOT REMOVE OR MODIFY + pass + + +if __name__ == "__main__": + app() diff --git a/examples/BraTS2024/mock_metrics/requirements.txt b/examples/BraTS2024/mock_metrics/requirements.txt new file mode 100644 index 000000000..845dccf39 --- /dev/null +++ b/examples/BraTS2024/mock_metrics/requirements.txt @@ -0,0 +1,6 @@ +PyYAML~=5.3 +typer +cnb-tools==0.3.2 +pandas +SimpleITK>=2.1.0 +numpy~=1.24.2 \ No newline at end of file diff --git a/examples/BraTS2024/mock_prep/Dockerfile b/examples/BraTS2024/mock_prep/Dockerfile new file mode 100644 index 000000000..1794b7d99 --- /dev/null +++ b/examples/BraTS2024/mock_prep/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.9.16-slim + +COPY ./requirements.txt /mlcube_project/requirements.txt + +RUN pip3 install --no-cache-dir -r /mlcube_project/requirements.txt + +ENV LANG C.UTF-8 + +# Create a non-root user. +RUN useradd nonrootuser +USER nonrootuser + +COPY . /mlcube_project + +ENTRYPOINT ["python3", "/mlcube_project/mlcube.py"] diff --git a/examples/BraTS2024/mock_prep/mlcube.py b/examples/BraTS2024/mock_prep/mlcube.py new file mode 100644 index 000000000..057b7ca1c --- /dev/null +++ b/examples/BraTS2024/mock_prep/mlcube.py @@ -0,0 +1,51 @@ +"""MLCube handler file""" +import typer +import yaml +from prepare import prepare_dataset +from sanity_check import perform_sanity_checks +from stats import generate_statistics + +app = typer.Typer() + + +@app.command("prepare") +def prepare( + data_path: str = typer.Option(..., "--data_path"), + labels_path: str = typer.Option(..., "--labels_path"), + parameters_file: str = typer.Option(..., "--parameters_file"), + output_path: str = typer.Option(..., "--output_path"), + output_labels_path: str = typer.Option(..., "--output_labels_path"), +): + with open(parameters_file) as f: + parameters = yaml.safe_load(f) + + prepare_dataset(data_path, labels_path, parameters, output_path, output_labels_path) + + +@app.command("sanity_check") +def sanity_check( + data_path: str = typer.Option(..., "--data_path"), + labels_path: str = typer.Option(..., "--labels_path"), + parameters_file: str = typer.Option(..., "--parameters_file"), +): + with open(parameters_file) as f: + parameters = yaml.safe_load(f) + + perform_sanity_checks(data_path, labels_path, parameters) + + +@app.command("statistics") +def statistics( + data_path: str = typer.Option(..., "--data_path"), + labels_path: str = typer.Option(..., "--labels_path"), + parameters_file: str = typer.Option(..., "--parameters_file"), + out_path: str = typer.Option(..., "--output_path"), +): + with open(parameters_file) as f: + parameters = yaml.safe_load(f) + + generate_statistics(data_path, labels_path, parameters, out_path) + + +if __name__ == "__main__": + app() diff --git a/examples/BraTS2024/mock_prep/prepare.py b/examples/BraTS2024/mock_prep/prepare.py new file mode 100644 index 000000000..4e9a3f31b --- /dev/null +++ b/examples/BraTS2024/mock_prep/prepare.py @@ -0,0 +1,11 @@ +import shutil + + +def prepare_dataset( + data_path, labels_path, parameters, output_path, output_labels_path +): + task = parameters["task"] + assert task in ["segmentation", "segmentation-radiotherapy", "pathology"], "Invalid task" + + shutil.copytree(data_path, output_path, dirs_exist_ok=True) + shutil.copytree(labels_path, output_labels_path, dirs_exist_ok=True) diff --git a/examples/BraTS2024/mock_prep/requirements.txt b/examples/BraTS2024/mock_prep/requirements.txt new file mode 100644 index 000000000..7b986d394 --- /dev/null +++ b/examples/BraTS2024/mock_prep/requirements.txt @@ -0,0 +1,2 @@ +pyYAML +typer diff --git a/examples/BraTS2024/mock_prep/sanity_check.py b/examples/BraTS2024/mock_prep/sanity_check.py new file mode 100644 index 000000000..60407a811 --- /dev/null +++ b/examples/BraTS2024/mock_prep/sanity_check.py @@ -0,0 +1,61 @@ +import os + + +def check_subject_validity_for_segmentation(labels_path, subject_dir, parameters): + modalities = parameters["segmentation_modalities"] + label_modality = parameters["label_modality"] + sep = parameters["separator"] + + # data + strings_to_check = [f"{sep}{modality}.nii.gz" for modality in modalities] + for string in strings_to_check: + if not os.path.isfile( + os.path.join(subject_dir, os.path.basename(subject_dir) + string) + ): + raise ValueError( + f"{os.path.basename(subject_dir)} does not contain all modalities" + ) + + assert len(os.listdir(subject_dir)) == len( + modalities + ), "invalid number of modalities" + + # labels + if not os.path.isfile( + os.path.join( + labels_path, os.path.basename(subject_dir) + f"{sep}{label_modality}.nii.gz" + ) + ): + raise ValueError( + f"{os.path.basename(subject_dir)} does not contain segmentation labels" + ) + + +def check_subject_validity_for_pathology(labels_path, data_path): + # data + if not all(file.endswith("png") for file in os.listdir(data_path)): + raise ValueError( + f"{os.path.basename(data_path)} should only contain PNG files" + ) + + # labels + assert len(os.listdir(labels_path)) == 1, "invalid number of labels file" + if not os.listdir(labels_path)[0].endswith("csv"): + raise ValueError( + f"{labels_path} does not contain classification labels in a CSV file" + ) + + +def perform_sanity_checks(data_path, labels_path, parameters): + task = parameters["task"] + + if task.startswith("segmentation"): + data_folders = os.listdir(data_path) + for folder in data_folders: + current_subject = os.path.join(data_path, folder) + assert os.path.isdir(current_subject), "Unexpected file found" + check_subject_validity_for_segmentation( + labels_path, current_subject, parameters + ) + else: + check_subject_validity_for_pathology(labels_path, data_path) diff --git a/examples/BraTS2024/mock_prep/stats.py b/examples/BraTS2024/mock_prep/stats.py new file mode 100644 index 000000000..4b7a79e6f --- /dev/null +++ b/examples/BraTS2024/mock_prep/stats.py @@ -0,0 +1,11 @@ +import os +import yaml + + +def generate_statistics(data_path, labels_path, parameters, out_path): + stats = { + "Number of Subjects": len(os.listdir(data_path)), + } + + with open(out_path, "w") as f: + yaml.dump(stats, f)