Skip to content

Commit

Permalink
Merge pull request FeTS-AI#1 from hasan7n/separate-stages-copied-branch
Browse files Browse the repository at this point in the history
Separate stages copied branch
  • Loading branch information
aristizabal95 authored Aug 28, 2023
2 parents 419733b + 84f8d98 commit 696ce37
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 71 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ ENV LANG C.UTF-8

RUN mkdir /project/stages

RUN cp /Front-End/src/applications/*.py /project/stages/
RUN cp /Front-End/bin/install/appdir/usr/bin/*.py /project/stages/

RUN cp -R /Front-End/src/applications/data_prep_models /project/stages/data_prep_models
RUN cp -R /Front-End/bin/install/appdir/usr/bin/data_prep_models /project/stages/data_prep_models

# Hotfix: install more recent version of GaNDLF for metrics generation
RUN pip install git+https://github.com/mlcommons/GaNDLF@616b37bafad8f89d5c816a88f44fa30470601311
Expand Down
4 changes: 2 additions & 2 deletions mlcubes/data_preparation/project/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,5 +174,5 @@ def setup_argparser():
)

if match_proc.should_run(report):
loop.set_description(stage.get_name())
report = stage.execute(0, report)
loop.set_description(match_proc.get_name())
report = match_proc.execute("AAAC_0|2008.03.30", report)
4 changes: 2 additions & 2 deletions mlcubes/data_preparation/project/stages/get_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,14 @@ def execute(self, index: Union[str, int], report: pd.DataFrame) -> pd.DataFrame:
"data_path": tp_out_path,
"labels_path": "",
}
if self.csv_processor.subject_timepoint_missing_modalities:
if f"{id}_{tp}" in self.csv_processor.subject_timepoint_missing_modalities:
shutil.rmtree(tp_out_path, ignore_errors=True)
comment = "There are missing modalities. Please check the data"
report_data["status"] = -1.1
report_data["status_name"] = "MISSING_MODALITIES"
report_data["data_path"] = tp_path
report_data["comment"] = comment
elif self.csv_processor.subject_timepoint_extra_modalities:
elif f"{id}_{tp}" in self.csv_processor.subject_timepoint_extra_modalities:
shutil.rmtree(tp_out_path, ignore_errors=True)
comment = "There are extra modalities. Please check the data"
report_data["status"] = -1.2
Expand Down
4 changes: 3 additions & 1 deletion mlcubes/data_preparation/project/stages/manual.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ def execute(self, index: Union[str, int], report: pd.DataFrame) -> pd.DataFrame:
out_path = self.__get_output_path(index)
under_review_path = self.__get_under_review_path(index)
bak_path = self.__get_backup_path(index)
id, tp = get_id_tp(index)
final_filename = f"{id}_{tp}_final_seg.nii.gz"
if not os.path.exists(bak_path):
shutil.copytree(in_path, bak_path)
set_files_read_only(bak_path)
Expand All @@ -73,7 +75,7 @@ def execute(self, index: Union[str, int], report: pd.DataFrame) -> pd.DataFrame:
f"You may find baseline segmentations inside {in_path}. "
+ f"Please inspect those segmentations and move the best one to {under_review_path}. "
+ "Make the necessary corrections to the generated segmentations with your desired tool, "
+ f"and once you're done, move the finalized file to {out_path}"
+ f"and once you're done, move the finalized file to {out_path} with the name {final_filename}."
)

report_data = {
Expand Down
42 changes: 19 additions & 23 deletions mlcubes/data_preparation/project/stages/match.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import Union
import os
import yaml
import json

import pandas as pd
from pandas import DataFrame
Expand All @@ -25,12 +24,12 @@ def get_name(self):

def __get_input_path(self, index: Union[str, int]):
id, tp = get_id_tp(index)
path = os.path.join(self.prev_stage_path, id, tp)
path = os.path.join(self.prev_stage_path, INTERIM_FOLDER, id, tp)
return path

def __get_backup_path(self, index: Union[str, int]):
id, tp = get_id_tp(index)
path = os.path.join(self.backup_path, id, tp)
path = os.path.join(self.backup_path, id, tp, TUMOR_MASK_FOLDER)
return path

def __get_output_path(self, index: Union[str, int]):
Expand Down Expand Up @@ -71,37 +70,34 @@ def execute(self, index: Union[str, int], report: DataFrame) -> DataFrame:
# TODO: Add the percent of unchanged files, as well as voxel changes
# To the report, as separate columns

match_output_path = self.__get_output_path(index)
os.makedirs(match_output_path, exist_ok=True)
# Get the necessary files for match check
id, tp = get_id_tp(index)
reviewed_filename = f"{id}_{tp}_final_seg.nii.gz"
reviewed_filename = f"reviewed/{id}_{tp}_final_seg.nii.gz"
reviewed_file = os.path.join(self.__get_input_path(index), reviewed_filename)
gt_filename = "" # TODO: How do we know which segmentation to compare against?
gt_filename = f"{id}_{tp}_tumorMask_fused-voting.nii.gz"
# TODO: How do we know which segmentation to compare against?
# Should we compare against all segmentations?
# If there's no exact match, which segmentation should we compare metrics with?
ground_truth = os.path.join(self.__get_backup_path(index), gt_filename)

# Prepare the assets for metrics generation
inputdata_file = os.path.join(self.__get_output_path(index), "inputdata.csv")
config_file = os.path.join(self.__get_output_path(index), "parameters.yaml")
data = {"subjectid": id, "prediction": reviewed_file, "target": ground_truth}
pd.DataFrame(data).to_csv(inputdata_file)
# TODO: Where do we get this config file?
# From reading the code, it seems to expect an MLCube parameters.yaml
# file which was used for training/generating inference
# That concept breaks here, because we have multiple models running
# without an accompanying MLCube, and we would need to know which config to use
# for which model

# config.yaml can be found inside project/data_prep_models/tumor_segmentation/{model_id}/config.yaml
config = {"problem_type": "segmentation"}
with open(config_file, "w") as f:
yaml.dump(config, f)

out_file = os.path.join(self.__get_output_path(index), "out.json")
inputdata_file = os.path.join(match_output_path, "inputdata.csv")
data = {"subjectid": f"{id}_{tp}", "prediction": reviewed_file, "target": ground_truth}
pd.DataFrame(data, index=[0]).to_csv(inputdata_file, index=False)

# Read gandlf config file.
# TODO: what are the requirements of config?
# TODO: do NOT hardcode the filesystem names used below
config_file = os.path.join(os.path.dirname(__file__), "data_prep_models/tumor_segmentation/model_0/config.yaml")

out_file = os.path.join(match_output_path, "out.yaml")

# Run the metrics generation logic
generate_metrics.generate_metrics_dict(inputdata_file, config_file, out_file)

# Open the generated metrics
with open(out_file, "r") as f:
metrics = json.load(f)
metrics = yaml.safe_load(f)
print(metrics)
2 changes: 1 addition & 1 deletion src/applications/CreateCSVForDICOMs.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def process_timepoint(self, timepoint, subject, subject_dir):
continue

for modality_id in MODALITY_ID_DICT[modality_to_check]:
if modality_id not in modality_lower:
if modality_id != modality_lower:
continue

valid_dicom, first_dicom_file = verify_dicom_folder(modality_path)
Expand Down
69 changes: 30 additions & 39 deletions src/applications/PrepareDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,11 +314,11 @@ def _run_brain_extraction_using_gandlf(
else models_to_infer.split(",")
)

model_counter = 0
images_for_fusion = []
for model_dir in models_to_run:
model_id = os.path.basename(model_dir)
model_output_dir = posixpath.join(
base_output_dir, "model_" + str(model_counter)
base_output_dir, "brain_extraction_" + str(model_id)
)
file_list = os.listdir(model_dir)
for file in file_list:
Expand All @@ -342,18 +342,17 @@ def _run_brain_extraction_using_gandlf(
for modality in modality_outputs:
modality_output_dir = posixpath.join(model_output_dir_testing, modality)
files_in_modality = os.listdir(modality_output_dir)
for file in files_in_modality:
for file in files_in_modality: # this loop may not be necessary
if file.endswith(".nii.gz"):
file_path = posixpath.join(modality_output_dir, file)
shutil.copyfile(
file_path,
posixpath.join(
base_output_dir,
f"brainMask_{model_counter}_{modality}.nii.gz",
f"brainMask_{model_id}_{modality}.nii.gz",
),
)
images_for_fusion.append(sitk.ReadImage(file_path, sitk.sitkUInt8))
model_counter += 1

return fuse_images(images_for_fusion, "staple", [0, 1])

Expand All @@ -379,11 +378,11 @@ def _run_tumor_segmentation_using_gandlf(
df_for_gandlf = pd.DataFrame(columns=GANDLF_DF_COLUMNS)
current_subject = {"SubjectID": subject_id}
channel_idx = 0
# modality order (trained according to EC): t1,t2,flair,t1c
modality_order = ["T1", "T2", "FLAIR", "T1GD"]
# todo: confirm the order for modalities
for key in MODALITIES_LIST:
current_subject = {
f"Channel_{channel_idx}": input_oriented_brain_images[key],
}
for key in modality_order:
current_subject[f"Channel_{channel_idx}"] = input_oriented_brain_images[key]
channel_idx += 1
df_for_gandlf = pd.DataFrame(current_subject, index=[0])
data_path = posixpath.join(base_output_dir, TUMOR_FILENAME)
Expand All @@ -398,12 +397,14 @@ def _run_tumor_segmentation_using_gandlf(
else models_to_infer.split(",")
)

model_counter = 0
tumor_masks_to_return = []
images_for_fusion = []
mask_output_dir = posixpath.join(base_output_dir, TUMOR_MASK_FOLDER)
os.makedirs(mask_output_dir, exist_ok=True)
for model_dir in models_to_run:
model_id = os.path.basename(model_dir)
model_output_dir = posixpath.join(
base_output_dir, "model_" + str(model_counter)
base_output_dir, "tumor_segmentation_" + str(model_id)
)
file_list = os.listdir(model_dir)
for file in file_list:
Expand All @@ -428,27 +429,24 @@ def _run_tumor_segmentation_using_gandlf(
)

model_output_dir_testing = posixpath.join(model_output_dir, TESTING_FOLDER)
subject_model_output_dir = os.listdir(model_output_dir_testing)
for subject in subject_model_output_dir:
subject_output_dir = posixpath.join(model_output_dir_testing, subject)
files_in_modality = os.listdir(subject_output_dir)
for file in files_in_modality:
if file.endswith(".nii.gz"):
file_path = posixpath.join(subject_output_dir, file)
shutil.copyfile(
file_path,
posixpath.join(
mask_output_dir,
f"{subject_id}_tumorMask_model-{model_counter}.nii.gz",
),
)
images_for_fusion.append(sitk.ReadImage(file_path, sitk.sitkUInt8))
model_counter += 1
# We expect one subject (one output modality, one file).
subject = os.listdir(model_output_dir_testing)[0]
subject_output_dir = posixpath.join(model_output_dir_testing, subject)
files_in_modality = os.listdir(subject_output_dir)
for file in files_in_modality: # this loop may not be necessary
if file.endswith(".nii.gz"):
file_path = posixpath.join(subject_output_dir, file)
renamed_path = posixpath.join(
mask_output_dir,
f"{subject_id}_tumorMask_model-{model_id}.nii.gz",
)
shutil.copyfile(file_path, renamed_path)
# Append the renamed path to keep track of model IDs
tumor_masks_to_return.append(renamed_path)
images_for_fusion.append(sitk.ReadImage(file_path, sitk.sitkUInt8))

tumor_class_list = [0, 1, 2, 3, 4]

tumor_masks_to_return = images_for_fusion

if len(images_for_fusion) > 1:
for fusion_type in ["staple", "simple", "voting"]:
fused_mask = fuse_images(images_for_fusion, fusion_type, tumor_class_list)
Expand Down Expand Up @@ -739,10 +737,7 @@ def extract_brain(self, row: pd.Series, pbar: tqdm):
for modality in MODALITIES_LIST:
image = sitk.ReadImage(outputs_reoriented[modality])
masked_image = sitk.Mask(image, brain_mask)
file_to_save = posixpath.join(
finalSubjectOutputDir_actual,
f"{subject_id_timepoint}_brain_{MODALITY_ID_MAPPING[modality]}.nii.gz",
)
file_to_save = input_for_tumor_models[modality]
sitk.WriteImage(masked_image, file_to_save)

# save the screenshot
Expand Down Expand Up @@ -783,18 +778,14 @@ def extract_tumor(self, row: pd.Series, pbar: tqdm):
interimOutputDir_actual,
)

tumor_mask_idx = 0
for tumor_mask in tumor_masks_for_qc:
tumor_mask_id = os.path.basename(tumor_mask).replace(".nii.gz", "")
# save the screenshot
_save_screenshot(
input_for_tumor_models,
posixpath.join(
interimOutputDir_actual,
f"{subject_id_timepoint}_summary_tumor-segmentation_model-{tumor_mask_idx}.png",
),
posixpath.join(interimOutputDir_actual, f"{tumor_mask_id}_summary.png"),
tumor_mask,
)
tumor_mask_idx += 1

with open(self.stdout_log, "a+") as f:
f.write(f"***\nTumor Masks For QC:\n{tumor_masks_for_qc}\n***")
Expand Down
2 changes: 1 addition & 1 deletion src/applications/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@

urls_for_download = {
"brain_extraction": "https://upenn.box.com/shared/static/cp5xz726mtb6gwwym8ydcxmw52zfngun",
"tumor_segmentation": "https://upenn.box.com/shared/static/hdcb0xqj4z528v3uc9xmfu60p0xtsv62", # should be changed
"tumor_segmentation": "https://storage.googleapis.com/medperf-storage/rano_test_assets/tumor_segmentation.zip", # should be changed
}

for model in urls_for_download.keys():
Expand Down

0 comments on commit 696ce37

Please sign in to comment.