From f5ad518622d42c94d0c4fdb0ae12ce3fddecb12f Mon Sep 17 00:00:00 2001
From: hasan7n <hasankassim7@hotmail.com>
Date: Tue, 15 Aug 2023 23:32:55 +0000
Subject: [PATCH 01/13] copy data_prep_models from the correct folder

---
 Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 9c4b416b..128d2fad 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -76,9 +76,9 @@ ENV LANG C.UTF-8
 
 RUN mkdir /project/stages
 
-RUN cp /Front-End/src/applications/*.py /project/stages/
+RUN cp /Front-End/bin/install/appdir/usr/bin/*.py /project/stages/
 
-RUN cp -R /Front-End/src/applications/data_prep_models /project/stages/data_prep_models
+RUN cp -R /Front-End/bin/install/appdir/usr/bin/data_prep_models /project/stages/data_prep_models
 
 # Hotfix: install more recent version of GaNDLF for metrics generation
 RUN pip install git+https://github.com/mlcommons/GaNDLF@616b37bafad8f89d5c816a88f44fa30470601311

From ef574ab012f503a9562617fdacc02d427a3f785b Mon Sep 17 00:00:00 2001
From: hasan7n <hasankassim7@hotmail.com>
Date: Tue, 15 Aug 2023 23:34:25 +0000
Subject: [PATCH 02/13] fix bugs related to tumor segmentation

---
 src/applications/PrepareDataset.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/applications/PrepareDataset.py b/src/applications/PrepareDataset.py
index 403ff7e7..93413fb8 100644
--- a/src/applications/PrepareDataset.py
+++ b/src/applications/PrepareDataset.py
@@ -381,9 +381,7 @@ def _run_tumor_segmentation_using_gandlf(
     channel_idx = 0
     # todo: confirm the order for modalities
     for key in MODALITIES_LIST:
-        current_subject = {
-            f"Channel_{channel_idx}": input_oriented_brain_images[key],
-        }
+        current_subject[f"Channel_{channel_idx}"] = input_oriented_brain_images[key]
         channel_idx += 1
     df_for_gandlf = pd.DataFrame(current_subject, index=[0])
     data_path = posixpath.join(base_output_dir, TUMOR_FILENAME)
@@ -401,6 +399,7 @@ def _run_tumor_segmentation_using_gandlf(
     model_counter = 0
     images_for_fusion = []
     mask_output_dir = posixpath.join(base_output_dir, TUMOR_MASK_FOLDER)
+    os.makedirs(mask_output_dir, exist_ok=True)
     for model_dir in models_to_run:
         model_output_dir = posixpath.join(
             base_output_dir, "model_" + str(model_counter)
@@ -447,7 +446,7 @@ def _run_tumor_segmentation_using_gandlf(
 
     tumor_class_list = [0, 1, 2, 3, 4]
 
-    tumor_masks_to_return = images_for_fusion
+    tumor_masks_to_return = []
 
     if len(images_for_fusion) > 1:
         for fusion_type in ["staple", "simple", "voting"]:

From 2aefcdcf4d47aa94b88f75cdbe60136a7674f352 Mon Sep 17 00:00:00 2001
From: hasan7n <hasankassim7@hotmail.com>
Date: Wed, 16 Aug 2023 21:34:41 +0000
Subject: [PATCH 03/13] modify tumor segmentation model download url

---
 src/applications/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/applications/setup.py b/src/applications/setup.py
index e69169b7..8787e8a6 100644
--- a/src/applications/setup.py
+++ b/src/applications/setup.py
@@ -72,7 +72,7 @@
 
 urls_for_download = {
     "brain_extraction": "https://upenn.box.com/shared/static/cp5xz726mtb6gwwym8ydcxmw52zfngun",
-    "tumor_segmentation": "https://upenn.box.com/shared/static/hdcb0xqj4z528v3uc9xmfu60p0xtsv62",  # should be changed
+    "tumor_segmentation": "https://storage.googleapis.com/medperf-storage/rano_test_assets/tumor_segmentation.zip",  # should be changed
 }
 
 for model in urls_for_download.keys():

From 3f04385cb1647eb94cf03b1e5b116c48d42dd124 Mon Sep 17 00:00:00 2001
From: hasan7n <hasankassim7@hotmail.com>
Date: Wed, 16 Aug 2023 21:36:40 +0000
Subject: [PATCH 04/13] fix the bugfix in prepareDataset

---
 src/applications/PrepareDataset.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/applications/PrepareDataset.py b/src/applications/PrepareDataset.py
index 93413fb8..d2f973ee 100644
--- a/src/applications/PrepareDataset.py
+++ b/src/applications/PrepareDataset.py
@@ -428,6 +428,7 @@ def _run_tumor_segmentation_using_gandlf(
 
         model_output_dir_testing = posixpath.join(model_output_dir, TESTING_FOLDER)
         subject_model_output_dir = os.listdir(model_output_dir_testing)
+        tumor_masks_to_return = []
         for subject in subject_model_output_dir:
             subject_output_dir = posixpath.join(model_output_dir_testing, subject)
             files_in_modality = os.listdir(subject_output_dir)
@@ -441,13 +442,12 @@ def _run_tumor_segmentation_using_gandlf(
                             f"{subject_id}_tumorMask_model-{model_counter}.nii.gz",
                         ),
                     )
+                    tumor_masks_to_return.append(file_path)
                     images_for_fusion.append(sitk.ReadImage(file_path, sitk.sitkUInt8))
         model_counter += 1
 
     tumor_class_list = [0, 1, 2, 3, 4]
 
-    tumor_masks_to_return = []
-
     if len(images_for_fusion) > 1:
         for fusion_type in ["staple", "simple", "voting"]:
             fused_mask = fuse_images(images_for_fusion, fusion_type, tumor_class_list)

From d122ae6b889914feea9bd8a749f04e86f0eab1f4 Mon Sep 17 00:00:00 2001
From: hasan7n <hasankassim7@hotmail.com>
Date: Wed, 16 Aug 2023 21:38:27 +0000
Subject: [PATCH 05/13] start fixing matching step (WIP)

---
 mlcubes/data_preparation/project/prepare.py   |  4 +-
 .../data_preparation/project/stages/manual.py |  4 +-
 .../data_preparation/project/stages/match.py  | 42 +++++++++----------
 3 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/mlcubes/data_preparation/project/prepare.py b/mlcubes/data_preparation/project/prepare.py
index 29bccaca..ca034700 100644
--- a/mlcubes/data_preparation/project/prepare.py
+++ b/mlcubes/data_preparation/project/prepare.py
@@ -174,5 +174,5 @@ def setup_argparser():
     )
 
     if match_proc.should_run(report):
-        loop.set_description(stage.get_name())
-        report = stage.execute(0, report)
+        loop.set_description(match_proc.get_name())
+        report = match_proc.execute("AAAC_0|2008.03.30", report)
diff --git a/mlcubes/data_preparation/project/stages/manual.py b/mlcubes/data_preparation/project/stages/manual.py
index 25d85b4c..2643e4f6 100644
--- a/mlcubes/data_preparation/project/stages/manual.py
+++ b/mlcubes/data_preparation/project/stages/manual.py
@@ -63,6 +63,8 @@ def execute(self, index: Union[str, int], report: pd.DataFrame) -> pd.DataFrame:
         out_path = self.__get_output_path(index)
         under_review_path = self.__get_under_review_path(index)
         bak_path = self.__get_backup_path(index)
+        id, tp = get_id_tp(index)
+        final_filename = f"{id}_{tp}_final_seg.nii.gz"
         if not os.path.exists(bak_path):
             shutil.copytree(in_path, bak_path)
             set_files_read_only(bak_path)
@@ -73,7 +75,7 @@ def execute(self, index: Union[str, int], report: pd.DataFrame) -> pd.DataFrame:
             f"You may find baseline segmentations inside {in_path}. "
             + f"Please inspect those segmentations and move the best one to {under_review_path}. "
             + "Make the necessary corrections to the generated segmentations with your desired tool, "
-            + f"and once you're done, move the finalized file to {out_path}"
+            + f"and once you're done, move the finalized file to {out_path} with the name {final_filename}."
         )
 
         report_data = {
diff --git a/mlcubes/data_preparation/project/stages/match.py b/mlcubes/data_preparation/project/stages/match.py
index 85c98d07..11a37139 100644
--- a/mlcubes/data_preparation/project/stages/match.py
+++ b/mlcubes/data_preparation/project/stages/match.py
@@ -1,7 +1,6 @@
 from typing import Union
 import os
 import yaml
-import json
 
 import pandas as pd
 from pandas import DataFrame
@@ -25,12 +24,12 @@ def get_name(self):
 
     def __get_input_path(self, index: Union[str, int]):
         id, tp = get_id_tp(index)
-        path = os.path.join(self.prev_stage_path, id, tp)
+        path = os.path.join(self.prev_stage_path, INTERIM_FOLDER, id, tp)
         return path
 
     def __get_backup_path(self, index: Union[str, int]):
         id, tp = get_id_tp(index)
-        path = os.path.join(self.backup_path, id, tp)
+        path = os.path.join(self.backup_path, id, tp, TUMOR_MASK_FOLDER)
         return path
 
     def __get_output_path(self, index: Union[str, int]):
@@ -71,37 +70,34 @@ def execute(self, index: Union[str, int], report: DataFrame) -> DataFrame:
         # TODO: Add the percent of unchanged files, as well as voxel changes
         # To the report, as separate columns
 
+        match_output_path = self.__get_output_path(index)
+        os.makedirs(match_output_path, exist_ok=True)
         # Get the necessary files for match check
         id, tp = get_id_tp(index)
-        reviewed_filename = f"{id}_{tp}_final_seg.nii.gz"
+        reviewed_filename = f"reviewed/{id}_{tp}_final_seg.nii.gz"
         reviewed_file = os.path.join(self.__get_input_path(index), reviewed_filename)
-        gt_filename = ""  # TODO: How do we know which segmentation to compare against?
+        gt_filename = f"{id}_{tp}_tumorMask_fused-voting.nii.gz"
+        # TODO: How do we know which segmentation to compare against?
         # Should we compare against all segmentations?
         # If there's no exact match, which segmentation should we compare metrics with?
         ground_truth = os.path.join(self.__get_backup_path(index), gt_filename)
 
         # Prepare the assets for metrics generation
-        inputdata_file = os.path.join(self.__get_output_path(index), "inputdata.csv")
-        config_file = os.path.join(self.__get_output_path(index), "parameters.yaml")
-        data = {"subjectid": id, "prediction": reviewed_file, "target": ground_truth}
-        pd.DataFrame(data).to_csv(inputdata_file)
-        # TODO: Where do we get this config file?
-        # From reading the code, it seems to expect an MLCube parameters.yaml
-        # file which was used for training/generating inference
-        # That concept breaks here, because we have multiple models running
-        # without an accompanying MLCube, and we would need to know which config to use
-        # for which model
-
-        # config.yaml can be found inside project/data_prep_models/tumor_segmentation/{model_id}/config.yaml
-        config = {"problem_type": "segmentation"}
-        with open(config_file, "w") as f:
-            yaml.dump(config, f)
-
-        out_file = os.path.join(self.__get_output_path(index), "out.json")
+        inputdata_file = os.path.join(match_output_path, "inputdata.csv")
+        data = {"subjectid": f"{id}_{tp}", "prediction": reviewed_file, "target": ground_truth}
+        pd.DataFrame(data, index=[0]).to_csv(inputdata_file, index=False)
+
+        # Read gandlf config file.
+        # TODO: what are the requirements of config?
+        # TODO: do NOT hardcode the filesystem names used below
+        config_file = os.path.join(os.path.dirname(__file__), "data_prep_models/tumor_segmentation/model_0/config.yaml")
+
+        out_file = os.path.join(match_output_path, "out.yaml")
 
         # Run the metrics generation logic
         generate_metrics.generate_metrics_dict(inputdata_file, config_file, out_file)
 
         # Open the generated metrics
         with open(out_file, "r") as f:
-            metrics = json.load(f)
+            metrics = yaml.safe_load(f)
+        print(metrics)

From e56bb5a5e57fff711ce6ae1c32908bde5e9ff119 Mon Sep 17 00:00:00 2001
From: sarthakpati <sarthak.pati@hotmail.com>
Date: Fri, 18 Aug 2023 19:51:59 -0400
Subject: [PATCH 06/13] updated link for tumor segmentation model

---
 src/applications/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/applications/setup.py b/src/applications/setup.py
index e69169b7..ccc1e9b0 100644
--- a/src/applications/setup.py
+++ b/src/applications/setup.py
@@ -72,7 +72,7 @@
 
 urls_for_download = {
     "brain_extraction": "https://upenn.box.com/shared/static/cp5xz726mtb6gwwym8ydcxmw52zfngun",
-    "tumor_segmentation": "https://upenn.box.com/shared/static/hdcb0xqj4z528v3uc9xmfu60p0xtsv62",  # should be changed
+    "tumor_segmentation": "https://upenn.box.com/shared/static/woiqk6x9ygazst5ofrnfnezuy0aw0tn6",  # should be changed
 }
 
 for model in urls_for_download.keys():

From ee06c5193494268cba56e5a8225680aeff687871 Mon Sep 17 00:00:00 2001
From: sarthakpati <sarthak.pati@hotmail.com>
Date: Fri, 18 Aug 2023 19:56:19 -0400
Subject: [PATCH 07/13] updated modality order and models

---
 src/applications/PrepareDataset.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/applications/PrepareDataset.py b/src/applications/PrepareDataset.py
index 60f8fe20..578b5d44 100644
--- a/src/applications/PrepareDataset.py
+++ b/src/applications/PrepareDataset.py
@@ -393,8 +393,10 @@ def _run_tumor_segmentation_using_gandlf(
     df_for_gandlf = pd.DataFrame(columns=["SubjectID", "Channel_0"])
     current_subject = {"SubjectID": subject_id}
     channel_idx = 0
+    # modality order (trained according to EC): t1,t2,flair,t1c
+    modality_order = ["T1", "T2", "FLAIR", "T1GD"]
     # todo: confirm the order for modalities
-    for key in modalities_list:
+    for key in modality_order:
         current_subject = {
             f"Channel_{channel_idx}": input_oriented_brain_images[key],
         }

From 99aefe40ff5895a3f9018ec6c29c47403248e720 Mon Sep 17 00:00:00 2001
From: "Edwards, Brandon" <brandon.edwards@intel.com>
Date: Tue, 22 Aug 2023 14:49:26 -0700
Subject: [PATCH 08/13] replacing string containment with string non-equality

---
 src/applications/CreateCSVForDICOMs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/applications/CreateCSVForDICOMs.py b/src/applications/CreateCSVForDICOMs.py
index e46b1e55..2484e546 100644
--- a/src/applications/CreateCSVForDICOMs.py
+++ b/src/applications/CreateCSVForDICOMs.py
@@ -113,7 +113,7 @@ def process_timepoint(self, timepoint, subject, subject_dir):
                     continue
 
                 for modality_id in MODALITY_ID_DICT[modality_to_check]:
-                    if modality_id not in modality_lower:
+                    if modality_id != modality_lower:
                         continue
 
                     valid_dicom, first_dicom_file = verify_dicom_folder(modality_path)

From 0939398edd3c64ecb6fdb633fec6dc49328b21d0 Mon Sep 17 00:00:00 2001
From: hasan7n <hasankassim7@hotmail.com>
Date: Wed, 23 Aug 2023 13:00:38 +0000
Subject: [PATCH 09/13] use tumor files names from dict This is not a bugfix.
 Just to remove confusion

---
 src/applications/PrepareDataset.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/applications/PrepareDataset.py b/src/applications/PrepareDataset.py
index 8c635bb6..a4b15be0 100644
--- a/src/applications/PrepareDataset.py
+++ b/src/applications/PrepareDataset.py
@@ -740,10 +740,7 @@ def extract_brain(self, row: pd.Series, pbar: tqdm):
         for modality in MODALITIES_LIST:
             image = sitk.ReadImage(outputs_reoriented[modality])
             masked_image = sitk.Mask(image, brain_mask)
-            file_to_save = posixpath.join(
-                finalSubjectOutputDir_actual,
-                f"{subject_id_timepoint}_brain_{MODALITY_ID_MAPPING[modality]}.nii.gz",
-            )
+            file_to_save = input_for_tumor_models[modality]
             sitk.WriteImage(masked_image, file_to_save)
 
         # save the screenshot

From 436a4c224f754e1af95e724df155c872946d7cd9 Mon Sep 17 00:00:00 2001
From: hasan7n <hasankassim7@hotmail.com>
Date: Wed, 23 Aug 2023 13:38:36 +0000
Subject: [PATCH 10/13] fix name conventions for extraction models outputs

---
 src/applications/PrepareDataset.py | 55 +++++++++++++-----------------
 1 file changed, 24 insertions(+), 31 deletions(-)

diff --git a/src/applications/PrepareDataset.py b/src/applications/PrepareDataset.py
index a4b15be0..508f6677 100644
--- a/src/applications/PrepareDataset.py
+++ b/src/applications/PrepareDataset.py
@@ -314,11 +314,11 @@ def _run_brain_extraction_using_gandlf(
         else models_to_infer.split(",")
     )
 
-    model_counter = 0
     images_for_fusion = []
     for model_dir in models_to_run:
+        model_id = os.path.basename(model_dir)
         model_output_dir = posixpath.join(
-            base_output_dir, "model_" + str(model_counter)
+            base_output_dir, "brain_extraction_" + str(model_id)
         )
         file_list = os.listdir(model_dir)
         for file in file_list:
@@ -342,18 +342,17 @@ def _run_brain_extraction_using_gandlf(
         for modality in modality_outputs:
             modality_output_dir = posixpath.join(model_output_dir_testing, modality)
             files_in_modality = os.listdir(modality_output_dir)
-            for file in files_in_modality:
+            for file in files_in_modality:  # this loop may not be necessary
                 if file.endswith(".nii.gz"):
                     file_path = posixpath.join(modality_output_dir, file)
                     shutil.copyfile(
                         file_path,
                         posixpath.join(
                             base_output_dir,
-                            f"brainMask_{model_counter}_{modality}.nii.gz",
+                            f"brainMask_{model_id}_{modality}.nii.gz",
                         ),
                     )
                     images_for_fusion.append(sitk.ReadImage(file_path, sitk.sitkUInt8))
-        model_counter += 1
 
     return fuse_images(images_for_fusion, "staple", [0, 1])
 
@@ -398,13 +397,14 @@ def _run_tumor_segmentation_using_gandlf(
         else models_to_infer.split(",")
     )
 
-    model_counter = 0
+    tumor_masks_to_return = []
     images_for_fusion = []
     mask_output_dir = posixpath.join(base_output_dir, TUMOR_MASK_FOLDER)
     os.makedirs(mask_output_dir, exist_ok=True)
     for model_dir in models_to_run:
+        model_id = os.path.basename(model_dir)
         model_output_dir = posixpath.join(
-            base_output_dir, "model_" + str(model_counter)
+            base_output_dir, "tumor_segmentation_" + str(model_id)
         )
         file_list = os.listdir(model_dir)
         for file in file_list:
@@ -429,24 +429,21 @@ def _run_tumor_segmentation_using_gandlf(
         )
 
         model_output_dir_testing = posixpath.join(model_output_dir, TESTING_FOLDER)
-        subject_model_output_dir = os.listdir(model_output_dir_testing)
-        tumor_masks_to_return = []
-        for subject in subject_model_output_dir:
-            subject_output_dir = posixpath.join(model_output_dir_testing, subject)
-            files_in_modality = os.listdir(subject_output_dir)
-            for file in files_in_modality:
-                if file.endswith(".nii.gz"):
-                    file_path = posixpath.join(subject_output_dir, file)
-                    shutil.copyfile(
-                        file_path,
-                        posixpath.join(
-                            mask_output_dir,
-                            f"{subject_id}_tumorMask_model-{model_counter}.nii.gz",
-                        ),
-                    )
-                    tumor_masks_to_return.append(file_path)
-                    images_for_fusion.append(sitk.ReadImage(file_path, sitk.sitkUInt8))
-        model_counter += 1
+        # We expect one subject (one output modality, one file).
+        subject = os.listdir(model_output_dir_testing)[0]
+        subject_output_dir = posixpath.join(model_output_dir_testing, subject)
+        files_in_modality = os.listdir(subject_output_dir)
+        for file in files_in_modality:  # this loop may not be necessary
+            if file.endswith(".nii.gz"):
+                file_path = posixpath.join(subject_output_dir, file)
+                renamed_path = posixpath.join(
+                    mask_output_dir,
+                    f"{subject_id}_tumorMask_model-{model_id}.nii.gz",
+                )
+                shutil.copyfile(file_path, renamed_path)
+                # Append the renamed path to keep track of model IDs
+                tumor_masks_to_return.append(renamed_path)
+                images_for_fusion.append(sitk.ReadImage(file_path, sitk.sitkUInt8))
 
     tumor_class_list = [0, 1, 2, 3, 4]
 
@@ -781,18 +778,14 @@ def extract_tumor(self, row: pd.Series, pbar: tqdm):
             interimOutputDir_actual,
         )
 
-        tumor_mask_idx = 0
         for tumor_mask in tumor_masks_for_qc:
+            tumor_mask_id = os.path.basename(tumor_mask)
             # save the screenshot
             _save_screenshot(
                 input_for_tumor_models,
-                posixpath.join(
-                    interimOutputDir_actual,
-                    f"{subject_id_timepoint}_summary_tumor-segmentation_model-{tumor_mask_idx}.png",
-                ),
+                posixpath.join(interimOutputDir_actual, f"{tumor_mask_id}_summary.png"),
                 tumor_mask,
             )
-            tumor_mask_idx += 1
 
         with open(self.stdout_log, "a+") as f:
             f.write(f"***\nTumor Masks For QC:\n{tumor_masks_for_qc}\n***")

From 6afa8ce54ac927d11c4a2f76fcfe5ba4e3e19f53 Mon Sep 17 00:00:00 2001
From: hasan7n <hasankassim7@hotmail.com>
Date: Wed, 23 Aug 2023 13:40:07 +0000
Subject: [PATCH 11/13] revert back to non openvino model the model was fine.
 There was some confusing folder copying that made me think the model was
 outputting five files

---
 src/applications/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/applications/setup.py b/src/applications/setup.py
index ccc1e9b0..8787e8a6 100644
--- a/src/applications/setup.py
+++ b/src/applications/setup.py
@@ -72,7 +72,7 @@
 
 urls_for_download = {
     "brain_extraction": "https://upenn.box.com/shared/static/cp5xz726mtb6gwwym8ydcxmw52zfngun",
-    "tumor_segmentation": "https://upenn.box.com/shared/static/woiqk6x9ygazst5ofrnfnezuy0aw0tn6",  # should be changed
+    "tumor_segmentation": "https://storage.googleapis.com/medperf-storage/rano_test_assets/tumor_segmentation.zip",  # should be changed
 }
 
 for model in urls_for_download.keys():

From 07ea0f2bd2411c8425628b63513b8955cc04cf61 Mon Sep 17 00:00:00 2001
From: hasan7n <hasankassim7@hotmail.com>
Date: Wed, 23 Aug 2023 14:58:43 +0000
Subject: [PATCH 12/13] remove file extension from tumor mask ID

---
 src/applications/PrepareDataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/applications/PrepareDataset.py b/src/applications/PrepareDataset.py
index 508f6677..791488f8 100644
--- a/src/applications/PrepareDataset.py
+++ b/src/applications/PrepareDataset.py
@@ -779,7 +779,7 @@ def extract_tumor(self, row: pd.Series, pbar: tqdm):
         )
 
         for tumor_mask in tumor_masks_for_qc:
-            tumor_mask_id = os.path.basename(tumor_mask)
+            tumor_mask_id = os.path.basename(tumor_mask).replace(".nii.gz", "")
             # save the screenshot
             _save_screenshot(
                 input_for_tumor_models,

From 2fca4ca391073ae4ca3b6a89b3daec0ccc34576b Mon Sep 17 00:00:00 2001
From: "Edwards, Brandon" <brandon.edwards@intel.com>
Date: Wed, 23 Aug 2023 17:07:18 -0700
Subject: [PATCH 13/13] missing and extra modalities detected by string for
 subject and timepoint in the CSVCreator missing and extra modalities lists
 which are long lived attributes over iterations through subjects and
 timepoints

---
 mlcubes/data_preparation/project/stages/get_csv.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlcubes/data_preparation/project/stages/get_csv.py b/mlcubes/data_preparation/project/stages/get_csv.py
index 5caa9717..587df0c6 100644
--- a/mlcubes/data_preparation/project/stages/get_csv.py
+++ b/mlcubes/data_preparation/project/stages/get_csv.py
@@ -75,14 +75,14 @@ def execute(self, index: Union[str, int], report: pd.DataFrame) -> pd.DataFrame:
             "data_path": tp_out_path,
             "labels_path": "",
         }
-        if self.csv_processor.subject_timepoint_missing_modalities:
+        if f"{id}_{tp}" in self.csv_processor.subject_timepoint_missing_modalities:
             shutil.rmtree(tp_out_path, ignore_errors=True)
             comment = "There are missing modalities. Please check the data"
             report_data["status"] = -1.1
             report_data["status_name"] = "MISSING_MODALITIES"
             report_data["data_path"] = tp_path
             report_data["comment"] = comment
-        elif self.csv_processor.subject_timepoint_extra_modalities:
+        elif f"{id}_{tp}" in self.csv_processor.subject_timepoint_extra_modalities:
             shutil.rmtree(tp_out_path, ignore_errors=True)
             comment = "There are extra modalities. Please check the data"
             report_data["status"] = -1.2