From d90d19e12eda7c504e10f8a9d6e8f012eb8d04c4 Mon Sep 17 00:00:00 2001 From: Viacheslav Kukushkin Date: Thu, 9 May 2024 01:00:02 +0300 Subject: [PATCH 01/14] fix train metrics errors 1. return output of whole batch, not just one item 2. make ground truth & predictions array to take into account `q_samples_per_volume` (the whole dataset size during 1 epoch is equal to len(data) * q_samples_per_volume; so if dataset df contains 100 records and q_samples_per_volume = 10 (by default) and batch size is 4, there would be 250 batches by 4 elements 3. make ground truth take into account that train_dataloader is shuffled. So now ground truth is sorted in the same order as predictions and as train_dataloader. --- GANDLF/compute/step.py | 9 +++------ GANDLF/compute/training_loop.py | 16 +++++++--------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/GANDLF/compute/step.py b/GANDLF/compute/step.py index c36258c47..bdd997a50 100644 --- a/GANDLF/compute/step.py +++ b/GANDLF/compute/step.py @@ -7,7 +7,7 @@ def step( model: torch.nn.Module, image: torch.Tensor, - label: torch.Tensor, + label: Optional[torch.Tensor], params: dict, train: Optional[bool] = True, ) -> Tuple[float, dict, torch.Tensor, torch.Tensor]: @@ -62,7 +62,7 @@ def step( if len(label.shape) > 1: label = torch.squeeze(label, -1) - if not (train) and params["model"]["type"].lower() == "openvino": + if not train and params["model"]["type"].lower() == "openvino": output = torch.from_numpy( model(inputs={params["model"]["IO"][0][0]: image.cpu().numpy()})[ params["model"]["IO"][1][0] @@ -86,12 +86,9 @@ def step( else: loss, metric_output = None, None - if len(output) > 1: - output = output[0] - if params["model"]["dimension"] == 2: - output = torch.unsqueeze(output, -1) if "medcam_enabled" in params and params["medcam_enabled"]: attention_map = torch.unsqueeze(attention_map, -1) + assert len(output) == len(image), f"Error: output({len(output)}) and batch({len(image)}) have different lengths. Both should be equal to batch size!" return loss, metric_output, output, attention_map diff --git a/GANDLF/compute/training_loop.py b/GANDLF/compute/training_loop.py index 61e0e6b0f..287d78aea 100644 --- a/GANDLF/compute/training_loop.py +++ b/GANDLF/compute/training_loop.py @@ -79,10 +79,8 @@ def train_network( # get ground truths if calculate_overall_metrics: - ( - ground_truth_array, - predictions_array, - ) = get_ground_truths_and_predictions_tensor(params, "training_data") + ground_truth_array = torch.zeros(len(train_dataloader.dataset), dtype=torch.int) + predictions_array = torch.zeros_like(ground_truth_array) # Set the model to train model.train() for batch_idx, (subject) in enumerate( @@ -117,11 +115,11 @@ def train_network( loss, calculated_metrics, output, _ = step(model, image, label, params) # store predictions for classification if calculate_overall_metrics: - predictions_array[ - batch_idx - * params["batch_size"] : (batch_idx + 1) - * params["batch_size"] - ] = (torch.argmax(output[0], 0).cpu().item()) + batch_idx_slice = slice(batch_idx * params["batch_size"], (batch_idx + 1) * params["batch_size"]) + ground_truth_array[batch_idx_slice] = label.detach().cpu().ravel() + batch_predictions = torch.argmax(output, 1).cpu() + assert len(batch_predictions) == len(label) + predictions_array[batch_idx_slice] = batch_predictions nan_loss = torch.isnan(loss) second_order = ( From ada957795dacf1d679d02d936b0c37bb5eb117a4 Mon Sep 17 00:00:00 2001 From: Viacheslav Kukushkin Date: Tue, 14 May 2024 13:50:35 +0300 Subject: [PATCH 02/14] Output_metrics is filled only for the last weighted avg_type --- GANDLF/metrics/classification.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GANDLF/metrics/classification.py b/GANDLF/metrics/classification.py index ba8fa589f..ccfa507f6 100644 --- a/GANDLF/metrics/classification.py +++ b/GANDLF/metrics/classification.py @@ -69,15 +69,16 @@ def overall_stats(prediction: torch.Tensor, target: torch.Tensor, params: dict) # ), } for metric_name, calculator in calculators.items(): + avg_typed_metric_name = f"{metric_name}_{average_type_key}" if metric_name == "aucroc": one_hot_preds = one_hot( prediction.long(), num_classes=params["model"]["num_classes"] ) - output_metrics[metric_name] = get_output_from_calculator( + output_metrics[avg_typed_metric_name] = get_output_from_calculator( one_hot_preds.float(), target, calculator ) else: - output_metrics[metric_name] = get_output_from_calculator( + output_metrics[avg_typed_metric_name] = get_output_from_calculator( prediction, target, calculator ) From 73174c732469d08336f4deade88e654ae1747392 Mon Sep 17 00:00:00 2001 From: Viacheslav Kukushkin Date: Tue, 14 May 2024 22:37:51 +0300 Subject: [PATCH 03/14] Refactored logger To ensure values in csv are always written in the same order as header --- GANDLF/compute/training_loop.py | 36 ++++++++--------- GANDLF/logger.py | 71 ++++++++++++++++----------------- 2 files changed, 52 insertions(+), 55 deletions(-) diff --git a/GANDLF/compute/training_loop.py b/GANDLF/compute/training_loop.py index 287d78aea..c5cafd869 100644 --- a/GANDLF/compute/training_loop.py +++ b/GANDLF/compute/training_loop.py @@ -323,44 +323,44 @@ def training_loop( # datetime object containing current date and time print("Initializing training at :", get_date_time(), flush=True) - calculate_overall_metrics = (params["problem_type"] == "classification") or ( - params["problem_type"] == "regression" - ) + metrics_log = list(params["metrics"]) - # get the overall metrics that are calculated automatically for classification/regression problems - if params["problem_type"] == "regression": - overall_metrics = overall_stats(torch.Tensor([1]), torch.Tensor([1]), params) - elif params["problem_type"] == "classification": - # this is just used to generate the headers for the overall stats - temp_tensor = torch.randint(0, params["model"]["num_classes"], (5,)) - overall_metrics = overall_stats( - temp_tensor.to(dtype=torch.int32), temp_tensor.to(dtype=torch.int32), params - ) + calculate_overall_metrics = params["problem_type"] in {"classification", "regression"} - metrics_log = params["metrics"].copy() if calculate_overall_metrics: + # get the overall metrics that are calculated automatically for classification/regression problems + if params["problem_type"] == "regression": + overall_metrics = overall_stats(torch.Tensor([1]), torch.Tensor([1]), params) + elif params["problem_type"] == "classification": + # this is just used to generate the headers for the overall stats + temp_tensor = torch.randint(0, params["model"]["num_classes"], (5,)) + overall_metrics = overall_stats( + temp_tensor.to(dtype=torch.int32), temp_tensor.to(dtype=torch.int32), params + ) + else: + raise NotImplementedError("Problem type not implemented for overall stats") + for metric in overall_metrics: if metric not in metrics_log: - metrics_log[metric] = 0 + metrics_log.append(metric) # Setup a few loggers for tracking train_logger = Logger( logger_csv_filename=os.path.join(output_dir, "logs_training.csv"), metrics=metrics_log, + mode="train", ) valid_logger = Logger( logger_csv_filename=os.path.join(output_dir, "logs_validation.csv"), metrics=metrics_log, + mode="valid", ) if testingDataDefined: test_logger = Logger( logger_csv_filename=os.path.join(output_dir, "logs_testing.csv"), metrics=metrics_log, + mode="test", ) - train_logger.write_header(mode="train") - valid_logger.write_header(mode="valid") - if testingDataDefined: - test_logger.write_header(mode="test") if "medcam" in params: model = medcam.inject( diff --git a/GANDLF/logger.py b/GANDLF/logger.py index bb3168583..f7e15f044 100755 --- a/GANDLF/logger.py +++ b/GANDLF/logger.py @@ -7,39 +7,42 @@ """ import os -from typing import Dict +from typing import Dict, List, Union import torch class Logger: - def __init__(self, logger_csv_filename: str, metrics: Dict[str, float]) -> None: + def __init__(self, logger_csv_filename: str, metrics: List[str], mode: str) -> None: """ Logger class to log the training and validation metrics to a csv file. + May append to existing file if headers match; elsewise raises an error. Args: logger_csv_filename (str): Path to a filename where the csv has to be stored. metrics (Dict[str, float]): The metrics to be logged. """ self.filename = logger_csv_filename - self.metrics = metrics + mode = mode.lower() + self.mode = mode.lower() - def write_header(self, mode="train"): - self.csv = open(self.filename, "a") - if os.stat(self.filename).st_size == 0: - mode_lower = mode.lower() - row = "epoch_no," + mode_lower + "_loss," - row += ( - ",".join([mode_lower + "_" + metric for metric in self.metrics]) + "," - ) - row = row[:-1] - row += "\n" - self.csv.write(row) - # else: - # print("Found a pre-existing file for logging, now appending logs to that file!") - self.csv.close() + new_header = ["epoch_no", f"{mode}_loss"] + [f"{mode}_{metric}" for metric in metrics] + + # TODO: do we really need to support appending to existing files? + if os.path.exists(self.filename): + with open(self.filename, "r") as f: + existing_header = f.readline().strip().split(",") + if set(existing_header) != set(new_header): + raise ValueError(f"Logger file {self.filename} error: existing header does not match new header." + f" Existing header: {existing_header}. New header: {new_header}") + self.ordered_header = existing_header + else: + with open(self.filename, "w") as f: + f.write(",".join(new_header) + "\n") + self.ordered_header = new_header def write( - self, epoch_number: int, loss: float, epoch_metrics: Dict[str, float] + self, epoch_number: int, loss: Union[float, torch.Tensor], + epoch_metrics: Dict[str, Union[float, torch.Tensor]] ) -> None: """ Write the epoch number, loss and metrics to the csv file. @@ -49,25 +52,19 @@ def write( loss (float): The loss value. epoch_metrics (Dict[str, float]): The metrics to be logged. """ - self.csv = open(self.filename, "a") - row = "" - row += str(epoch_number) + "," + if torch.is_tensor(loss): - row += str(loss.cpu().item()) - else: - row += str(loss) - row += "," + loss = loss.cpu().item() + + row = {"epoch_no": epoch_number, + f"{self.mode}_loss": loss} - for metric in epoch_metrics: - if torch.is_tensor(epoch_metrics[metric]): - row += str(epoch_metrics[metric].cpu().item()) - else: - row += str(epoch_metrics[metric]) - row += "," - row = row[:-1] - self.csv.write(row) - self.csv.write("\n") - self.csv.close() + for metric, metric_val in epoch_metrics.items(): + if torch.is_tensor(metric_val): + metric_val = metric_val.cpu().item() + row[f"{self.mode}_{metric}"] = metric_val - def close(self): - self.csv.close() + with open(self.filename, "a") as f: + line = [row[col] for col in self.ordered_header] + line = [str(x) for x in line] + f.write(",".join(line) + "\n") From 73037366371932bcffbc758ba26305ad1657a139 Mon Sep 17 00:00:00 2001 From: Viacheslav Kukushkin Date: Tue, 14 May 2024 22:46:27 +0300 Subject: [PATCH 04/14] general refactoring & typing --- GANDLF/compute/forward_pass.py | 40 ++++++------------- GANDLF/compute/loss_and_metric.py | 66 ++++++++++++++++--------------- GANDLF/compute/step.py | 4 +- GANDLF/compute/training_loop.py | 65 +++++++++++++++++------------- GANDLF/logger.py | 19 +++++---- GANDLF/metrics/__init__.py | 4 +- GANDLF/metrics/classification.py | 6 ++- GANDLF/metrics/regression.py | 5 ++- GANDLF/utils/generic.py | 8 ++-- GANDLF/utils/tensor.py | 2 +- 10 files changed, 116 insertions(+), 103 deletions(-) diff --git a/GANDLF/compute/forward_pass.py b/GANDLF/compute/forward_pass.py index 9da87b8ff..e910c98d0 100644 --- a/GANDLF/compute/forward_pass.py +++ b/GANDLF/compute/forward_pass.py @@ -1,6 +1,6 @@ import os import pathlib -from typing import Optional, Tuple +from typing import Optional, Tuple, Union import numpy as np import pandas as pd @@ -51,12 +51,14 @@ def validate_network( print("*" * 20) # Initialize a few things total_epoch_valid_loss = 0 - total_epoch_valid_metric = {} + total_epoch_valid_metric: dict[str, Union[float, np.array]] = {} average_epoch_valid_metric = {} for metric in params["metrics"]: if "per_label" in metric: - total_epoch_valid_metric[metric] = [] + total_epoch_valid_metric[metric] = np.zeros( + shape=params["model"]["num_classes"] + ) else: total_epoch_valid_metric[metric] = 0 @@ -64,8 +66,7 @@ def validate_network( subject_id_list = [] is_classification = params.get("problem_type") == "classification" calculate_overall_metrics = ( - (params["problem_type"] == "classification") - or (params["problem_type"] == "regression") + params["problem_type"] in {"classification", "regression"} ) and mode == "validation" is_inference = mode == "inference" @@ -193,6 +194,7 @@ def validate_network( if params["save_output"] or is_inference: # we divide by scaling factor here because we multiply by it during loss/metric calculation + # TODO: regression-only, right? outputToWrite += ( str(epoch) + "," @@ -206,23 +208,14 @@ def validate_network( ) if calculate_overall_metrics: + # TODO: that's for classification only. What about regression? predictions_array[batch_idx] = ( torch.argmax(pred_output[0], 0).cpu().item() ) # # Non network validation related total_epoch_valid_loss += final_loss.detach().cpu().item() - for metric in final_metric.keys(): - if isinstance(total_epoch_valid_metric[metric], list): - if len(total_epoch_valid_metric[metric]) == 0: - total_epoch_valid_metric[metric] = np.array( - final_metric[metric] - ) - else: - total_epoch_valid_metric[metric] += np.array( - final_metric[metric] - ) - else: - total_epoch_valid_metric[metric] += final_metric[metric] + for metric, metric_val in final_metric.keys(): + total_epoch_valid_metric[metric] += metric_val else: # for segmentation problems OR regression/classification when no label is present grid_sampler = torchio.inference.GridSampler( @@ -386,6 +379,7 @@ def validate_network( # final regression output output_prediction = output_prediction / len(patch_loader) if calculate_overall_metrics: + # TOD: what? regression and argmax? predictions_array[batch_idx] = ( torch.argmax(output_prediction[0], 0).cpu().item() ) @@ -440,17 +434,7 @@ def validate_network( # loss.cpu().data.item() total_epoch_valid_loss += final_loss.cpu().item() for metric in final_metric.keys(): - if isinstance(total_epoch_valid_metric[metric], list): - if len(total_epoch_valid_metric[metric]) == 0: - total_epoch_valid_metric[metric] = np.array( - final_metric[metric] - ) - else: - total_epoch_valid_metric[metric] += np.array( - final_metric[metric] - ) - else: - total_epoch_valid_metric[metric] += final_metric[metric] + total_epoch_valid_metric[metric] += final_metric[metric] if label_ground_truth is not None: if params["verbose"]: diff --git a/GANDLF/compute/loss_and_metric.py b/GANDLF/compute/loss_and_metric.py index e149c08db..36f78560e 100644 --- a/GANDLF/compute/loss_and_metric.py +++ b/GANDLF/compute/loss_and_metric.py @@ -1,5 +1,6 @@ import sys -from typing import Dict, Tuple +import warnings +from typing import Dict, Tuple, Union from GANDLF.losses import global_losses_dict from GANDLF.metrics import global_metrics_dict import torch @@ -13,7 +14,7 @@ def get_metric_output( prediction: torch.Tensor, target: torch.Tensor, params: dict, -) -> float: +) -> Union[float, list]: """ This function computes the metric output for a given metric function, prediction and target. @@ -36,6 +37,12 @@ def get_metric_output( if len(temp) > 1: return temp else: + # TODO: this branch is extremely age case and is buggy. + # Overall the case when metric returns a list but of length 1 is very rare. The only case is when + # the metric returns Nx.. tensor (i.e. without aggregation by elements) and batch_size==N==1. This branch + # would definitely fail for such a metrics like + # MulticlassAccuracy(num_classes=3, multidim_average="samplewise") + # Maybe the best solution is to raise an error here if metric is configured to return samplewise results? return metric_output.item() @@ -115,41 +122,38 @@ def get_loss_and_metrics( loss_kld = global_losses_dict["kld"](prediction[2], prediction[3]) loss_cycle = global_losses_dict["mse"](prediction[2], prediction[4], None) loss = 0.01 * loss_kld + loss_reco + 10 * loss_seg + loss_cycle + elif deep_supervision_model: + # this is for models that have deep-supervision + for i, _ in enumerate(prediction): + # loss is calculated based on resampled "soft" labels using a pre-defined weights array + loss += ( + loss_function(prediction[i], ground_truth_resampled[i], params) + * loss_weights[i] + ) else: - if deep_supervision_model: - # this is for models that have deep-supervision - for i, _ in enumerate(prediction): - # loss is calculated based on resampled "soft" labels using a pre-defined weights array - loss += ( - loss_function(prediction[i], ground_truth_resampled[i], params) - * loss_weights[i] - ) - else: - loss = loss_function(prediction, target, params) + loss = loss_function(prediction, target, params) metric_output = {} # Metrics should be a list for metric in params["metrics"]: metric_lower = metric.lower() metric_output[metric] = 0 - if metric_lower in global_metrics_dict: - metric_function = global_metrics_dict[metric_lower] - if sdnet_check: - metric_output[metric] = get_metric_output( - metric_function, prediction[0], target.squeeze(-1), params + if metric_lower not in global_metrics_dict: + warnings.warn("WARNING: Could not find the requested metric '" + metric) + continue + + metric_function = global_metrics_dict[metric_lower] + if sdnet_check: + metric_output[metric] = get_metric_output( + metric_function, prediction[0], target.squeeze(-1), params + ) + elif deep_supervision_model: + for i, _ in enumerate(prediction): + metric_output[metric] += get_metric_output( + metric_function, prediction[i], ground_truth_resampled[i], params ) - else: - if deep_supervision_model: - for i, _ in enumerate(prediction): - metric_output[metric] += get_metric_output( - metric_function, - prediction[i], - ground_truth_resampled[i], - params, - ) - - else: - metric_output[metric] = get_metric_output( - metric_function, prediction, target, params - ) + else: + metric_output[metric] = get_metric_output( + metric_function, prediction, target, params + ) return loss, metric_output diff --git a/GANDLF/compute/step.py b/GANDLF/compute/step.py index bdd997a50..151ee0872 100644 --- a/GANDLF/compute/step.py +++ b/GANDLF/compute/step.py @@ -90,5 +90,7 @@ def step( if "medcam_enabled" in params and params["medcam_enabled"]: attention_map = torch.unsqueeze(attention_map, -1) - assert len(output) == len(image), f"Error: output({len(output)}) and batch({len(image)}) have different lengths. Both should be equal to batch size!" + assert len(output) == len( + image + ), f"Error: output({len(output)}) and batch({len(image)}) have different lengths. Both should be equal to batch size!" return loss, metric_output, output, attention_map diff --git a/GANDLF/compute/training_loop.py b/GANDLF/compute/training_loop.py index c5cafd869..8f183030d 100644 --- a/GANDLF/compute/training_loop.py +++ b/GANDLF/compute/training_loop.py @@ -1,5 +1,5 @@ import os, time, psutil -from typing import Tuple +from typing import Tuple, Union import pandas as pd import torch from torch.utils.data import DataLoader @@ -22,7 +22,6 @@ version_check, write_training_patches, print_model_summary, - get_ground_truths_and_predictions_tensor, get_model_dict, print_and_format_metrics, ) @@ -59,15 +58,26 @@ def train_network( print("*" * 20) # Initialize a few things total_epoch_train_loss = 0 - total_epoch_train_metric = {} + total_epoch_train_metric: dict[str, Union[float, np.array]] = {} average_epoch_train_metric = {} - calculate_overall_metrics = (params["problem_type"] == "classification") or ( - params["problem_type"] == "regression" - ) + # TODO: calculate metrics for segmentation and other problems. btw what are possible problem types? + calculate_overall_metrics = params["problem_type"] in { + "classification", + "regression", + } + + # get ground truths + if calculate_overall_metrics: + # TODO: for regression / segmentation we need different dtypes + different shape + ground_truth_array = torch.zeros(len(train_dataloader.dataset), dtype=torch.int) + predictions_array = torch.zeros_like(ground_truth_array) for metric in params["metrics"]: + # TODO: can it be per-label for non-classif? if "per_label" in metric: - total_epoch_train_metric[metric] = [] + total_epoch_train_metric[metric] = np.zeros( + shape=params["model"]["num_classes"] + ) else: total_epoch_train_metric[metric] = 0 @@ -77,10 +87,6 @@ def train_network( if params["verbose"]: print("Using Automatic mixed precision", flush=True) - # get ground truths - if calculate_overall_metrics: - ground_truth_array = torch.zeros(len(train_dataloader.dataset), dtype=torch.int) - predictions_array = torch.zeros_like(ground_truth_array) # Set the model to train model.train() for batch_idx, (subject) in enumerate( @@ -115,13 +121,18 @@ def train_network( loss, calculated_metrics, output, _ = step(model, image, label, params) # store predictions for classification if calculate_overall_metrics: - batch_idx_slice = slice(batch_idx * params["batch_size"], (batch_idx + 1) * params["batch_size"]) + batch_idx_slice = slice( + batch_idx * params["batch_size"], (batch_idx + 1) * params["batch_size"] + ) + # TODO: label = BATCH_SIZE x 1. What if not? Multiclass? classif - OHE? ground_truth_array[batch_idx_slice] = label.detach().cpu().ravel() + # TODO: output is BATCH_SIZE x N_CLASSES. What if not? batch_predictions = torch.argmax(output, 1).cpu() assert len(batch_predictions) == len(label) predictions_array[batch_idx_slice] = batch_predictions nan_loss = torch.isnan(loss) + # loss backward second_order = ( hasattr(optimizer, "is_second_order") and optimizer.is_second_order ) @@ -155,18 +166,8 @@ def train_network( # Non network training related if not nan_loss: total_epoch_train_loss += loss.detach().cpu().item() - for metric in calculated_metrics.keys(): - if isinstance(total_epoch_train_metric[metric], list): - if len(total_epoch_train_metric[metric]) == 0: - total_epoch_train_metric[metric] = np.array( - calculated_metrics[metric] - ) - else: - total_epoch_train_metric[metric] += np.array( - calculated_metrics[metric] - ) - else: - total_epoch_train_metric[metric] += calculated_metrics[metric] + for metric, metric_val in calculated_metrics.items(): + total_epoch_train_metric[metric] += metric_val if params["verbose"]: # For printing information at halftime during an epoch @@ -194,6 +195,9 @@ def train_network( average_epoch_train_metric = overall_stats( predictions_array, ground_truth_array, params ) + # TODO: the following not just prints and formats, but updates the dict also. Clean this code + # 1. average_epoch_train_metric and total_epoch_train_metric are combined + # 2. list values in total_epoch_train_metric are converted to strings by some logic (but not in avg_ep_tr_metr) average_epoch_train_metric = print_and_format_metrics( average_epoch_train_metric, total_epoch_train_metric, @@ -325,17 +329,24 @@ def training_loop( metrics_log = list(params["metrics"]) - calculate_overall_metrics = params["problem_type"] in {"classification", "regression"} + calculate_overall_metrics = params["problem_type"] in { + "classification", + "regression", + } if calculate_overall_metrics: # get the overall metrics that are calculated automatically for classification/regression problems if params["problem_type"] == "regression": - overall_metrics = overall_stats(torch.Tensor([1]), torch.Tensor([1]), params) + overall_metrics = overall_stats( + torch.Tensor([1]), torch.Tensor([1]), params + ) elif params["problem_type"] == "classification": # this is just used to generate the headers for the overall stats temp_tensor = torch.randint(0, params["model"]["num_classes"], (5,)) overall_metrics = overall_stats( - temp_tensor.to(dtype=torch.int32), temp_tensor.to(dtype=torch.int32), params + temp_tensor.to(dtype=torch.int32), + temp_tensor.to(dtype=torch.int32), + params, ) else: raise NotImplementedError("Problem type not implemented for overall stats") diff --git a/GANDLF/logger.py b/GANDLF/logger.py index f7e15f044..ef98d5505 100755 --- a/GANDLF/logger.py +++ b/GANDLF/logger.py @@ -25,15 +25,19 @@ def __init__(self, logger_csv_filename: str, metrics: List[str], mode: str) -> N mode = mode.lower() self.mode = mode.lower() - new_header = ["epoch_no", f"{mode}_loss"] + [f"{mode}_{metric}" for metric in metrics] + new_header = ["epoch_no", f"{mode}_loss"] + [ + f"{mode}_{metric}" for metric in metrics + ] # TODO: do we really need to support appending to existing files? if os.path.exists(self.filename): with open(self.filename, "r") as f: existing_header = f.readline().strip().split(",") if set(existing_header) != set(new_header): - raise ValueError(f"Logger file {self.filename} error: existing header does not match new header." - f" Existing header: {existing_header}. New header: {new_header}") + raise ValueError( + f"Logger file {self.filename} error: existing header does not match new header." + f" Existing header: {existing_header}. New header: {new_header}" + ) self.ordered_header = existing_header else: with open(self.filename, "w") as f: @@ -41,8 +45,10 @@ def __init__(self, logger_csv_filename: str, metrics: List[str], mode: str) -> N self.ordered_header = new_header def write( - self, epoch_number: int, loss: Union[float, torch.Tensor], - epoch_metrics: Dict[str, Union[float, torch.Tensor]] + self, + epoch_number: int, + loss: Union[float, torch.Tensor], + epoch_metrics: Dict[str, Union[float, torch.Tensor]], ) -> None: """ Write the epoch number, loss and metrics to the csv file. @@ -56,8 +62,7 @@ def write( if torch.is_tensor(loss): loss = loss.cpu().item() - row = {"epoch_no": epoch_number, - f"{self.mode}_loss": loss} + row = {"epoch_no": epoch_number, f"{self.mode}_loss": loss} for metric, metric_val in epoch_metrics.items(): if torch.is_tensor(metric_val): diff --git a/GANDLF/metrics/__init__.py b/GANDLF/metrics/__init__.py index b8de47cf1..1fc21b3fb 100644 --- a/GANDLF/metrics/__init__.py +++ b/GANDLF/metrics/__init__.py @@ -1,6 +1,8 @@ """ All the metrics are to be called from here """ +from typing import Union + from GANDLF.losses.regression import MSE_loss, CEL from .segmentation import ( multi_class_dice, @@ -100,7 +102,7 @@ ] -def overall_stats(predictions, ground_truth, params): +def overall_stats(predictions, ground_truth, params) -> dict[str, Union[float, list]]: """ Generates a dictionary of metrics calculated on the overall predictions and ground truths. diff --git a/GANDLF/metrics/classification.py b/GANDLF/metrics/classification.py index ccfa507f6..5ef113fda 100644 --- a/GANDLF/metrics/classification.py +++ b/GANDLF/metrics/classification.py @@ -1,3 +1,5 @@ +from typing import Union + import torch import torchmetrics as tm from torch.nn.functional import one_hot @@ -5,7 +7,9 @@ from GANDLF.utils.generic import determine_classification_task_type -def overall_stats(prediction: torch.Tensor, target: torch.Tensor, params: dict) -> dict: +def overall_stats( + prediction: torch.Tensor, target: torch.Tensor, params: dict +) -> dict[str, Union[float, list]]: """ Generates a dictionary of metrics calculated on the overall prediction and ground truths. diff --git a/GANDLF/metrics/regression.py b/GANDLF/metrics/regression.py index eedbde027..0d84fa2a3 100644 --- a/GANDLF/metrics/regression.py +++ b/GANDLF/metrics/regression.py @@ -1,6 +1,7 @@ """ All the metrics are to be called from here """ +from typing import Union import torch from sklearn.metrics import balanced_accuracy_score @@ -82,7 +83,9 @@ def per_label_accuracy( return balanced_acc_score(prediction, target, params) -def overall_stats(prediction: torch.Tensor, target: torch.Tensor, params: dict) -> dict: +def overall_stats( + prediction: torch.Tensor, target: torch.Tensor, params: dict +) -> dict[str, Union[float, list]]: """ Generates a dictionary of metrics calculated on the overall predictions and ground truths. diff --git a/GANDLF/utils/generic.py b/GANDLF/utils/generic.py index b2fff3cc3..9b94a9cd7 100644 --- a/GANDLF/utils/generic.py +++ b/GANDLF/utils/generic.py @@ -231,6 +231,7 @@ def print_and_format_metrics( Args: cohort_level_metrics (dict): The cohort level metrics calculated from the GANDLF.metrics.overall_stats function. + May be empty dict if not classification/regression. sample_level_metrics (dict): The sample level metrics calculated from separate samples from the dataloader(s). metrics_dict_from_parameters (dict): The metrics dictionary to populate. mode (str): The mode of the metrics (train, val, test). @@ -270,11 +271,8 @@ def __update_metric_from_list_to_single_string(input_metrics_dict: dict) -> dict else: to_print = sample_level_metrics[metric] / length_of_dataloader output_metrics_dict[metric] = to_print - for metric in output_metrics_dict.keys(): - print( - " Epoch Final " + mode + " " + metric + " : ", - output_metrics_dict[metric], - ) + for metric, metric_val in output_metrics_dict.items(): + print(" Epoch Final " + mode + " " + metric + " : ", metric_val) output_metrics_dict = __update_metric_from_list_to_single_string( output_metrics_dict ) diff --git a/GANDLF/utils/tensor.py b/GANDLF/utils/tensor.py index 9dfd3cb9c..651b7b35d 100644 --- a/GANDLF/utils/tensor.py +++ b/GANDLF/utils/tensor.py @@ -521,7 +521,7 @@ def get_ground_truths_and_predictions_tensor( def get_output_from_calculator( prediction: torch.Tensor, target: torch.tensor, calculator: torchmetrics.Metric -) -> float: +) -> Union[float, list]: """ Helper function to get the output from a calculator. From 36bbfa9a720825a9b1ffc3fdcb20d03dc67fe9aa Mon Sep 17 00:00:00 2001 From: Viacheslav Kukushkin Date: Wed, 15 May 2024 02:08:54 +0300 Subject: [PATCH 05/14] Fix after changing step output shape --- GANDLF/compute/forward_pass.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/GANDLF/compute/forward_pass.py b/GANDLF/compute/forward_pass.py index e910c98d0..c9afefb29 100644 --- a/GANDLF/compute/forward_pass.py +++ b/GANDLF/compute/forward_pass.py @@ -309,7 +309,6 @@ def validate_network( # save outputs if params["problem_type"] == "segmentation": output_prediction = aggregator.get_output_tensor() - output_prediction = output_prediction.unsqueeze(0) if params["save_output"]: img_for_metadata = torchio.ScalarImage( tensor=subject["1"]["data"].squeeze(0), @@ -389,7 +388,7 @@ def validate_network( + "," + subject["subject_id"][0] + "," - + str(output_prediction) + + str(output_prediction[0]) + "\n" ) @@ -401,7 +400,6 @@ def validate_network( n.squeeze(), raw_input=image[i].squeeze(-1) ) - output_prediction = output_prediction.squeeze(-1) if is_inference and is_classification: logits_list.append(output_prediction) subject_id_list.append(subject.get("subject_id")[0]) @@ -412,9 +410,8 @@ def validate_network( if label_ground_truth.shape[0] == 3: label_ground_truth = label_ground_truth[0, ...].unsqueeze(0) # we always want the ground truth to be in the same format as the prediction + # add batch dim label_ground_truth = label_ground_truth.unsqueeze(0) - if label_ground_truth.shape[-1] == 1: - label_ground_truth = label_ground_truth.squeeze(-1) final_loss, final_metric = get_loss_and_metrics( image, label_ground_truth, From 62ffb146637b8716b4e903ff126cb76d0c68f7f8 Mon Sep 17 00:00:00 2001 From: Viacheslav Kukushkin Date: Tue, 21 May 2024 19:43:47 +0300 Subject: [PATCH 06/14] dynamic lists instead of fixed size for handling dynamic batch_size --- GANDLF/compute/forward_pass.py | 6 ++---- GANDLF/compute/training_loop.py | 15 +++++---------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/GANDLF/compute/forward_pass.py b/GANDLF/compute/forward_pass.py index c9afefb29..90b32458a 100644 --- a/GANDLF/compute/forward_pass.py +++ b/GANDLF/compute/forward_pass.py @@ -108,10 +108,8 @@ def validate_network( # get ground truths for classification problem, validation set if calculate_overall_metrics: - ( - ground_truth_array, - predictions_array, - ) = get_ground_truths_and_predictions_tensor(params, "validation_data") + ground_truth_array = [] + predictions_array = [] for batch_idx, (subject) in enumerate( tqdm(valid_dataloader, desc="Looping over " + mode + " data") diff --git a/GANDLF/compute/training_loop.py b/GANDLF/compute/training_loop.py index 8f183030d..4880edae1 100644 --- a/GANDLF/compute/training_loop.py +++ b/GANDLF/compute/training_loop.py @@ -68,9 +68,8 @@ def train_network( # get ground truths if calculate_overall_metrics: - # TODO: for regression / segmentation we need different dtypes + different shape - ground_truth_array = torch.zeros(len(train_dataloader.dataset), dtype=torch.int) - predictions_array = torch.zeros_like(ground_truth_array) + ground_truth_array = [] + predictions_array = [] for metric in params["metrics"]: # TODO: can it be per-label for non-classif? @@ -121,15 +120,11 @@ def train_network( loss, calculated_metrics, output, _ = step(model, image, label, params) # store predictions for classification if calculate_overall_metrics: - batch_idx_slice = slice( - batch_idx * params["batch_size"], (batch_idx + 1) * params["batch_size"] - ) - # TODO: label = BATCH_SIZE x 1. What if not? Multiclass? classif - OHE? - ground_truth_array[batch_idx_slice] = label.detach().cpu().ravel() + ground_truth_array.extend(list(label.detach().cpu())) # TODO: output is BATCH_SIZE x N_CLASSES. What if not? batch_predictions = torch.argmax(output, 1).cpu() assert len(batch_predictions) == len(label) - predictions_array[batch_idx_slice] = batch_predictions + predictions_array.extend(batch_predictions.tolist()) nan_loss = torch.isnan(loss) # loss backward @@ -193,7 +188,7 @@ def train_network( # get overall stats for classification if calculate_overall_metrics: average_epoch_train_metric = overall_stats( - predictions_array, ground_truth_array, params + torch.Tensor(predictions_array), torch.Tensor(ground_truth_array), params ) # TODO: the following not just prints and formats, but updates the dict also. Clean this code # 1. average_epoch_train_metric and total_epoch_train_metric are combined From 3987439a2a59e1ff9aad364ac0c661cd9dc6414b Mon Sep 17 00:00:00 2001 From: Viacheslav Kukushkin Date: Tue, 21 May 2024 23:45:02 +0300 Subject: [PATCH 07/14] Fix for segmentation --- GANDLF/compute/forward_pass.py | 13 ++++---- GANDLF/compute/step.py | 53 ++++++++++++++++++++------------- GANDLF/compute/training_loop.py | 14 +++++---- GANDLF/logger.py | 2 +- 4 files changed, 50 insertions(+), 32 deletions(-) diff --git a/GANDLF/compute/forward_pass.py b/GANDLF/compute/forward_pass.py index 90b32458a..539a422bd 100644 --- a/GANDLF/compute/forward_pass.py +++ b/GANDLF/compute/forward_pass.py @@ -206,13 +206,12 @@ def validate_network( ) if calculate_overall_metrics: + ground_truth_array.append(label_ground_truth.item()) # TODO: that's for classification only. What about regression? - predictions_array[batch_idx] = ( - torch.argmax(pred_output[0], 0).cpu().item() - ) + predictions_array.append(torch.argmax(pred_output[0], 0).cpu().item()) # # Non network validation related total_epoch_valid_loss += final_loss.detach().cpu().item() - for metric, metric_val in final_metric.keys(): + for metric, metric_val in final_metric.items(): total_epoch_valid_metric[metric] += metric_val else: # for segmentation problems OR regression/classification when no label is present @@ -306,7 +305,7 @@ def validate_network( # save outputs if params["problem_type"] == "segmentation": - output_prediction = aggregator.get_output_tensor() + output_prediction = aggregator.get_output_tensor().unsqueeze(0) if params["save_output"]: img_for_metadata = torchio.ScalarImage( tensor=subject["1"]["data"].squeeze(0), @@ -465,7 +464,9 @@ def validate_network( # get overall stats for classification if calculate_overall_metrics: average_epoch_valid_metric = overall_stats( - predictions_array, ground_truth_array, params + torch.Tensor(predictions_array), + torch.Tensor(ground_truth_array), + params, ) average_epoch_valid_metric = print_and_format_metrics( average_epoch_valid_metric, diff --git a/GANDLF/compute/step.py b/GANDLF/compute/step.py index 151ee0872..141ff5890 100644 --- a/GANDLF/compute/step.py +++ b/GANDLF/compute/step.py @@ -1,3 +1,4 @@ +import warnings from typing import Optional, Tuple import torch import psutil @@ -16,8 +17,12 @@ def step( Args: model (torch.nn.Module): The model to process the input image with, it should support appropriate dimensions. - image (torch.Tensor): The input image stack according to requirements. + image (torch.Tensor): The input image stack according to requirements. (B, C, H, W, D) label (torch.Tensor): The input label for the corresponding image tensor. + If segmentation, (B, C, H, W, D); + if classification / regression (not multilabel), (B, 1) + if classif / reg (multilabel), (B, N_LABELS) + params (dict): The parameters dictionary. train (Optional[bool], optional): Whether the step is for training or validation. Defaults to True. @@ -44,23 +49,19 @@ def step( if params["problem_type"] == "segmentation": if label.shape[1] == 3: label = label[:, 0, ...].unsqueeze(1) - # this warning should only come up once - if params["print_rgb_label_warning"]: - print( - "WARNING: The label image is an RGB image, only the first channel will be used.", - flush=True, - ) - params["print_rgb_label_warning"] = False + warnings.warn( + "The label image is an RGB image, only the first channel will be used." + ) - if params["model"]["dimension"] == 2: - label = torch.squeeze(label, -1) + assert len(label) == len(image) if params["model"]["dimension"] == 2: - image = torch.squeeze(image, -1) - if "value_keys" in params: - if label is not None: - if len(label.shape) > 1: - label = torch.squeeze(label, -1) + image = image.squeeze(-1) # removing depth + + # for segmentation remove the depth dimension from the label. + # for classification / regression, flattens class / reg label from list (possible in multilabel) to scalar + if label is not None: + label = label.squeeze(-1) if not train and params["model"]["type"].lower() == "openvino": output = torch.from_numpy( @@ -69,17 +70,25 @@ def step( ] ) output = output.to(params["device"]) - else: - if params["model"]["amp"]: - with torch.cuda.amp.autocast(): - output = model(image) - else: + elif params["model"]["amp"]: + with torch.cuda.amp.autocast(): output = model(image) + else: + output = model(image) attention_map = None if "medcam_enabled" in params and params["medcam_enabled"]: output, attention_map = output + if not isinstance(output, torch.Tensor): + warnings.warn( + f"Model output is not a Tensor: {type(output)}. Say, `deep_resunet` and `deep_unet` may return " + f"list of tensors on different scales instead of just one prediction Tensor. However due to " + f"GaNDLF architecture it is expected that models return only one tensor. For deep_* models " + f"only the biggeest scale is processed. Use these models with caution till fix is implemented." + ) + output = output[0] + # one-hot encoding of 'label' will probably be needed for segmentation if label is not None: loss, metric_output = get_loss_and_metrics(image, label, output, params) @@ -90,6 +99,10 @@ def step( if "medcam_enabled" in params and params["medcam_enabled"]: attention_map = torch.unsqueeze(attention_map, -1) + if params["model"]["dimension"] == 2 and params["problem_type"] == "segmentation": + # for 2d images where the depth is removed, add it back + output = torch.unsqueeze(output, -1) + assert len(output) == len( image ), f"Error: output({len(output)}) and batch({len(image)}) have different lengths. Both should be equal to batch size!" diff --git a/GANDLF/compute/training_loop.py b/GANDLF/compute/training_loop.py index 4880edae1..7276cec36 100644 --- a/GANDLF/compute/training_loop.py +++ b/GANDLF/compute/training_loop.py @@ -92,21 +92,25 @@ def train_network( tqdm(train_dataloader, desc="Looping over training data") ): optimizer.zero_grad() - image = ( + image = ( # 5D tensor: (B, C, H, W, D) torch.cat( [subject[key][torchio.DATA] for key in params["channel_keys"]], dim=1 ) .float() .to(params["device"]) ) - if "value_keys" in params: + if ( + "value_keys" in params + ): # classification / regression (when label is scalar) or multilabel classif/regression label = torch.cat([subject[key] for key in params["value_keys"]], dim=0) # min is needed because for certain cases, batch size becomes smaller than the total remaining labels label = label.reshape( min(params["batch_size"], len(label)), len(params["value_keys"]) ) else: - label = subject["label"][torchio.DATA] + label = subject["label"][ + torchio.DATA + ] # segmentation; label is (B, C, H, W, D) image label = label.to(params["device"]) if params["save_training"]: @@ -120,11 +124,11 @@ def train_network( loss, calculated_metrics, output, _ = step(model, image, label, params) # store predictions for classification if calculate_overall_metrics: - ground_truth_array.extend(list(label.detach().cpu())) + ground_truth_array.extend(label.detach().cpu()) # TODO: output is BATCH_SIZE x N_CLASSES. What if not? batch_predictions = torch.argmax(output, 1).cpu() assert len(batch_predictions) == len(label) - predictions_array.extend(batch_predictions.tolist()) + predictions_array.extend(batch_predictions.detach().cpu()) nan_loss = torch.isnan(loss) # loss backward diff --git a/GANDLF/logger.py b/GANDLF/logger.py index ef98d5505..2562eb17d 100755 --- a/GANDLF/logger.py +++ b/GANDLF/logger.py @@ -70,6 +70,6 @@ def write( row[f"{self.mode}_{metric}"] = metric_val with open(self.filename, "a") as f: - line = [row[col] for col in self.ordered_header] + line = [row.get(col, "") for col in self.ordered_header] line = [str(x) for x in line] f.write(",".join(line) + "\n") From 26b33a90380056916bd11b410c732015b9bbb224 Mon Sep 17 00:00:00 2001 From: Viacheslav Kukushkin Date: Wed, 22 May 2024 01:07:53 +0300 Subject: [PATCH 08/14] a crutch for deep_* and sdnet architectures (that return list) --- GANDLF/compute/forward_pass.py | 2 +- GANDLF/compute/step.py | 31 ++++++++++++++++--------------- GANDLF/compute/training_loop.py | 2 ++ 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/GANDLF/compute/forward_pass.py b/GANDLF/compute/forward_pass.py index 539a422bd..3cddb9f61 100644 --- a/GANDLF/compute/forward_pass.py +++ b/GANDLF/compute/forward_pass.py @@ -376,7 +376,7 @@ def validate_network( output_prediction = output_prediction / len(patch_loader) if calculate_overall_metrics: # TOD: what? regression and argmax? - predictions_array[batch_idx] = ( + predictions_array.append( torch.argmax(output_prediction[0], 0).cpu().item() ) if params["save_output"]: diff --git a/GANDLF/compute/step.py b/GANDLF/compute/step.py index 141ff5890..483f89f1c 100644 --- a/GANDLF/compute/step.py +++ b/GANDLF/compute/step.py @@ -1,5 +1,5 @@ import warnings -from typing import Optional, Tuple +from typing import Optional, Tuple, Union import torch import psutil from .loss_and_metric import get_loss_and_metrics @@ -11,14 +11,14 @@ def step( label: Optional[torch.Tensor], params: dict, train: Optional[bool] = True, -) -> Tuple[float, dict, torch.Tensor, torch.Tensor]: +) -> Tuple[float, dict, Union[torch.Tensor, list[torch.Tensor]], torch.Tensor]: """ This function performs a single step of training or validation. Args: model (torch.nn.Module): The model to process the input image with, it should support appropriate dimensions. image (torch.Tensor): The input image stack according to requirements. (B, C, H, W, D) - label (torch.Tensor): The input label for the corresponding image tensor. + label Optional[torch.Tensor]: The input label for the corresponding image tensor. If segmentation, (B, C, H, W, D); if classification / regression (not multilabel), (B, 1) if classif / reg (multilabel), (B, N_LABELS) @@ -27,7 +27,8 @@ def step( train (Optional[bool], optional): Whether the step is for training or validation. Defaults to True. Returns: - Tuple[float, dict, torch.Tensor, torch.Tensor]: The loss, metrics, output, and attention map. + Tuple[float, dict, Union[torch.Tensor, list[torch.Tensor]], torch.Tensor]: The loss, metrics, output, + and attention map. """ if params["verbose"]: if torch.cuda.is_available(): @@ -80,15 +81,6 @@ def step( if "medcam_enabled" in params and params["medcam_enabled"]: output, attention_map = output - if not isinstance(output, torch.Tensor): - warnings.warn( - f"Model output is not a Tensor: {type(output)}. Say, `deep_resunet` and `deep_unet` may return " - f"list of tensors on different scales instead of just one prediction Tensor. However due to " - f"GaNDLF architecture it is expected that models return only one tensor. For deep_* models " - f"only the biggeest scale is processed. Use these models with caution till fix is implemented." - ) - output = output[0] - # one-hot encoding of 'label' will probably be needed for segmentation if label is not None: loss, metric_output = get_loss_and_metrics(image, label, output, params) @@ -99,9 +91,18 @@ def step( if "medcam_enabled" in params and params["medcam_enabled"]: attention_map = torch.unsqueeze(attention_map, -1) - if params["model"]["dimension"] == 2 and params["problem_type"] == "segmentation": + if not isinstance(output, torch.Tensor): + warnings.warn( + f"Model output is not a Tensor: {type(output)}. Say, `deep_resunet` and `deep_unet` may return " + f"list of tensors on different scales instead of just one prediction Tensor. However due to " + f"GaNDLF architecture it is expected that models return only one tensor. For deep_* models " + f"only the biggeest scale is processed. Use these models with caution till fix is implemented." + ) + output = output[0] + + if params["model"]["dimension"] == 2: # for 2d images where the depth is removed, add it back - output = torch.unsqueeze(output, -1) + output = output.unsqueeze(-1) assert len(output) == len( image diff --git a/GANDLF/compute/training_loop.py b/GANDLF/compute/training_loop.py index 7276cec36..d4517755a 100644 --- a/GANDLF/compute/training_loop.py +++ b/GANDLF/compute/training_loop.py @@ -124,6 +124,8 @@ def train_network( loss, calculated_metrics, output, _ = step(model, image, label, params) # store predictions for classification if calculate_overall_metrics: + # TODO: smelly code. if segmentation, in some models output may be a list of tensors rather then a one + # tensor. This is not handled here. However, `calculate_overall_metrics` is set to False for segmentation ground_truth_array.extend(label.detach().cpu()) # TODO: output is BATCH_SIZE x N_CLASSES. What if not? batch_predictions = torch.argmax(output, 1).cpu() From 71273cee582671ca05b39c969bf07488da6397ca Mon Sep 17 00:00:00 2001 From: Viacheslav Kukushkin Date: Wed, 22 May 2024 12:58:22 +0300 Subject: [PATCH 09/14] turning training dataset shuffle on was turned off as workaround at https://github.com/mlcommons/GaNDLF/pull/870 --- GANDLF/data/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/data/__init__.py b/GANDLF/data/__init__.py index adba1f2c6..6427ccee9 100644 --- a/GANDLF/data/__init__.py +++ b/GANDLF/data/__init__.py @@ -24,7 +24,7 @@ def get_train_loader(params): loader_type="train", ), batch_size=params["batch_size"], - shuffle=False, + shuffle=True, pin_memory=False, # params["pin_memory_dataloader"], # this is going OOM if True - needs investigation ) From d30cf20172d89d103e67bae671cae55c16c6d016 Mon Sep 17 00:00:00 2001 From: Viacheslav Kukushkin Date: Thu, 23 May 2024 10:55:17 +0300 Subject: [PATCH 10/14] Test fix for the case when both label and value_to_pred exist fixes test_train_inference_classification_histology_large_2d (35) --- GANDLF/compute/forward_pass.py | 1 + GANDLF/compute/step.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/GANDLF/compute/forward_pass.py b/GANDLF/compute/forward_pass.py index 3cddb9f61..73d68e4b1 100644 --- a/GANDLF/compute/forward_pass.py +++ b/GANDLF/compute/forward_pass.py @@ -379,6 +379,7 @@ def validate_network( predictions_array.append( torch.argmax(output_prediction[0], 0).cpu().item() ) + ground_truth_array.append(label_ground_truth.item()) if params["save_output"]: outputToWrite += ( str(epoch) diff --git a/GANDLF/compute/step.py b/GANDLF/compute/step.py index 483f89f1c..f588b66ea 100644 --- a/GANDLF/compute/step.py +++ b/GANDLF/compute/step.py @@ -61,7 +61,9 @@ def step( # for segmentation remove the depth dimension from the label. # for classification / regression, flattens class / reg label from list (possible in multilabel) to scalar - if label is not None: + # TODO: second condition is crutch - in some cases label is passed as 1-d Tensor (B,) and if Batch size is 1, + # it is squeezed to scalar tensor (0-d) and the future logic fails + if label is not None and len(label.shape) != 1: label = label.squeeze(-1) if not train and params["model"]["type"].lower() == "openvino": @@ -100,7 +102,7 @@ def step( ) output = output[0] - if params["model"]["dimension"] == 2: + if params["model"]["dimension"] == 2 and params["problem_type"] == "segmentation": # for 2d images where the depth is removed, add it back output = output.unsqueeze(-1) From 92c4387cde26689531f153d7177bee734a406e35 Mon Sep 17 00:00:00 2001 From: Viacheslav Kukushkin Date: Thu, 23 May 2024 13:38:22 +0300 Subject: [PATCH 11/14] bugfix when label is not present --- GANDLF/compute/step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/compute/step.py b/GANDLF/compute/step.py index f588b66ea..148d206cf 100644 --- a/GANDLF/compute/step.py +++ b/GANDLF/compute/step.py @@ -54,7 +54,7 @@ def step( "The label image is an RGB image, only the first channel will be used." ) - assert len(label) == len(image) + assert len(label) == len(image) if params["model"]["dimension"] == 2: image = image.squeeze(-1) # removing depth From d0d25fbbc91d7f3ae3235a0b3e80ada65d1f8787 Mon Sep 17 00:00:00 2001 From: Viacheslav Kukushkin Date: Thu, 6 Jun 2024 17:15:47 +0300 Subject: [PATCH 12/14] Do not assert metric shape; lets take a first evaluated instead (for one of classes per-label metrics are not counted thus metric shape may differ) --- GANDLF/compute/training_loop.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/GANDLF/compute/training_loop.py b/GANDLF/compute/training_loop.py index d4517755a..bbf24a98d 100644 --- a/GANDLF/compute/training_loop.py +++ b/GANDLF/compute/training_loop.py @@ -74,9 +74,7 @@ def train_network( for metric in params["metrics"]: # TODO: can it be per-label for non-classif? if "per_label" in metric: - total_epoch_train_metric[metric] = np.zeros( - shape=params["model"]["num_classes"] - ) + total_epoch_train_metric[metric] = np.zeros(1) # real shape would be defined during execution else: total_epoch_train_metric[metric] = 0 @@ -168,7 +166,7 @@ def train_network( if not nan_loss: total_epoch_train_loss += loss.detach().cpu().item() for metric, metric_val in calculated_metrics.items(): - total_epoch_train_metric[metric] += metric_val + total_epoch_train_metric[metric] = total_epoch_train_metric[metric] + metric_val if params["verbose"]: # For printing information at halftime during an epoch From ca8a9040f1492c704c67a94d15bb32b51ce7e25a Mon Sep 17 00:00:00 2001 From: Viacheslav Kukushkin Date: Thu, 6 Jun 2024 21:49:13 +0300 Subject: [PATCH 13/14] Blacked --- GANDLF/compute/training_loop.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/GANDLF/compute/training_loop.py b/GANDLF/compute/training_loop.py index bbf24a98d..32b52f188 100644 --- a/GANDLF/compute/training_loop.py +++ b/GANDLF/compute/training_loop.py @@ -74,7 +74,9 @@ def train_network( for metric in params["metrics"]: # TODO: can it be per-label for non-classif? if "per_label" in metric: - total_epoch_train_metric[metric] = np.zeros(1) # real shape would be defined during execution + total_epoch_train_metric[metric] = np.zeros( + 1 + ) # real shape would be defined during execution else: total_epoch_train_metric[metric] = 0 @@ -166,7 +168,9 @@ def train_network( if not nan_loss: total_epoch_train_loss += loss.detach().cpu().item() for metric, metric_val in calculated_metrics.items(): - total_epoch_train_metric[metric] = total_epoch_train_metric[metric] + metric_val + total_epoch_train_metric[metric] = ( + total_epoch_train_metric[metric] + metric_val + ) if params["verbose"]: # For printing information at halftime during an epoch From 6b22745d20f87fc450199c38621ec7fdcb7b2d7d Mon Sep 17 00:00:00 2001 From: Viacheslav Kukushkin Date: Thu, 18 Jul 2024 14:52:45 +0300 Subject: [PATCH 14/14] Error correction in validation and testing loops - the same metric error was occuring in the loops in forward_pass.py - now it is fixed - entire epoch completes successfully Implemented by Szymon Mazurek szymon.mazurek@cyfronet.pl --- GANDLF/compute/forward_pass.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/GANDLF/compute/forward_pass.py b/GANDLF/compute/forward_pass.py index 73d68e4b1..be2bff034 100644 --- a/GANDLF/compute/forward_pass.py +++ b/GANDLF/compute/forward_pass.py @@ -56,9 +56,7 @@ def validate_network( for metric in params["metrics"]: if "per_label" in metric: - total_epoch_valid_metric[metric] = np.zeros( - shape=params["model"]["num_classes"] - ) + total_epoch_valid_metric[metric] = np.zeros(1) else: total_epoch_valid_metric[metric] = 0 @@ -212,7 +210,9 @@ def validate_network( # # Non network validation related total_epoch_valid_loss += final_loss.detach().cpu().item() for metric, metric_val in final_metric.items(): - total_epoch_valid_metric[metric] += metric_val + total_epoch_valid_metric[metric] = ( + total_epoch_valid_metric[metric] + metric_val + ) else: # for segmentation problems OR regression/classification when no label is present grid_sampler = torchio.inference.GridSampler( @@ -429,7 +429,9 @@ def validate_network( # loss.cpu().data.item() total_epoch_valid_loss += final_loss.cpu().item() for metric in final_metric.keys(): - total_epoch_valid_metric[metric] += final_metric[metric] + total_epoch_valid_metric[metric] = ( + total_epoch_valid_metric[metric] + final_metric[metric] + ) if label_ground_truth is not None: if params["verbose"]: